PyPI - autonomous-app - Versions diffs - 0.3.40__tar.gz → 0.3.42__tar.gz - Mend

autonomous-app 0.3.40tar.gz → 0.3.42tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

{autonomous_app-0.3.40 → autonomous_app-0.3.42}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: autonomous-app
-Version: 0.3.40
+Version: 0.3.42
 Summary: Containerized application framework built on Flask with additional libraries and tools for rapid development of web applications.
 Author-email: Steven A Moore <samoore@binghamton.edu>
 Project-URL: homepage, https://github.com/Sallenmoore/autonomous

{autonomous_app-0.3.40 → autonomous_app-0.3.42}/src/autonomous/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.3.40"
+__version__ = "0.3.42"
 from dotenv import load_dotenv

{autonomous_app-0.3.40 → autonomous_app-0.3.42}/src/autonomous/ai/imageagent.py RENAMED Viewed

@@ -21,8 +21,7 @@ class ImageAgent(BaseAgent):
         self,
         prompt,
         negative_prompt="",
-        aspect_ratio="3:4",
-        image_size="2K",
+        aspect_ratio="Portrait",
         files=None,
     ):
         return self.get_client(
@@ -31,6 +30,5 @@ class ImageAgent(BaseAgent):
             prompt,
             aspect_ratio=aspect_ratio,
             negative_prompt=negative_prompt,
-            image_size=image_size,
             files=files,
         )

{autonomous_app-0.3.40 → autonomous_app-0.3.42}/src/autonomous/ai/models/gemini.py RENAMED Viewed

@@ -18,12 +18,14 @@ from autonomous.model.automodel import AutoModel
 class GeminiAIModel(AutoModel):
     _client = None
-    _text_model = "gemini-3-pro-preview"
-    _summary_model = "gemini-2.5-flash"
-    _image_model = "gemini-3-pro-image-preview"
-    _json_model = "gemini-3-pro-preview"
-    _stt_model = "gemini-3-pro-preview"
-    _tts_model = "gemini-2.5-flash-preview-tts"
+    # Model definitions
+    _text_model = "gemini-1.5-pro"
+    _summary_model = "gemini-1.5-flash"
+    _json_model = "gemini-1.5-pro"
+    _stt_model = "gemini-1.5-pro"
+    _image_model = "imagen-3.0-generate-001"
+    _tts_model = "gemini-2.0-flash-exp"
     messages = ListAttr(StringAttr(default=[]))
     name = StringAttr(default="agent")
@@ -37,6 +39,30 @@ class GeminiAIModel(AutoModel):
     MAX_FILES = 14
     MAX_SUMMARY_TOKEN_LENGTH = 10000
+    SAFETY_SETTINGS = [
+        types.SafetySetting(
+            category="HARM_CATEGORY_HATE_SPEECH",
+            threshold="BLOCK_NONE",
+        ),
+        types.SafetySetting(
+            category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
+            threshold="BLOCK_NONE",
+        ),
+        types.SafetySetting(
+            category="HARM_CATEGORY_DANGEROUS_CONTENT",
+            threshold="BLOCK_NONE",
+        ),
+        types.SafetySetting(
+            category="HARM_CATEGORY_HARASSMENT",
+            threshold="BLOCK_NONE",
+        ),
+        types.SafetySetting(
+            category="HARM_CATEGORY_CIVIC_INTEGRITY",
+            threshold="BLOCK_NONE",
+        ),
+    ]
     VOICES = {
         "Zephyr": ["female"],
         "Puck": ["male"],
@@ -73,52 +99,39 @@ class GeminiAIModel(AutoModel):
     @property
     def client(self):
         if not self._client:
-            # log("=== Initializing Gemini AI Client ===", _print=True)
             self._client = genai.Client(api_key=os.environ.get("GOOGLEAI_KEY"))
-            # log("=== Gemini AI Client Initialized ===", _print=True)
         return self._client
     def _add_function(self, user_function):
-        # This function is now a bit more advanced to conform to the Tool Use schema
         tool_schema = {
             "name": user_function.get("name"),
             "description": user_function.get("description"),
             "parameters": user_function.get("parameters"),
         }
-        # Validate that the schema has a name, description, and parameters
         if not all(
             [tool_schema["name"], tool_schema["description"], tool_schema["parameters"]]
         ):
             raise ValueError(
                 "Tool schema must have a 'name', 'description', and 'parameters' field."
             )
         return tool_schema
     def _create_wav_header(
         self, raw_audio_bytes, channels=1, rate=24000, sample_width=2
     ):
-        """Creates an in-memory WAV file from raw PCM audio bytes."""
         buffer = io.BytesIO()
         with wave.open(buffer, "wb") as wav_file:
-            # Set audio parameters
             wav_file.setnchannels(channels)
             wav_file.setsampwidth(sample_width)
-            wav_file.setframerate(rate)  # 16,000 Hz sample rate
-            # Write the raw audio data
+            wav_file.setframerate(rate)
             wav_file.writeframes(raw_audio_bytes)
         buffer.seek(0)
         return buffer
     def _add_context(self, context):
-        # Create in-memory file
         context_data = (
             json.dumps(context, indent=2) if isinstance(context, dict) else str(context)
         )
         f = io.BytesIO(context_data.encode("utf-8"))
         f.name = f"context-{self.pk}"
         return self._add_files([{"name": f.name, "file": f}])
@@ -128,31 +141,28 @@ class GeminiAIModel(AutoModel):
         for f in file_list[: self.MAX_FILES]:
             fn = f["name"]
             try:
-                result = self.client.files.delete(name=fn)
-            except Exception as e:
-                pass
-                # log(f"No existing file to delete for {fn}: {e}", _print=True)
-            else:
+                self.client.files.delete(name=fn)
+            except Exception:
                 pass
-                # log(f"Deleting old version of {fn}: {result}", _print=True)
-            # If the content is raw bytes, wrap it in BytesIO
             file_content = f["file"]
-            if isinstance(file_content, bytes):
-                fileobj = io.BytesIO(file_content)
-            else:
-                fileobj = file_content
+            fileobj = (
+                io.BytesIO(file_content)
+                if isinstance(file_content, bytes)
+                else file_content
+            )
             uploaded_file = self.client.files.upload(
                 file=fileobj,
                 config={"mime_type": mime_type, "display_name": fn},
             )
             uploaded_files.append(uploaded_file)
-            # This ensures the file is 'ACTIVE' before you use it in a prompt.
             while uploaded_file.state.name == "PROCESSING":
                 time.sleep(0.5)
                 uploaded_file = self.client.get_file(uploaded_file.name)
-        self.file_refs = [f.name for f in self.client.files.list()]  # Update file_refs
+        self.file_refs = [f.name for f in self.client.files.list()]
         self.save()
         return uploaded_files
@@ -160,8 +170,8 @@ class GeminiAIModel(AutoModel):
         self, message, function, additional_instructions="", uri="", context={}
     ):
         function_definition = self._add_function(function)
         contents = [message]
         if context:
             contents.extend(self._add_context(context))
             additional_instructions += (
@@ -169,12 +179,7 @@ class GeminiAIModel(AutoModel):
             )
         if uri:
-            contents.append(
-                Part.from_uri(
-                    file_uri=uri,
-                    mime_type="application/json",
-                ),
-            )
+            contents.append(Part.from_uri(file_uri=uri, mime_type="application/json"))
             additional_instructions += "\nUse the provided uri file for reference\n"
         response = self.client.models.generate_content(
@@ -183,24 +188,20 @@ class GeminiAIModel(AutoModel):
             config=types.GenerateContentConfig(
                 system_instruction=f"{self.instructions}.{additional_instructions}",
                 tools=[types.Tool(function_declarations=[function_definition])],
-                tool_config={
-                    "function_calling_config": {
-                        "mode": "ANY",  # Force a function call
-                    }
-                },
+                tool_config=types.ToolConfig(
+                    function_calling_config=types.FunctionCallingConfig(mode="ANY")
+                ),
             ),
         )
-        # The response is now a ToolCall, not a JSON string
         try:
-            # log(response.candidates[0].content.parts[0].function_call, _print=True)
+            if not response.candidates or not response.candidates[0].content.parts:
+                return {}
             tool_call = response.candidates[0].content.parts[0].function_call
             if tool_call and tool_call.name == function["name"]:
                 return tool_call.args
             else:
-                log(
-                    "==== Model did not return a tool call or returned the wrong one. ===="
-                )
                 log(f"Response: {response.text}", _print=True)
                 return {}
         except Exception as e:
@@ -216,12 +217,7 @@ class GeminiAIModel(AutoModel):
             )
         if uri:
-            contents.append(
-                Part.from_uri(
-                    file_uri=uri,
-                    mime_type="application/json",
-                ),
-            )
+            contents.append(Part.from_uri(file_uri=uri, mime_type="application/json"))
         response = self.client.models.generate_content(
             model=self._text_model,
@@ -230,20 +226,14 @@ class GeminiAIModel(AutoModel):
             ),
             contents=contents,
         )
-        # log(results, _print=True)
-        # log("=================== END REPORT ===================", _print=True)
         return response.text
     def summarize_text(self, text, primer=""):
         primer = primer or self.instructions
         updated_prompt_list = []
-        # Find all words in the prompt
         words = re.findall(r"\w+", text)
-        # Split the words into chunks
         for i in range(0, len(words), self.MAX_SUMMARY_TOKEN_LENGTH):
-            # Join a chunk of words and add to the list
             updated_prompt_list.append(
                 " ".join(words[i : i + self.MAX_SUMMARY_TOKEN_LENGTH])
             )
@@ -252,40 +242,27 @@ class GeminiAIModel(AutoModel):
         for p in updated_prompt_list:
             response = self.client.models.generate_content(
                 model=self._summary_model,
-                config=types.GenerateContentConfig(
-                    system_instruction=f"{primer}",
-                ),
-                contents=text,
+                config=types.GenerateContentConfig(system_instruction=f"{primer}"),
+                contents=p,
             )
             try:
                 summary = response.candidates[0].content.parts[0].text
+                full_summary += summary + "\n"
             except Exception as e:
-                log(f"{type(e)}:{e}\n\n Unable to generate content ====", _print=True)
+                log(f"Summary Error: {e}", _print=True)
                 break
-            else:
-                full_summary += summary + "\n"
-        return summary
+        return full_summary
     def generate_transcription(
-        self,
-        audio_file,
-        prompt="Transcribe this audio clip",
-        display_name="audio.mp3",
+        self, audio_file, prompt="Transcribe this audio clip", display_name="audio.mp3"
     ):
         myfile = self.client.files.upload(
             file=io.BytesIO(audio_file),
-            config={
-                "mime_type": "audio/mp3",
-                "display_name": display_name,
-            },
+            config={"mime_type": "audio/mp3", "display_name": display_name},
         )
         response = self.client.models.generate_content(
             model=self._stt_model,
-            contents=[
-                prompt,
-                myfile,
-            ],
+            contents=[prompt, myfile],
         )
         return response.text
@@ -309,44 +286,59 @@ class GeminiAIModel(AutoModel):
                     speech_config=types.SpeechConfig(
                         voice_config=types.VoiceConfig(
                             prebuilt_voice_config=types.PrebuiltVoiceConfig(
-                                voice_name=voice,
+                                voice_name=voice
                             )
                         )
                     ),
                 ),
             )
             blob = response.candidates[0].content.parts[0].inline_data
-            # Create a WAV file in memory from the raw audio bytes
             wav_buffer = self._create_wav_header(blob.data)
-            # 2. Load the WAV audio using pydub, which will now correctly read the header
             audio_segment = AudioSegment.from_file(wav_buffer, format="wav")
-            # 3. Create a new in-memory buffer for the MP3 output
             mp3_buffer = io.BytesIO()
-            # 4. Export the audio segment directly to the in-memory buffer
             audio_segment.export(mp3_buffer, format="mp3")
-            # 5. Return the bytes from the buffer, not the filename
             return mp3_buffer.getvalue()
         except Exception as e:
-            log(
-                f"==== Error: Unable to generate audio ====\n{type(e)}:{e}", _print=True
-            )
-            # You can return a default empty byte string or re-raise the exception
+            log(f"==== Audio Gen Error: {e} ====", _print=True)
             raise e
-    def generate_image(
-        self,
-        prompt,
-        negative_prompt="",
-        files=None,
-        aspect_ratio="3:4",
-        image_size="2K",
-    ):
+    def _get_image_config(self, aspect_ratio_input):
+        """
+        Parses custom aspect ratio keys (e.g., '2KPortrait') into valid
+        Google Gemini API parameters for ratio and size.
+        """
+        # Default fallback
+        ratio = "1:1"
+        size = "1K"
+        # Logic Mapping
+        # Keys match what your App sends in ttrpgbase.py
+        if aspect_ratio_input == "2KPortrait":
+            ratio = "9:16"
+            size = "2K"  # <--- THIS WAS MISSING BEFORE
+        elif aspect_ratio_input == "Portrait":
+            ratio = "9:16"
+            size = "1K"
+        elif aspect_ratio_input == "Landscape":
+            ratio = "16:9"
+            size = "1K"
+        elif aspect_ratio_input == "4K":
+            ratio = "16:9"
+            size = "4K"
+        elif aspect_ratio_input == "4KPortrait":
+            ratio = "9:16"
+            size = "4K"
+        elif aspect_ratio_input == "2K":
+            ratio = "1:1"
+            size = "2K"
+        # Pass-through for standard inputs
+        elif aspect_ratio_input in ["1:1", "3:4", "4:3", "9:16", "16:9"]:
+            ratio = aspect_ratio_input
+        return ratio, size
+    def generate_image(self, prompt, negative_prompt="", files=None, aspect_ratio="2K"):
         image = None
         contents = [prompt]
@@ -355,51 +347,36 @@ class GeminiAIModel(AutoModel):
             contents.extend(filerefs)
         try:
-            # log(self._image_model, contents, _print=True)
+            # 1. Resolve Aspect Ratio AND Size
+            valid_ratio, valid_size = self._get_image_config(aspect_ratio)
+            # 2. Call API with correct parameters
             response = self.client.models.generate_content(
                 model=self._image_model,
                 contents=contents,
                 config=types.GenerateContentConfig(
-                    safety_settings=[
-                        types.SafetySetting(
-                            category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
-                            threshold=types.HarmBlockThreshold.BLOCK_NONE,
-                        ),
-                        types.SafetySetting(
-                            category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
-                            threshold=types.HarmBlockThreshold.BLOCK_NONE,
-                        ),
-                        types.SafetySetting(
-                            category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
-                            threshold=types.HarmBlockThreshold.BLOCK_NONE,
-                        ),
-                        types.SafetySetting(
-                            category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
-                            threshold=types.HarmBlockThreshold.BLOCK_NONE,
-                        ),
-                        types.SafetySetting(
-                            category=types.HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY,
-                            threshold=types.HarmBlockThreshold.BLOCK_NONE,
-                        ),
-                    ],
+                    safety_settings=self.SAFETY_SETTINGS,
                     image_config=types.ImageConfig(
-                        aspect_ratio=aspect_ratio,
-                        image_size=image_size,
+                        aspect_ratio=valid_ratio,
+                        image_size=valid_size,  # Now passing "2K" or "4K" correctly
                     ),
                 ),
             )
-            log(response, _print=True)
-            log(response.candidates, _print=True)
-            image_parts = [
-                part.inline_data.data
-                for part in response.candidates[0].content.parts
-                if part.inline_data
-            ]
-            image = image_parts[0]
+            # 3. Extract Image Data
+            if response.candidates and response.candidates[0].content.parts:
+                for part in response.candidates[0].content.parts:
+                    if part.inline_data:
+                        image = part.inline_data.data
+                        break
+            if not image:
+                raise ValueError(
+                    f"API returned Success but no image data found. Response: {response}"
+                )
         except Exception as e:
-            log(
-                f"==== Error: Unable to create image ====\n\n{e}",
-                _print=True,
-            )
+            log(f"==== Error: Unable to create image ====\n\n{e}", _print=True)
             raise e
         return image

{autonomous_app-0.3.40 → autonomous_app-0.3.42}/src/autonomous/ai/models/local_model.py RENAMED Viewed

@@ -268,7 +268,11 @@ class LocalAIModel(AutoModel):
             "4:3": (1152, 896),
             "16:9": (1216, 832),
             "2K": (2048, 1080),
+            "2KPortrait": (1080, 2048),
+            "Portrait": (1080, 2048),
             "4K": (3840, 2160),
+            "Landscape": (3840, 2160),
+            "4KPortrait": (2160, 3840),
             "9:16": (832, 1216),
             "3:2": (1216, 832),
             "2:3": (832, 1216),
@@ -277,25 +281,20 @@ class LocalAIModel(AutoModel):
         return resolutions.get(aspect_ratio, (1024, 1024))
     def generate_image(
-        self,
-        prompt,
-        negative_prompt="",
-        files=None,
-        aspect_ratio="3:4",
-        image_size="2K",
+        self, prompt, negative_prompt="", files=None, aspect_ratio="2KPortrait"
     ):
-        # 1. CLIP Token Limit Fix (Auto-Summarize)
-        if len(prompt) > 300:
-            log("⚠️ Prompt exceeds CLIP limit. rewriting...", _print=True)
-            summary_instruction = (
-                "Convert the description into a comma-separated Stable Diffusion prompt. "
-                "Keep visual elements and style. Under 50 words."
-            )
-            new_prompt = self.generate_text(
-                message=prompt, additional_instructions=summary_instruction, context={}
-            )
-            if new_prompt and len(new_prompt) > 10:
-                prompt = new_prompt
+        # # 1. CLIP Token Limit Fix (Auto-Summarize)
+        # if len(prompt) > 800:
+        #     log("⚠️ Prompt exceeds CLIP limit. rewriting...", _print=True)
+        #     summary_instruction = (
+        #         "Convert the description into a comma-separated Stable Diffusion prompt. "
+        #         "Keep visual elements and style. Under 50 words."
+        #     )
+        #     new_prompt = self.generate_text(
+        #         message=prompt, additional_instructions=summary_instruction, context={}
+        #     )
+        #     if new_prompt and len(new_prompt) > 10:
+        #         prompt = new_prompt
         # 2. Resolution Calculation
         width, height = self._get_dimensions(aspect_ratio)
@@ -312,25 +311,28 @@ class LocalAIModel(AutoModel):
         }
         try:
-            # Handle Files (Dict -> List of Tuples for requests)
-            img_files = {}
+            # Handle Files (Corrected List Logic)
+            # requests.post expects a list of tuples for multiple files with same key
+            files_list = []
             if files and isinstance(files, dict):
                 for fn, f_bytes in files.items():
                     if isinstance(f_bytes, bytes):
                         file_obj = io.BytesIO(f_bytes)
                     else:
                         file_obj = f_bytes
-                    img_files["file"] = (fn, file_obj, "image/png")
+                    # Appending to list instead of overwriting dict key
+                    files_list.append(("files", (fn, file_obj, "image/png")))
             # Send Request
-            if img_files:
+            if files_list:
                 response = requests.post(
-                    f"{self._media_url}/generate-image", data=data, files=img_files
+                    f"{self._media_url}/generate-image", data=data, files=files_list
                 )
             else:
                 response = requests.post(f"{self._media_url}/generate-image", data=data)
             response.raise_for_status()
+            log("==== LocalAI Image Payload ====", data, _print=True)
             return response.content
         except Exception as e:

{autonomous_app-0.3.40 → autonomous_app-0.3.42}/src/autonomous_app.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: autonomous-app
-Version: 0.3.40
+Version: 0.3.42
 Summary: Containerized application framework built on Flask with additional libraries and tools for rapid development of web applications.
 Author-email: Steven A Moore <samoore@binghamton.edu>
 Project-URL: homepage, https://github.com/Sallenmoore/autonomous