autonomous-app 0.3.41__py3-none-any.whl → 0.3.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
autonomous/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.3.41"
1
+ __version__ = "0.3.42"
2
2
 
3
3
  from dotenv import load_dotenv
4
4
 
@@ -21,7 +21,7 @@ class ImageAgent(BaseAgent):
21
21
  self,
22
22
  prompt,
23
23
  negative_prompt="",
24
- aspect_ratio="2KPortrait",
24
+ aspect_ratio="Portrait",
25
25
  files=None,
26
26
  ):
27
27
  return self.get_client(
@@ -18,12 +18,14 @@ from autonomous.model.automodel import AutoModel
18
18
 
19
19
  class GeminiAIModel(AutoModel):
20
20
  _client = None
21
- _text_model = "gemini-3-pro-preview"
22
- _summary_model = "gemini-2.5-flash"
23
- _image_model = "gemini-3-pro-image-preview"
24
- _json_model = "gemini-3-pro-preview"
25
- _stt_model = "gemini-3-pro-preview"
26
- _tts_model = "gemini-2.5-flash-preview-tts"
21
+
22
+ # Model definitions
23
+ _text_model = "gemini-1.5-pro"
24
+ _summary_model = "gemini-1.5-flash"
25
+ _json_model = "gemini-1.5-pro"
26
+ _stt_model = "gemini-1.5-pro"
27
+ _image_model = "imagen-3.0-generate-001"
28
+ _tts_model = "gemini-2.0-flash-exp"
27
29
 
28
30
  messages = ListAttr(StringAttr(default=[]))
29
31
  name = StringAttr(default="agent")
@@ -37,6 +39,30 @@ class GeminiAIModel(AutoModel):
37
39
 
38
40
  MAX_FILES = 14
39
41
  MAX_SUMMARY_TOKEN_LENGTH = 10000
42
+
43
+ SAFETY_SETTINGS = [
44
+ types.SafetySetting(
45
+ category="HARM_CATEGORY_HATE_SPEECH",
46
+ threshold="BLOCK_NONE",
47
+ ),
48
+ types.SafetySetting(
49
+ category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
50
+ threshold="BLOCK_NONE",
51
+ ),
52
+ types.SafetySetting(
53
+ category="HARM_CATEGORY_DANGEROUS_CONTENT",
54
+ threshold="BLOCK_NONE",
55
+ ),
56
+ types.SafetySetting(
57
+ category="HARM_CATEGORY_HARASSMENT",
58
+ threshold="BLOCK_NONE",
59
+ ),
60
+ types.SafetySetting(
61
+ category="HARM_CATEGORY_CIVIC_INTEGRITY",
62
+ threshold="BLOCK_NONE",
63
+ ),
64
+ ]
65
+
40
66
  VOICES = {
41
67
  "Zephyr": ["female"],
42
68
  "Puck": ["male"],
@@ -73,52 +99,39 @@ class GeminiAIModel(AutoModel):
73
99
  @property
74
100
  def client(self):
75
101
  if not self._client:
76
- # log("=== Initializing Gemini AI Client ===", _print=True)
77
102
  self._client = genai.Client(api_key=os.environ.get("GOOGLEAI_KEY"))
78
- # log("=== Gemini AI Client Initialized ===", _print=True)
79
103
  return self._client
80
104
 
81
105
  def _add_function(self, user_function):
82
- # This function is now a bit more advanced to conform to the Tool Use schema
83
106
  tool_schema = {
84
107
  "name": user_function.get("name"),
85
108
  "description": user_function.get("description"),
86
109
  "parameters": user_function.get("parameters"),
87
110
  }
88
-
89
- # Validate that the schema has a name, description, and parameters
90
111
  if not all(
91
112
  [tool_schema["name"], tool_schema["description"], tool_schema["parameters"]]
92
113
  ):
93
114
  raise ValueError(
94
115
  "Tool schema must have a 'name', 'description', and 'parameters' field."
95
116
  )
96
-
97
117
  return tool_schema
98
118
 
99
119
  def _create_wav_header(
100
120
  self, raw_audio_bytes, channels=1, rate=24000, sample_width=2
101
121
  ):
102
- """Creates an in-memory WAV file from raw PCM audio bytes."""
103
122
  buffer = io.BytesIO()
104
123
  with wave.open(buffer, "wb") as wav_file:
105
- # Set audio parameters
106
124
  wav_file.setnchannels(channels)
107
125
  wav_file.setsampwidth(sample_width)
108
- wav_file.setframerate(rate) # 16,000 Hz sample rate
109
-
110
- # Write the raw audio data
126
+ wav_file.setframerate(rate)
111
127
  wav_file.writeframes(raw_audio_bytes)
112
-
113
128
  buffer.seek(0)
114
129
  return buffer
115
130
 
116
131
  def _add_context(self, context):
117
- # Create in-memory file
118
132
  context_data = (
119
133
  json.dumps(context, indent=2) if isinstance(context, dict) else str(context)
120
134
  )
121
-
122
135
  f = io.BytesIO(context_data.encode("utf-8"))
123
136
  f.name = f"context-{self.pk}"
124
137
  return self._add_files([{"name": f.name, "file": f}])
@@ -128,31 +141,28 @@ class GeminiAIModel(AutoModel):
128
141
  for f in file_list[: self.MAX_FILES]:
129
142
  fn = f["name"]
130
143
  try:
131
- result = self.client.files.delete(name=fn)
132
- except Exception as e:
133
- pass
134
- # log(f"No existing file to delete for {fn}: {e}", _print=True)
135
- else:
144
+ self.client.files.delete(name=fn)
145
+ except Exception:
136
146
  pass
137
- # log(f"Deleting old version of {fn}: {result}", _print=True)
138
147
 
139
- # If the content is raw bytes, wrap it in BytesIO
140
148
  file_content = f["file"]
141
- if isinstance(file_content, bytes):
142
- fileobj = io.BytesIO(file_content)
143
- else:
144
- fileobj = file_content
149
+ fileobj = (
150
+ io.BytesIO(file_content)
151
+ if isinstance(file_content, bytes)
152
+ else file_content
153
+ )
154
+
145
155
  uploaded_file = self.client.files.upload(
146
156
  file=fileobj,
147
157
  config={"mime_type": mime_type, "display_name": fn},
148
158
  )
149
159
  uploaded_files.append(uploaded_file)
150
160
 
151
- # This ensures the file is 'ACTIVE' before you use it in a prompt.
152
161
  while uploaded_file.state.name == "PROCESSING":
153
162
  time.sleep(0.5)
154
163
  uploaded_file = self.client.get_file(uploaded_file.name)
155
- self.file_refs = [f.name for f in self.client.files.list()] # Update file_refs
164
+
165
+ self.file_refs = [f.name for f in self.client.files.list()]
156
166
  self.save()
157
167
  return uploaded_files
158
168
 
@@ -160,8 +170,8 @@ class GeminiAIModel(AutoModel):
160
170
  self, message, function, additional_instructions="", uri="", context={}
161
171
  ):
162
172
  function_definition = self._add_function(function)
163
-
164
173
  contents = [message]
174
+
165
175
  if context:
166
176
  contents.extend(self._add_context(context))
167
177
  additional_instructions += (
@@ -169,12 +179,7 @@ class GeminiAIModel(AutoModel):
169
179
  )
170
180
 
171
181
  if uri:
172
- contents.append(
173
- Part.from_uri(
174
- file_uri=uri,
175
- mime_type="application/json",
176
- ),
177
- )
182
+ contents.append(Part.from_uri(file_uri=uri, mime_type="application/json"))
178
183
  additional_instructions += "\nUse the provided uri file for reference\n"
179
184
 
180
185
  response = self.client.models.generate_content(
@@ -183,24 +188,20 @@ class GeminiAIModel(AutoModel):
183
188
  config=types.GenerateContentConfig(
184
189
  system_instruction=f"{self.instructions}.{additional_instructions}",
185
190
  tools=[types.Tool(function_declarations=[function_definition])],
186
- tool_config={
187
- "function_calling_config": {
188
- "mode": "ANY", # Force a function call
189
- }
190
- },
191
+ tool_config=types.ToolConfig(
192
+ function_calling_config=types.FunctionCallingConfig(mode="ANY")
193
+ ),
191
194
  ),
192
195
  )
193
196
 
194
- # The response is now a ToolCall, not a JSON string
195
197
  try:
196
- # log(response.candidates[0].content.parts[0].function_call, _print=True)
198
+ if not response.candidates or not response.candidates[0].content.parts:
199
+ return {}
200
+
197
201
  tool_call = response.candidates[0].content.parts[0].function_call
198
202
  if tool_call and tool_call.name == function["name"]:
199
203
  return tool_call.args
200
204
  else:
201
- log(
202
- "==== Model did not return a tool call or returned the wrong one. ===="
203
- )
204
205
  log(f"Response: {response.text}", _print=True)
205
206
  return {}
206
207
  except Exception as e:
@@ -216,12 +217,7 @@ class GeminiAIModel(AutoModel):
216
217
  )
217
218
 
218
219
  if uri:
219
- contents.append(
220
- Part.from_uri(
221
- file_uri=uri,
222
- mime_type="application/json",
223
- ),
224
- )
220
+ contents.append(Part.from_uri(file_uri=uri, mime_type="application/json"))
225
221
 
226
222
  response = self.client.models.generate_content(
227
223
  model=self._text_model,
@@ -230,20 +226,14 @@ class GeminiAIModel(AutoModel):
230
226
  ),
231
227
  contents=contents,
232
228
  )
233
-
234
- # log(results, _print=True)
235
- # log("=================== END REPORT ===================", _print=True)
236
229
  return response.text
237
230
 
238
231
  def summarize_text(self, text, primer=""):
239
232
  primer = primer or self.instructions
240
-
241
233
  updated_prompt_list = []
242
- # Find all words in the prompt
243
234
  words = re.findall(r"\w+", text)
244
- # Split the words into chunks
235
+
245
236
  for i in range(0, len(words), self.MAX_SUMMARY_TOKEN_LENGTH):
246
- # Join a chunk of words and add to the list
247
237
  updated_prompt_list.append(
248
238
  " ".join(words[i : i + self.MAX_SUMMARY_TOKEN_LENGTH])
249
239
  )
@@ -252,40 +242,27 @@ class GeminiAIModel(AutoModel):
252
242
  for p in updated_prompt_list:
253
243
  response = self.client.models.generate_content(
254
244
  model=self._summary_model,
255
- config=types.GenerateContentConfig(
256
- system_instruction=f"{primer}",
257
- ),
258
- contents=text,
245
+ config=types.GenerateContentConfig(system_instruction=f"{primer}"),
246
+ contents=p,
259
247
  )
260
248
  try:
261
249
  summary = response.candidates[0].content.parts[0].text
250
+ full_summary += summary + "\n"
262
251
  except Exception as e:
263
- log(f"{type(e)}:{e}\n\n Unable to generate content ====", _print=True)
252
+ log(f"Summary Error: {e}", _print=True)
264
253
  break
265
- else:
266
- full_summary += summary + "\n"
267
- return summary
254
+ return full_summary
268
255
 
269
256
  def generate_transcription(
270
- self,
271
- audio_file,
272
- prompt="Transcribe this audio clip",
273
- display_name="audio.mp3",
257
+ self, audio_file, prompt="Transcribe this audio clip", display_name="audio.mp3"
274
258
  ):
275
259
  myfile = self.client.files.upload(
276
260
  file=io.BytesIO(audio_file),
277
- config={
278
- "mime_type": "audio/mp3",
279
- "display_name": display_name,
280
- },
261
+ config={"mime_type": "audio/mp3", "display_name": display_name},
281
262
  )
282
-
283
263
  response = self.client.models.generate_content(
284
264
  model=self._stt_model,
285
- contents=[
286
- prompt,
287
- myfile,
288
- ],
265
+ contents=[prompt, myfile],
289
266
  )
290
267
  return response.text
291
268
 
@@ -309,44 +286,59 @@ class GeminiAIModel(AutoModel):
309
286
  speech_config=types.SpeechConfig(
310
287
  voice_config=types.VoiceConfig(
311
288
  prebuilt_voice_config=types.PrebuiltVoiceConfig(
312
- voice_name=voice,
289
+ voice_name=voice
313
290
  )
314
291
  )
315
292
  ),
316
293
  ),
317
294
  )
318
295
  blob = response.candidates[0].content.parts[0].inline_data
319
-
320
- # Create a WAV file in memory from the raw audio bytes
321
296
  wav_buffer = self._create_wav_header(blob.data)
322
-
323
- # 2. Load the WAV audio using pydub, which will now correctly read the header
324
297
  audio_segment = AudioSegment.from_file(wav_buffer, format="wav")
325
-
326
- # 3. Create a new in-memory buffer for the MP3 output
327
298
  mp3_buffer = io.BytesIO()
328
-
329
- # 4. Export the audio segment directly to the in-memory buffer
330
299
  audio_segment.export(mp3_buffer, format="mp3")
331
-
332
- # 5. Return the bytes from the buffer, not the filename
333
300
  return mp3_buffer.getvalue()
334
-
335
301
  except Exception as e:
336
- log(
337
- f"==== Error: Unable to generate audio ====\n{type(e)}:{e}", _print=True
338
- )
339
- # You can return a default empty byte string or re-raise the exception
302
+ log(f"==== Audio Gen Error: {e} ====", _print=True)
340
303
  raise e
341
304
 
342
- def generate_image(
343
- self,
344
- prompt,
345
- negative_prompt="",
346
- files=None,
347
- aspect_ratio="3:4",
348
- image_size="2K",
349
- ):
305
+ def _get_image_config(self, aspect_ratio_input):
306
+ """
307
+ Parses custom aspect ratio keys (e.g., '2KPortrait') into valid
308
+ Google Gemini API parameters for ratio and size.
309
+ """
310
+ # Default fallback
311
+ ratio = "1:1"
312
+ size = "1K"
313
+
314
+ # Logic Mapping
315
+ # Keys match what your App sends in ttrpgbase.py
316
+ if aspect_ratio_input == "2KPortrait":
317
+ ratio = "9:16"
318
+ size = "2K" # <--- THIS WAS MISSING BEFORE
319
+ elif aspect_ratio_input == "Portrait":
320
+ ratio = "9:16"
321
+ size = "1K"
322
+ elif aspect_ratio_input == "Landscape":
323
+ ratio = "16:9"
324
+ size = "1K"
325
+ elif aspect_ratio_input == "4K":
326
+ ratio = "16:9"
327
+ size = "4K"
328
+ elif aspect_ratio_input == "4KPortrait":
329
+ ratio = "9:16"
330
+ size = "4K"
331
+ elif aspect_ratio_input == "2K":
332
+ ratio = "1:1"
333
+ size = "2K"
334
+
335
+ # Pass-through for standard inputs
336
+ elif aspect_ratio_input in ["1:1", "3:4", "4:3", "9:16", "16:9"]:
337
+ ratio = aspect_ratio_input
338
+
339
+ return ratio, size
340
+
341
+ def generate_image(self, prompt, negative_prompt="", files=None, aspect_ratio="2K"):
350
342
  image = None
351
343
  contents = [prompt]
352
344
 
@@ -355,51 +347,36 @@ class GeminiAIModel(AutoModel):
355
347
  contents.extend(filerefs)
356
348
 
357
349
  try:
358
- # log(self._image_model, contents, _print=True)
350
+ # 1. Resolve Aspect Ratio AND Size
351
+ valid_ratio, valid_size = self._get_image_config(aspect_ratio)
352
+
353
+ # 2. Call API with correct parameters
359
354
  response = self.client.models.generate_content(
360
355
  model=self._image_model,
361
356
  contents=contents,
362
357
  config=types.GenerateContentConfig(
363
- safety_settings=[
364
- types.SafetySetting(
365
- category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
366
- threshold=types.HarmBlockThreshold.BLOCK_NONE,
367
- ),
368
- types.SafetySetting(
369
- category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
370
- threshold=types.HarmBlockThreshold.BLOCK_NONE,
371
- ),
372
- types.SafetySetting(
373
- category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
374
- threshold=types.HarmBlockThreshold.BLOCK_NONE,
375
- ),
376
- types.SafetySetting(
377
- category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
378
- threshold=types.HarmBlockThreshold.BLOCK_NONE,
379
- ),
380
- types.SafetySetting(
381
- category=types.HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY,
382
- threshold=types.HarmBlockThreshold.BLOCK_NONE,
383
- ),
384
- ],
358
+ safety_settings=self.SAFETY_SETTINGS,
385
359
  image_config=types.ImageConfig(
386
- aspect_ratio=aspect_ratio,
387
- image_size=image_size,
360
+ aspect_ratio=valid_ratio,
361
+ image_size=valid_size, # Now passing "2K" or "4K" correctly
388
362
  ),
389
363
  ),
390
364
  )
391
- log(response, _print=True)
392
- log(response.candidates, _print=True)
393
- image_parts = [
394
- part.inline_data.data
395
- for part in response.candidates[0].content.parts
396
- if part.inline_data
397
- ]
398
- image = image_parts[0]
365
+
366
+ # 3. Extract Image Data
367
+ if response.candidates and response.candidates[0].content.parts:
368
+ for part in response.candidates[0].content.parts:
369
+ if part.inline_data:
370
+ image = part.inline_data.data
371
+ break
372
+
373
+ if not image:
374
+ raise ValueError(
375
+ f"API returned Success but no image data found. Response: {response}"
376
+ )
377
+
399
378
  except Exception as e:
400
- log(
401
- f"==== Error: Unable to create image ====\n\n{e}",
402
- _print=True,
403
- )
379
+ log(f"==== Error: Unable to create image ====\n\n{e}", _print=True)
404
380
  raise e
381
+
405
382
  return image
@@ -269,7 +269,9 @@ class LocalAIModel(AutoModel):
269
269
  "16:9": (1216, 832),
270
270
  "2K": (2048, 1080),
271
271
  "2KPortrait": (1080, 2048),
272
+ "Portrait": (1080, 2048),
272
273
  "4K": (3840, 2160),
274
+ "Landscape": (3840, 2160),
273
275
  "4KPortrait": (2160, 3840),
274
276
  "9:16": (832, 1216),
275
277
  "3:2": (1216, 832),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: autonomous-app
3
- Version: 0.3.41
3
+ Version: 0.3.42
4
4
  Summary: Containerized application framework built on Flask with additional libraries and tools for rapid development of web applications.
5
5
  Author-email: Steven A Moore <samoore@binghamton.edu>
6
6
  Project-URL: homepage, https://github.com/Sallenmoore/autonomous
@@ -1,15 +1,15 @@
1
- autonomous/__init__.py,sha256=2Py6rnpUfu4t8c9Y9n3oQLD2-RGCbghqEkVltb7JTpw,95
1
+ autonomous/__init__.py,sha256=XvTINoMxlBE_wUWlNeWGg7W7PDm0SpflzDGROfnBvEA,95
2
2
  autonomous/cli.py,sha256=z4AaGeWNW_uBLFAHng0J_lfS9v3fXemK1PeT85u4Eo4,42
3
3
  autonomous/logger.py,sha256=NQtgEaTWNAWfLSgqSP7ksXj1GpOuCgoUV711kSMm-WA,2022
4
4
  autonomous/ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  autonomous/ai/audioagent.py,sha256=aZ25eEdze8S060-a4S0k319tgyl2aDTUa8dJu07mXn0,1092
6
6
  autonomous/ai/baseagent.py,sha256=icOPygr1NdH64u1ZYbwHHywYIY1ZtaLY9HtfNmUbx4k,4702
7
- autonomous/ai/imageagent.py,sha256=5eb4irUcWOD1HnquHX3jOTjOsed5Yn98rxSol84Lf18,902
7
+ autonomous/ai/imageagent.py,sha256=1RT7OYTnRUo3q5k5w83A3cOh3hXUlrx0jRkg0YJSgZ0,900
8
8
  autonomous/ai/jsonagent.py,sha256=DNfZHMVCfc5nrkWJm2OebTYDkBwm_ZCeVWGIFGjB_Cg,1208
9
9
  autonomous/ai/textagent.py,sha256=0y2Hvb9pup1OnsA51hGPcD8yllZOZtztDLQvCNYABaw,1043
10
10
  autonomous/ai/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- autonomous/ai/models/gemini.py,sha256=eu48gywNFpUFaqBt-4MFX2oRM5IED9rUTgtavM_HRG0,14468
12
- autonomous/ai/models/local_model.py,sha256=HjHDZqmdhlT29e5Q7GXNbZYbM9GVNiSzEJyBDpXWxtU,12431
11
+ autonomous/ai/models/gemini.py,sha256=jCfOovhSBG0G9rg2DQ3plRV66VKPzW7w56UVjuMoXvU,13106
12
+ autonomous/ai/models/local_model.py,sha256=uvevUIQgCwwrrFub-LoV4vbk71psdEPEzfzmuztn3sI,12508
13
13
  autonomous/apis/version_control/GHCallbacks.py,sha256=AyiUlYfV5JePi11GVyqYyXoj5UTbPKzS-HRRI94rjJo,1069
14
14
  autonomous/apis/version_control/GHOrganization.py,sha256=mi2livdsGurKiifbvuLwiFbdDzL77IlEfhwEa-tG77I,1155
15
15
  autonomous/apis/version_control/GHRepo.py,sha256=hTFHMkxSbSlVELfh8S6mq6ijkIKPRQO-Q5775ZjRKD4,4622
@@ -55,7 +55,7 @@ autonomous/taskrunner/__init__.py,sha256=ughX-QfWBas5W3aB2SiF887SWJ3Dzc2X43Yxtmp
55
55
  autonomous/taskrunner/autotasks.py,sha256=2zRaqHYqfdlgC_BQm6B6D2svN1ukyWeJJHwweZFHVoo,2616
56
56
  autonomous/taskrunner/task_router.py,sha256=W09HtRUuhwlnGxM5w4l6Hzw6mfS6L4ljWiMzD3ZVFeU,601
57
57
  autonomous/utils/markdown.py,sha256=tf8vlHARiQO1X_aGbqlYozzP_TbdiDRT9EEP6aFRQo0,2153
58
- autonomous_app-0.3.41.dist-info/METADATA,sha256=bF72uTuECc-BhEP0w7uDgm7gqnLsx-cf4eIH1uIxJhY,3024
59
- autonomous_app-0.3.41.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
60
- autonomous_app-0.3.41.dist-info/top_level.txt,sha256=ZyxWWDdbvZekF3UFunxl4BQsVDb_FOW3eTn0vun_jb4,11
61
- autonomous_app-0.3.41.dist-info/RECORD,,
58
+ autonomous_app-0.3.42.dist-info/METADATA,sha256=wQyVXrufozI92_4Yv4zfuPya9LYxav2u8jHjUogkQRA,3024
59
+ autonomous_app-0.3.42.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
60
+ autonomous_app-0.3.42.dist-info/top_level.txt,sha256=ZyxWWDdbvZekF3UFunxl4BQsVDb_FOW3eTn0vun_jb4,11
61
+ autonomous_app-0.3.42.dist-info/RECORD,,