iatoolkit 1.7.0__py3-none-any.whl → 1.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. iatoolkit/__init__.py +1 -1
  2. iatoolkit/common/routes.py +16 -3
  3. iatoolkit/common/util.py +8 -123
  4. iatoolkit/core.py +1 -0
  5. iatoolkit/infra/connectors/file_connector.py +10 -2
  6. iatoolkit/infra/connectors/google_drive_connector.py +3 -0
  7. iatoolkit/infra/connectors/local_file_connector.py +3 -0
  8. iatoolkit/infra/connectors/s3_connector.py +24 -1
  9. iatoolkit/infra/llm_providers/deepseek_adapter.py +17 -1
  10. iatoolkit/infra/llm_providers/gemini_adapter.py +117 -18
  11. iatoolkit/infra/llm_providers/openai_adapter.py +175 -18
  12. iatoolkit/infra/llm_response.py +13 -0
  13. iatoolkit/locales/en.yaml +82 -4
  14. iatoolkit/locales/es.yaml +79 -4
  15. iatoolkit/repositories/llm_query_repo.py +51 -18
  16. iatoolkit/repositories/models.py +16 -7
  17. iatoolkit/services/company_context_service.py +294 -133
  18. iatoolkit/services/configuration_service.py +140 -121
  19. iatoolkit/services/dispatcher_service.py +1 -4
  20. iatoolkit/services/knowledge_base_service.py +26 -4
  21. iatoolkit/services/llm_client_service.py +58 -2
  22. iatoolkit/services/prompt_service.py +251 -164
  23. iatoolkit/services/query_service.py +37 -18
  24. iatoolkit/services/storage_service.py +92 -0
  25. iatoolkit/static/js/chat_filepond.js +188 -63
  26. iatoolkit/static/js/chat_main.js +105 -52
  27. iatoolkit/static/styles/chat_iatoolkit.css +96 -0
  28. iatoolkit/system_prompts/query_main.prompt +24 -41
  29. iatoolkit/templates/chat.html +15 -6
  30. iatoolkit/views/base_login_view.py +1 -1
  31. iatoolkit/views/categories_api_view.py +111 -0
  32. iatoolkit/views/chat_view.py +1 -1
  33. iatoolkit/views/configuration_api_view.py +1 -1
  34. iatoolkit/views/login_view.py +1 -1
  35. iatoolkit/views/prompt_api_view.py +88 -7
  36. {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/METADATA +1 -1
  37. {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/RECORD +41 -39
  38. {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/WHEEL +0 -0
  39. {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/licenses/LICENSE +0 -0
  40. {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/licenses/LICENSE_COMMUNITY.md +0 -0
  41. {iatoolkit-1.7.0.dist-info → iatoolkit-1.15.3.dist-info}/top_level.txt +0 -0
iatoolkit/__init__.py CHANGED
@@ -3,7 +3,7 @@
3
3
  #
4
4
  # IAToolkit is open source software.
5
5
 
6
- __version__ = "1.7.0"
6
+ __version__ = "1.15.3"
7
7
 
8
8
  # Expose main classes and functions at the top level of the package
9
9
 
@@ -32,6 +32,7 @@ def register_views(app):
32
32
  from iatoolkit.views.root_redirect_view import RootRedirectView
33
33
  from iatoolkit.views.users_api_view import UsersApiView
34
34
  from iatoolkit.views.rag_api_view import RagApiView
35
+ from iatoolkit.views.categories_api_view import CategoriesApiView
35
36
 
36
37
  # assign root '/' to our new redirect logic
37
38
  app.add_url_rule('/home', view_func=RootRedirectView.as_view('root_redirect'))
@@ -85,9 +86,21 @@ def register_views(app):
85
86
  # can be used also for executing iatoolkit prompts
86
87
  app.add_url_rule('/<company_short_name>/api/llm_query', view_func=LLMQueryApiView.as_view('llm_query_api'))
87
88
 
88
- # open the promt directory
89
- app.add_url_rule('/<company_short_name>/api/prompts', view_func=PromptApiView.as_view('prompt'))
90
-
89
+ # Categories Endpoint
90
+ app.add_url_rule('/<company_short_name>/api/categories',
91
+ view_func=CategoriesApiView.as_view('categories_api'),
92
+ methods=['GET', 'POST'])
93
+
94
+ # open the promt directory and specific prompt management
95
+ prompt_view = PromptApiView.as_view('prompt')
96
+ app.add_url_rule('/<company_short_name>/api/prompts',
97
+ view_func=prompt_view,
98
+ methods=['GET', 'POST'],
99
+ defaults={'prompt_name': None})
100
+
101
+ app.add_url_rule('/<company_short_name>/api/prompts/<prompt_name>',
102
+ view_func=prompt_view,
103
+ methods=['GET', 'POST','PUT', 'DELETE'])
91
104
  # toolbar buttons
92
105
  app.add_url_rule('/<company_short_name>/api/feedback', view_func=UserFeedbackApiView.as_view('feedback'))
93
106
  app.add_url_rule('/<company_short_name>/api/history', view_func=HistoryApiView.as_view('history'))
iatoolkit/common/util.py CHANGED
@@ -162,8 +162,15 @@ class Utility:
162
162
  Parses a YAML string into a dictionary securely.
163
163
  """
164
164
  try:
165
+ if not yaml_content:
166
+ return {}
167
+
168
+ # Normalizar tabulaciones que rompen YAML
165
169
  yaml_content = yaml_content.replace('\t', ' ')
166
- return yaml.safe_load(yaml_content) or {}
170
+
171
+ loaded = yaml.safe_load(yaml_content)
172
+ # Asegurar que siempre retornamos un dict, incluso si el YAML es una lista o escalar
173
+ return loaded if isinstance(loaded, dict) else {}
167
174
  except yaml.YAMLError as e:
168
175
  logging.error(f"Error parsing YAML string: {e}")
169
176
  return {}
@@ -182,128 +189,6 @@ class Utility:
182
189
  raise IAToolkitException(IAToolkitException.ErrorType.FILE_IO_ERROR,
183
190
  f"Failed to generate YAML: {e}")
184
191
 
185
- def generate_context_for_schema(self, entity_name: str, schema_file: str = None, schema: dict = {}) -> str:
186
- if not schema_file and not schema:
187
- raise IAToolkitException(IAToolkitException.ErrorType.FILE_IO_ERROR,
188
- f'No se pudo obtener schema de la entidad: {entity_name}')
189
-
190
- try:
191
- if schema_file:
192
- schema = self.load_schema_from_yaml(schema_file)
193
- table_schema = self.generate_schema_table(schema)
194
- return table_schema
195
- except Exception as e:
196
- logging.exception(e)
197
- raise IAToolkitException(IAToolkitException.ErrorType.FILE_IO_ERROR,
198
- f'No se pudo leer el schema de la entidad: {entity_name}') from e
199
-
200
- def generate_schema_table(self, schema: dict) -> str:
201
- """
202
- Genera una descripción detallada y formateada en Markdown de un esquema.
203
- Esta función está diseñada para manejar el formato específico de nuestros
204
- archivos YAML, donde el esquema se define bajo una única clave raíz.
205
- """
206
- if not schema or not isinstance(schema, dict):
207
- return ""
208
-
209
- # Asumimos que el YAML tiene una única clave raíz que nombra a la entidad.
210
- if len(schema) == 1:
211
- root_name = list(schema.keys())[0]
212
- root_details = schema[root_name]
213
-
214
- # support this format
215
- if root_details.get('columns'):
216
- root_details = root_details['columns']
217
-
218
- if isinstance(root_details, dict):
219
- # Las claves de metadatos describen el objeto en sí, no sus propiedades hijas.
220
- METADATA_KEYS = ['description', 'type', 'format', 'items', 'properties', 'pk']
221
-
222
- # Las propiedades son las claves restantes en el diccionario.
223
- properties = {
224
- k: v for k, v in root_details.items() if k not in METADATA_KEYS
225
- }
226
-
227
- # La descripción del objeto raíz.
228
- root_description = root_details.get('description', '')
229
-
230
- # Formatea las propiedades extraídas usando la función auxiliar recursiva.
231
- formatted_properties = self._format_json_schema(properties, 0)
232
-
233
- # Construcción del resultado final, incluyendo el nombre del objeto raíz.
234
- output_parts = [f"\n\n### Objeto: `{root_name}`"]
235
- if root_description:
236
- # Limpia la descripción para que se muestre bien
237
- cleaned_description = '\n'.join(line.strip() for line in root_description.strip().split('\n'))
238
- output_parts.append(f"{cleaned_description}")
239
-
240
- if formatted_properties:
241
- output_parts.append(f"**Campos del objeto `{root_name}`:**\n{formatted_properties}")
242
-
243
- return "\n".join(output_parts)
244
-
245
- # Si el esquema (como tender_schema.yaml) no tiene un objeto raíz,
246
- # se formatea directamente como una lista de propiedades.
247
- return self._format_json_schema(schema, 0)
248
-
249
- def _format_json_schema(self, properties: dict, indent_level: int) -> str:
250
- """
251
- Formatea de manera recursiva las propiedades de un esquema JSON/YAML.
252
- """
253
- output = []
254
- indent_str = ' ' * indent_level
255
-
256
- for name, details in properties.items():
257
- if not isinstance(details, dict):
258
- continue
259
-
260
- description = details.get('description', '')
261
- data_type = details.get('type', 'any')
262
- output.append(f"{indent_str}- **`{name.lower()}`** ({data_type}): {description}")
263
- # if 'pk' in details and details['pk']:
264
- # output.append(f"{indent_str}- **Primary Key**: {details['pk']}")
265
-
266
- child_indent_str = ' ' * (indent_level + 1)
267
-
268
- # Manejo de 'oneOf' para mostrar valores constantes
269
- if 'oneOf' in details:
270
- for item in details['oneOf']:
271
- if 'const' in item:
272
- const_desc = item.get('description', '')
273
- output.append(f"{child_indent_str}- `{item['const']}`: {const_desc}")
274
-
275
- # Manejo de 'items' para arrays
276
- if 'items' in details:
277
- items_details = details.get('items', {})
278
- if isinstance(items_details, dict):
279
- item_description = items_details.get('description')
280
- if item_description:
281
- # Limpiamos y añadimos la descripción del item
282
- cleaned_description = '\n'.join(
283
- f"{line.strip()}" for line in item_description.strip().split('\n')
284
- )
285
- output.append(
286
- f"{child_indent_str}*Descripción de los elementos del array:*\n{child_indent_str}{cleaned_description}")
287
-
288
- if 'properties' in items_details:
289
- nested_properties = self._format_json_schema(items_details['properties'], indent_level + 1)
290
- output.append(nested_properties)
291
-
292
- # Manejo de 'properties' para objetos anidados estándar
293
- if 'properties' in details:
294
- nested_properties = self._format_json_schema(details['properties'], indent_level + 1)
295
- output.append(nested_properties)
296
-
297
- elif 'additionalProperties' in details and 'properties' in details.get('additionalProperties', {}):
298
- # Imprime un marcador de posición para la clave dinámica.
299
- output.append(
300
- f"{child_indent_str}- **[*]** (object): Las claves de este objeto son dinámicas (ej. un ID).")
301
- # Procesa las propiedades del objeto anidado.
302
- nested_properties = self._format_json_schema(details['additionalProperties']['properties'],
303
- indent_level + 2)
304
- output.append(nested_properties)
305
-
306
- return '\n'.join(output)
307
192
 
308
193
  def load_markdown_context(self, filepath: str) -> str:
309
194
  with open(filepath, 'r', encoding='utf-8') as f:
iatoolkit/core.py CHANGED
@@ -58,6 +58,7 @@ class IAToolkit:
58
58
  self._injector = Injector() # init empty injector
59
59
  self.version = IATOOLKIT_VERSION
60
60
  self.license = "Community Edition"
61
+ self.is_community = True
61
62
 
62
63
  @classmethod
63
64
  def get_instance(cls) -> 'IAToolkit':
@@ -4,7 +4,7 @@
4
4
  # IAToolkit is open source software.
5
5
 
6
6
  from abc import ABC, abstractmethod
7
- from typing import List
7
+ from typing import List, Optional
8
8
 
9
9
 
10
10
  class FileConnector(ABC):
@@ -14,4 +14,12 @@ class FileConnector(ABC):
14
14
 
15
15
  @abstractmethod
16
16
  def get_file_content(self, file_path: str) -> bytes:
17
- pass
17
+ pass
18
+
19
+
20
+ @abstractmethod
21
+ def upload_file(self, file_path: str, content: bytes, content_type: str = None) -> None:
22
+ pass
23
+
24
+ def generate_presigned_url(self, file_path: str, expiration: int = 3600) -> Optional[str]:
25
+ return None
@@ -66,3 +66,6 @@ class GoogleDriveConnector(FileConnector):
66
66
  status, done = downloader.next_chunk()
67
67
 
68
68
  return file_buffer.getvalue()
69
+
70
+ def upload_file(self, file_path: str, content: bytes, content_type: str = None) -> None:
71
+ return
@@ -44,3 +44,6 @@ class LocalFileConnector(FileConnector):
44
44
  except Exception as e:
45
45
  raise IAToolkitException(IAToolkitException.ErrorType.FILE_IO_ERROR,
46
46
  f"Error leyendo el archivo {file_path}: {e}")
47
+
48
+ def upload_file(self, file_path: str, content: bytes, content_type: str = None) -> None:
49
+ return
@@ -32,4 +32,27 @@ class S3Connector(FileConnector):
32
32
 
33
33
  def get_file_content(self, file_path: str) -> bytes:
34
34
  response = self.s3.get_object(Bucket=self.bucket, Key=file_path)
35
- return response['Body'].read()
35
+ return response['Body'].read()
36
+
37
+ def upload_file(self, file_path: str, content: bytes, content_type: str = None) -> None:
38
+ # If the path doesn't start with the prefix, add it (optional, depends on your logic)'
39
+ # Assuming file_path is either a full path or relative to the root of the bucket for flexibility
40
+ full_path = file_path
41
+
42
+ extra_args = {}
43
+ if content_type:
44
+ extra_args['ContentType'] = content_type
45
+
46
+ self.s3.put_object(
47
+ Bucket=self.bucket,
48
+ Key=full_path,
49
+ Body=content,
50
+ **extra_args
51
+ )
52
+
53
+ def generate_presigned_url(self, file_path: str, expiration: int = 3600) -> str:
54
+ return self.s3.generate_presigned_url(
55
+ 'get_object',
56
+ Params={'Bucket': self.bucket, 'Key': file_path},
57
+ ExpiresIn=expiration
58
+ )
@@ -40,6 +40,13 @@ class DeepseekAdapter:
40
40
  tools = kwargs.get("tools") or []
41
41
  tool_choice = kwargs.get("tool_choice", "auto")
42
42
  context_history = kwargs.get("context_history") or []
43
+ images = kwargs.get("images") or []
44
+
45
+ if images:
46
+ logging.warning(
47
+ f"[DeepseekAdapter] Images provided but DeepSeek models are not multimodal. "
48
+ f"Ignoring {len(images)} images."
49
+ )
43
50
 
44
51
  try:
45
52
  # 1) Build messages from history (if any)
@@ -232,6 +239,7 @@ class DeepseekAdapter:
232
239
 
233
240
  # If the model produced tool calls, fills this list
234
241
  tool_calls_out: List[ToolCall] = []
242
+ content_parts: List[Dict] = [] # Initialize content_parts
235
243
 
236
244
  tool_calls = getattr(message, "tool_calls", None) or []
237
245
  if not tool_calls:
@@ -239,6 +247,13 @@ class DeepseekAdapter:
239
247
  output_text = getattr(message, "content", "") or ""
240
248
  status = "completed"
241
249
 
250
+ # Fill content_parts for text response
251
+ if output_text:
252
+ content_parts.append({
253
+ "type": "text",
254
+ "text": output_text
255
+ })
256
+
242
257
  else:
243
258
  logging.debug(f"[DeepSeek] RAW tool_calls: {tool_calls}")
244
259
 
@@ -274,5 +289,6 @@ class DeepseekAdapter:
274
289
  output_text=output_text,
275
290
  output=tool_calls_out,
276
291
  usage=usage,
277
- reasoning_content=reasoning_content
292
+ reasoning_content=reasoning_content,
293
+ content_parts=content_parts # Pass content_parts
278
294
  )
@@ -11,6 +11,9 @@ from iatoolkit.common.exceptions import IAToolkitException
11
11
  import logging
12
12
  import json
13
13
  import uuid
14
+ import mimetypes
15
+ import re
16
+
14
17
 
15
18
  class GeminiAdapter:
16
19
 
@@ -34,6 +37,7 @@ class GeminiAdapter:
34
37
  text: Optional[Dict] = None,
35
38
  reasoning: Optional[Dict] = None,
36
39
  tool_choice: str = "auto",
40
+ images: Optional[List[Dict]] = None,
37
41
  ) -> LLMResponse:
38
42
  try:
39
43
  # init the model with the configured client
@@ -43,11 +47,12 @@ class GeminiAdapter:
43
47
  )
44
48
 
45
49
  # prepare the content for gemini
50
+ # We pass images here because they need to be merged into the content
46
51
  if context_history:
47
52
  # concat the history with the current input
48
- contents = self._prepare_gemini_contents(context_history + input)
53
+ contents = self._prepare_gemini_contents(context_history + input, images)
49
54
  else:
50
- contents = self._prepare_gemini_contents(input)
55
+ contents = self._prepare_gemini_contents(input, images)
51
56
 
52
57
  # prepare tools
53
58
  gemini_tools = self._prepare_gemini_tools(tools) if tools else None
@@ -108,31 +113,67 @@ class GeminiAdapter:
108
113
  }
109
114
  return model_mapping.get(model.lower(), model)
110
115
 
111
- def _prepare_gemini_contents(self, input: List[Dict]) -> List[Dict]:
116
+ def _prepare_gemini_contents(self, input: List[Dict], images: Optional[List[Dict]] = None) -> List[Dict]:
112
117
  # convert input messages to Gemini format
113
118
  gemini_contents = []
114
119
 
115
- for message in input:
120
+ # Find the last user message to attach images to
121
+ last_user_msg_index = -1
122
+ if images:
123
+ for i in range(len(input) - 1, -1, -1):
124
+ if input[i].get("role") == "user":
125
+ last_user_msg_index = i
126
+ break
127
+
128
+ for i, message in enumerate(input):
129
+ parts = []
130
+
116
131
  if message.get("role") == "system":
132
+ # System prompts are usually passed as user role with special text in Gemini 1.0/1.5 API
133
+ # unless using the explicit system_instruction parameter (which is model-init time).
134
+ # Here we keep the existing logic of prepending to user role.
117
135
  gemini_contents.append({
118
136
  "role": "user",
119
137
  "parts": [{"text": f"[INSTRUCCIONES DEL SISTEMA]\n{message.get('content', '')}"}]
120
138
  })
139
+ continue # Skip the rest for this iteration
140
+
121
141
  elif message.get("role") == "user":
122
- gemini_contents.append({
123
- "role": "user",
124
- "parts": [{"text": message.get("content", "")}]
125
- })
142
+ role = "user"
143
+ parts.append({"text": message.get("content", "")})
144
+
145
+ # Attach images to the LAST user message only
146
+ if images and i == last_user_msg_index:
147
+ for img in images:
148
+ filename = img.get('name', '')
149
+ mime_type, _ = mimetypes.guess_type(filename)
150
+ if not mime_type:
151
+ mime_type = 'image/jpeg'
152
+
153
+ parts.append({
154
+ "inline_data": {
155
+ "mime_type": mime_type,
156
+ "data": img.get('base64', '')
157
+ }
158
+ })
159
+
126
160
  elif message.get("type") == "function_call_output":
127
- gemini_contents.append({
128
- "role": "function",
129
- "parts": [{
130
- "function_response": {
131
- "name": "tool_result",
132
- "response": {"output": message.get("output", "")}
133
- }
134
- }]
161
+ role = "function"
162
+ parts.append({
163
+ "function_response": {
164
+ "name": "tool_result",
165
+ "response": {"output": message.get("output", "")}
166
+ }
135
167
  })
168
+ else:
169
+ # Handle assistant messages or others if present in history
170
+ # Assuming role mapping is correct or handled elsewhere if needed
171
+ continue
172
+
173
+ gemini_contents.append({
174
+ "role": role,
175
+ "parts": parts
176
+ })
136
177
 
137
178
  return gemini_contents
138
179
 
@@ -245,13 +286,37 @@ class GeminiAdapter:
245
286
  response_id = str(uuid.uuid4())
246
287
  output_text = ""
247
288
  tool_calls = []
289
+ content_parts = []
248
290
 
249
291
  if gemini_response.candidates and len(gemini_response.candidates) > 0:
250
292
  candidate = gemini_response.candidates[0]
251
293
 
252
294
  for part in candidate.content.parts:
295
+ # 1. Caso Texto
253
296
  if hasattr(part, 'text') and part.text:
254
- output_text += part.text
297
+ text_chunk = part.text
298
+
299
+ # Buscar imágenes incrustadas como Markdown en el texto
300
+ # Pattern: ![Alt text](URL)
301
+ markdown_images = re.findall(r'!\[([^\]]*)\]\((https?://[^)]+)\)', text_chunk)
302
+
303
+ for alt_text, url in markdown_images:
304
+ content_parts.append({
305
+ "type": "image",
306
+ "source": {
307
+ "type": "url",
308
+ "media_type": "image/webp", # Asumimos webp por defecto en generación moderna
309
+ "url": url
310
+ }
311
+ })
312
+
313
+ output_text += text_chunk
314
+ content_parts.append({
315
+ "type": "text",
316
+ "text": text_chunk
317
+ })
318
+
319
+ # 2. Caso Función (Tool Call)
255
320
  elif hasattr(part, 'function_call') and part.function_call:
256
321
  func_call = part.function_call
257
322
  tool_calls.append(ToolCall(
@@ -261,6 +326,39 @@ class GeminiAdapter:
261
326
  arguments=json.dumps(MessageToDict(func_call._pb).get('args', {}))
262
327
  ))
263
328
 
329
+ # 3. Caso Imagen (Inline Data / Base64 directo de Gemini)
330
+ elif hasattr(part, 'inline_data') and part.inline_data:
331
+ # Gemini devuelve imagenes generadas aqui
332
+ mime_type = part.inline_data.mime_type
333
+ data_base64 = part.inline_data.data # Esto son bytes o str base64
334
+
335
+ content_parts.append({
336
+ "type": "image",
337
+ "source": {
338
+ "type": "base64",
339
+ "media_type": mime_type,
340
+ "data": data_base64
341
+ }
342
+ })
343
+
344
+ # Opcional: Agregar un placeholder al texto plano para logs
345
+ output_text += "\n[Imagen Generada]\n"
346
+
347
+ # 4. Caso Archivo (File Data / URI)
348
+ elif hasattr(part, 'file_data') and part.file_data:
349
+ mime_type = part.file_data.mime_type
350
+ file_uri = part.file_data.file_uri
351
+
352
+ content_parts.append({
353
+ "type": "image",
354
+ "source": {
355
+ "type": "url",
356
+ "media_type": mime_type,
357
+ "url": file_uri
358
+ }
359
+ })
360
+ output_text += f"\n[Imagen Generada: {file_uri}]\n"
361
+
264
362
  # Determinar status
265
363
  status = "completed"
266
364
  if gemini_response.candidates:
@@ -299,7 +397,8 @@ class GeminiAdapter:
299
397
  status=status,
300
398
  output_text=output_text,
301
399
  output=tool_calls,
302
- usage=usage
400
+ usage=usage,
401
+ content_parts=content_parts
303
402
  )
304
403
 
305
404
  def _extract_usage_metadata(self, gemini_response) -> Usage: