iatoolkit 0.66.2__py3-none-any.whl → 0.71.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. iatoolkit/__init__.py +2 -6
  2. iatoolkit/base_company.py +3 -31
  3. iatoolkit/cli_commands.py +1 -1
  4. iatoolkit/common/routes.py +5 -1
  5. iatoolkit/common/session_manager.py +2 -0
  6. iatoolkit/company_registry.py +1 -2
  7. iatoolkit/iatoolkit.py +13 -13
  8. iatoolkit/infra/llm_client.py +8 -12
  9. iatoolkit/infra/llm_proxy.py +38 -10
  10. iatoolkit/locales/en.yaml +25 -2
  11. iatoolkit/locales/es.yaml +27 -4
  12. iatoolkit/repositories/database_manager.py +8 -3
  13. iatoolkit/repositories/document_repo.py +1 -1
  14. iatoolkit/repositories/models.py +6 -8
  15. iatoolkit/repositories/profile_repo.py +0 -4
  16. iatoolkit/repositories/vs_repo.py +26 -20
  17. iatoolkit/services/auth_service.py +2 -2
  18. iatoolkit/services/branding_service.py +11 -7
  19. iatoolkit/services/company_context_service.py +155 -0
  20. iatoolkit/services/configuration_service.py +133 -0
  21. iatoolkit/services/dispatcher_service.py +75 -70
  22. iatoolkit/services/document_service.py +5 -2
  23. iatoolkit/services/embedding_service.py +145 -0
  24. iatoolkit/services/excel_service.py +15 -11
  25. iatoolkit/services/file_processor_service.py +4 -12
  26. iatoolkit/services/history_service.py +7 -7
  27. iatoolkit/services/i18n_service.py +4 -4
  28. iatoolkit/services/jwt_service.py +7 -9
  29. iatoolkit/services/language_service.py +29 -23
  30. iatoolkit/services/load_documents_service.py +100 -113
  31. iatoolkit/services/mail_service.py +9 -4
  32. iatoolkit/services/profile_service.py +10 -7
  33. iatoolkit/services/prompt_manager_service.py +20 -16
  34. iatoolkit/services/query_service.py +112 -43
  35. iatoolkit/services/search_service.py +11 -4
  36. iatoolkit/services/sql_service.py +57 -25
  37. iatoolkit/services/user_feedback_service.py +15 -13
  38. iatoolkit/static/js/chat_history_button.js +3 -5
  39. iatoolkit/static/js/chat_main.js +2 -17
  40. iatoolkit/static/js/chat_onboarding_button.js +6 -0
  41. iatoolkit/static/styles/chat_iatoolkit.css +69 -158
  42. iatoolkit/static/styles/chat_modal.css +1 -37
  43. iatoolkit/static/styles/onboarding.css +7 -0
  44. iatoolkit/system_prompts/query_main.prompt +2 -10
  45. iatoolkit/templates/change_password.html +1 -1
  46. iatoolkit/templates/chat.html +12 -4
  47. iatoolkit/templates/chat_modals.html +4 -0
  48. iatoolkit/templates/error.html +1 -1
  49. iatoolkit/templates/login_simulation.html +17 -6
  50. iatoolkit/templates/onboarding_shell.html +4 -1
  51. iatoolkit/views/base_login_view.py +7 -8
  52. iatoolkit/views/change_password_view.py +2 -3
  53. iatoolkit/views/embedding_api_view.py +65 -0
  54. iatoolkit/views/external_login_view.py +1 -1
  55. iatoolkit/views/file_store_api_view.py +1 -1
  56. iatoolkit/views/forgot_password_view.py +2 -4
  57. iatoolkit/views/help_content_api_view.py +9 -9
  58. iatoolkit/views/history_api_view.py +1 -1
  59. iatoolkit/views/home_view.py +2 -2
  60. iatoolkit/views/init_context_api_view.py +18 -17
  61. iatoolkit/views/llmquery_api_view.py +3 -2
  62. iatoolkit/views/login_simulation_view.py +14 -2
  63. iatoolkit/views/login_view.py +9 -9
  64. iatoolkit/views/signup_view.py +2 -4
  65. iatoolkit/views/verify_user_view.py +2 -4
  66. {iatoolkit-0.66.2.dist-info → iatoolkit-0.71.4.dist-info}/METADATA +40 -22
  67. iatoolkit-0.71.4.dist-info/RECORD +122 -0
  68. iatoolkit-0.71.4.dist-info/licenses/LICENSE +21 -0
  69. iatoolkit/services/help_content_service.py +0 -30
  70. iatoolkit/services/onboarding_service.py +0 -43
  71. iatoolkit-0.66.2.dist-info/RECORD +0 -119
  72. {iatoolkit-0.66.2.dist-info → iatoolkit-0.71.4.dist-info}/WHEEL +0 -0
  73. {iatoolkit-0.66.2.dist-info → iatoolkit-0.71.4.dist-info}/top_level.txt +0 -0
@@ -4,40 +4,38 @@
4
4
  # IAToolkit is open source software.
5
5
 
6
6
  from sqlalchemy import text
7
- from huggingface_hub import InferenceClient
8
7
  from injector import inject
9
8
  from iatoolkit.common.exceptions import IAToolkitException
10
9
  from iatoolkit.repositories.database_manager import DatabaseManager
11
- from iatoolkit.repositories.models import Document, VSDoc
12
- import os
10
+ from iatoolkit.services.embedding_service import EmbeddingService
11
+ from iatoolkit.repositories.models import Document, VSDoc, Company
13
12
  import logging
14
13
 
14
+
15
15
  class VSRepo:
16
16
  @inject
17
- def __init__(self, db_manager: DatabaseManager):
17
+ def __init__(self,
18
+ db_manager: DatabaseManager,
19
+ embedding_service: EmbeddingService):
18
20
  self.session = db_manager.get_session()
19
-
20
- # Inicializar el modelo de embeddings
21
- self.embedder = InferenceClient(
22
- model="sentence-transformers/all-MiniLM-L6-v2",
23
- token=os.getenv('HF_TOKEN'))
21
+ self.embedding_service = embedding_service
24
22
 
25
23
 
26
- def add_document(self, vs_chunk_list: list[VSDoc]):
24
+ def add_document(self, company_short_name, vs_chunk_list: list[VSDoc]):
27
25
  try:
28
26
  for doc in vs_chunk_list:
29
27
  # calculate the embedding for the text
30
- doc.embedding = self.embedder.feature_extraction(doc.text)
28
+ doc.embedding = self.embedding_service.embed_text(company_short_name, doc.text)
31
29
  self.session.add(doc)
32
30
  self.session.commit()
33
31
  except Exception as e:
34
- logging.error(f"Error insertando documentos en PostgreSQL: {str(e)}")
32
+ logging.error(f"Error inserting documents into PostgreSQL: {str(e)}")
35
33
  self.session.rollback()
36
34
  raise IAToolkitException(IAToolkitException.ErrorType.VECTOR_STORE_ERROR,
37
- f"Error insertando documentos en PostgreSQL: {str(e)}")
35
+ f"Error inserting documents into PostgreSQL: {str(e)}")
38
36
 
39
37
  def query(self,
40
- company_id: int,
38
+ company_short_name: str,
41
39
  query_text: str,
42
40
  n_results=5,
43
41
  metadata_filter=None
@@ -46,18 +44,25 @@ class VSRepo:
46
44
  search documents similar to the query for a company
47
45
 
48
46
  Args:
49
- company_id:
47
+ company_short_name: The company's unique short name.
50
48
  query_text: query text
51
49
  n_results: max number of results to return
52
- metadata_filter: (ej: {"document_type": "certificate"})
50
+ metadata_filter: (e.g., {"document_type": "certificate"})
53
51
 
54
52
  Returns:
55
53
  list of documents matching the query and filters
56
54
  """
57
- # Generate the embedding with the query text
58
- query_embedding = self.embedder.feature_extraction([query_text])[0]
55
+ # Generate the embedding with the query text for the specific company
56
+ query_embedding = self.embedding_service.embed_text(company_short_name, query_text)
59
57
 
58
+ sql_query, params = None, None
60
59
  try:
60
+ # Get company ID from its short name for the SQL query
61
+ company = self.session.query(Company).filter(Company.short_name == company_short_name).one_or_none()
62
+ if not company:
63
+ raise IAToolkitException(IAToolkitException.ErrorType.VECTOR_STORE_ERROR,
64
+ f"Company with short name '{company_short_name}' not found.")
65
+
61
66
  # build the SQL query
62
67
  sql_query_parts = ["""
63
68
  SELECT iat_documents.id, \
@@ -73,11 +78,12 @@ class VSRepo:
73
78
 
74
79
  # query parameters
75
80
  params = {
76
- "company_id": company_id,
81
+ "company_id": company.id,
77
82
  "query_embedding": query_embedding,
78
83
  "n_results": n_results
79
84
  }
80
85
 
86
+
81
87
  # add metadata filter, if exists
82
88
  if metadata_filter and isinstance(metadata_filter, dict):
83
89
  for key, value in metadata_filter.items():
@@ -108,7 +114,7 @@ class VSRepo:
108
114
  meta_data = row[4] if len(row) > 4 and row[4] is not None else {}
109
115
  doc = Document(
110
116
  id=row[0],
111
- company_id=company_id,
117
+ company_id=company.id,
112
118
  filename=row[1],
113
119
  content=row[2],
114
120
  content_b64=row[3],
@@ -84,7 +84,7 @@ class AuthService:
84
84
  )
85
85
  return {'success': True, 'user_identifier': user_identifier}
86
86
  except Exception as e:
87
- logging.error(f"Error al crear la sesión desde token para {user_identifier}: {e}")
87
+ logging.error(f"error creeating session for Token of {user_identifier}: {e}")
88
88
  self.log_access(
89
89
  company_short_name=company_short_name,
90
90
  auth_type='redeem_token',
@@ -189,5 +189,5 @@ class AuthService:
189
189
  session.commit()
190
190
 
191
191
  except Exception as e:
192
- logging.error(f"Fallo al escribir en AccessLog: {e}", exc_info=False)
192
+ logging.error(f"error writting to AccessLog: {e}", exc_info=False)
193
193
  session.rollback()
@@ -4,16 +4,17 @@
4
4
  # IAToolkit is open source software.
5
5
 
6
6
  from iatoolkit.repositories.models import Company
7
+ from iatoolkit.services.configuration_service import ConfigurationService
7
8
  from injector import inject
8
9
 
9
10
 
10
11
  class BrandingService:
11
12
  """
12
- Servicio centralizado que gestiona la configuración de branding.
13
+ Branding configuration for IAToolkit
13
14
  """
14
-
15
15
  @inject
16
- def __init__(self):
16
+ def __init__(self, config_service: ConfigurationService):
17
+ self.config_service = config_service
17
18
  """
18
19
  Define los estilos de branding por defecto para la aplicación.
19
20
  """
@@ -68,15 +69,15 @@ class BrandingService:
68
69
  "send_button_color": "#212529" # Gris oscuro/casi negro por defecto
69
70
  }
70
71
 
71
- def get_company_branding(self, company: Company | None) -> dict:
72
+ def get_company_branding(self, company_short_name: str) -> dict:
72
73
  """
73
74
  Retorna los estilos de branding finales para una compañía,
74
75
  fusionando los valores por defecto con los personalizados.
75
76
  """
76
77
  final_branding_values = self._default_branding.copy()
78
+ branding_data = self.config_service.get_configuration(company_short_name, 'branding')
79
+ final_branding_values.update(branding_data)
77
80
 
78
- if company and company.branding:
79
- final_branding_values.update(company.branding)
80
81
 
81
82
  # Función para convertir HEX a RGB
82
83
  def hex_to_rgb(hex_color):
@@ -138,8 +139,11 @@ class BrandingService:
138
139
  }}
139
140
  """
140
141
 
142
+ # get the company name from configuration for the branding render
143
+ company_name = self.config_service.get_configuration(company_short_name, 'name')
144
+
141
145
  return {
142
- "name": company.name if company else "IAToolkit",
146
+ "name": company_name,
143
147
  "primary_text_style": primary_text_style,
144
148
  "secondary_text_style": secondary_text_style,
145
149
  "tertiary_text_style": tertiary_text_style,
@@ -0,0 +1,155 @@
1
+ # Copyright (c) 2024 Fernando Libedinsky
2
+ # Product: IAToolkit
3
+ #
4
+ # IAToolkit is open source software.
5
+
6
+ from iatoolkit.common.util import Utility
7
+ from iatoolkit.services.configuration_service import ConfigurationService
8
+ from iatoolkit.services.sql_service import SqlService
9
+ from iatoolkit.common.exceptions import IAToolkitException
10
+ import logging
11
+ from injector import inject
12
+ import os
13
+
14
+
15
+ class CompanyContextService:
16
+ """
17
+ Responsible for building the complete context string for a given company
18
+ to be sent to the Language Model.
19
+ """
20
+
21
+ @inject
22
+ def __init__(self,
23
+ sql_service: SqlService,
24
+ utility: Utility,
25
+ config_service: ConfigurationService):
26
+ self.sql_service = sql_service
27
+ self.utility = utility
28
+ self.config_service = config_service
29
+
30
+ def get_company_context(self, company_short_name: str) -> str:
31
+ """
32
+ Builds the full context by aggregating three sources:
33
+ 1. Static context files (Markdown).
34
+ 2. Static schema files (YAML for APIs, etc.).
35
+ 3. Dynamic SQL database schema from the live connection.
36
+ """
37
+ context_parts = []
38
+
39
+ # 1. Context from Markdown (context/*.md) and yaml (schema/*.yaml) files
40
+ try:
41
+ md_context = self._get_static_file_context(company_short_name)
42
+ if md_context:
43
+ context_parts.append(md_context)
44
+ except Exception as e:
45
+ logging.warning(f"Could not load Markdown context for '{company_short_name}': {e}")
46
+
47
+ # 2. Context from company-specific Python logic (SQL schemas)
48
+ try:
49
+ sql_context = self._get_sql_schema_context(company_short_name)
50
+ if sql_context:
51
+ context_parts.append(sql_context)
52
+ except Exception as e:
53
+ logging.warning(f"Could not generate SQL context for '{company_short_name}': {e}")
54
+
55
+ # Join all parts with a clear separator
56
+ return "\n\n---\n\n".join(context_parts)
57
+
58
+ def _get_static_file_context(self, company_short_name: str) -> str:
59
+ # Get context from .md and .yaml schema files.
60
+ static_context = ''
61
+
62
+ # Part 1: Markdown context files
63
+ context_dir = f'companies/{company_short_name}/context'
64
+ if os.path.exists(context_dir):
65
+ context_files = self.utility.get_files_by_extension(context_dir, '.md', return_extension=True)
66
+ for file in context_files:
67
+ filepath = os.path.join(context_dir, file)
68
+ static_context += self.utility.load_markdown_context(filepath)
69
+
70
+ # Part 2: YAML schema files
71
+ schema_dir = f'companies/{company_short_name}/schema'
72
+ if os.path.exists(schema_dir):
73
+ schema_files = self.utility.get_files_by_extension(schema_dir, '.yaml', return_extension=True)
74
+ for file in schema_files:
75
+ schema_name = file.split('.')[0] # Use full filename as entity name
76
+ filepath = os.path.join(schema_dir, file)
77
+ static_context += self.utility.generate_context_for_schema(schema_name, filepath)
78
+
79
+ return static_context
80
+
81
+ def _get_sql_schema_context(self, company_short_name: str) -> str:
82
+ """
83
+ Generates the SQL schema context by inspecting live database connections
84
+ based on the flexible company.yaml configuration.
85
+ It supports including all tables and providing specific overrides for a subset of them.
86
+ """
87
+ data_sources_config = self.config_service.get_configuration(company_short_name, 'data_sources')
88
+ if not data_sources_config or not data_sources_config.get('sql'):
89
+ return ''
90
+
91
+ sql_context = ''
92
+ for source in data_sources_config.get('sql', []):
93
+ db_name = source.get('database')
94
+ if not db_name:
95
+ continue
96
+
97
+ try:
98
+ db_manager = self.sql_service.get_database_manager(db_name)
99
+ except IAToolkitException as e:
100
+ logging.warning(f"Could not get DB manager for '{db_name}': {e}")
101
+ continue
102
+
103
+ db_description = source.get('description', '')
104
+ sql_context = f'***Base de datos (database_name)***: {db_name}\n'
105
+ sql_context += f"**Descripción:**: {db_description}\n" if db_description else ""
106
+ sql_context += "Para consultar esta base de datos debes utilizar el servicio ***iat_sql_query***.\n"
107
+
108
+ # 1. get the list of tables to process.
109
+ tables_to_process = []
110
+ if source.get('include_all_tables', False):
111
+ all_tables = db_manager.get_all_table_names()
112
+ tables_to_exclude = set(source.get('exclude_tables', []))
113
+ tables_to_process = [t for t in all_tables if t not in tables_to_exclude]
114
+ elif 'tables' in source:
115
+ # if not include_all_tables, use the list of tables explicitly specified in the map.
116
+ tables_to_process = list(source['tables'].keys())
117
+
118
+ # 2. get the global settings and overrides.
119
+ global_exclude_columns = source.get('exclude_columns', [])
120
+ table_prefix = source.get('table_prefix')
121
+ table_overrides = source.get('tables', {})
122
+
123
+ # 3. iterate over the tables.
124
+ for table_name in tables_to_process:
125
+ try:
126
+ # 4. get the table specific configuration.
127
+ table_config = table_overrides.get(table_name, {})
128
+
129
+ # 5. define the schema name, using the override if it exists.
130
+ # Priority 1: Explicit override from the 'tables' map.
131
+ schema_name = table_config.get('schema_name')
132
+
133
+ if not schema_name:
134
+ # Priority 2: Automatic prefix stripping.
135
+ if table_prefix and table_name.startswith(table_prefix):
136
+ schema_name = table_name[len(table_prefix):]
137
+ else:
138
+ # Priority 3: Default to the table name itself.
139
+ schema_name = table_name
140
+
141
+ # 6. define the list of columns to exclude, (local vs. global).
142
+ local_exclude_columns = table_config.get('exclude_columns')
143
+ final_exclude_columns = local_exclude_columns if local_exclude_columns is not None else global_exclude_columns
144
+
145
+ # 7. get the table schema definition.
146
+ table_definition = db_manager.get_table_schema(
147
+ table_name=table_name,
148
+ schema_name=schema_name,
149
+ exclude_columns=final_exclude_columns
150
+ )
151
+ sql_context += table_definition
152
+ except (KeyError, RuntimeError) as e:
153
+ logging.warning(f"Could not generate schema for table '{table_name}': {e}")
154
+
155
+ return sql_context
@@ -0,0 +1,133 @@
1
+ # iatoolkit/services/configuration_service.py
2
+ # Copyright (c) 2024 Fernando Libedinsky
3
+ # Product: IAToolkit
4
+
5
+ from pathlib import Path
6
+ from iatoolkit.repositories.models import Company
7
+ from iatoolkit.common.util import Utility
8
+ from injector import inject
9
+ import logging
10
+
11
+ class ConfigurationService:
12
+ """
13
+ Orchestrates the configuration of a Company by reading its YAML files
14
+ and using the BaseCompany's protected methods to register settings.
15
+ """
16
+
17
+ @inject
18
+ def __init__(self,
19
+ utility: Utility):
20
+ self.utility = utility
21
+ self._loaded_configs = {} # cache for store loaded configurations
22
+
23
+ def get_configuration(self, company_short_name: str, content_key: str):
24
+ """
25
+ Public method to provide a specific section of a company's configuration.
26
+ It uses a cache to avoid reading files from disk on every call.
27
+ """
28
+ self._ensure_config_loaded(company_short_name)
29
+ return self._loaded_configs[company_short_name].get(content_key)
30
+
31
+ def load_configuration(self, company_short_name: str, company_instance):
32
+ """
33
+ Main entry point for configuring a company instance.
34
+ This method is invoked by the dispatcher for each registered company.
35
+ """
36
+ logging.info(f"⚙️ Starting configuration for company '{company_short_name}'...")
37
+
38
+ # 1. Load the main configuration file and supplementary content files
39
+ config = self._load_and_merge_configs(company_short_name)
40
+
41
+ # 2. Register core company details and get the database object
42
+ company_db_object = self._register_core_details(company_instance, config)
43
+
44
+ # 3. Register tools (functions)
45
+ self._register_tools(company_instance, config.get('tools', []))
46
+
47
+ # 4. Register prompt categories and prompts
48
+ self._register_prompts(company_instance, config)
49
+
50
+ # 5. Link the persisted Company object back to the running instance
51
+ company_instance.company_short_name = company_short_name
52
+ company_instance.company = company_db_object
53
+ company_instance.id = company_instance.company.id
54
+
55
+ logging.info(f"✅ Company '{company_short_name}' configured successfully.")
56
+
57
+ def _ensure_config_loaded(self, company_short_name: str):
58
+ """
59
+ Checks if the configuration for a company is in the cache.
60
+ If not, it loads it from files and stores it.
61
+ """
62
+ if company_short_name not in self._loaded_configs:
63
+ self._loaded_configs[company_short_name] = self._load_and_merge_configs(company_short_name)
64
+
65
+ def _load_and_merge_configs(self, company_short_name: str) -> dict:
66
+ """
67
+ Loads the main company.yaml and merges data from supplementary files
68
+ specified in the 'content_files' section.
69
+ """
70
+ config_dir = Path("companies") / company_short_name / "config"
71
+ main_config_path = config_dir / "company.yaml"
72
+
73
+ if not main_config_path.exists():
74
+ raise FileNotFoundError(f"Main configuration file not found: {main_config_path}")
75
+
76
+ config = self.utility.load_schema_from_yaml(main_config_path)
77
+
78
+ # Load and merge supplementary content files (e.g., onboarding_cards)
79
+ for key, file_path in config.get('help_files', {}).items():
80
+ supplementary_path = config_dir / file_path
81
+ if supplementary_path.exists():
82
+ config[key] = self.utility.load_schema_from_yaml(supplementary_path)
83
+ else:
84
+ logging.warning(f"⚠️ Warning: Content file not found: {supplementary_path}")
85
+ config[key] = None # Ensure the key exists but is empty
86
+
87
+ return config
88
+
89
+ def _register_core_details(self, company_instance, config: dict) -> Company:
90
+ """Calls _create_company with data from the merged YAML config."""
91
+ return company_instance._create_company(
92
+ short_name=config['id'],
93
+ name=config['name'],
94
+ parameters=config.get('parameters', {})
95
+ )
96
+
97
+ def _register_tools(self, company_instance, tools_config: list):
98
+ """Calls _create_function for each tool defined in the YAML."""
99
+ for tool in tools_config:
100
+ company_instance._create_function(
101
+ function_name=tool['function_name'],
102
+ description=tool['description'],
103
+ params=tool['params']
104
+ )
105
+
106
+ def _register_prompts(self, company_instance, config: dict):
107
+ """
108
+ Creates prompt categories first, then creates each prompt and assigns
109
+ it to its respective category.
110
+ """
111
+ prompts_config = config.get('prompts', [])
112
+ categories_config = config.get('prompt_categories', [])
113
+
114
+ created_categories = {}
115
+ for i, category_name in enumerate(categories_config):
116
+ category_obj = company_instance._create_prompt_category(name=category_name, order=i + 1)
117
+ created_categories[category_name] = category_obj
118
+
119
+ for prompt_data in prompts_config:
120
+ category_name = prompt_data.get('category')
121
+ if not category_name or category_name not in created_categories:
122
+ logging.info(f"⚠️ Warning: Prompt '{prompt_data['name']}' has an invalid or missing category. Skipping.")
123
+ continue
124
+
125
+ category_obj = created_categories[category_name]
126
+ company_instance._create_prompt(
127
+ prompt_name=prompt_data['name'],
128
+ description=prompt_data['description'],
129
+ order=prompt_data['order'],
130
+ category=category_obj,
131
+ active=prompt_data.get('active', True),
132
+ custom_fields=prompt_data.get('custom_fields', [])
133
+ )