iatoolkit 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of iatoolkit might be problematic. Click here for more details.

@@ -0,0 +1,312 @@
1
+ # Copyright (c) 2024 Fernando Libedinsky
2
+ # Producto: IAToolkit
3
+ # Todos los derechos reservados.
4
+ # En trámite de registro en el Registro de Propiedad Intelectual de Chile.
5
+
6
+ from iatoolkit import current_iatoolkit
7
+ from common.exceptions import IAToolkitException
8
+ from services.prompt_manager_service import PromptService
9
+ from repositories.llm_query_repo import LLMQueryRepo
10
+ from repositories.models import Company, Function
11
+ from services.excel_service import ExcelService
12
+ from services.mail_service import MailService
13
+ from iatoolkit.company_registry import get_company_registry
14
+ from common.util import Utility
15
+ from injector import inject
16
+ import logging
17
+ import os
18
+
19
+
20
+ class Dispatcher:
21
+ @inject
22
+ def __init__(self,
23
+ prompt_service: PromptService,
24
+ llmquery_repo: LLMQueryRepo,
25
+ util: Utility,
26
+ excel_service: ExcelService,
27
+ mail_service: MailService):
28
+ self.prompt_service = prompt_service
29
+ self.llmquery_repo = llmquery_repo
30
+ self.util = util
31
+ self.excel_service = excel_service
32
+ self.mail_service = mail_service
33
+ self.system_functions = _FUNCTION_LIST
34
+ self.system_prompts = _SYSTEM_PROMPT
35
+
36
+ # Use the global registry
37
+ self.company_registry = get_company_registry()
38
+
39
+ # The dispatcher starts "empty" and will be initialized later.
40
+ self.company_classes = {}
41
+ self.initialize_companies()
42
+
43
+ self.tool_handlers = {
44
+ "iat_generate_excel": self.excel_service.excel_generator,
45
+ "iat_send_email": self.mail_service.send_mail,
46
+ }
47
+
48
+ def initialize_companies(self):
49
+ """
50
+ Initializes and instantiates all registered company classes.
51
+ This method should be called *after* the main injector is fully configured.
52
+ """
53
+ if self.company_classes: # Prevent re-initialization
54
+ return
55
+
56
+ # ✅ NOW it is safe to get the injector and instantiate companies.
57
+ injector = current_iatoolkit()._get_injector()
58
+ self.company_registry.set_injector(injector)
59
+ self.company_classes = self.company_registry.instantiate_companies()
60
+
61
+ def start_execution(self):
62
+ """Runs the startup logic for all registered companies."""
63
+ # Ensure companies are initialized before starting them
64
+ if not self.company_classes:
65
+ self.initialize_companies()
66
+
67
+ for company_name, company_instance in self.company_classes.items():
68
+ logging.info(f'Starting execution for company: {company_name}')
69
+ company_instance.start_execution()
70
+
71
+ return True
72
+
73
+ def init_db(self):
74
+ # create system functions
75
+ for function in self.system_functions:
76
+ self.llmquery_repo.create_or_update_function(
77
+ Function(
78
+ company_id=None,
79
+ system_function=True,
80
+ name=function['function_name'],
81
+ description= function['description'],
82
+ parameters=function['parameters']
83
+ )
84
+ )
85
+
86
+ # create the system prompts
87
+ i = 1
88
+ for prompt in self.system_prompts:
89
+ self.prompt_service.create_prompt(
90
+ prompt_name=prompt['name'],
91
+ description=prompt['description'],
92
+ order=1,
93
+ is_system_prompt=True
94
+ )
95
+ i += 1
96
+
97
+ # initialize the database for every company class
98
+ for company in self.company_classes.values():
99
+ print(f'inicializando clase: {company.__class__.__name__}')
100
+ company.init_db()
101
+
102
+
103
+ def dispatch(self, company_name: str, action: str, **kwargs) -> str:
104
+ company_key = company_name.lower()
105
+
106
+ if company_key not in self.company_classes:
107
+ available_companies = list(self.company_classes.keys())
108
+ raise IAToolkitException(
109
+ IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
110
+ f"Empresa '{company_name}' no configurada. Empresas disponibles: {available_companies}"
111
+ )
112
+
113
+ # check if action is a system function
114
+ if action in self.tool_handlers:
115
+ return self.tool_handlers[action](**kwargs)
116
+
117
+ company_instance = self.company_classes[company_name]
118
+ try:
119
+ return company_instance.handle_request(action, **kwargs)
120
+ except IAToolkitException as e:
121
+ # Si ya es una IAToolkitException, la relanzamos para preservar el tipo de error original.
122
+ raise e
123
+
124
+ except Exception as e:
125
+ logging.exception(e)
126
+ raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
127
+ f"Error en function call '{action}': {str(e)}") from e
128
+
129
+ def get_company_context(self, company_name: str, **kwargs) -> str:
130
+ if company_name not in self.company_classes:
131
+ raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
132
+ f"Empresa no configurada: {company_name}")
133
+
134
+ company_context = ''
135
+
136
+ # read the company context from this list of markdown files,
137
+ # company brief, credits, operation description, etc.
138
+ context_dir = os.path.join(os.getcwd(), f'companies/{company_name}/context')
139
+ context_files = self.util.get_files_by_extension(context_dir, '.md', return_extension=True)
140
+ for file in context_files:
141
+ filepath = os.path.join(context_dir, file)
142
+ company_context += self.util.load_markdown_context(filepath)
143
+
144
+ # add the schemas for every table or function call responses
145
+ schema_dir = os.path.join(os.getcwd(), f'companies/{company_name}/schema')
146
+ schema_files = self.util.get_files_by_extension(schema_dir, '.yaml', return_extension=True)
147
+ for file in schema_files:
148
+ schema_name = file.split('_')[0]
149
+ filepath = os.path.join(schema_dir, file)
150
+ company_context += self.util.generate_context_for_schema(schema_name, filepath)
151
+
152
+ company_instance = self.company_classes[company_name]
153
+ try:
154
+ return company_context + company_instance.get_company_context(**kwargs)
155
+ except Exception as e:
156
+ logging.exception(e)
157
+ raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
158
+ f"Error en get_company_context de {company_name}: {str(e)}") from e
159
+
160
+ def get_company_services(self, company: Company) -> list[dict]:
161
+ # create the syntax with openai response syntax, for the company function list
162
+ tools = []
163
+ functions = self.llmquery_repo.get_company_functions(company)
164
+
165
+ for function in functions:
166
+ # make sure is always on
167
+ function.parameters["additionalProperties"] = False
168
+
169
+ ai_tool = {
170
+ "type": "function",
171
+ "name": function.name,
172
+ "description": function.description,
173
+ "parameters": function.parameters,
174
+ "strict": True
175
+ }
176
+ tools.append(ai_tool)
177
+ return tools
178
+
179
+ def get_user_info(self, company_name: str, **kwargs) -> dict:
180
+ if company_name not in self.company_classes:
181
+ raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
182
+ f"Empresa no configurada: {company_name}")
183
+
184
+ company_instance = self.company_classes[company_name]
185
+ try:
186
+ return company_instance.get_user_info(**kwargs)
187
+ except Exception as e:
188
+ logging.exception(e)
189
+ raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
190
+ f"Error en get_user_info de {company_name}: {str(e)}") from e
191
+
192
+
193
+ def get_metadata_from_filename(self, company_name: str, filename: str) -> dict:
194
+ if company_name not in self.company_classes:
195
+ raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
196
+ f"Empresa no configurada: {company_name}")
197
+
198
+ company_instance = self.company_classes[company_name]
199
+ try:
200
+ return company_instance.get_metadata_from_filename(filename)
201
+ except Exception as e:
202
+ logging.exception(e)
203
+ raise IAToolkitException(IAToolkitException.ErrorType.EXTERNAL_SOURCE_ERROR,
204
+ f"Error en get_metadata_from_filename de {company_name}: {str(e)}") from e
205
+
206
+ def get_registered_companies(self) -> dict:
207
+ """Obtiene todas las empresas registradas (para debugging/admin)"""
208
+ return {
209
+ "registered_classes": list(self.company_registry.get_registered_companies().keys()),
210
+ "instantiated": list(self.company_classes.keys()),
211
+ "count": len(self.company_classes)
212
+ }
213
+
214
+
215
+ # iatoolkit system prompts
216
+ _SYSTEM_PROMPT = [
217
+ {'name': 'query_main', 'description':'main prompt de iatoolkit'},
218
+ {'name': 'format_styles', 'description':'formatos y estilos de salida'},
219
+ {'name': 'sql_rules', 'description':'instrucciones para generar sql'}
220
+ ]
221
+
222
+
223
+ # iatoolkit function calls
224
+ _FUNCTION_LIST = [
225
+ {
226
+ "name": "iat_generate_excel",
227
+ "description": "Generador de Excel."
228
+ "Genera un archivo Excel (.xlsx) a partir de una lista de diccionarios. "
229
+ "Cada diccionario representa una fila del archivo. "
230
+ "el archivo se guarda en directorio de descargas."
231
+ "retorna diccionario con filename, attachment_token (para enviar archivo por mail)"
232
+ "content_type y download_link",
233
+ "function_name": "iat_generate_excel",
234
+ "parameters": {
235
+ "type": "object",
236
+ "properties": {
237
+ "filename": {
238
+ "type": "string",
239
+ "description": "Nombre del archivo de salida (ejemplo: 'reporte.xlsx')",
240
+ "pattern": "^.+\\.xlsx?$"
241
+ },
242
+ "sheet_name": {
243
+ "type": "string",
244
+ "description": "Nombre de la hoja dentro del Excel",
245
+ "minLength": 1
246
+ },
247
+ "data": {
248
+ "type": "array",
249
+ "description": "Lista de diccionarios. Cada diccionario representa una fila.",
250
+ "minItems": 1,
251
+ "items": {
252
+ "type": "object",
253
+ "properties": {},
254
+ "additionalProperties": {
255
+ "anyOf": [
256
+ {"type": "string"},
257
+ {"type": "number"},
258
+ {"type": "boolean"},
259
+ {"type": "null"},
260
+ {
261
+ "type": "string",
262
+ "format": "date"
263
+ }
264
+ ]
265
+ }
266
+ }
267
+ }
268
+ },
269
+ "required": ["filename", "sheet_name", "data"]
270
+ }
271
+ },
272
+ {
273
+ 'name': 'Envio de mails',
274
+ 'description': "iatoolkit mail system. "
275
+ "envia mails cuando un usuario lo solicita."
276
+ "Si no te indican quien envia el correo utiliza la dirección iatoolkit@iatoolkit.com",
277
+ 'function_name': "iat_send_email",
278
+ 'parameters': {
279
+ "type": "object",
280
+ "properties": {
281
+ "from_email": {"type": "string","description": "dirección de correo electrónico que esta enviando el email."},
282
+ "recipient": {"type": "string", "description": "email del destinatario"},
283
+ "subject": {"type": "string", "description": "asunto del email"},
284
+ "body": {"type": "string", "description": "HTML del email"},
285
+ "attachments": {
286
+ "type": "array",
287
+ "description": "Lista de archivos adjuntos codificados en base64",
288
+ "items": {
289
+ "type": "object",
290
+ "properties": {
291
+ "filename": {
292
+ "type": "string",
293
+ "description": "Nombre del archivo con su extensión (ej. informe.pdf)"
294
+ },
295
+ "content": {
296
+ "type": "string",
297
+ "description": "Contenido del archivo en b64."
298
+ },
299
+ "attachment_token": {
300
+ "type": "string",
301
+ "description": "token para descargar el archivo."
302
+ }
303
+ },
304
+ "required": ["filename", "content", "attachment_token"],
305
+ "additionalProperties": False
306
+ }
307
+ }
308
+ },
309
+ "required": ["from_email","recipient", "subject", "body", "attachments"]
310
+ }
311
+ }
312
+ ]
@@ -0,0 +1,159 @@
1
+ # Copyright (c) 2024 Fernando Libedinsky
2
+ # Producto: IAToolkit
3
+ # Todos los derechos reservados.
4
+ # En trámite de registro en el Registro de Propiedad Intelectual de Chile.
5
+
6
+ from docx import Document
7
+ import fitz # PyMuPDF
8
+ from PIL import Image
9
+ import io
10
+ import os
11
+ import pytesseract
12
+ from injector import inject
13
+ from common.exceptions import IAToolkitException
14
+
15
+ class DocumentService:
16
+ @inject
17
+ def __init__(self):
18
+ # max number of pages to load
19
+ self.max_doc_pages = int(os.getenv("MAX_DOC_PAGES", "10"))
20
+
21
+ def file_to_txt(self, filename, file_content):
22
+ try:
23
+ if filename.lower().endswith('.docx'):
24
+ return self.read_docx(file_content)
25
+ elif filename.lower().endswith('.txt'):
26
+ if isinstance(file_content, bytes):
27
+ try:
28
+ # decode using UTF-8
29
+ file_content = file_content.decode('utf-8')
30
+ except UnicodeDecodeError:
31
+ raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
32
+ "El archivo no es texto o la codificación no es UTF-8")
33
+
34
+ return file_content
35
+ elif filename.lower().endswith('.pdf'):
36
+ if self.is_scanned_pdf(file_content):
37
+ return self.read_scanned_pdf(file_content)
38
+ else:
39
+ return self.read_pdf(file_content)
40
+ else:
41
+ raise IAToolkitException(IAToolkitException.ErrorType.FILE_FORMAT_ERROR,
42
+ "Formato de archivo desconocido")
43
+ except IAToolkitException as e:
44
+ # Si es una excepción conocida, simplemente la relanzamos
45
+ raise
46
+ except Exception as e:
47
+ raise IAToolkitException(IAToolkitException.ErrorType.FILE_IO_ERROR,
48
+ f"Error processing file: {e}") from e
49
+
50
+ def read_docx(self, file_content):
51
+ try:
52
+ # Crear un archivo en memoria desde el contenido en bytes
53
+ file_like_object = io.BytesIO(file_content)
54
+ doc = Document(file_like_object)
55
+
56
+ # to Markdown
57
+ md_content = ""
58
+ for para in doc.paragraphs:
59
+ # headings ...
60
+ if para.style.name.startswith("Heading"):
61
+ level = int(para.style.name.replace("Heading ", ""))
62
+ md_content += f"{'#' * level} {para.text}\n\n"
63
+ # lists ...
64
+ elif para.style.name in ["List Bullet", "List Paragraph"]:
65
+ md_content += f"- {para.text}\n"
66
+ elif para.style.name in ["List Number"]:
67
+ md_content += f"1. {para.text}\n"
68
+ # normal text
69
+ else:
70
+ md_content += f"{para.text}\n\n"
71
+ return md_content
72
+ except Exception as e:
73
+ raise ValueError(f"Error reading .docx file: {e}")
74
+
75
+ def read_pdf(self, file_content):
76
+ try:
77
+ with fitz.open(stream=file_content, filetype="pdf") as pdf:
78
+ text = ""
79
+ for page in pdf:
80
+ text += page.get_text()
81
+ return text
82
+ except Exception as e:
83
+ raise ValueError(f"Error reading .pdf file: {e}")
84
+
85
+ # Determina es un documento escaneado (imagen) o contiene prompt_llm.txt seleccionable.
86
+ def is_scanned_pdf(self, file_content):
87
+ doc = fitz.open(stream=io.BytesIO(file_content), filetype='pdf')
88
+
89
+ for page_num in range(len(doc)):
90
+ page = doc[page_num]
91
+
92
+ # Intenta extraer prompt_llm.txt directamente
93
+ text = page.get_text()
94
+ if text.strip(): # Si hay prompt_llm.txt, no es escaneado
95
+ return False
96
+
97
+ # Busca imágenes en la página
98
+ images = page.get_images(full=True)
99
+ if images: # Si hay imágenes pero no hay prompt_llm.txt, puede ser un escaneo
100
+ continue
101
+
102
+ # Si no se encontró prompt_llm.txt en ninguna página
103
+ return True
104
+
105
+ def read_scanned_pdf(self, file_content):
106
+ images = self.pdf_to_images(file_content)
107
+ if not images:
108
+ return ''
109
+
110
+ document_text = ''
111
+ for image in images:
112
+ document_text += self.image_to_text(image)
113
+
114
+ return document_text
115
+
116
+ def pdf_to_images(self, file_content):
117
+ images = [] # list of images to return
118
+
119
+ pdf_document = fitz.open(stream=io.BytesIO(file_content), filetype='pdf')
120
+ if pdf_document.page_count > self.max_doc_pages:
121
+ return None
122
+
123
+ for page_number in range(len(pdf_document)):
124
+ page = pdf_document[page_number]
125
+
126
+ images_on_page = page.get_images(full=True) # Obtiene todas las imágenes de la página
127
+ for img in images_on_page:
128
+ xref = img[0] # Referencia de la imagen en el PDF
129
+ pix = fitz.Pixmap(pdf_document, xref) # Crear el Pixmap de la imagen
130
+
131
+ # Si la imagen está en CMYK, conviértela a RGB para mayor compatibilidad
132
+ if pix.n > 4: # CMYK tiene más de 4 canales
133
+ pix = fitz.Pixmap(fitz.csRGB, pix)
134
+
135
+ images.append(pix)
136
+
137
+ pdf_document.close()
138
+ return images
139
+
140
+ def image_to_text(self, image):
141
+ # Determinar el modo PIL en base a pix.n
142
+ if image.n == 1:
143
+ pil_mode = "L"
144
+ elif image.n == 2:
145
+ pil_mode = "LA"
146
+ elif image.n == 3:
147
+ pil_mode = "RGB"
148
+ elif image.n == 4:
149
+ pil_mode = "RGBA"
150
+ else:
151
+ # Caso especial (conversion previa debería evitarlos)
152
+ raise ValueError(f"Canales desconocidos: {image.n}")
153
+
154
+ img = Image.frombytes(pil_mode, (image.width, image.height), image.samples)
155
+ return pytesseract.image_to_string(img, lang="spa")
156
+
157
+
158
+
159
+
@@ -0,0 +1,98 @@
1
+ # Copyright (c) 2024 Fernando Libedinsky
2
+ # Producto: IAToolkit
3
+ # Todos los derechos reservados.
4
+ # En trámite de registro en el Registro de Propiedad Intelectual de Chile.
5
+
6
+ from common.util import Utility
7
+ import pandas as pd
8
+ from uuid import uuid4
9
+ from pathlib import Path
10
+ from common.exceptions import IAToolkitException
11
+ from injector import inject
12
+ import os
13
+ import logging
14
+ from flask import current_app, jsonify
15
+
16
+ EXCEL_MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
17
+
18
+
19
+ class ExcelService:
20
+ @inject
21
+ def __init__(self,util: Utility):
22
+ self.util = util
23
+
24
+ def excel_generator(self, **kwargs) -> str:
25
+ """
26
+ Genera un Excel a partir de una lista de diccionarios.
27
+
28
+ Parámetros esperados en kwargs:
29
+ - filename: str (nombre lógico a mostrar, ej. "reporte_clientes.xlsx") [obligatorio]
30
+ - data: list[dict] (filas del excel) [obligatorio]
31
+ - sheet_name: str = "hoja 1"
32
+
33
+ Retorna:
34
+ {
35
+ "filename": "reporte.xlsx",
36
+ "attachment_token": "8b7f8a66-...-c1c3.xlsx",
37
+ "content_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
38
+ "download_link": "/download/8b7f8a66-...-c1c3.xlsx"
39
+ }
40
+ """
41
+ try:
42
+ # get the parameters
43
+ fname = kwargs.get('filename')
44
+ if not fname:
45
+ return 'falta el nombre del archivo de salida'
46
+
47
+ data = kwargs.get('data')
48
+ if not data or not isinstance(data, list):
49
+ return 'faltan los datos o no es una lista de diccionarios'
50
+
51
+ sheet_name = kwargs.get('sheet_name', 'hoja 1')
52
+
53
+ # 1. convert dictionary to dataframe
54
+ df = pd.DataFrame(data)
55
+
56
+ # 3. create temporary name
57
+ token = f"{uuid4()}.xlsx"
58
+ filepath = Path("static/temp") / token
59
+ filepath.parent.mkdir(parents=True, exist_ok=True)
60
+
61
+ # 4. save excel file in temporary directory
62
+ df.to_excel(filepath, index=False, sheet_name=sheet_name)
63
+
64
+ # 5. return the link to the LLM
65
+ return {
66
+ "filename": fname,
67
+ "attachment_token": token,
68
+ "content_type": EXCEL_MIME,
69
+ "download_link": f"/download/{token}"
70
+ }
71
+
72
+ except Exception as e:
73
+ raise IAToolkitException(IAToolkitException.ErrorType.CALL_ERROR,
74
+ 'error generating excel file') from e
75
+
76
+ def validate_file_access(self, filename):
77
+ try:
78
+ if not filename:
79
+ return jsonify({"error": "Nombre de archivo inválido"})
80
+ # Prevent path traversal attacks
81
+ if '..' in filename or filename.startswith('/') or '\\' in filename:
82
+ return jsonify({"error": "Nombre de archivo inválido"})
83
+
84
+ temp_dir = os.path.join(current_app.root_path, 'static', 'temp')
85
+ file_path = os.path.join(temp_dir, filename)
86
+
87
+ if not os.path.exists(file_path):
88
+ return jsonify({"error": "Archivo no encontrado"})
89
+
90
+ if not os.path.isfile(file_path):
91
+ return jsonify({"error": "La ruta no corresponde a un archivo"})
92
+
93
+ return None
94
+
95
+ except Exception as e:
96
+ error_msg = f"Error validando acceso al archivo {filename}: {str(e)}"
97
+ logging.error(error_msg)
98
+ return jsonify({"error": "Error validando archivo"})
@@ -0,0 +1,92 @@
1
+ # Copyright (c) 2024 Fernando Libedinsky
2
+ # Producto: IAToolkit
3
+ # Todos los derechos reservados.
4
+ # En trámite de registro en el Registro de Propiedad Intelectual de Chile.
5
+
6
+ from infra.connectors.file_connector import FileConnector
7
+ import logging
8
+ import os
9
+ from typing import Optional, Callable, Dict
10
+
11
+
12
+ class FileProcessorConfig:
13
+ def __init__(
14
+ self,
15
+ filters: Dict,
16
+ action: Callable[[str, bytes], None],
17
+ continue_on_error: bool = True,
18
+ log_file: str = 'file_processor.log',
19
+ echo: bool = False,
20
+ context: dict = None,
21
+ ):
22
+ self.filters = filters
23
+ self.action = action
24
+ self.continue_on_error = continue_on_error
25
+ self.log_file = log_file
26
+ self.echo = echo
27
+ self.context = context or {}
28
+
29
+ class FileProcessor:
30
+ def __init__(self,
31
+ connector: FileConnector,
32
+ config: FileProcessorConfig,
33
+ logger: Optional[logging.Logger] = None):
34
+ self.connector = connector
35
+ self.config = config
36
+ self.logger = logger or self._setup_logger()
37
+ self.processed_files = 0
38
+
39
+ def _setup_logger(self):
40
+ logging.basicConfig(
41
+ filename=self.config.log_file,
42
+ level=logging.INFO,
43
+ format='%(asctime)s - %(levelname)s - %(message)s'
44
+ )
45
+ return logging.getLogger(__name__)
46
+
47
+ def process_files(self):
48
+ try:
49
+ files = self.connector.list_files()
50
+ except Exception as e:
51
+ self.logger.error(f"Error fetching files: {e}")
52
+ return False
53
+
54
+ if self.config.echo:
55
+ print(f'cargando un total de {len(files)} archivos')
56
+
57
+ for file_info in files:
58
+ file_path = file_info['path']
59
+ file_name = file_info['name']
60
+
61
+ try:
62
+ if not self._apply_filters(file_name):
63
+ continue
64
+
65
+ if self.config.echo:
66
+ print(f'loading: {file_name}')
67
+
68
+ content = self.connector.get_file_content(file_path)
69
+
70
+ # execute the action defined
71
+ filename = os.path.basename(file_name)
72
+ self.config.action(filename, content, self.config.context)
73
+ self.processed_files += 1
74
+
75
+ self.logger.info(f"Successfully processed file: {file_path}")
76
+
77
+ except Exception as e:
78
+ self.logger.error(f"Error processing {file_path}: {e}")
79
+ if not self.config.continue_on_error:
80
+ raise e
81
+
82
+ def _apply_filters(self, file_path: str) -> bool:
83
+ filters = self.config.filters
84
+
85
+ if 'filename_contains' in filters and filters['filename_contains'] not in file_path:
86
+ return False
87
+
88
+ if 'custom_filter' in filters and callable(filters['custom_filter']):
89
+ if not filters['custom_filter'](file_path):
90
+ return False
91
+
92
+ return True