cwyodmodules 0.3.36__py3-none-any.whl → 0.3.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cwyodmodules/api/chat_history.py +10 -9
- cwyodmodules/batch/utilities/chat_history/auth_utils.py +3 -2
- cwyodmodules/batch/utilities/chat_history/cosmosdb.py +13 -12
- cwyodmodules/batch/utilities/chat_history/postgresdbservice.py +13 -12
- cwyodmodules/batch/utilities/common/source_document.py +9 -8
- cwyodmodules/batch/utilities/document_chunking/fixed_size_overlap.py +2 -1
- cwyodmodules/batch/utilities/document_chunking/layout.py +2 -1
- cwyodmodules/batch/utilities/document_chunking/page.py +2 -1
- cwyodmodules/batch/utilities/document_loading/read.py +2 -1
- cwyodmodules/batch/utilities/helpers/azure_computer_vision_client.py +7 -6
- cwyodmodules/batch/utilities/helpers/azure_form_recognizer_helper.py +3 -2
- cwyodmodules/batch/utilities/helpers/azure_postgres_helper.py +11 -10
- cwyodmodules/batch/utilities/helpers/azure_postgres_helper_light_rag.py +11 -10
- cwyodmodules/batch/utilities/helpers/azure_search_helper.py +10 -9
- cwyodmodules/batch/utilities/helpers/config/config_helper.py +20 -19
- cwyodmodules/batch/utilities/helpers/embedders/integrated_vectorization_embedder.py +4 -3
- cwyodmodules/batch/utilities/helpers/embedders/postgres_embedder.py +4 -3
- cwyodmodules/batch/utilities/helpers/embedders/push_embedder.py +7 -6
- cwyodmodules/batch/utilities/helpers/env_helper.py +4 -1
- cwyodmodules/batch/utilities/helpers/lightrag_helper.py +5 -4
- cwyodmodules/batch/utilities/helpers/llm_helper.py +10 -9
- cwyodmodules/batch/utilities/helpers/secret_helper.py +3 -3
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_index.py +5 -4
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_indexer.py +4 -3
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_skillset.py +2 -1
- cwyodmodules/batch/utilities/orchestrator/lang_chain_agent.py +4 -3
- cwyodmodules/batch/utilities/orchestrator/open_ai_functions.py +2 -1
- cwyodmodules/batch/utilities/orchestrator/orchestrator_base.py +5 -4
- cwyodmodules/batch/utilities/orchestrator/prompt_flow.py +5 -4
- cwyodmodules/batch/utilities/orchestrator/semantic_kernel_orchestrator.py +2 -1
- cwyodmodules/batch/utilities/parser/output_parser_tool.py +5 -4
- cwyodmodules/batch/utilities/plugins/outlook_calendar_plugin.py +4 -3
- cwyodmodules/batch/utilities/search/azure_search_handler.py +12 -11
- cwyodmodules/batch/utilities/search/azure_search_handler_light_rag.py +10 -9
- cwyodmodules/batch/utilities/search/integrated_vectorization_search_handler.py +12 -11
- cwyodmodules/batch/utilities/search/lightrag_search_handler.py +9 -8
- cwyodmodules/batch/utilities/search/postgres_search_handler.py +13 -12
- cwyodmodules/batch/utilities/search/postgres_search_handler_light_rag.py +14 -13
- cwyodmodules/batch/utilities/search/search.py +3 -2
- cwyodmodules/batch/utilities/tools/content_safety_checker.py +5 -4
- cwyodmodules/batch/utilities/tools/post_prompt_tool.py +2 -1
- cwyodmodules/batch/utilities/tools/question_answer_tool.py +8 -7
- cwyodmodules/batch/utilities/tools/text_processing_tool.py +2 -1
- {cwyodmodules-0.3.36.dist-info → cwyodmodules-0.3.37.dist-info}/METADATA +2 -2
- {cwyodmodules-0.3.36.dist-info → cwyodmodules-0.3.37.dist-info}/RECORD +48 -48
- {cwyodmodules-0.3.36.dist-info → cwyodmodules-0.3.37.dist-info}/WHEEL +0 -0
- {cwyodmodules-0.3.36.dist-info → cwyodmodules-0.3.37.dist-info}/licenses/LICENSE +0 -0
- {cwyodmodules-0.3.36.dist-info → cwyodmodules-0.3.37.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ from ..helpers.azure_identity_helper import AzureIdentityHelper
|
|
7
7
|
|
8
8
|
from logging_config import logger
|
9
9
|
env_helper: EnvHelper = EnvHelper()
|
10
|
+
log_execution = env_helper.LOG_EXECUTION
|
10
11
|
log_args = env_helper.LOG_ARGS
|
11
12
|
log_result = env_helper.LOG_RESULT
|
12
13
|
|
@@ -19,7 +20,7 @@ class AzurePostgresHelper:
|
|
19
20
|
self.azure_identity_helper = AzureIdentityHelper()
|
20
21
|
self.conn = None
|
21
22
|
|
22
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
23
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
23
24
|
def _create_search_client(self):
|
24
25
|
"""
|
25
26
|
Establishes a connection to Azure PostgreSQL using AAD authentication.
|
@@ -49,7 +50,7 @@ class AzurePostgresHelper:
|
|
49
50
|
logger.error(f"Error establishing a connection to PostgreSQL: {e}")
|
50
51
|
raise
|
51
52
|
|
52
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
53
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
53
54
|
def get_search_client(self):
|
54
55
|
"""
|
55
56
|
Provides a reusable database connection.
|
@@ -58,7 +59,7 @@ class AzurePostgresHelper:
|
|
58
59
|
self.conn = self._create_search_client()
|
59
60
|
return self.conn
|
60
61
|
|
61
|
-
@logger.trace_function(log_args=False, log_result=False)
|
62
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
62
63
|
def get_vector_store(self, embedding_array):
|
63
64
|
"""
|
64
65
|
Fetches search indexes from PostgreSQL based on an embedding vector.
|
@@ -87,7 +88,7 @@ class AzurePostgresHelper:
|
|
87
88
|
finally:
|
88
89
|
conn.close()
|
89
90
|
|
90
|
-
@logger.trace_function(log_args=False, log_result=log_result)
|
91
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=log_result)
|
91
92
|
def create_vector_store(self, documents_to_upload):
|
92
93
|
"""
|
93
94
|
Inserts documents into the `vector_store` table in batch mode.
|
@@ -131,7 +132,7 @@ class AzurePostgresHelper:
|
|
131
132
|
finally:
|
132
133
|
conn.close()
|
133
134
|
|
134
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
135
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
135
136
|
def get_files(self):
|
136
137
|
"""
|
137
138
|
Fetches distinct titles from the PostgreSQL database.
|
@@ -164,7 +165,7 @@ class AzurePostgresHelper:
|
|
164
165
|
finally:
|
165
166
|
conn.close()
|
166
167
|
|
167
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
168
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
168
169
|
def delete_documents(self, ids_to_delete):
|
169
170
|
"""
|
170
171
|
Deletes documents from the PostgreSQL database based on the provided ids.
|
@@ -212,7 +213,7 @@ class AzurePostgresHelper:
|
|
212
213
|
finally:
|
213
214
|
conn.close()
|
214
215
|
|
215
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
216
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
216
217
|
def perform_search(self, title):
|
217
218
|
"""
|
218
219
|
Fetches search results from PostgreSQL based on the title.
|
@@ -239,7 +240,7 @@ class AzurePostgresHelper:
|
|
239
240
|
finally:
|
240
241
|
conn.close()
|
241
242
|
|
242
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
243
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
243
244
|
def get_unique_files(self):
|
244
245
|
"""
|
245
246
|
Fetches unique titles from PostgreSQL.
|
@@ -264,7 +265,7 @@ class AzurePostgresHelper:
|
|
264
265
|
finally:
|
265
266
|
conn.close()
|
266
267
|
|
267
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
268
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
268
269
|
def search_by_blob_url(self, blob_url):
|
269
270
|
"""
|
270
271
|
Fetches unique titles from PostgreSQL based on a given blob URL.
|
@@ -291,7 +292,7 @@ class AzurePostgresHelper:
|
|
291
292
|
finally:
|
292
293
|
conn.close()
|
293
294
|
|
294
|
-
@logger.trace_function(log_args=False, log_result=False)
|
295
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
295
296
|
def store_with_lightrag(self, documents_to_upload):
|
296
297
|
"""
|
297
298
|
Stores documents using LightRAG for enhanced vector and text storage capabilities.
|
@@ -30,6 +30,7 @@ from ..helpers.llm_helper import LLMHelper
|
|
30
30
|
from ..helpers.env_helper import EnvHelper
|
31
31
|
from logging_config import logger
|
32
32
|
env_helper: EnvHelper = EnvHelper()
|
33
|
+
log_execution = env_helper.LOG_EXECUTION
|
33
34
|
log_args = env_helper.LOG_ARGS
|
34
35
|
log_result = env_helper.LOG_RESULT
|
35
36
|
|
@@ -47,7 +48,7 @@ class AzureSearchHelper:
|
|
47
48
|
self.search_index_client = self._create_search_index_client(search_credential)
|
48
49
|
self.azure_computer_vision_client = AzureComputerVisionClient(self.env_helper)
|
49
50
|
|
50
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
51
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
51
52
|
def _search_credential(self):
|
52
53
|
if self.env_helper.is_auth_type_keys():
|
53
54
|
return AzureKeyCredential(self.env_helper.AZURE_SEARCH_KEY)
|
@@ -55,7 +56,7 @@ class AzureSearchHelper:
|
|
55
56
|
credential = self.azure_identity_helper.get_credential()
|
56
57
|
return credential
|
57
58
|
|
58
|
-
@logger.trace_function(log_args=False, log_result=False)
|
59
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
59
60
|
def _create_search_client(
|
60
61
|
self, search_credential: Union[AzureKeyCredential, ChainedTokenCredential, DefaultAzureCredential]
|
61
62
|
) -> SearchClient:
|
@@ -65,7 +66,7 @@ class AzureSearchHelper:
|
|
65
66
|
credential=search_credential,
|
66
67
|
)
|
67
68
|
|
68
|
-
@logger.trace_function(log_args=False, log_result=False)
|
69
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
69
70
|
def _create_search_index_client(
|
70
71
|
self, search_credential: Union[AzureKeyCredential, ChainedTokenCredential, DefaultAzureCredential]
|
71
72
|
):
|
@@ -73,13 +74,13 @@ class AzureSearchHelper:
|
|
73
74
|
endpoint=self.env_helper.AZURE_SEARCH_SERVICE, credential=search_credential
|
74
75
|
)
|
75
76
|
|
76
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
77
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
77
78
|
def get_search_client(self) -> SearchClient:
|
78
79
|
self.create_index()
|
79
80
|
return self.search_client
|
80
81
|
|
81
82
|
@property
|
82
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
83
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
83
84
|
def search_dimensions(self) -> int:
|
84
85
|
if AzureSearchHelper._search_dimension is None:
|
85
86
|
AzureSearchHelper._search_dimension = len(
|
@@ -88,7 +89,7 @@ class AzureSearchHelper:
|
|
88
89
|
return AzureSearchHelper._search_dimension
|
89
90
|
|
90
91
|
@property
|
91
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
92
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
92
93
|
def image_search_dimensions(self) -> int:
|
93
94
|
if AzureSearchHelper._image_search_dimension is None:
|
94
95
|
AzureSearchHelper._image_search_dimension = len(
|
@@ -96,7 +97,7 @@ class AzureSearchHelper:
|
|
96
97
|
)
|
97
98
|
return AzureSearchHelper._image_search_dimension
|
98
99
|
|
99
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
100
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
100
101
|
def create_index(self):
|
101
102
|
fields = [
|
102
103
|
SimpleField(
|
@@ -211,13 +212,13 @@ class AzureSearchHelper:
|
|
211
212
|
)
|
212
213
|
self.search_index_client.create_index(index)
|
213
214
|
|
214
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
215
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
215
216
|
def _index_not_exists(self, index_name: str) -> bool:
|
216
217
|
return index_name not in [
|
217
218
|
name for name in self.search_index_client.list_index_names()
|
218
219
|
]
|
219
220
|
|
220
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
221
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
221
222
|
def get_conversation_logger(self):
|
222
223
|
fields = [
|
223
224
|
SimpleField(
|
@@ -20,6 +20,7 @@ ADVANCED_IMAGE_PROCESSING_FILE_TYPES = ["jpeg", "jpg", "png", "tiff", "bmp"]
|
|
20
20
|
|
21
21
|
from logging_config import logger
|
22
22
|
env_helper: EnvHelper = EnvHelper()
|
23
|
+
log_execution = env_helper.LOG_EXECUTION
|
23
24
|
log_args = env_helper.LOG_ARGS
|
24
25
|
log_result = env_helper.LOG_RESULT
|
25
26
|
|
@@ -59,7 +60,7 @@ class Config:
|
|
59
60
|
"conversational_flow", self.env_helper.CONVERSATION_FLOW
|
60
61
|
)
|
61
62
|
|
62
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
63
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
63
64
|
def get_available_document_types(self) -> list[str]:
|
64
65
|
document_types = {
|
65
66
|
"txt",
|
@@ -78,27 +79,27 @@ class Config:
|
|
78
79
|
|
79
80
|
return sorted(document_types)
|
80
81
|
|
81
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
82
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
82
83
|
def get_advanced_image_processing_image_types(self):
|
83
84
|
return ADVANCED_IMAGE_PROCESSING_FILE_TYPES
|
84
85
|
|
85
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
86
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
86
87
|
def get_available_chunking_strategies(self):
|
87
88
|
return [c.value for c in ChunkingStrategy]
|
88
89
|
|
89
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
90
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
90
91
|
def get_available_loading_strategies(self):
|
91
92
|
return [c.value for c in LoadingStrategy]
|
92
93
|
|
93
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
94
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
94
95
|
def get_available_orchestration_strategies(self):
|
95
96
|
return [c.value for c in OrchestrationStrategy]
|
96
97
|
|
97
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
98
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
98
99
|
def get_available_ai_assistant_types(self):
|
99
100
|
return [c.value for c in AssistantStrategy]
|
100
101
|
|
101
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
102
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
102
103
|
def get_available_conversational_flows(self):
|
103
104
|
return [c.value for c in ConversationFlow]
|
104
105
|
|
@@ -149,7 +150,7 @@ class ConfigHelper:
|
|
149
150
|
_default_config = None
|
150
151
|
|
151
152
|
@staticmethod
|
152
|
-
@logger.trace_function(log_args=False, log_result=False)
|
153
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
153
154
|
def _set_new_config_properties(config: dict, default_config: dict):
|
154
155
|
"""
|
155
156
|
Function used to set newer properties that will not be present in older configs.
|
@@ -200,7 +201,7 @@ class ConfigHelper:
|
|
200
201
|
|
201
202
|
@staticmethod
|
202
203
|
@functools.cache
|
203
|
-
@logger.trace_function(log_args=False, log_result=False)
|
204
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
204
205
|
def get_active_config_or_default():
|
205
206
|
logger.info("Method get_active_config_or_default started")
|
206
207
|
env_helper = EnvHelper()
|
@@ -226,13 +227,13 @@ class ConfigHelper:
|
|
226
227
|
|
227
228
|
@staticmethod
|
228
229
|
@functools.cache
|
229
|
-
@logger.trace_function(log_args=False, log_result=False)
|
230
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
230
231
|
def get_default_assistant_prompt():
|
231
232
|
config = ConfigHelper.get_default_config()
|
232
233
|
return config["prompts"]["answering_user_prompt"]
|
233
234
|
|
234
235
|
@staticmethod
|
235
|
-
@logger.trace_function(log_args=False, log_result=log_result)
|
236
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=log_result)
|
236
237
|
def save_config_as_active(config):
|
237
238
|
ConfigHelper.validate_config(config)
|
238
239
|
blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
|
@@ -244,7 +245,7 @@ class ConfigHelper:
|
|
244
245
|
ConfigHelper.get_active_config_or_default.cache_clear()
|
245
246
|
|
246
247
|
@staticmethod
|
247
|
-
@logger.trace_function(log_args=False, log_result=log_result)
|
248
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=log_result)
|
248
249
|
def validate_config(config: dict):
|
249
250
|
for document_processor in config.get("document_processors"):
|
250
251
|
document_type = document_processor.get("document_type")
|
@@ -260,7 +261,7 @@ class ConfigHelper:
|
|
260
261
|
)
|
261
262
|
|
262
263
|
@staticmethod
|
263
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
264
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
264
265
|
def get_default_config():
|
265
266
|
if ConfigHelper._default_config is None:
|
266
267
|
env_helper = EnvHelper()
|
@@ -292,7 +293,7 @@ class ConfigHelper:
|
|
292
293
|
|
293
294
|
@staticmethod
|
294
295
|
@functools.cache
|
295
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
296
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
296
297
|
def get_default_contract_assistant():
|
297
298
|
contract_file_path = os.path.join(
|
298
299
|
os.path.dirname(__file__), "default_contract_assistant_prompt.txt"
|
@@ -305,7 +306,7 @@ class ConfigHelper:
|
|
305
306
|
|
306
307
|
@staticmethod
|
307
308
|
@functools.cache
|
308
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
309
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
309
310
|
def get_default_employee_assistant():
|
310
311
|
employee_file_path = os.path.join(
|
311
312
|
os.path.dirname(__file__), "default_employee_assistant_prompt.txt"
|
@@ -317,13 +318,13 @@ class ConfigHelper:
|
|
317
318
|
return "".join([str(elem) for elem in employee_assistant])
|
318
319
|
|
319
320
|
@staticmethod
|
320
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
321
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
321
322
|
def clear_config():
|
322
323
|
ConfigHelper._default_config = None
|
323
324
|
ConfigHelper.get_active_config_or_default.cache_clear()
|
324
325
|
|
325
326
|
@staticmethod
|
326
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
327
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
327
328
|
def _append_advanced_image_processors():
|
328
329
|
image_file_types = ["jpeg", "jpg", "png", "tiff", "bmp"]
|
329
330
|
ConfigHelper._remove_processors_for_file_types(image_file_types)
|
@@ -335,7 +336,7 @@ class ConfigHelper:
|
|
335
336
|
)
|
336
337
|
|
337
338
|
@staticmethod
|
338
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
339
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
339
340
|
def _remove_processors_for_file_types(file_types: list[str]):
|
340
341
|
document_processors = ConfigHelper._default_config["document_processors"]
|
341
342
|
document_processors = [
|
@@ -346,7 +347,7 @@ class ConfigHelper:
|
|
346
347
|
ConfigHelper._default_config["document_processors"] = document_processors
|
347
348
|
|
348
349
|
@staticmethod
|
349
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
350
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
350
351
|
def delete_config():
|
351
352
|
blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
|
352
353
|
blob_client.delete_file(CONFIG_FILE_NAME)
|
@@ -10,6 +10,7 @@ from ..config.config_helper import ConfigHelper
|
|
10
10
|
|
11
11
|
from logging_config import logger
|
12
12
|
env_helper: EnvHelper = EnvHelper()
|
13
|
+
log_execution = env_helper.LOG_EXECUTION
|
13
14
|
log_args = env_helper.LOG_ARGS
|
14
15
|
log_result = env_helper.LOG_RESULT
|
15
16
|
|
@@ -22,14 +23,14 @@ class IntegratedVectorizationEmbedder(EmbedderBase):
|
|
22
23
|
self.llm_helper: LLMHelper = LLMHelper()
|
23
24
|
logger.info("Initialized IntegratedVectorizationEmbedder.")
|
24
25
|
|
25
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
26
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
26
27
|
def embed_file(self, source_url: str, file_name: str = None):
|
27
28
|
logger.info(
|
28
29
|
f"Starting embed_file for source_url: {source_url}, file_name: {file_name}."
|
29
30
|
)
|
30
31
|
self.process_using_integrated_vectorization(source_url=source_url)
|
31
32
|
|
32
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
33
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
33
34
|
def process_using_integrated_vectorization(self, source_url: str):
|
34
35
|
logger.info(f"Starting integrated vectorization for source_url: {source_url}.")
|
35
36
|
config = ConfigHelper.get_active_config_or_default()
|
@@ -53,7 +54,7 @@ class IntegratedVectorizationEmbedder(EmbedderBase):
|
|
53
54
|
logger.error(f"Error processing {source_url}: {e}")
|
54
55
|
raise e
|
55
56
|
|
56
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
57
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
57
58
|
def reprocess_all(self):
|
58
59
|
logger.info("Starting reprocess_all operation.")
|
59
60
|
search_indexer = AzureSearchIndexer(self.env_helper)
|
@@ -16,6 +16,7 @@ from ...common.source_document import SourceDocument
|
|
16
16
|
|
17
17
|
from logging_config import logger
|
18
18
|
env_helper: EnvHelper = EnvHelper()
|
19
|
+
log_execution = env_helper.LOG_EXECUTION
|
19
20
|
log_args = env_helper.LOG_ARGS
|
20
21
|
log_result = env_helper.LOG_RESULT
|
21
22
|
|
@@ -35,7 +36,7 @@ class PostgresEmbedder(EmbedderBase):
|
|
35
36
|
ext = processor.document_type.lower()
|
36
37
|
self.embedding_configs[ext] = processor
|
37
38
|
|
38
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
39
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
39
40
|
def embed_file(self, source_url: str, file_name: str):
|
40
41
|
logger.info(f"Embedding file: {file_name} from source: {source_url}")
|
41
42
|
file_extension = file_name.split(".")[-1].lower()
|
@@ -50,7 +51,7 @@ class PostgresEmbedder(EmbedderBase):
|
|
50
51
|
file_name, {"embeddings_added": "true"}
|
51
52
|
)
|
52
53
|
|
53
|
-
@logger.trace_function(log_args=False, log_result=log_result)
|
54
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=log_result)
|
54
55
|
def __embed(
|
55
56
|
self, source_url: str, file_extension: str, embedding_config: EmbeddingConfig
|
56
57
|
):
|
@@ -88,7 +89,7 @@ class PostgresEmbedder(EmbedderBase):
|
|
88
89
|
else:
|
89
90
|
logger.warning("No documents to upload.")
|
90
91
|
|
91
|
-
@logger.trace_function(log_args=False, log_result=False)
|
92
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
92
93
|
def __convert_to_search_document(self, document: SourceDocument):
|
93
94
|
logger.info(f"Generating embeddings for document ID: {document.id}")
|
94
95
|
embedded_content = self.llm_helper.generate_embeddings(document.content)
|
@@ -20,6 +20,7 @@ from ...common.source_document import SourceDocument
|
|
20
20
|
|
21
21
|
from logging_config import logger
|
22
22
|
env_helper: EnvHelper = EnvHelper()
|
23
|
+
log_execution = env_helper.LOG_EXECUTION
|
23
24
|
log_args = env_helper.LOG_ARGS
|
24
25
|
log_result = env_helper.LOG_RESULT
|
25
26
|
|
@@ -42,7 +43,7 @@ class PushEmbedder(EmbedderBase):
|
|
42
43
|
self.embedding_configs[ext] = processor
|
43
44
|
logger.info("Document processors loaded")
|
44
45
|
|
45
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
46
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
46
47
|
def embed_file(self, source_url: str, file_name: str):
|
47
48
|
logger.info(f"Embedding file: {file_name} from URL: {source_url}")
|
48
49
|
file_extension = file_name.split(".")[-1].lower()
|
@@ -58,7 +59,7 @@ class PushEmbedder(EmbedderBase):
|
|
58
59
|
file_name, {"embeddings_added": "true"}
|
59
60
|
)
|
60
61
|
|
61
|
-
@logger.trace_function(log_args=False, log_result=log_result)
|
62
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=log_result)
|
62
63
|
def __embed(
|
63
64
|
self, source_url: str, file_extension: str, embedding_config: EmbeddingConfig
|
64
65
|
):
|
@@ -105,7 +106,7 @@ class PushEmbedder(EmbedderBase):
|
|
105
106
|
else:
|
106
107
|
logger.warning("No documents to upload.")
|
107
108
|
|
108
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
109
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
109
110
|
def __generate_image_caption(self, source_url):
|
110
111
|
logger.info(f"Generating image caption for URL: {source_url}")
|
111
112
|
model = self.env_helper.AZURE_OPENAI_VISION_MODEL
|
@@ -134,7 +135,7 @@ If the image is mostly text, use OCR to extract the text as it is displayed in t
|
|
134
135
|
logger.info("Caption generation completed")
|
135
136
|
return caption
|
136
137
|
|
137
|
-
@logger.trace_function(log_args=False, log_result=False)
|
138
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
138
139
|
def __convert_to_search_document(self, document: SourceDocument):
|
139
140
|
logger.info(f"Converting document ID {document.id} to search document format")
|
140
141
|
embedded_content = self.llm_helper.generate_embeddings(document.content)
|
@@ -158,12 +159,12 @@ If the image is mostly text, use OCR to extract the text as it is displayed in t
|
|
158
159
|
self.env_helper.AZURE_SEARCH_OFFSET_COLUMN: document.offset,
|
159
160
|
}
|
160
161
|
|
161
|
-
@logger.trace_function(log_args=log_args, log_result=log_result)
|
162
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
162
163
|
def __generate_document_id(self, source_url: str) -> str:
|
163
164
|
hash_key = hashlib.sha1(f"{source_url}_1".encode("utf-8")).hexdigest()
|
164
165
|
return f"doc_{hash_key}"
|
165
166
|
|
166
|
-
@logger.trace_function(log_args=False, log_result=False)
|
167
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
167
168
|
def __create_image_document(
|
168
169
|
self,
|
169
170
|
source_url: str,
|
@@ -25,7 +25,7 @@ class EnvHelper:
|
|
25
25
|
cls._instance = instance
|
26
26
|
return cls._instance
|
27
27
|
|
28
|
-
@logger.trace_function(log_args=False, log_result=False)
|
28
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
29
29
|
def __load_config(self, **kwargs) -> None:
|
30
30
|
# load_dotenv()
|
31
31
|
|
@@ -48,6 +48,9 @@ class EnvHelper:
|
|
48
48
|
# )
|
49
49
|
|
50
50
|
self.LOGLEVEL = self.secretHelper.get_secret("logging-level")
|
51
|
+
self.LOG_EXECUTION = self.get_env_var_bool(
|
52
|
+
"LOG_EXECUTION", "True"
|
53
|
+
)
|
51
54
|
self.LOG_ARGS = self.get_env_var_bool(
|
52
55
|
"LOG_ARGS", "True"
|
53
56
|
)
|
@@ -5,6 +5,7 @@ from ..helpers.azure_identity_helper import AzureIdentityHelper
|
|
5
5
|
from ...utilities.helpers.env_helper import EnvHelper
|
6
6
|
from logging_config import logger
|
7
7
|
env_helper: EnvHelper = EnvHelper()
|
8
|
+
log_execution = env_helper.LOG_EXECUTION
|
8
9
|
log_args = env_helper.LOG_ARGS
|
9
10
|
log_result = env_helper.LOG_RESULT
|
10
11
|
|
@@ -15,7 +16,7 @@ class LightRAGHelper:
|
|
15
16
|
self.azure_identity_helper = AzureIdentityHelper()
|
16
17
|
self.conn = None
|
17
18
|
|
18
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
19
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
19
20
|
def _create_connection(self):
|
20
21
|
"""
|
21
22
|
Establishes a connection to PostgreSQL using AAD authentication.
|
@@ -45,7 +46,7 @@ class LightRAGHelper:
|
|
45
46
|
logger.error(f"Error establishing a connection to PostgreSQL: {e}")
|
46
47
|
raise
|
47
48
|
|
48
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
49
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
49
50
|
def get_connection(self):
|
50
51
|
"""
|
51
52
|
Provides a reusable database connection.
|
@@ -54,7 +55,7 @@ class LightRAGHelper:
|
|
54
55
|
self.conn = self._create_connection()
|
55
56
|
return self.conn
|
56
57
|
|
57
|
-
@logger.trace_function(log_args=False, log_result=False)
|
58
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
58
59
|
def store_vector_and_text(self, vector, text, metadata):
|
59
60
|
"""
|
60
61
|
Stores a vector and associated text in the PostgreSQL database.
|
@@ -76,7 +77,7 @@ class LightRAGHelper:
|
|
76
77
|
finally:
|
77
78
|
conn.close()
|
78
79
|
|
79
|
-
@logger.trace_function(log_args=False, log_result=False)
|
80
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
80
81
|
def retrieve_vectors(self, query_vector, top_k):
|
81
82
|
"""
|
82
83
|
Retrieves the top K vectors similar to the provided query vector.
|
@@ -12,6 +12,7 @@ from .env_helper import EnvHelper
|
|
12
12
|
|
13
13
|
from logging_config import logger
|
14
14
|
env_helper: EnvHelper = EnvHelper()
|
15
|
+
log_execution = env_helper.LOG_EXECUTION
|
15
16
|
log_args = env_helper.LOG_ARGS
|
16
17
|
log_result = env_helper.LOG_RESULT
|
17
18
|
|
@@ -48,7 +49,7 @@ class LLMHelper:
|
|
48
49
|
|
49
50
|
logger.info("Initializing LLMHelper completed")
|
50
51
|
|
51
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
52
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
52
53
|
def get_llm(self):
|
53
54
|
if self.auth_type_keys:
|
54
55
|
return AzureChatOpenAI(
|
@@ -70,7 +71,7 @@ class LLMHelper:
|
|
70
71
|
)
|
71
72
|
|
72
73
|
# TODO: This needs to have a custom callback to stream back to the UI
|
73
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
74
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
74
75
|
def get_streaming_llm(self):
|
75
76
|
if self.auth_type_keys:
|
76
77
|
return AzureChatOpenAI(
|
@@ -96,7 +97,7 @@ class LLMHelper:
|
|
96
97
|
azure_ad_token_provider=self.token_provider,
|
97
98
|
)
|
98
99
|
|
99
|
-
@logger.trace_function(log_args=log_args, log_result=False)
|
100
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
100
101
|
def get_embedding_model(self):
|
101
102
|
if self.auth_type_keys:
|
102
103
|
return AzureOpenAIEmbeddings(
|
@@ -113,7 +114,7 @@ class LLMHelper:
|
|
113
114
|
azure_ad_token_provider=self.token_provider,
|
114
115
|
)
|
115
116
|
|
116
|
-
@logger.trace_function(log_args=False, log_result=False)
|
117
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
117
118
|
def generate_embeddings(self, input: Union[str, list[int]]) -> List[float]:
|
118
119
|
return (
|
119
120
|
self.openai_client.embeddings.create(
|
@@ -123,7 +124,7 @@ class LLMHelper:
|
|
123
124
|
.embedding
|
124
125
|
)
|
125
126
|
|
126
|
-
@logger.trace_function(log_args=False, log_result=False)
|
127
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
127
128
|
def get_chat_completion_with_functions(
|
128
129
|
self, messages: list[dict], functions: list[dict], function_call: str = "auto"
|
129
130
|
):
|
@@ -134,7 +135,7 @@ class LLMHelper:
|
|
134
135
|
function_call=function_call,
|
135
136
|
)
|
136
137
|
|
137
|
-
@logger.trace_function(log_args=False, log_result=False)
|
138
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
138
139
|
def get_chat_completion(
|
139
140
|
self, messages: list[dict], model: str | None = None, **kwargs
|
140
141
|
):
|
@@ -145,7 +146,7 @@ class LLMHelper:
|
|
145
146
|
**kwargs,
|
146
147
|
)
|
147
148
|
|
148
|
-
@logger.trace_function(log_args=False, log_result=False)
|
149
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
149
150
|
def get_sk_chat_completion_service(self, service_id: str):
|
150
151
|
if self.auth_type_keys:
|
151
152
|
return AzureChatCompletion(
|
@@ -175,7 +176,7 @@ class LLMHelper:
|
|
175
176
|
ad_token_provider=self.token_provider,
|
176
177
|
)
|
177
178
|
|
178
|
-
@logger.trace_function(log_args=False, log_result=False)
|
179
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
179
180
|
def get_sk_service_settings(self, service: AzureChatCompletion):
|
180
181
|
return cast(
|
181
182
|
AzureChatPromptExecutionSettings,
|
@@ -186,7 +187,7 @@ class LLMHelper:
|
|
186
187
|
),
|
187
188
|
)
|
188
189
|
|
189
|
-
@logger.trace_function(log_args=False, log_result=False)
|
190
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
190
191
|
def get_ml_client(self):
|
191
192
|
if not hasattr(self, "_ml_client"):
|
192
193
|
credential = self.azure_identity_helper.get_credential()
|
@@ -30,7 +30,7 @@ class SecretHelper:
|
|
30
30
|
connection_verify=True,
|
31
31
|
)
|
32
32
|
|
33
|
-
@logger.trace_function(log_args=False, log_result=False)
|
33
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
34
34
|
def get_secret(self, secret_name: str) -> str:
|
35
35
|
"""
|
36
36
|
Retrieves the value of a secret from the environment variables or Azure Key Vault.
|
@@ -48,7 +48,7 @@ class SecretHelper:
|
|
48
48
|
secret_value = self.secret_client.get_secret(name=secret_name).value
|
49
49
|
return secret_value
|
50
50
|
|
51
|
-
@logger.trace_function(log_args=False, log_result=False)
|
51
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
52
52
|
def set_secret(self, secret_name: str, secret_value: str) -> None:
|
53
53
|
"""
|
54
54
|
Sets the value of a secret in Azure Key Vault only if it doesn't exist or has a different value.
|
@@ -76,7 +76,7 @@ class SecretHelper:
|
|
76
76
|
logger.warning(f"Secret {secret_name} has been created")
|
77
77
|
|
78
78
|
|
79
|
-
@logger.trace_function(log_args=False, log_result=False)
|
79
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
80
80
|
def get_secret_from_json(self, secret_name: str) -> str:
|
81
81
|
secret_value = self.secret_client.get_secret(secret_name).value
|
82
82
|
return secret_value
|