cwyodmodules 0.3.44__py3-none-any.whl → 0.3.45__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,362 +1,362 @@
1
- import os
2
- import json
3
- import functools
4
- from string import Template
5
-
6
- from ..azure_blob_storage_client import AzureBlobStorageClient
7
- from ...document_chunking.chunking_strategy import ChunkingStrategy, ChunkingSettings
8
- from ...document_loading import LoadingSettings, LoadingStrategy
9
- from .embedding_config import EmbeddingConfig
10
- from ...orchestrator.orchestration_strategy import OrchestrationStrategy
11
- from ...orchestrator import OrchestrationSettings
12
- from ..env_helper import EnvHelper
13
- from .assistant_strategy import AssistantStrategy
14
- from .conversation_flow import ConversationFlow
15
- from .database_type import DatabaseType
16
- from .agent_mode import AgentMode
17
-
18
- CONFIG_CONTAINER_NAME = "config"
19
- CONFIG_FILE_NAME = "active.json"
20
- ADVANCED_IMAGE_PROCESSING_FILE_TYPES = ["jpeg", "jpg", "png", "tiff", "bmp"]
21
-
22
- from mgmt_config import logger
23
- env_helper: EnvHelper = EnvHelper()
24
- log_execution = env_helper.LOG_EXECUTION
25
- log_args = env_helper.LOG_ARGS
26
- log_result = env_helper.LOG_RESULT
27
-
28
-
29
- class Config:
30
- def __init__(self, config: dict):
31
- self.prompts = Prompts(config["prompts"])
32
- self.messages = Messages(config["messages"])
33
- self.example = Example(config["example"])
34
- self.logging = Logging(config["logging"])
35
- self.document_processors = [
36
- EmbeddingConfig(
37
- document_type=c["document_type"],
38
- chunking=ChunkingSettings(c["chunking"]),
39
- loading=LoadingSettings(c["loading"]),
40
- use_advanced_image_processing=c.get(
41
- "use_advanced_image_processing", False
42
- ),
43
- )
44
- for c in config["document_processors"]
45
- ]
46
- self.env_helper = EnvHelper()
47
- self.default_orchestration_settings = {
48
- "strategy": self.env_helper.ORCHESTRATION_STRATEGY
49
- }
50
- self.orchestrator = OrchestrationSettings(
51
- config.get("orchestrator", self.default_orchestration_settings)
52
- )
53
- self.integrated_vectorization_config = (
54
- IntegratedVectorizationConfig(config["integrated_vectorization_config"])
55
- if self.env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION
56
- else None
57
- )
58
- self.enable_chat_history = config["enable_chat_history"]
59
- self.database_type = config.get("database_type", self.env_helper.DATABASE_TYPE)
60
- self.conversational_flow = config.get(
61
- "conversational_flow", self.env_helper.CONVERSATION_FLOW
62
- )
63
- self.agent_mode = config.get("agent_mode", AgentMode.NORMAL.value)
64
-
65
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
66
- def get_available_document_types(self) -> list[str]:
67
- document_types = {
68
- "txt",
69
- "pdf",
70
- "url",
71
- "html",
72
- "htm",
73
- "md",
74
- "jpeg",
75
- "jpg",
76
- "png",
77
- "docx",
78
- }
79
- if self.env_helper.USE_ADVANCED_IMAGE_PROCESSING:
80
- document_types.update(ADVANCED_IMAGE_PROCESSING_FILE_TYPES)
81
-
82
- return sorted(document_types)
83
-
84
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
85
- def get_advanced_image_processing_image_types(self):
86
- return ADVANCED_IMAGE_PROCESSING_FILE_TYPES
87
-
88
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
89
- def get_available_chunking_strategies(self):
90
- return [c.value for c in ChunkingStrategy]
91
-
92
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
93
- def get_available_loading_strategies(self):
94
- return [c.value for c in LoadingStrategy]
95
-
96
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
97
- def get_available_orchestration_strategies(self):
98
- return [c.value for c in OrchestrationStrategy]
99
-
100
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
101
- def get_available_ai_assistant_types(self):
102
- return [c.value for c in AssistantStrategy]
103
-
104
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
105
- def get_available_conversational_flows(self):
106
- return [c.value for c in ConversationFlow]
107
-
108
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
109
- def get_available_agent_modes(self):
110
- return [c.value for c in AgentMode]
111
-
112
-
113
- # TODO: Change to AnsweringChain or something, Prompts is not a good name
114
- class Prompts:
115
- def __init__(self, prompts: dict):
116
- self.condense_question_prompt = prompts["condense_question_prompt"]
117
- self.answering_system_prompt = prompts["answering_system_prompt"]
118
- self.answering_user_prompt = prompts["answering_user_prompt"]
119
- self.post_answering_prompt = prompts["post_answering_prompt"]
120
- self.use_on_your_data_format = prompts["use_on_your_data_format"]
121
- self.enable_post_answering_prompt = prompts["enable_post_answering_prompt"]
122
- self.enable_content_safety = prompts["enable_content_safety"]
123
- self.ai_assistant_type = prompts["ai_assistant_type"]
124
- self.conversational_flow = prompts["conversational_flow"]
125
-
126
-
127
- class Example:
128
- def __init__(self, example: dict):
129
- self.documents = example["documents"]
130
- self.user_question = example["user_question"]
131
- self.answer = example["answer"]
132
-
133
-
134
- class Messages:
135
- def __init__(self, messages: dict):
136
- self.post_answering_filter = messages["post_answering_filter"]
137
-
138
-
139
- class Logging:
140
- def __init__(self, logging: dict):
141
- self.log_user_interactions = (
142
- str(logging["log_user_interactions"]).lower() == "true"
143
- )
144
- self.log_tokens = str(logging["log_tokens"]).lower() == "true"
145
-
146
-
147
- class IntegratedVectorizationConfig:
148
- def __init__(self, integrated_vectorization_config: dict):
149
- self.max_page_length = integrated_vectorization_config["max_page_length"]
150
- self.page_overlap_length = integrated_vectorization_config[
151
- "page_overlap_length"
152
- ]
153
-
154
-
155
- class ConfigHelper:
156
- _default_config = None
157
-
158
- @staticmethod
159
- @logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
160
- def _set_new_config_properties(config: dict, default_config: dict):
161
- """
162
- Function used to set newer properties that will not be present in older configs.
163
- The function mutates the config object.
164
- """
165
- if config["prompts"].get("answering_system_prompt") is None:
166
- config["prompts"]["answering_system_prompt"] = default_config["prompts"][
167
- "answering_system_prompt"
168
- ]
169
-
170
- prompt_modified = (
171
- config["prompts"].get("answering_prompt")
172
- != default_config["prompts"]["answering_prompt"]
173
- )
174
-
175
- if config["prompts"].get("answering_user_prompt") is None:
176
- if prompt_modified:
177
- config["prompts"]["answering_user_prompt"] = config["prompts"].get(
178
- "answering_prompt"
179
- )
180
- else:
181
- config["prompts"]["answering_user_prompt"] = default_config["prompts"][
182
- "answering_user_prompt"
183
- ]
184
-
185
- if config["prompts"].get("use_on_your_data_format") is None:
186
- config["prompts"]["use_on_your_data_format"] = not prompt_modified
187
-
188
- if config.get("example") is None:
189
- config["example"] = default_config["example"]
190
-
191
- if config["prompts"].get("ai_assistant_type") is None:
192
- config["prompts"]["ai_assistant_type"] = default_config["prompts"][
193
- "ai_assistant_type"
194
- ]
195
-
196
- if config.get("integrated_vectorization_config") is None:
197
- config["integrated_vectorization_config"] = default_config[
198
- "integrated_vectorization_config"
199
- ]
200
-
201
- if config["prompts"].get("conversational_flow") is None:
202
- config["prompts"]["conversational_flow"] = default_config["prompts"][
203
- "conversational_flow"
204
- ]
205
- if config.get("enable_chat_history") is None:
206
- config["enable_chat_history"] = default_config["enable_chat_history"]
207
-
208
- if config.get("agent_mode") is None:
209
- config["agent_mode"] = default_config["agent_mode"]
210
-
211
- @staticmethod
212
- @functools.cache
213
- @logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
214
- def get_active_config_or_default():
215
- logger.info("Method get_active_config_or_default started")
216
- env_helper = EnvHelper()
217
- config = ConfigHelper.get_default_config()
218
- if env_helper.LOAD_CONFIG_FROM_BLOB_STORAGE:
219
- logger.info("Loading configuration from Blob Storage")
220
- blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
221
-
222
- if blob_client.file_exists(CONFIG_FILE_NAME):
223
- logger.info("Configuration file found in Blob Storage")
224
- default_config = config
225
- config_file = blob_client.download_file(CONFIG_FILE_NAME)
226
- config = json.loads(config_file)
227
-
228
- ConfigHelper._set_new_config_properties(config, default_config)
229
- else:
230
- logger.info(
231
- "Configuration file not found in Blob Storage, using default configuration"
232
- )
233
-
234
- logger.info("Method get_active_config_or_default ended")
235
- return Config(config)
236
-
237
- @staticmethod
238
- @functools.cache
239
- @logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
240
- def get_default_assistant_prompt():
241
- config = ConfigHelper.get_default_config()
242
- return config["prompts"]["answering_user_prompt"]
243
-
244
- @staticmethod
245
- @logger.trace_function(log_execution=log_execution, log_args=False, log_result=log_result)
246
- def save_config_as_active(config):
247
- ConfigHelper.validate_config(config)
248
- blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
249
- blob_client = blob_client.upload_file(
250
- json.dumps(config, indent=2),
251
- CONFIG_FILE_NAME,
252
- content_type="application/json",
253
- )
254
- ConfigHelper.get_active_config_or_default.cache_clear()
255
-
256
- @staticmethod
257
- @logger.trace_function(log_execution=log_execution, log_args=False, log_result=log_result)
258
- def validate_config(config: dict):
259
- for document_processor in config.get("document_processors"):
260
- document_type = document_processor.get("document_type")
261
- unsupported_advanced_image_processing_file_type = (
262
- document_type not in ADVANCED_IMAGE_PROCESSING_FILE_TYPES
263
- )
264
- if (
265
- document_processor.get("use_advanced_image_processing")
266
- and unsupported_advanced_image_processing_file_type
267
- ):
268
- raise Exception(
269
- f"Advanced image processing has not been enabled for document type {document_type}, as only {ADVANCED_IMAGE_PROCESSING_FILE_TYPES} file types are supported."
270
- )
271
-
272
- @staticmethod
273
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
274
- def get_default_config():
275
- if ConfigHelper._default_config is None:
276
- env_helper = EnvHelper()
277
-
278
- config_file_path = os.path.join(os.path.dirname(__file__), "default.json")
279
- logger.info("Loading default config from %s", config_file_path)
280
- with open(config_file_path, encoding="utf-8") as f:
281
- ConfigHelper._default_config = json.loads(
282
- Template(f.read()).substitute(
283
- ORCHESTRATION_STRATEGY=env_helper.ORCHESTRATION_STRATEGY,
284
- LOG_USER_INTERACTIONS=(
285
- False
286
- if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
287
- else True
288
- ),
289
- LOG_TOKENS=(
290
- False
291
- if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
292
- else True
293
- ),
294
- CONVERSATION_FLOW=env_helper.CONVERSATION_FLOW,
295
- DATABASE_TYPE=env_helper.DATABASE_TYPE,
296
- )
297
- )
298
- if env_helper.USE_ADVANCED_IMAGE_PROCESSING:
299
- ConfigHelper._append_advanced_image_processors()
300
-
301
- return ConfigHelper._default_config
302
-
303
- @staticmethod
304
- @functools.cache
305
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
306
- def get_default_contract_assistant():
307
- contract_file_path = os.path.join(
308
- os.path.dirname(__file__), "default_contract_assistant_prompt.txt"
309
- )
310
- contract_assistant = ""
311
- with open(contract_file_path, encoding="utf-8") as f:
312
- contract_assistant = f.readlines()
313
-
314
- return "".join([str(elem) for elem in contract_assistant])
315
-
316
- @staticmethod
317
- @functools.cache
318
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
319
- def get_default_employee_assistant():
320
- employee_file_path = os.path.join(
321
- os.path.dirname(__file__), "default_employee_assistant_prompt.txt"
322
- )
323
- employee_assistant = ""
324
- with open(employee_file_path, encoding="utf-8") as f:
325
- employee_assistant = f.readlines()
326
-
327
- return "".join([str(elem) for elem in employee_assistant])
328
-
329
- @staticmethod
330
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
331
- def clear_config():
332
- ConfigHelper._default_config = None
333
- ConfigHelper.get_active_config_or_default.cache_clear()
334
-
335
- @staticmethod
336
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
337
- def _append_advanced_image_processors():
338
- image_file_types = ["jpeg", "jpg", "png", "tiff", "bmp"]
339
- ConfigHelper._remove_processors_for_file_types(image_file_types)
340
- ConfigHelper._default_config["document_processors"].extend(
341
- [
342
- {"document_type": file_type, "use_advanced_image_processing": True}
343
- for file_type in image_file_types
344
- ]
345
- )
346
-
347
- @staticmethod
348
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
349
- def _remove_processors_for_file_types(file_types: list[str]):
350
- document_processors = ConfigHelper._default_config["document_processors"]
351
- document_processors = [
352
- document_processor
353
- for document_processor in document_processors
354
- if document_processor["document_type"] not in file_types
355
- ]
356
- ConfigHelper._default_config["document_processors"] = document_processors
357
-
358
- @staticmethod
359
- @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
360
- def delete_config():
361
- blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
362
- blob_client.delete_file(CONFIG_FILE_NAME)
1
+ import os
2
+ import json
3
+ import functools
4
+ from string import Template
5
+
6
+ from ..azure_blob_storage_client import AzureBlobStorageClient
7
+ from ...document_chunking.chunking_strategy import ChunkingStrategy, ChunkingSettings
8
+ from ...document_loading import LoadingSettings, LoadingStrategy
9
+ from .embedding_config import EmbeddingConfig
10
+ from ...orchestrator.orchestration_strategy import OrchestrationStrategy
11
+ from ...orchestrator import OrchestrationSettings
12
+ from ..env_helper import EnvHelper
13
+ from .assistant_strategy import AssistantStrategy
14
+ from .conversation_flow import ConversationFlow
15
+ from .database_type import DatabaseType
16
+ from .agent_mode import AgentMode
17
+
18
+ CONFIG_CONTAINER_NAME = "config"
19
+ CONFIG_FILE_NAME = "active.json"
20
+ ADVANCED_IMAGE_PROCESSING_FILE_TYPES = ["jpeg", "jpg", "png", "tiff", "bmp"]
21
+
22
+ from mgmt_config import logger
23
+ env_helper: EnvHelper = EnvHelper()
24
+ log_execution = env_helper.LOG_EXECUTION
25
+ log_args = env_helper.LOG_ARGS
26
+ log_result = env_helper.LOG_RESULT
27
+
28
+
29
+ class Config:
30
+ def __init__(self, config: dict):
31
+ self.prompts = Prompts(config["prompts"])
32
+ self.messages = Messages(config["messages"])
33
+ self.example = Example(config["example"])
34
+ self.logging = Logging(config["logging"])
35
+ self.document_processors = [
36
+ EmbeddingConfig(
37
+ document_type=c["document_type"],
38
+ chunking=ChunkingSettings(c["chunking"]),
39
+ loading=LoadingSettings(c["loading"]),
40
+ use_advanced_image_processing=c.get(
41
+ "use_advanced_image_processing", False
42
+ ),
43
+ )
44
+ for c in config["document_processors"]
45
+ ]
46
+ self.env_helper = EnvHelper()
47
+ self.default_orchestration_settings = {
48
+ "strategy": self.env_helper.ORCHESTRATION_STRATEGY
49
+ }
50
+ self.orchestrator = OrchestrationSettings(
51
+ config.get("orchestrator", self.default_orchestration_settings)
52
+ )
53
+ self.integrated_vectorization_config = (
54
+ IntegratedVectorizationConfig(config["integrated_vectorization_config"])
55
+ if self.env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION
56
+ else None
57
+ )
58
+ self.enable_chat_history = config["enable_chat_history"]
59
+ self.database_type = config.get("database_type", self.env_helper.DATABASE_TYPE)
60
+ self.conversational_flow = config.get(
61
+ "conversational_flow", self.env_helper.CONVERSATION_FLOW
62
+ )
63
+ self.agent_mode = config.get("agent_mode", AgentMode.NORMAL.value)
64
+
65
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
66
+ def get_available_document_types(self) -> list[str]:
67
+ document_types = {
68
+ "txt",
69
+ "pdf",
70
+ "url",
71
+ "html",
72
+ "htm",
73
+ "md",
74
+ "jpeg",
75
+ "jpg",
76
+ "png",
77
+ "docx",
78
+ }
79
+ if self.env_helper.USE_ADVANCED_IMAGE_PROCESSING:
80
+ document_types.update(ADVANCED_IMAGE_PROCESSING_FILE_TYPES)
81
+
82
+ return sorted(document_types)
83
+
84
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
85
+ def get_advanced_image_processing_image_types(self):
86
+ return ADVANCED_IMAGE_PROCESSING_FILE_TYPES
87
+
88
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
89
+ def get_available_chunking_strategies(self):
90
+ return [c.value for c in ChunkingStrategy]
91
+
92
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
93
+ def get_available_loading_strategies(self):
94
+ return [c.value for c in LoadingStrategy]
95
+
96
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
97
+ def get_available_orchestration_strategies(self):
98
+ return [c.value for c in OrchestrationStrategy]
99
+
100
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
101
+ def get_available_ai_assistant_types(self):
102
+ return [c.value for c in AssistantStrategy]
103
+
104
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
105
+ def get_available_conversational_flows(self):
106
+ return [c.value for c in ConversationFlow]
107
+
108
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
109
+ def get_available_agent_modes(self):
110
+ return [c.value for c in AgentMode]
111
+
112
+
113
+ # TODO: Change to AnsweringChain or something, Prompts is not a good name
114
+ class Prompts:
115
+ def __init__(self, prompts: dict):
116
+ self.condense_question_prompt = prompts["condense_question_prompt"]
117
+ self.answering_system_prompt = prompts["answering_system_prompt"]
118
+ self.answering_user_prompt = prompts["answering_user_prompt"]
119
+ self.post_answering_prompt = prompts["post_answering_prompt"]
120
+ self.use_on_your_data_format = prompts["use_on_your_data_format"]
121
+ self.enable_post_answering_prompt = prompts["enable_post_answering_prompt"]
122
+ self.enable_content_safety = prompts["enable_content_safety"]
123
+ self.ai_assistant_type = prompts["ai_assistant_type"]
124
+ self.conversational_flow = prompts["conversational_flow"]
125
+
126
+
127
+ class Example:
128
+ def __init__(self, example: dict):
129
+ self.documents = example["documents"]
130
+ self.user_question = example["user_question"]
131
+ self.answer = example["answer"]
132
+
133
+
134
+ class Messages:
135
+ def __init__(self, messages: dict):
136
+ self.post_answering_filter = messages["post_answering_filter"]
137
+
138
+
139
+ class Logging:
140
+ def __init__(self, logging: dict):
141
+ self.log_user_interactions = (
142
+ str(logging["log_user_interactions"]).lower() == "true"
143
+ )
144
+ self.log_tokens = str(logging["log_tokens"]).lower() == "true"
145
+
146
+
147
+ class IntegratedVectorizationConfig:
148
+ def __init__(self, integrated_vectorization_config: dict):
149
+ self.max_page_length = integrated_vectorization_config["max_page_length"]
150
+ self.page_overlap_length = integrated_vectorization_config[
151
+ "page_overlap_length"
152
+ ]
153
+
154
+
155
+ class ConfigHelper:
156
+ _default_config = None
157
+
158
+ @staticmethod
159
+ @logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
160
+ def _set_new_config_properties(config: dict, default_config: dict):
161
+ """
162
+ Function used to set newer properties that will not be present in older configs.
163
+ The function mutates the config object.
164
+ """
165
+ if config["prompts"].get("answering_system_prompt") is None:
166
+ config["prompts"]["answering_system_prompt"] = default_config["prompts"][
167
+ "answering_system_prompt"
168
+ ]
169
+
170
+ prompt_modified = (
171
+ config["prompts"].get("answering_prompt")
172
+ != default_config["prompts"]["answering_prompt"]
173
+ )
174
+
175
+ if config["prompts"].get("answering_user_prompt") is None:
176
+ if prompt_modified:
177
+ config["prompts"]["answering_user_prompt"] = config["prompts"].get(
178
+ "answering_prompt"
179
+ )
180
+ else:
181
+ config["prompts"]["answering_user_prompt"] = default_config["prompts"][
182
+ "answering_user_prompt"
183
+ ]
184
+
185
+ if config["prompts"].get("use_on_your_data_format") is None:
186
+ config["prompts"]["use_on_your_data_format"] = not prompt_modified
187
+
188
+ if config.get("example") is None:
189
+ config["example"] = default_config["example"]
190
+
191
+ if config["prompts"].get("ai_assistant_type") is None:
192
+ config["prompts"]["ai_assistant_type"] = default_config["prompts"][
193
+ "ai_assistant_type"
194
+ ]
195
+
196
+ if config.get("integrated_vectorization_config") is None:
197
+ config["integrated_vectorization_config"] = default_config[
198
+ "integrated_vectorization_config"
199
+ ]
200
+
201
+ if config["prompts"].get("conversational_flow") is None:
202
+ config["prompts"]["conversational_flow"] = default_config["prompts"][
203
+ "conversational_flow"
204
+ ]
205
+ if config.get("enable_chat_history") is None:
206
+ config["enable_chat_history"] = default_config["enable_chat_history"]
207
+
208
+ if config.get("agent_mode") is None:
209
+ config["agent_mode"] = default_config["agent_mode"]
210
+
211
+ @staticmethod
212
+ @functools.cache
213
+ @logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
214
+ def get_active_config_or_default():
215
+ logger.info("Method get_active_config_or_default started")
216
+ env_helper = EnvHelper()
217
+ config = ConfigHelper.get_default_config()
218
+ if env_helper.LOAD_CONFIG_FROM_BLOB_STORAGE:
219
+ logger.info("Loading configuration from Blob Storage")
220
+ blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
221
+
222
+ if blob_client.file_exists(CONFIG_FILE_NAME):
223
+ logger.info("Configuration file found in Blob Storage")
224
+ default_config = config
225
+ config_file = blob_client.download_file(CONFIG_FILE_NAME)
226
+ config = json.loads(config_file)
227
+
228
+ ConfigHelper._set_new_config_properties(config, default_config)
229
+ else:
230
+ logger.info(
231
+ "Configuration file not found in Blob Storage, using default configuration"
232
+ )
233
+
234
+ logger.info("Method get_active_config_or_default ended")
235
+ return Config(config)
236
+
237
+ @staticmethod
238
+ @functools.cache
239
+ @logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
240
+ def get_default_assistant_prompt():
241
+ config = ConfigHelper.get_default_config()
242
+ return config["prompts"]["answering_user_prompt"]
243
+
244
+ @staticmethod
245
+ @logger.trace_function(log_execution=log_execution, log_args=False, log_result=log_result)
246
+ def save_config_as_active(config):
247
+ ConfigHelper.validate_config(config)
248
+ blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
249
+ blob_client = blob_client.upload_file(
250
+ json.dumps(config, indent=2),
251
+ CONFIG_FILE_NAME,
252
+ content_type="application/json",
253
+ )
254
+ ConfigHelper.get_active_config_or_default.cache_clear()
255
+
256
+ @staticmethod
257
+ @logger.trace_function(log_execution=log_execution, log_args=False, log_result=log_result)
258
+ def validate_config(config: dict):
259
+ for document_processor in config.get("document_processors"):
260
+ document_type = document_processor.get("document_type")
261
+ unsupported_advanced_image_processing_file_type = (
262
+ document_type not in ADVANCED_IMAGE_PROCESSING_FILE_TYPES
263
+ )
264
+ if (
265
+ document_processor.get("use_advanced_image_processing")
266
+ and unsupported_advanced_image_processing_file_type
267
+ ):
268
+ raise Exception(
269
+ f"Advanced image processing has not been enabled for document type {document_type}, as only {ADVANCED_IMAGE_PROCESSING_FILE_TYPES} file types are supported."
270
+ )
271
+
272
+ @staticmethod
273
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
274
+ def get_default_config():
275
+ if ConfigHelper._default_config is None:
276
+ env_helper = EnvHelper()
277
+
278
+ config_file_path = os.path.join(os.path.dirname(__file__), "default.json")
279
+ logger.info("Loading default config from %s", config_file_path)
280
+ with open(config_file_path, encoding="utf-8") as f:
281
+ ConfigHelper._default_config = json.loads(
282
+ Template(f.read()).substitute(
283
+ ORCHESTRATION_STRATEGY=env_helper.ORCHESTRATION_STRATEGY,
284
+ LOG_USER_INTERACTIONS=(
285
+ False
286
+ if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
287
+ else True
288
+ ),
289
+ LOG_TOKENS=(
290
+ False
291
+ if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
292
+ else True
293
+ ),
294
+ CONVERSATION_FLOW=env_helper.CONVERSATION_FLOW,
295
+ DATABASE_TYPE=env_helper.DATABASE_TYPE,
296
+ )
297
+ )
298
+ if env_helper.USE_ADVANCED_IMAGE_PROCESSING:
299
+ ConfigHelper._append_advanced_image_processors()
300
+
301
+ return ConfigHelper._default_config
302
+
303
+ @staticmethod
304
+ @functools.cache
305
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
306
+ def get_default_contract_assistant():
307
+ contract_file_path = os.path.join(
308
+ os.path.dirname(__file__), "default_contract_assistant_prompt.txt"
309
+ )
310
+ contract_assistant = ""
311
+ with open(contract_file_path, encoding="utf-8") as f:
312
+ contract_assistant = f.readlines()
313
+
314
+ return "".join([str(elem) for elem in contract_assistant])
315
+
316
+ @staticmethod
317
+ @functools.cache
318
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
319
+ def get_default_employee_assistant():
320
+ employee_file_path = os.path.join(
321
+ os.path.dirname(__file__), "default_employee_assistant_prompt.txt"
322
+ )
323
+ employee_assistant = ""
324
+ with open(employee_file_path, encoding="utf-8") as f:
325
+ employee_assistant = f.readlines()
326
+
327
+ return "".join([str(elem) for elem in employee_assistant])
328
+
329
+ @staticmethod
330
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
331
+ def clear_config():
332
+ ConfigHelper._default_config = None
333
+ ConfigHelper.get_active_config_or_default.cache_clear()
334
+
335
+ @staticmethod
336
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
337
+ def _append_advanced_image_processors():
338
+ image_file_types = ["jpeg", "jpg", "png", "tiff", "bmp"]
339
+ ConfigHelper._remove_processors_for_file_types(image_file_types)
340
+ ConfigHelper._default_config["document_processors"].extend(
341
+ [
342
+ {"document_type": file_type, "use_advanced_image_processing": True}
343
+ for file_type in image_file_types
344
+ ]
345
+ )
346
+
347
+ @staticmethod
348
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
349
+ def _remove_processors_for_file_types(file_types: list[str]):
350
+ document_processors = ConfigHelper._default_config["document_processors"]
351
+ document_processors = [
352
+ document_processor
353
+ for document_processor in document_processors
354
+ if document_processor["document_type"] not in file_types
355
+ ]
356
+ ConfigHelper._default_config["document_processors"] = document_processors
357
+
358
+ @staticmethod
359
+ @logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
360
+ def delete_config():
361
+ blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
362
+ blob_client.delete_file(CONFIG_FILE_NAME)