cwyodmodules 0.3.44__py3-none-any.whl → 0.3.45__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cwyodmodules/__init__.py +0 -0
- cwyodmodules/batch/utilities/helpers/config/config_helper.py +362 -362
- cwyodmodules/batch/utilities/helpers/config/default.json +148 -148
- cwyodmodules/batch/utilities/helpers/secret_helper.py +79 -80
- cwyodmodules/graphrag/indexing/extraction.py +237 -230
- cwyodmodules/graphrag/main.py +34 -34
- cwyodmodules/graphrag/query/generate.py +106 -91
- {cwyodmodules-0.3.44.dist-info → cwyodmodules-0.3.45.dist-info}/METADATA +1 -1
- {cwyodmodules-0.3.44.dist-info → cwyodmodules-0.3.45.dist-info}/RECORD +12 -11
- {cwyodmodules-0.3.44.dist-info → cwyodmodules-0.3.45.dist-info}/WHEEL +0 -0
- {cwyodmodules-0.3.44.dist-info → cwyodmodules-0.3.45.dist-info}/licenses/LICENSE +0 -0
- {cwyodmodules-0.3.44.dist-info → cwyodmodules-0.3.45.dist-info}/top_level.txt +0 -0
@@ -1,362 +1,362 @@
|
|
1
|
-
import os
|
2
|
-
import json
|
3
|
-
import functools
|
4
|
-
from string import Template
|
5
|
-
|
6
|
-
from ..azure_blob_storage_client import AzureBlobStorageClient
|
7
|
-
from ...document_chunking.chunking_strategy import ChunkingStrategy, ChunkingSettings
|
8
|
-
from ...document_loading import LoadingSettings, LoadingStrategy
|
9
|
-
from .embedding_config import EmbeddingConfig
|
10
|
-
from ...orchestrator.orchestration_strategy import OrchestrationStrategy
|
11
|
-
from ...orchestrator import OrchestrationSettings
|
12
|
-
from ..env_helper import EnvHelper
|
13
|
-
from .assistant_strategy import AssistantStrategy
|
14
|
-
from .conversation_flow import ConversationFlow
|
15
|
-
from .database_type import DatabaseType
|
16
|
-
from .agent_mode import AgentMode
|
17
|
-
|
18
|
-
CONFIG_CONTAINER_NAME = "config"
|
19
|
-
CONFIG_FILE_NAME = "active.json"
|
20
|
-
ADVANCED_IMAGE_PROCESSING_FILE_TYPES = ["jpeg", "jpg", "png", "tiff", "bmp"]
|
21
|
-
|
22
|
-
from mgmt_config import logger
|
23
|
-
env_helper: EnvHelper = EnvHelper()
|
24
|
-
log_execution = env_helper.LOG_EXECUTION
|
25
|
-
log_args = env_helper.LOG_ARGS
|
26
|
-
log_result = env_helper.LOG_RESULT
|
27
|
-
|
28
|
-
|
29
|
-
class Config:
|
30
|
-
def __init__(self, config: dict):
|
31
|
-
self.prompts = Prompts(config["prompts"])
|
32
|
-
self.messages = Messages(config["messages"])
|
33
|
-
self.example = Example(config["example"])
|
34
|
-
self.logging = Logging(config["logging"])
|
35
|
-
self.document_processors = [
|
36
|
-
EmbeddingConfig(
|
37
|
-
document_type=c["document_type"],
|
38
|
-
chunking=ChunkingSettings(c["chunking"]),
|
39
|
-
loading=LoadingSettings(c["loading"]),
|
40
|
-
use_advanced_image_processing=c.get(
|
41
|
-
"use_advanced_image_processing", False
|
42
|
-
),
|
43
|
-
)
|
44
|
-
for c in config["document_processors"]
|
45
|
-
]
|
46
|
-
self.env_helper = EnvHelper()
|
47
|
-
self.default_orchestration_settings = {
|
48
|
-
"strategy": self.env_helper.ORCHESTRATION_STRATEGY
|
49
|
-
}
|
50
|
-
self.orchestrator = OrchestrationSettings(
|
51
|
-
config.get("orchestrator", self.default_orchestration_settings)
|
52
|
-
)
|
53
|
-
self.integrated_vectorization_config = (
|
54
|
-
IntegratedVectorizationConfig(config["integrated_vectorization_config"])
|
55
|
-
if self.env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION
|
56
|
-
else None
|
57
|
-
)
|
58
|
-
self.enable_chat_history = config["enable_chat_history"]
|
59
|
-
self.database_type = config.get("database_type", self.env_helper.DATABASE_TYPE)
|
60
|
-
self.conversational_flow = config.get(
|
61
|
-
"conversational_flow", self.env_helper.CONVERSATION_FLOW
|
62
|
-
)
|
63
|
-
self.agent_mode = config.get("agent_mode", AgentMode.NORMAL.value)
|
64
|
-
|
65
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
66
|
-
def get_available_document_types(self) -> list[str]:
|
67
|
-
document_types = {
|
68
|
-
"txt",
|
69
|
-
"pdf",
|
70
|
-
"url",
|
71
|
-
"html",
|
72
|
-
"htm",
|
73
|
-
"md",
|
74
|
-
"jpeg",
|
75
|
-
"jpg",
|
76
|
-
"png",
|
77
|
-
"docx",
|
78
|
-
}
|
79
|
-
if self.env_helper.USE_ADVANCED_IMAGE_PROCESSING:
|
80
|
-
document_types.update(ADVANCED_IMAGE_PROCESSING_FILE_TYPES)
|
81
|
-
|
82
|
-
return sorted(document_types)
|
83
|
-
|
84
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
85
|
-
def get_advanced_image_processing_image_types(self):
|
86
|
-
return ADVANCED_IMAGE_PROCESSING_FILE_TYPES
|
87
|
-
|
88
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
89
|
-
def get_available_chunking_strategies(self):
|
90
|
-
return [c.value for c in ChunkingStrategy]
|
91
|
-
|
92
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
93
|
-
def get_available_loading_strategies(self):
|
94
|
-
return [c.value for c in LoadingStrategy]
|
95
|
-
|
96
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
97
|
-
def get_available_orchestration_strategies(self):
|
98
|
-
return [c.value for c in OrchestrationStrategy]
|
99
|
-
|
100
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
101
|
-
def get_available_ai_assistant_types(self):
|
102
|
-
return [c.value for c in AssistantStrategy]
|
103
|
-
|
104
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
105
|
-
def get_available_conversational_flows(self):
|
106
|
-
return [c.value for c in ConversationFlow]
|
107
|
-
|
108
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
109
|
-
def get_available_agent_modes(self):
|
110
|
-
return [c.value for c in AgentMode]
|
111
|
-
|
112
|
-
|
113
|
-
# TODO: Change to AnsweringChain or something, Prompts is not a good name
|
114
|
-
class Prompts:
|
115
|
-
def __init__(self, prompts: dict):
|
116
|
-
self.condense_question_prompt = prompts["condense_question_prompt"]
|
117
|
-
self.answering_system_prompt = prompts["answering_system_prompt"]
|
118
|
-
self.answering_user_prompt = prompts["answering_user_prompt"]
|
119
|
-
self.post_answering_prompt = prompts["post_answering_prompt"]
|
120
|
-
self.use_on_your_data_format = prompts["use_on_your_data_format"]
|
121
|
-
self.enable_post_answering_prompt = prompts["enable_post_answering_prompt"]
|
122
|
-
self.enable_content_safety = prompts["enable_content_safety"]
|
123
|
-
self.ai_assistant_type = prompts["ai_assistant_type"]
|
124
|
-
self.conversational_flow = prompts["conversational_flow"]
|
125
|
-
|
126
|
-
|
127
|
-
class Example:
|
128
|
-
def __init__(self, example: dict):
|
129
|
-
self.documents = example["documents"]
|
130
|
-
self.user_question = example["user_question"]
|
131
|
-
self.answer = example["answer"]
|
132
|
-
|
133
|
-
|
134
|
-
class Messages:
|
135
|
-
def __init__(self, messages: dict):
|
136
|
-
self.post_answering_filter = messages["post_answering_filter"]
|
137
|
-
|
138
|
-
|
139
|
-
class Logging:
|
140
|
-
def __init__(self, logging: dict):
|
141
|
-
self.log_user_interactions = (
|
142
|
-
str(logging["log_user_interactions"]).lower() == "true"
|
143
|
-
)
|
144
|
-
self.log_tokens = str(logging["log_tokens"]).lower() == "true"
|
145
|
-
|
146
|
-
|
147
|
-
class IntegratedVectorizationConfig:
|
148
|
-
def __init__(self, integrated_vectorization_config: dict):
|
149
|
-
self.max_page_length = integrated_vectorization_config["max_page_length"]
|
150
|
-
self.page_overlap_length = integrated_vectorization_config[
|
151
|
-
"page_overlap_length"
|
152
|
-
]
|
153
|
-
|
154
|
-
|
155
|
-
class ConfigHelper:
|
156
|
-
_default_config = None
|
157
|
-
|
158
|
-
@staticmethod
|
159
|
-
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
160
|
-
def _set_new_config_properties(config: dict, default_config: dict):
|
161
|
-
"""
|
162
|
-
Function used to set newer properties that will not be present in older configs.
|
163
|
-
The function mutates the config object.
|
164
|
-
"""
|
165
|
-
if config["prompts"].get("answering_system_prompt") is None:
|
166
|
-
config["prompts"]["answering_system_prompt"] = default_config["prompts"][
|
167
|
-
"answering_system_prompt"
|
168
|
-
]
|
169
|
-
|
170
|
-
prompt_modified = (
|
171
|
-
config["prompts"].get("answering_prompt")
|
172
|
-
!= default_config["prompts"]["answering_prompt"]
|
173
|
-
)
|
174
|
-
|
175
|
-
if config["prompts"].get("answering_user_prompt") is None:
|
176
|
-
if prompt_modified:
|
177
|
-
config["prompts"]["answering_user_prompt"] = config["prompts"].get(
|
178
|
-
"answering_prompt"
|
179
|
-
)
|
180
|
-
else:
|
181
|
-
config["prompts"]["answering_user_prompt"] = default_config["prompts"][
|
182
|
-
"answering_user_prompt"
|
183
|
-
]
|
184
|
-
|
185
|
-
if config["prompts"].get("use_on_your_data_format") is None:
|
186
|
-
config["prompts"]["use_on_your_data_format"] = not prompt_modified
|
187
|
-
|
188
|
-
if config.get("example") is None:
|
189
|
-
config["example"] = default_config["example"]
|
190
|
-
|
191
|
-
if config["prompts"].get("ai_assistant_type") is None:
|
192
|
-
config["prompts"]["ai_assistant_type"] = default_config["prompts"][
|
193
|
-
"ai_assistant_type"
|
194
|
-
]
|
195
|
-
|
196
|
-
if config.get("integrated_vectorization_config") is None:
|
197
|
-
config["integrated_vectorization_config"] = default_config[
|
198
|
-
"integrated_vectorization_config"
|
199
|
-
]
|
200
|
-
|
201
|
-
if config["prompts"].get("conversational_flow") is None:
|
202
|
-
config["prompts"]["conversational_flow"] = default_config["prompts"][
|
203
|
-
"conversational_flow"
|
204
|
-
]
|
205
|
-
if config.get("enable_chat_history") is None:
|
206
|
-
config["enable_chat_history"] = default_config["enable_chat_history"]
|
207
|
-
|
208
|
-
if config.get("agent_mode") is None:
|
209
|
-
config["agent_mode"] = default_config["agent_mode"]
|
210
|
-
|
211
|
-
@staticmethod
|
212
|
-
@functools.cache
|
213
|
-
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
214
|
-
def get_active_config_or_default():
|
215
|
-
logger.info("Method get_active_config_or_default started")
|
216
|
-
env_helper = EnvHelper()
|
217
|
-
config = ConfigHelper.get_default_config()
|
218
|
-
if env_helper.LOAD_CONFIG_FROM_BLOB_STORAGE:
|
219
|
-
logger.info("Loading configuration from Blob Storage")
|
220
|
-
blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
|
221
|
-
|
222
|
-
if blob_client.file_exists(CONFIG_FILE_NAME):
|
223
|
-
logger.info("Configuration file found in Blob Storage")
|
224
|
-
default_config = config
|
225
|
-
config_file = blob_client.download_file(CONFIG_FILE_NAME)
|
226
|
-
config = json.loads(config_file)
|
227
|
-
|
228
|
-
ConfigHelper._set_new_config_properties(config, default_config)
|
229
|
-
else:
|
230
|
-
logger.info(
|
231
|
-
"Configuration file not found in Blob Storage, using default configuration"
|
232
|
-
)
|
233
|
-
|
234
|
-
logger.info("Method get_active_config_or_default ended")
|
235
|
-
return Config(config)
|
236
|
-
|
237
|
-
@staticmethod
|
238
|
-
@functools.cache
|
239
|
-
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
240
|
-
def get_default_assistant_prompt():
|
241
|
-
config = ConfigHelper.get_default_config()
|
242
|
-
return config["prompts"]["answering_user_prompt"]
|
243
|
-
|
244
|
-
@staticmethod
|
245
|
-
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=log_result)
|
246
|
-
def save_config_as_active(config):
|
247
|
-
ConfigHelper.validate_config(config)
|
248
|
-
blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
|
249
|
-
blob_client = blob_client.upload_file(
|
250
|
-
json.dumps(config, indent=2),
|
251
|
-
CONFIG_FILE_NAME,
|
252
|
-
content_type="application/json",
|
253
|
-
)
|
254
|
-
ConfigHelper.get_active_config_or_default.cache_clear()
|
255
|
-
|
256
|
-
@staticmethod
|
257
|
-
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=log_result)
|
258
|
-
def validate_config(config: dict):
|
259
|
-
for document_processor in config.get("document_processors"):
|
260
|
-
document_type = document_processor.get("document_type")
|
261
|
-
unsupported_advanced_image_processing_file_type = (
|
262
|
-
document_type not in ADVANCED_IMAGE_PROCESSING_FILE_TYPES
|
263
|
-
)
|
264
|
-
if (
|
265
|
-
document_processor.get("use_advanced_image_processing")
|
266
|
-
and unsupported_advanced_image_processing_file_type
|
267
|
-
):
|
268
|
-
raise Exception(
|
269
|
-
f"Advanced image processing has not been enabled for document type {document_type}, as only {ADVANCED_IMAGE_PROCESSING_FILE_TYPES} file types are supported."
|
270
|
-
)
|
271
|
-
|
272
|
-
@staticmethod
|
273
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
274
|
-
def get_default_config():
|
275
|
-
if ConfigHelper._default_config is None:
|
276
|
-
env_helper = EnvHelper()
|
277
|
-
|
278
|
-
config_file_path = os.path.join(os.path.dirname(__file__), "default.json")
|
279
|
-
logger.info("Loading default config from %s", config_file_path)
|
280
|
-
with open(config_file_path, encoding="utf-8") as f:
|
281
|
-
ConfigHelper._default_config = json.loads(
|
282
|
-
Template(f.read()).substitute(
|
283
|
-
ORCHESTRATION_STRATEGY=env_helper.ORCHESTRATION_STRATEGY,
|
284
|
-
LOG_USER_INTERACTIONS=(
|
285
|
-
False
|
286
|
-
if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
|
287
|
-
else True
|
288
|
-
),
|
289
|
-
LOG_TOKENS=(
|
290
|
-
False
|
291
|
-
if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
|
292
|
-
else True
|
293
|
-
),
|
294
|
-
CONVERSATION_FLOW=env_helper.CONVERSATION_FLOW,
|
295
|
-
DATABASE_TYPE=env_helper.DATABASE_TYPE,
|
296
|
-
)
|
297
|
-
)
|
298
|
-
if env_helper.USE_ADVANCED_IMAGE_PROCESSING:
|
299
|
-
ConfigHelper._append_advanced_image_processors()
|
300
|
-
|
301
|
-
return ConfigHelper._default_config
|
302
|
-
|
303
|
-
@staticmethod
|
304
|
-
@functools.cache
|
305
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
306
|
-
def get_default_contract_assistant():
|
307
|
-
contract_file_path = os.path.join(
|
308
|
-
os.path.dirname(__file__), "default_contract_assistant_prompt.txt"
|
309
|
-
)
|
310
|
-
contract_assistant = ""
|
311
|
-
with open(contract_file_path, encoding="utf-8") as f:
|
312
|
-
contract_assistant = f.readlines()
|
313
|
-
|
314
|
-
return "".join([str(elem) for elem in contract_assistant])
|
315
|
-
|
316
|
-
@staticmethod
|
317
|
-
@functools.cache
|
318
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
319
|
-
def get_default_employee_assistant():
|
320
|
-
employee_file_path = os.path.join(
|
321
|
-
os.path.dirname(__file__), "default_employee_assistant_prompt.txt"
|
322
|
-
)
|
323
|
-
employee_assistant = ""
|
324
|
-
with open(employee_file_path, encoding="utf-8") as f:
|
325
|
-
employee_assistant = f.readlines()
|
326
|
-
|
327
|
-
return "".join([str(elem) for elem in employee_assistant])
|
328
|
-
|
329
|
-
@staticmethod
|
330
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
331
|
-
def clear_config():
|
332
|
-
ConfigHelper._default_config = None
|
333
|
-
ConfigHelper.get_active_config_or_default.cache_clear()
|
334
|
-
|
335
|
-
@staticmethod
|
336
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
337
|
-
def _append_advanced_image_processors():
|
338
|
-
image_file_types = ["jpeg", "jpg", "png", "tiff", "bmp"]
|
339
|
-
ConfigHelper._remove_processors_for_file_types(image_file_types)
|
340
|
-
ConfigHelper._default_config["document_processors"].extend(
|
341
|
-
[
|
342
|
-
{"document_type": file_type, "use_advanced_image_processing": True}
|
343
|
-
for file_type in image_file_types
|
344
|
-
]
|
345
|
-
)
|
346
|
-
|
347
|
-
@staticmethod
|
348
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
349
|
-
def _remove_processors_for_file_types(file_types: list[str]):
|
350
|
-
document_processors = ConfigHelper._default_config["document_processors"]
|
351
|
-
document_processors = [
|
352
|
-
document_processor
|
353
|
-
for document_processor in document_processors
|
354
|
-
if document_processor["document_type"] not in file_types
|
355
|
-
]
|
356
|
-
ConfigHelper._default_config["document_processors"] = document_processors
|
357
|
-
|
358
|
-
@staticmethod
|
359
|
-
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
360
|
-
def delete_config():
|
361
|
-
blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
|
362
|
-
blob_client.delete_file(CONFIG_FILE_NAME)
|
1
|
+
import os
|
2
|
+
import json
|
3
|
+
import functools
|
4
|
+
from string import Template
|
5
|
+
|
6
|
+
from ..azure_blob_storage_client import AzureBlobStorageClient
|
7
|
+
from ...document_chunking.chunking_strategy import ChunkingStrategy, ChunkingSettings
|
8
|
+
from ...document_loading import LoadingSettings, LoadingStrategy
|
9
|
+
from .embedding_config import EmbeddingConfig
|
10
|
+
from ...orchestrator.orchestration_strategy import OrchestrationStrategy
|
11
|
+
from ...orchestrator import OrchestrationSettings
|
12
|
+
from ..env_helper import EnvHelper
|
13
|
+
from .assistant_strategy import AssistantStrategy
|
14
|
+
from .conversation_flow import ConversationFlow
|
15
|
+
from .database_type import DatabaseType
|
16
|
+
from .agent_mode import AgentMode
|
17
|
+
|
18
|
+
CONFIG_CONTAINER_NAME = "config"
|
19
|
+
CONFIG_FILE_NAME = "active.json"
|
20
|
+
ADVANCED_IMAGE_PROCESSING_FILE_TYPES = ["jpeg", "jpg", "png", "tiff", "bmp"]
|
21
|
+
|
22
|
+
from mgmt_config import logger
|
23
|
+
env_helper: EnvHelper = EnvHelper()
|
24
|
+
log_execution = env_helper.LOG_EXECUTION
|
25
|
+
log_args = env_helper.LOG_ARGS
|
26
|
+
log_result = env_helper.LOG_RESULT
|
27
|
+
|
28
|
+
|
29
|
+
class Config:
|
30
|
+
def __init__(self, config: dict):
|
31
|
+
self.prompts = Prompts(config["prompts"])
|
32
|
+
self.messages = Messages(config["messages"])
|
33
|
+
self.example = Example(config["example"])
|
34
|
+
self.logging = Logging(config["logging"])
|
35
|
+
self.document_processors = [
|
36
|
+
EmbeddingConfig(
|
37
|
+
document_type=c["document_type"],
|
38
|
+
chunking=ChunkingSettings(c["chunking"]),
|
39
|
+
loading=LoadingSettings(c["loading"]),
|
40
|
+
use_advanced_image_processing=c.get(
|
41
|
+
"use_advanced_image_processing", False
|
42
|
+
),
|
43
|
+
)
|
44
|
+
for c in config["document_processors"]
|
45
|
+
]
|
46
|
+
self.env_helper = EnvHelper()
|
47
|
+
self.default_orchestration_settings = {
|
48
|
+
"strategy": self.env_helper.ORCHESTRATION_STRATEGY
|
49
|
+
}
|
50
|
+
self.orchestrator = OrchestrationSettings(
|
51
|
+
config.get("orchestrator", self.default_orchestration_settings)
|
52
|
+
)
|
53
|
+
self.integrated_vectorization_config = (
|
54
|
+
IntegratedVectorizationConfig(config["integrated_vectorization_config"])
|
55
|
+
if self.env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION
|
56
|
+
else None
|
57
|
+
)
|
58
|
+
self.enable_chat_history = config["enable_chat_history"]
|
59
|
+
self.database_type = config.get("database_type", self.env_helper.DATABASE_TYPE)
|
60
|
+
self.conversational_flow = config.get(
|
61
|
+
"conversational_flow", self.env_helper.CONVERSATION_FLOW
|
62
|
+
)
|
63
|
+
self.agent_mode = config.get("agent_mode", AgentMode.NORMAL.value)
|
64
|
+
|
65
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
66
|
+
def get_available_document_types(self) -> list[str]:
|
67
|
+
document_types = {
|
68
|
+
"txt",
|
69
|
+
"pdf",
|
70
|
+
"url",
|
71
|
+
"html",
|
72
|
+
"htm",
|
73
|
+
"md",
|
74
|
+
"jpeg",
|
75
|
+
"jpg",
|
76
|
+
"png",
|
77
|
+
"docx",
|
78
|
+
}
|
79
|
+
if self.env_helper.USE_ADVANCED_IMAGE_PROCESSING:
|
80
|
+
document_types.update(ADVANCED_IMAGE_PROCESSING_FILE_TYPES)
|
81
|
+
|
82
|
+
return sorted(document_types)
|
83
|
+
|
84
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
85
|
+
def get_advanced_image_processing_image_types(self):
|
86
|
+
return ADVANCED_IMAGE_PROCESSING_FILE_TYPES
|
87
|
+
|
88
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
89
|
+
def get_available_chunking_strategies(self):
|
90
|
+
return [c.value for c in ChunkingStrategy]
|
91
|
+
|
92
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
93
|
+
def get_available_loading_strategies(self):
|
94
|
+
return [c.value for c in LoadingStrategy]
|
95
|
+
|
96
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
97
|
+
def get_available_orchestration_strategies(self):
|
98
|
+
return [c.value for c in OrchestrationStrategy]
|
99
|
+
|
100
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
101
|
+
def get_available_ai_assistant_types(self):
|
102
|
+
return [c.value for c in AssistantStrategy]
|
103
|
+
|
104
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
105
|
+
def get_available_conversational_flows(self):
|
106
|
+
return [c.value for c in ConversationFlow]
|
107
|
+
|
108
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
109
|
+
def get_available_agent_modes(self):
|
110
|
+
return [c.value for c in AgentMode]
|
111
|
+
|
112
|
+
|
113
|
+
# TODO: Change to AnsweringChain or something, Prompts is not a good name
|
114
|
+
class Prompts:
|
115
|
+
def __init__(self, prompts: dict):
|
116
|
+
self.condense_question_prompt = prompts["condense_question_prompt"]
|
117
|
+
self.answering_system_prompt = prompts["answering_system_prompt"]
|
118
|
+
self.answering_user_prompt = prompts["answering_user_prompt"]
|
119
|
+
self.post_answering_prompt = prompts["post_answering_prompt"]
|
120
|
+
self.use_on_your_data_format = prompts["use_on_your_data_format"]
|
121
|
+
self.enable_post_answering_prompt = prompts["enable_post_answering_prompt"]
|
122
|
+
self.enable_content_safety = prompts["enable_content_safety"]
|
123
|
+
self.ai_assistant_type = prompts["ai_assistant_type"]
|
124
|
+
self.conversational_flow = prompts["conversational_flow"]
|
125
|
+
|
126
|
+
|
127
|
+
class Example:
|
128
|
+
def __init__(self, example: dict):
|
129
|
+
self.documents = example["documents"]
|
130
|
+
self.user_question = example["user_question"]
|
131
|
+
self.answer = example["answer"]
|
132
|
+
|
133
|
+
|
134
|
+
class Messages:
|
135
|
+
def __init__(self, messages: dict):
|
136
|
+
self.post_answering_filter = messages["post_answering_filter"]
|
137
|
+
|
138
|
+
|
139
|
+
class Logging:
|
140
|
+
def __init__(self, logging: dict):
|
141
|
+
self.log_user_interactions = (
|
142
|
+
str(logging["log_user_interactions"]).lower() == "true"
|
143
|
+
)
|
144
|
+
self.log_tokens = str(logging["log_tokens"]).lower() == "true"
|
145
|
+
|
146
|
+
|
147
|
+
class IntegratedVectorizationConfig:
|
148
|
+
def __init__(self, integrated_vectorization_config: dict):
|
149
|
+
self.max_page_length = integrated_vectorization_config["max_page_length"]
|
150
|
+
self.page_overlap_length = integrated_vectorization_config[
|
151
|
+
"page_overlap_length"
|
152
|
+
]
|
153
|
+
|
154
|
+
|
155
|
+
class ConfigHelper:
|
156
|
+
_default_config = None
|
157
|
+
|
158
|
+
@staticmethod
|
159
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
160
|
+
def _set_new_config_properties(config: dict, default_config: dict):
|
161
|
+
"""
|
162
|
+
Function used to set newer properties that will not be present in older configs.
|
163
|
+
The function mutates the config object.
|
164
|
+
"""
|
165
|
+
if config["prompts"].get("answering_system_prompt") is None:
|
166
|
+
config["prompts"]["answering_system_prompt"] = default_config["prompts"][
|
167
|
+
"answering_system_prompt"
|
168
|
+
]
|
169
|
+
|
170
|
+
prompt_modified = (
|
171
|
+
config["prompts"].get("answering_prompt")
|
172
|
+
!= default_config["prompts"]["answering_prompt"]
|
173
|
+
)
|
174
|
+
|
175
|
+
if config["prompts"].get("answering_user_prompt") is None:
|
176
|
+
if prompt_modified:
|
177
|
+
config["prompts"]["answering_user_prompt"] = config["prompts"].get(
|
178
|
+
"answering_prompt"
|
179
|
+
)
|
180
|
+
else:
|
181
|
+
config["prompts"]["answering_user_prompt"] = default_config["prompts"][
|
182
|
+
"answering_user_prompt"
|
183
|
+
]
|
184
|
+
|
185
|
+
if config["prompts"].get("use_on_your_data_format") is None:
|
186
|
+
config["prompts"]["use_on_your_data_format"] = not prompt_modified
|
187
|
+
|
188
|
+
if config.get("example") is None:
|
189
|
+
config["example"] = default_config["example"]
|
190
|
+
|
191
|
+
if config["prompts"].get("ai_assistant_type") is None:
|
192
|
+
config["prompts"]["ai_assistant_type"] = default_config["prompts"][
|
193
|
+
"ai_assistant_type"
|
194
|
+
]
|
195
|
+
|
196
|
+
if config.get("integrated_vectorization_config") is None:
|
197
|
+
config["integrated_vectorization_config"] = default_config[
|
198
|
+
"integrated_vectorization_config"
|
199
|
+
]
|
200
|
+
|
201
|
+
if config["prompts"].get("conversational_flow") is None:
|
202
|
+
config["prompts"]["conversational_flow"] = default_config["prompts"][
|
203
|
+
"conversational_flow"
|
204
|
+
]
|
205
|
+
if config.get("enable_chat_history") is None:
|
206
|
+
config["enable_chat_history"] = default_config["enable_chat_history"]
|
207
|
+
|
208
|
+
if config.get("agent_mode") is None:
|
209
|
+
config["agent_mode"] = default_config["agent_mode"]
|
210
|
+
|
211
|
+
@staticmethod
|
212
|
+
@functools.cache
|
213
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
214
|
+
def get_active_config_or_default():
|
215
|
+
logger.info("Method get_active_config_or_default started")
|
216
|
+
env_helper = EnvHelper()
|
217
|
+
config = ConfigHelper.get_default_config()
|
218
|
+
if env_helper.LOAD_CONFIG_FROM_BLOB_STORAGE:
|
219
|
+
logger.info("Loading configuration from Blob Storage")
|
220
|
+
blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
|
221
|
+
|
222
|
+
if blob_client.file_exists(CONFIG_FILE_NAME):
|
223
|
+
logger.info("Configuration file found in Blob Storage")
|
224
|
+
default_config = config
|
225
|
+
config_file = blob_client.download_file(CONFIG_FILE_NAME)
|
226
|
+
config = json.loads(config_file)
|
227
|
+
|
228
|
+
ConfigHelper._set_new_config_properties(config, default_config)
|
229
|
+
else:
|
230
|
+
logger.info(
|
231
|
+
"Configuration file not found in Blob Storage, using default configuration"
|
232
|
+
)
|
233
|
+
|
234
|
+
logger.info("Method get_active_config_or_default ended")
|
235
|
+
return Config(config)
|
236
|
+
|
237
|
+
@staticmethod
|
238
|
+
@functools.cache
|
239
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
240
|
+
def get_default_assistant_prompt():
|
241
|
+
config = ConfigHelper.get_default_config()
|
242
|
+
return config["prompts"]["answering_user_prompt"]
|
243
|
+
|
244
|
+
@staticmethod
|
245
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=log_result)
|
246
|
+
def save_config_as_active(config):
|
247
|
+
ConfigHelper.validate_config(config)
|
248
|
+
blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
|
249
|
+
blob_client = blob_client.upload_file(
|
250
|
+
json.dumps(config, indent=2),
|
251
|
+
CONFIG_FILE_NAME,
|
252
|
+
content_type="application/json",
|
253
|
+
)
|
254
|
+
ConfigHelper.get_active_config_or_default.cache_clear()
|
255
|
+
|
256
|
+
@staticmethod
|
257
|
+
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=log_result)
|
258
|
+
def validate_config(config: dict):
|
259
|
+
for document_processor in config.get("document_processors"):
|
260
|
+
document_type = document_processor.get("document_type")
|
261
|
+
unsupported_advanced_image_processing_file_type = (
|
262
|
+
document_type not in ADVANCED_IMAGE_PROCESSING_FILE_TYPES
|
263
|
+
)
|
264
|
+
if (
|
265
|
+
document_processor.get("use_advanced_image_processing")
|
266
|
+
and unsupported_advanced_image_processing_file_type
|
267
|
+
):
|
268
|
+
raise Exception(
|
269
|
+
f"Advanced image processing has not been enabled for document type {document_type}, as only {ADVANCED_IMAGE_PROCESSING_FILE_TYPES} file types are supported."
|
270
|
+
)
|
271
|
+
|
272
|
+
@staticmethod
|
273
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
274
|
+
def get_default_config():
|
275
|
+
if ConfigHelper._default_config is None:
|
276
|
+
env_helper = EnvHelper()
|
277
|
+
|
278
|
+
config_file_path = os.path.join(os.path.dirname(__file__), "default.json")
|
279
|
+
logger.info("Loading default config from %s", config_file_path)
|
280
|
+
with open(config_file_path, encoding="utf-8") as f:
|
281
|
+
ConfigHelper._default_config = json.loads(
|
282
|
+
Template(f.read()).substitute(
|
283
|
+
ORCHESTRATION_STRATEGY=env_helper.ORCHESTRATION_STRATEGY,
|
284
|
+
LOG_USER_INTERACTIONS=(
|
285
|
+
False
|
286
|
+
if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
|
287
|
+
else True
|
288
|
+
),
|
289
|
+
LOG_TOKENS=(
|
290
|
+
False
|
291
|
+
if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
|
292
|
+
else True
|
293
|
+
),
|
294
|
+
CONVERSATION_FLOW=env_helper.CONVERSATION_FLOW,
|
295
|
+
DATABASE_TYPE=env_helper.DATABASE_TYPE,
|
296
|
+
)
|
297
|
+
)
|
298
|
+
if env_helper.USE_ADVANCED_IMAGE_PROCESSING:
|
299
|
+
ConfigHelper._append_advanced_image_processors()
|
300
|
+
|
301
|
+
return ConfigHelper._default_config
|
302
|
+
|
303
|
+
@staticmethod
|
304
|
+
@functools.cache
|
305
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
306
|
+
def get_default_contract_assistant():
|
307
|
+
contract_file_path = os.path.join(
|
308
|
+
os.path.dirname(__file__), "default_contract_assistant_prompt.txt"
|
309
|
+
)
|
310
|
+
contract_assistant = ""
|
311
|
+
with open(contract_file_path, encoding="utf-8") as f:
|
312
|
+
contract_assistant = f.readlines()
|
313
|
+
|
314
|
+
return "".join([str(elem) for elem in contract_assistant])
|
315
|
+
|
316
|
+
@staticmethod
|
317
|
+
@functools.cache
|
318
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
319
|
+
def get_default_employee_assistant():
|
320
|
+
employee_file_path = os.path.join(
|
321
|
+
os.path.dirname(__file__), "default_employee_assistant_prompt.txt"
|
322
|
+
)
|
323
|
+
employee_assistant = ""
|
324
|
+
with open(employee_file_path, encoding="utf-8") as f:
|
325
|
+
employee_assistant = f.readlines()
|
326
|
+
|
327
|
+
return "".join([str(elem) for elem in employee_assistant])
|
328
|
+
|
329
|
+
@staticmethod
|
330
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
331
|
+
def clear_config():
|
332
|
+
ConfigHelper._default_config = None
|
333
|
+
ConfigHelper.get_active_config_or_default.cache_clear()
|
334
|
+
|
335
|
+
@staticmethod
|
336
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
337
|
+
def _append_advanced_image_processors():
|
338
|
+
image_file_types = ["jpeg", "jpg", "png", "tiff", "bmp"]
|
339
|
+
ConfigHelper._remove_processors_for_file_types(image_file_types)
|
340
|
+
ConfigHelper._default_config["document_processors"].extend(
|
341
|
+
[
|
342
|
+
{"document_type": file_type, "use_advanced_image_processing": True}
|
343
|
+
for file_type in image_file_types
|
344
|
+
]
|
345
|
+
)
|
346
|
+
|
347
|
+
@staticmethod
|
348
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
349
|
+
def _remove_processors_for_file_types(file_types: list[str]):
|
350
|
+
document_processors = ConfigHelper._default_config["document_processors"]
|
351
|
+
document_processors = [
|
352
|
+
document_processor
|
353
|
+
for document_processor in document_processors
|
354
|
+
if document_processor["document_type"] not in file_types
|
355
|
+
]
|
356
|
+
ConfigHelper._default_config["document_processors"] = document_processors
|
357
|
+
|
358
|
+
@staticmethod
|
359
|
+
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
360
|
+
def delete_config():
|
361
|
+
blob_client = AzureBlobStorageClient(container_name=CONFIG_CONTAINER_NAME)
|
362
|
+
blob_client.delete_file(CONFIG_FILE_NAME)
|