cwyodmodules 0.3.31__py3-none-any.whl → 0.3.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cwyodmodules/api/chat_history.py +14 -7
- cwyodmodules/batch/utilities/chat_history/auth_utils.py +7 -3
- cwyodmodules/batch/utilities/chat_history/cosmosdb.py +17 -1
- cwyodmodules/batch/utilities/chat_history/postgresdbservice.py +239 -254
- cwyodmodules/batch/utilities/common/source_document.py +60 -61
- cwyodmodules/batch/utilities/document_chunking/fixed_size_overlap.py +8 -3
- cwyodmodules/batch/utilities/document_chunking/layout.py +8 -3
- cwyodmodules/batch/utilities/document_chunking/page.py +8 -3
- cwyodmodules/batch/utilities/document_loading/read.py +30 -34
- cwyodmodules/batch/utilities/helpers/azure_computer_vision_client.py +10 -3
- cwyodmodules/batch/utilities/helpers/azure_form_recognizer_helper.py +6 -2
- cwyodmodules/batch/utilities/helpers/azure_identity_helper.py +3 -34
- cwyodmodules/batch/utilities/helpers/azure_postgres_helper.py +14 -2
- cwyodmodules/batch/utilities/helpers/azure_postgres_helper_light_rag.py +14 -2
- cwyodmodules/batch/utilities/helpers/azure_search_helper.py +15 -6
- cwyodmodules/batch/utilities/helpers/config/config_helper.py +24 -2
- cwyodmodules/batch/utilities/helpers/env_helper.py +9 -9
- cwyodmodules/batch/utilities/helpers/lightrag_helper.py +9 -2
- cwyodmodules/batch/utilities/helpers/llm_helper.py +13 -2
- cwyodmodules/batch/utilities/helpers/secret_helper.py +9 -9
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_index.py +8 -2
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_indexer.py +9 -2
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_skillset.py +6 -2
- cwyodmodules/batch/utilities/orchestrator/lang_chain_agent.py +8 -2
- cwyodmodules/batch/utilities/orchestrator/open_ai_functions.py +6 -2
- cwyodmodules/batch/utilities/orchestrator/orchestrator_base.py +9 -3
- cwyodmodules/batch/utilities/orchestrator/prompt_flow.py +8 -2
- cwyodmodules/batch/utilities/orchestrator/semantic_kernel_orchestrator.py +135 -138
- cwyodmodules/batch/utilities/parser/output_parser_tool.py +64 -64
- cwyodmodules/batch/utilities/plugins/outlook_calendar_plugin.py +91 -93
- cwyodmodules/batch/utilities/search/azure_search_handler.py +16 -3
- cwyodmodules/batch/utilities/search/azure_search_handler_light_rag.py +14 -2
- cwyodmodules/batch/utilities/search/integrated_vectorization_search_handler.py +36 -24
- cwyodmodules/batch/utilities/search/lightrag_search_handler.py +14 -2
- cwyodmodules/batch/utilities/search/postgres_search_handler.py +100 -97
- cwyodmodules/batch/utilities/search/postgres_search_handler_light_rag.py +103 -104
- cwyodmodules/batch/utilities/search/search.py +21 -24
- cwyodmodules/batch/utilities/tools/content_safety_checker.py +66 -78
- cwyodmodules/batch/utilities/tools/post_prompt_tool.py +48 -60
- cwyodmodules/batch/utilities/tools/question_answer_tool.py +196 -206
- cwyodmodules/batch/utilities/tools/text_processing_tool.py +36 -39
- cwyodmodules/logging_config.py +15 -0
- {cwyodmodules-0.3.31.dist-info → cwyodmodules-0.3.33.dist-info}/METADATA +2 -1
- {cwyodmodules-0.3.31.dist-info → cwyodmodules-0.3.33.dist-info}/RECORD +47 -46
- {cwyodmodules-0.3.31.dist-info → cwyodmodules-0.3.33.dist-info}/WHEEL +0 -0
- {cwyodmodules-0.3.31.dist-info → cwyodmodules-0.3.33.dist-info}/licenses/LICENSE +0 -0
- {cwyodmodules-0.3.31.dist-info → cwyodmodules-0.3.33.dist-info}/top_level.txt +0 -0
@@ -4,14 +4,11 @@ import json
|
|
4
4
|
from urllib.parse import urlparse, quote
|
5
5
|
from ..helpers.azure_blob_storage_client import AzureBlobStorageClient
|
6
6
|
|
7
|
-
from
|
8
|
-
from
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
logger = getLogger("__main__")
|
13
|
-
##tracer = trace.get_tracer("__main__" + ".base_package")
|
14
|
-
tracer = trace.get_tracer("__main__")
|
7
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
8
|
+
from logging_config import logger
|
9
|
+
env_helper: EnvHelper = EnvHelper()
|
10
|
+
log_args = env_helper.LOG_ARGS
|
11
|
+
log_result = env_helper.LOG_RESULT
|
15
12
|
|
16
13
|
class SourceDocument:
|
17
14
|
def __init__(
|
@@ -54,19 +51,20 @@ class SourceDocument:
|
|
54
51
|
)
|
55
52
|
return False
|
56
53
|
|
54
|
+
@logger.trace_function(log_args=False, log_result=False)
|
57
55
|
def to_json(self):
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
return json_string
|
56
|
+
json_string = json.dumps(self, cls=SourceDocumentEncoder)
|
57
|
+
logger.debug(f"Serialized SourceDocument to JSON: {json_string}")
|
58
|
+
return json_string
|
62
59
|
|
63
60
|
@classmethod
|
61
|
+
@logger.trace_function(log_args=False, log_result=False)
|
64
62
|
def from_json(cls, json_string):
|
65
|
-
logger.debug(f"Deserializing SourceDocument from JSON: {json_string}")
|
66
63
|
source_document = json.loads(json_string, cls=SourceDocumentDecoder)
|
67
64
|
return source_document
|
68
65
|
|
69
66
|
@classmethod
|
67
|
+
@logger.trace_function(log_args=False, log_result=False)
|
70
68
|
def from_dict(cls, dict_obj):
|
71
69
|
logger.debug(f"Creating SourceDocument from dict: {dict_obj}")
|
72
70
|
return cls(
|
@@ -81,6 +79,7 @@ class SourceDocument:
|
|
81
79
|
)
|
82
80
|
|
83
81
|
@classmethod
|
82
|
+
@logger.trace_function(log_args=False, log_result=False)
|
84
83
|
def from_metadata(
|
85
84
|
cls: Type["SourceDocument"],
|
86
85
|
content: str,
|
@@ -114,62 +113,62 @@ class SourceDocument:
|
|
114
113
|
)
|
115
114
|
return source_document
|
116
115
|
|
116
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
117
117
|
def get_filename(self, include_path=False):
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
return filename
|
118
|
+
filename = self.source.replace("_SAS_TOKEN_PLACEHOLDER_", "").replace(
|
119
|
+
"http://", ""
|
120
|
+
)
|
121
|
+
if include_path:
|
122
|
+
filename = filename.split("/")[-1]
|
123
|
+
else:
|
124
|
+
filename = filename.split("/")[-1].split(".")[0]
|
125
|
+
logger.debug(
|
126
|
+
f"Extracted filename: {filename}, include_path: {include_path}"
|
127
|
+
)
|
128
|
+
return filename
|
130
129
|
|
130
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
131
131
|
def get_markdown_url(self):
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
return f"[{self.title}]({url})"
|
132
|
+
url = quote(self.source, safe=":/")
|
133
|
+
if "_SAS_TOKEN_PLACEHOLDER_" in url:
|
134
|
+
blob_client = AzureBlobStorageClient()
|
135
|
+
container_sas = blob_client.get_container_sas()
|
136
|
+
url = url.replace("_SAS_TOKEN_PLACEHOLDER_", container_sas)
|
137
|
+
logger.debug(f"Generated markdown URL: {url}")
|
138
|
+
return f"[{self.title}]({url})"
|
140
139
|
|
141
140
|
|
142
141
|
class SourceDocumentEncoder(json.JSONEncoder):
|
142
|
+
@logger.trace_function(log_args=False, log_result=False)
|
143
143
|
def default(self, obj):
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
return super().default(obj)
|
144
|
+
if isinstance(obj, SourceDocument):
|
145
|
+
logger.debug(f"Encoding SourceDocument: {obj}")
|
146
|
+
return {
|
147
|
+
"id": obj.id,
|
148
|
+
"content": obj.content,
|
149
|
+
"source": obj.source,
|
150
|
+
"title": obj.title,
|
151
|
+
"chunk": obj.chunk,
|
152
|
+
"offset": obj.offset,
|
153
|
+
"page_number": obj.page_number,
|
154
|
+
"chunk_id": obj.chunk_id,
|
155
|
+
}
|
156
|
+
return super().default(obj)
|
158
157
|
|
159
158
|
|
160
159
|
class SourceDocumentDecoder(json.JSONDecoder):
|
160
|
+
@logger.trace_function(log_args=False, log_result=False)
|
161
161
|
def decode(self, s, **kwargs):
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
return source_document
|
162
|
+
logger.debug(f"Decoding JSON string: {s}")
|
163
|
+
obj = super().decode(s, **kwargs)
|
164
|
+
source_document = SourceDocument(
|
165
|
+
id=obj["id"],
|
166
|
+
content=obj["content"],
|
167
|
+
source=obj["source"],
|
168
|
+
title=obj["title"],
|
169
|
+
chunk=obj["chunk"],
|
170
|
+
offset=obj["offset"],
|
171
|
+
page_number=obj["page_number"],
|
172
|
+
chunk_id=obj["chunk_id"],
|
173
|
+
)
|
174
|
+
return source_document
|
@@ -3,12 +3,17 @@ from .document_chunking_base import DocumentChunkingBase
|
|
3
3
|
from langchain.text_splitter import TokenTextSplitter
|
4
4
|
from .chunking_strategy import ChunkingSettings
|
5
5
|
from ..common.source_document import SourceDocument
|
6
|
-
import
|
6
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
7
|
+
from logging_config import logger
|
8
|
+
env_helper: EnvHelper = EnvHelper()
|
9
|
+
log_args = env_helper.LOG_ARGS
|
10
|
+
log_result = env_helper.LOG_RESULT
|
7
11
|
|
8
12
|
class FixedSizeOverlapDocumentChunking(DocumentChunkingBase):
|
9
13
|
def __init__(self) -> None:
|
10
14
|
pass
|
11
15
|
|
16
|
+
@logger.trace_function(log_args=False, log_result=False)
|
12
17
|
def chunk(
|
13
18
|
self, documents: List[SourceDocument], chunking: ChunkingSettings
|
14
19
|
) -> List[SourceDocument]:
|
@@ -19,8 +24,8 @@ class FixedSizeOverlapDocumentChunking(DocumentChunkingBase):
|
|
19
24
|
document_url = documents[0].source
|
20
25
|
except IndexError as e:
|
21
26
|
# If no documents are provided, set document_url to None
|
22
|
-
|
23
|
-
|
27
|
+
logger.error("No documents provided for chunking.")
|
28
|
+
logger.debug(e)
|
24
29
|
document_url = None
|
25
30
|
splitter = TokenTextSplitter.from_tiktoken_encoder(
|
26
31
|
chunk_size=chunking.chunk_size, chunk_overlap=chunking.chunk_overlap
|
@@ -3,12 +3,17 @@ from .document_chunking_base import DocumentChunkingBase
|
|
3
3
|
from langchain.text_splitter import MarkdownTextSplitter
|
4
4
|
from .chunking_strategy import ChunkingSettings
|
5
5
|
from ..common.source_document import SourceDocument
|
6
|
-
import
|
6
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
7
|
+
from logging_config import logger
|
8
|
+
env_helper: EnvHelper = EnvHelper()
|
9
|
+
log_args = env_helper.LOG_ARGS
|
10
|
+
log_result = env_helper.LOG_RESULT
|
7
11
|
|
8
12
|
class LayoutDocumentChunking(DocumentChunkingBase):
|
9
13
|
def __init__(self) -> None:
|
10
14
|
pass
|
11
15
|
|
16
|
+
@logger.trace_function(log_args=False, log_result=False)
|
12
17
|
def chunk(
|
13
18
|
self, documents: List[SourceDocument], chunking: ChunkingSettings
|
14
19
|
) -> List[SourceDocument]:
|
@@ -19,8 +24,8 @@ class LayoutDocumentChunking(DocumentChunkingBase):
|
|
19
24
|
document_url = documents[0].source
|
20
25
|
except IndexError as e:
|
21
26
|
# If no documents are provided, set document_url to None
|
22
|
-
|
23
|
-
|
27
|
+
logger.error("No documents provided for chunking.")
|
28
|
+
logger.debug(e)
|
24
29
|
document_url = None
|
25
30
|
splitter = MarkdownTextSplitter.from_tiktoken_encoder(
|
26
31
|
chunk_size=chunking.chunk_size, chunk_overlap=chunking.chunk_overlap
|
@@ -3,12 +3,17 @@ from .document_chunking_base import DocumentChunkingBase
|
|
3
3
|
from langchain.text_splitter import MarkdownTextSplitter
|
4
4
|
from .chunking_strategy import ChunkingSettings
|
5
5
|
from ..common.source_document import SourceDocument
|
6
|
-
import
|
6
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
7
|
+
from logging_config import logger
|
8
|
+
env_helper: EnvHelper = EnvHelper()
|
9
|
+
log_args = env_helper.LOG_ARGS
|
10
|
+
log_result = env_helper.LOG_RESULT
|
7
11
|
|
8
12
|
class PageDocumentChunking(DocumentChunkingBase):
|
9
13
|
def __init__(self) -> None:
|
10
14
|
pass
|
11
15
|
|
16
|
+
@logger.trace_function(log_args=False, log_result=False)
|
12
17
|
def chunk(
|
13
18
|
self, documents: List[SourceDocument], chunking: ChunkingSettings
|
14
19
|
) -> List[SourceDocument]:
|
@@ -16,8 +21,8 @@ class PageDocumentChunking(DocumentChunkingBase):
|
|
16
21
|
document_url = documents[0].source
|
17
22
|
except IndexError as e:
|
18
23
|
# If no documents are provided, set document_url to None
|
19
|
-
|
20
|
-
|
24
|
+
logger.error("No documents provided for chunking.")
|
25
|
+
logger.debug(e)
|
21
26
|
document_url = None
|
22
27
|
splitter = MarkdownTextSplitter.from_tiktoken_encoder(
|
23
28
|
chunk_size=chunking.chunk_size, chunk_overlap=chunking.chunk_overlap
|
@@ -3,46 +3,42 @@ from .document_loading_base import DocumentLoadingBase
|
|
3
3
|
from ..helpers.azure_form_recognizer_helper import AzureFormRecognizerClient
|
4
4
|
from ..common.source_document import SourceDocument
|
5
5
|
|
6
|
-
from
|
7
|
-
from
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
logger = getLogger("__main__")
|
12
|
-
# tracer = trace.get_tracer("__main__" + ".base_package")
|
13
|
-
tracer = trace.get_tracer("__main__")
|
6
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
7
|
+
from logging_config import logger
|
8
|
+
env_helper: EnvHelper = EnvHelper()
|
9
|
+
log_args = env_helper.LOG_ARGS
|
10
|
+
log_result = env_helper.LOG_RESULT
|
14
11
|
|
15
12
|
|
16
13
|
class ReadDocumentLoading(DocumentLoadingBase):
|
17
14
|
def __init__(self) -> None:
|
18
15
|
super().__init__()
|
19
16
|
|
17
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
20
18
|
def load(self, document_url: str) -> List[SourceDocument]:
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
document_url, use_layout=False
|
28
|
-
)
|
29
|
-
)
|
30
|
-
documents = [
|
31
|
-
SourceDocument(
|
32
|
-
content=page["page_text"],
|
33
|
-
source=document_url,
|
34
|
-
page_number=page["page_number"],
|
35
|
-
offset=page["offset"],
|
36
|
-
)
|
37
|
-
for page in pages_content
|
38
|
-
]
|
39
|
-
logger.info(
|
40
|
-
f"Successfully loaded {len(documents)} pages from {document_url}"
|
19
|
+
logger.info(f"Loading document from URL: {document_url}")
|
20
|
+
try:
|
21
|
+
azure_form_recognizer_client = AzureFormRecognizerClient()
|
22
|
+
pages_content = (
|
23
|
+
azure_form_recognizer_client.begin_analyze_document_from_url(
|
24
|
+
document_url, use_layout=False
|
41
25
|
)
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
26
|
+
)
|
27
|
+
documents = [
|
28
|
+
SourceDocument(
|
29
|
+
content=page["page_text"],
|
30
|
+
source=document_url,
|
31
|
+
page_number=page["page_number"],
|
32
|
+
offset=page["offset"],
|
46
33
|
)
|
47
|
-
|
48
|
-
|
34
|
+
for page in pages_content
|
35
|
+
]
|
36
|
+
logger.info(
|
37
|
+
f"Successfully loaded {len(documents)} pages from {document_url}"
|
38
|
+
)
|
39
|
+
return documents
|
40
|
+
except Exception as e:
|
41
|
+
logger.error(
|
42
|
+
f"Error loading document from {document_url}: {e}", exc_info=True
|
43
|
+
)
|
44
|
+
raise
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import logging
|
2
1
|
from urllib.parse import urljoin
|
3
2
|
from ..helpers.azure_identity_helper import AzureIdentityHelper
|
4
3
|
|
@@ -6,8 +5,10 @@ import requests
|
|
6
5
|
from requests import Response
|
7
6
|
|
8
7
|
from .env_helper import EnvHelper
|
9
|
-
|
10
|
-
|
8
|
+
from logging_config import logger
|
9
|
+
env_helper: EnvHelper = EnvHelper()
|
10
|
+
log_args = env_helper.LOG_ARGS
|
11
|
+
log_result = env_helper.LOG_RESULT
|
11
12
|
|
12
13
|
|
13
14
|
class AzureComputerVisionClient:
|
@@ -28,6 +29,7 @@ class AzureComputerVisionClient:
|
|
28
29
|
env_helper.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION
|
29
30
|
)
|
30
31
|
|
32
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
31
33
|
def vectorize_image(self, image_url: str) -> list[float]:
|
32
34
|
logger.info(f"Making call to computer vision to vectorize image: {image_url}")
|
33
35
|
response = self.__make_request(
|
@@ -39,6 +41,7 @@ class AzureComputerVisionClient:
|
|
39
41
|
response_json = self.__get_json_body(response)
|
40
42
|
return self.__get_vectors(response_json)
|
41
43
|
|
44
|
+
@logger.trace_function(log_args=False, log_result=False)
|
42
45
|
def vectorize_text(self, text: str) -> list[float]:
|
43
46
|
logger.debug(f"Making call to computer vision to vectorize text: {text}")
|
44
47
|
response = self.__make_request(
|
@@ -50,6 +53,7 @@ class AzureComputerVisionClient:
|
|
50
53
|
response_json = self.__get_json_body(response)
|
51
54
|
return self.__get_vectors(response_json)
|
52
55
|
|
56
|
+
@logger.trace_function(log_args=False, log_result=False)
|
53
57
|
def __make_request(self, path: str, body) -> Response:
|
54
58
|
try:
|
55
59
|
headers = {}
|
@@ -74,12 +78,14 @@ class AzureComputerVisionClient:
|
|
74
78
|
except Exception as e:
|
75
79
|
raise Exception("Call to Azure Computer Vision failed") from e
|
76
80
|
|
81
|
+
@logger.trace_function(log_args=False, log_result=log_result)
|
77
82
|
def __validate_response(self, response: Response):
|
78
83
|
if response.status_code != 200:
|
79
84
|
raise Exception(
|
80
85
|
f"Call to Azure Computer Vision failed with status: {response.status_code}, body: {response.text}"
|
81
86
|
)
|
82
87
|
|
88
|
+
@logger.trace_function(log_args=False, log_result=False)
|
83
89
|
def __get_json_body(self, response: Response) -> dict:
|
84
90
|
try:
|
85
91
|
return response.json()
|
@@ -88,6 +94,7 @@ class AzureComputerVisionClient:
|
|
88
94
|
f"Call to Azure Computer Vision returned malformed response body: {response.text}",
|
89
95
|
) from e
|
90
96
|
|
97
|
+
@logger.trace_function(log_args=False, log_result=log_result)
|
91
98
|
def __get_vectors(self, response_json: dict) -> list[float]:
|
92
99
|
if self.__RESPONSE_VECTOR_KEY in response_json:
|
93
100
|
return response_json[self.__RESPONSE_VECTOR_KEY]
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import logging
|
2
1
|
from azure.core.credentials import AzureKeyCredential
|
3
2
|
from azure.ai.formrecognizer import DocumentAnalysisClient
|
4
3
|
from ..helpers.azure_identity_helper import AzureIdentityHelper
|
@@ -6,7 +5,10 @@ import html
|
|
6
5
|
import traceback
|
7
6
|
from .env_helper import EnvHelper
|
8
7
|
|
9
|
-
|
8
|
+
from logging_config import logger
|
9
|
+
env_helper: EnvHelper = EnvHelper()
|
10
|
+
log_args = env_helper.LOG_ARGS
|
11
|
+
log_result = env_helper.LOG_RESULT
|
10
12
|
|
11
13
|
|
12
14
|
class AzureFormRecognizerClient:
|
@@ -45,6 +47,7 @@ class AzureFormRecognizerClient:
|
|
45
47
|
"paragraph": "p",
|
46
48
|
}
|
47
49
|
|
50
|
+
@logger.trace_function(log_args=False, log_result=False)
|
48
51
|
def _table_to_html(self, table):
|
49
52
|
table_html = "<table>"
|
50
53
|
rows = [
|
@@ -72,6 +75,7 @@ class AzureFormRecognizerClient:
|
|
72
75
|
table_html += "</table>"
|
73
76
|
return table_html
|
74
77
|
|
78
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
75
79
|
def begin_analyze_document_from_url(
|
76
80
|
self, source_url: str, use_layout: bool = True, paragraph_separator: str = ""
|
77
81
|
):
|
@@ -1,8 +1,4 @@
|
|
1
|
-
import os
|
2
1
|
from azure.identity import (
|
3
|
-
ChainedTokenCredential,
|
4
|
-
ManagedIdentityCredential,
|
5
|
-
EnvironmentCredential,
|
6
2
|
TokenCachePersistenceOptions,
|
7
3
|
get_bearer_token_provider,
|
8
4
|
DefaultAzureCredential
|
@@ -14,6 +10,7 @@ from opentelemetry.propagate import extract
|
|
14
10
|
|
15
11
|
logger = getLogger("__main__")
|
16
12
|
tracer = trace.get_tracer("__main__")
|
13
|
+
|
17
14
|
class AzureIdentityHelper:
|
18
15
|
"""
|
19
16
|
A helper class to provide a chained Azure token credential.
|
@@ -21,40 +18,12 @@ class AzureIdentityHelper:
|
|
21
18
|
Token caching is configured for in-memory persistence.
|
22
19
|
"""
|
23
20
|
def __init__(self):
|
24
|
-
# Configure in-memory token cache persistence
|
25
|
-
# For in-memory, unencrypted storage is typically allowed for simplicity during development.
|
26
|
-
# In production, especially with shared environments, consider the security implications.
|
27
|
-
client_secret_available = os.getenv("AZURE_CLIENT_SECRET") is not None
|
28
21
|
|
29
|
-
token_cache_options = TokenCachePersistenceOptions(allow_unencrypted_storage=True)
|
22
|
+
token_cache_options = TokenCachePersistenceOptions(allow_unencrypted_storage=True)
|
30
23
|
|
31
|
-
|
32
|
-
managed_identity_credential = ManagedIdentityCredential(
|
33
|
-
token_cache_persistence_options=token_cache_options
|
34
|
-
)
|
35
|
-
environment_credential = EnvironmentCredential(
|
24
|
+
self._credential = DefaultAzureCredential(
|
36
25
|
token_cache_persistence_options=token_cache_options
|
37
26
|
)
|
38
|
-
|
39
|
-
|
40
|
-
# Create a chain of credentials
|
41
|
-
# The chain will try credentials in the order they are provided.
|
42
|
-
if client_secret_available:
|
43
|
-
logger.info("Using Environment Credential first with token cache persistence.")
|
44
|
-
self._credential = ChainedTokenCredential(
|
45
|
-
environment_credential,
|
46
|
-
managed_identity_credential
|
47
|
-
)
|
48
|
-
else:
|
49
|
-
logger.info("Using Managed Identity Credential first with token cache persistence.")
|
50
|
-
# self._credential = ChainedTokenCredential(
|
51
|
-
|
52
|
-
# managed_identity_credential,
|
53
|
-
# environment_credential
|
54
|
-
# )
|
55
|
-
self._credential = DefaultAzureCredential(
|
56
|
-
token_cache_persistence_options=token_cache_options
|
57
|
-
)
|
58
27
|
|
59
28
|
def get_credential(self):
|
60
29
|
"""
|
@@ -1,11 +1,13 @@
|
|
1
|
-
import logging
|
2
1
|
import psycopg2
|
3
2
|
from psycopg2.extras import execute_values, RealDictCursor
|
4
3
|
from ..helpers.azure_identity_helper import AzureIdentityHelper
|
5
4
|
from .llm_helper import LLMHelper
|
6
5
|
from .env_helper import EnvHelper
|
7
6
|
|
8
|
-
|
7
|
+
from logging_config import logger
|
8
|
+
env_helper: EnvHelper = EnvHelper()
|
9
|
+
log_args = env_helper.LOG_ARGS
|
10
|
+
log_result = env_helper.LOG_RESULT
|
9
11
|
|
10
12
|
|
11
13
|
class AzurePostgresHelper:
|
@@ -15,6 +17,7 @@ class AzurePostgresHelper:
|
|
15
17
|
self.azure_identity_helper = AzureIdentityHelper()
|
16
18
|
self.conn = None
|
17
19
|
|
20
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
18
21
|
def _create_search_client(self):
|
19
22
|
"""
|
20
23
|
Establishes a connection to Azure PostgreSQL using AAD authentication.
|
@@ -44,6 +47,7 @@ class AzurePostgresHelper:
|
|
44
47
|
logger.error(f"Error establishing a connection to PostgreSQL: {e}")
|
45
48
|
raise
|
46
49
|
|
50
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
47
51
|
def get_search_client(self):
|
48
52
|
"""
|
49
53
|
Provides a reusable database connection.
|
@@ -52,6 +56,7 @@ class AzurePostgresHelper:
|
|
52
56
|
self.conn = self._create_search_client()
|
53
57
|
return self.conn
|
54
58
|
|
59
|
+
@logger.trace_function(log_args=False, log_result=False)
|
55
60
|
def get_vector_store(self, embedding_array):
|
56
61
|
"""
|
57
62
|
Fetches search indexes from PostgreSQL based on an embedding vector.
|
@@ -80,6 +85,7 @@ class AzurePostgresHelper:
|
|
80
85
|
finally:
|
81
86
|
conn.close()
|
82
87
|
|
88
|
+
@logger.trace_function(log_args=False, log_result=log_result)
|
83
89
|
def create_vector_store(self, documents_to_upload):
|
84
90
|
"""
|
85
91
|
Inserts documents into the `vector_store` table in batch mode.
|
@@ -123,6 +129,7 @@ class AzurePostgresHelper:
|
|
123
129
|
finally:
|
124
130
|
conn.close()
|
125
131
|
|
132
|
+
@logger.trace_function(log_args=False, log_result=log_result)
|
126
133
|
def get_files(self):
|
127
134
|
"""
|
128
135
|
Fetches distinct titles from the PostgreSQL database.
|
@@ -155,6 +162,7 @@ class AzurePostgresHelper:
|
|
155
162
|
finally:
|
156
163
|
conn.close()
|
157
164
|
|
165
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
158
166
|
def delete_documents(self, ids_to_delete):
|
159
167
|
"""
|
160
168
|
Deletes documents from the PostgreSQL database based on the provided ids.
|
@@ -202,6 +210,7 @@ class AzurePostgresHelper:
|
|
202
210
|
finally:
|
203
211
|
conn.close()
|
204
212
|
|
213
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
205
214
|
def perform_search(self, title):
|
206
215
|
"""
|
207
216
|
Fetches search results from PostgreSQL based on the title.
|
@@ -228,6 +237,7 @@ class AzurePostgresHelper:
|
|
228
237
|
finally:
|
229
238
|
conn.close()
|
230
239
|
|
240
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
231
241
|
def get_unique_files(self):
|
232
242
|
"""
|
233
243
|
Fetches unique titles from PostgreSQL.
|
@@ -252,6 +262,7 @@ class AzurePostgresHelper:
|
|
252
262
|
finally:
|
253
263
|
conn.close()
|
254
264
|
|
265
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
255
266
|
def search_by_blob_url(self, blob_url):
|
256
267
|
"""
|
257
268
|
Fetches unique titles from PostgreSQL based on a given blob URL.
|
@@ -278,6 +289,7 @@ class AzurePostgresHelper:
|
|
278
289
|
finally:
|
279
290
|
conn.close()
|
280
291
|
|
292
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
281
293
|
def store_with_lightrag(self, documents_to_upload):
|
282
294
|
"""
|
283
295
|
Stores documents using LightRAG for enhanced vector and text storage capabilities.
|