cwyodmodules 0.3.32__py3-none-any.whl → 0.3.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cwyodmodules/api/chat_history.py +14 -7
- cwyodmodules/batch/utilities/chat_history/auth_utils.py +7 -3
- cwyodmodules/batch/utilities/chat_history/cosmosdb.py +17 -1
- cwyodmodules/batch/utilities/chat_history/postgresdbservice.py +239 -254
- cwyodmodules/batch/utilities/common/source_document.py +60 -61
- cwyodmodules/batch/utilities/document_chunking/fixed_size_overlap.py +8 -3
- cwyodmodules/batch/utilities/document_chunking/layout.py +8 -3
- cwyodmodules/batch/utilities/document_chunking/page.py +8 -3
- cwyodmodules/batch/utilities/document_loading/read.py +30 -34
- cwyodmodules/batch/utilities/helpers/azure_computer_vision_client.py +10 -3
- cwyodmodules/batch/utilities/helpers/azure_form_recognizer_helper.py +6 -2
- cwyodmodules/batch/utilities/helpers/azure_postgres_helper.py +14 -2
- cwyodmodules/batch/utilities/helpers/azure_postgres_helper_light_rag.py +14 -2
- cwyodmodules/batch/utilities/helpers/azure_search_helper.py +15 -6
- cwyodmodules/batch/utilities/helpers/config/config_helper.py +24 -2
- cwyodmodules/batch/utilities/helpers/env_helper.py +9 -9
- cwyodmodules/batch/utilities/helpers/lightrag_helper.py +9 -2
- cwyodmodules/batch/utilities/helpers/llm_helper.py +13 -2
- cwyodmodules/batch/utilities/helpers/secret_helper.py +9 -9
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_index.py +8 -2
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_indexer.py +9 -2
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_skillset.py +6 -2
- cwyodmodules/batch/utilities/orchestrator/lang_chain_agent.py +8 -2
- cwyodmodules/batch/utilities/orchestrator/open_ai_functions.py +6 -2
- cwyodmodules/batch/utilities/orchestrator/orchestrator_base.py +9 -3
- cwyodmodules/batch/utilities/orchestrator/prompt_flow.py +8 -2
- cwyodmodules/batch/utilities/orchestrator/semantic_kernel_orchestrator.py +135 -138
- cwyodmodules/batch/utilities/parser/output_parser_tool.py +64 -64
- cwyodmodules/batch/utilities/plugins/outlook_calendar_plugin.py +91 -93
- cwyodmodules/batch/utilities/search/azure_search_handler.py +16 -3
- cwyodmodules/batch/utilities/search/azure_search_handler_light_rag.py +14 -2
- cwyodmodules/batch/utilities/search/integrated_vectorization_search_handler.py +36 -24
- cwyodmodules/batch/utilities/search/lightrag_search_handler.py +14 -2
- cwyodmodules/batch/utilities/search/postgres_search_handler.py +100 -97
- cwyodmodules/batch/utilities/search/postgres_search_handler_light_rag.py +103 -104
- cwyodmodules/batch/utilities/search/search.py +21 -24
- cwyodmodules/batch/utilities/tools/content_safety_checker.py +66 -78
- cwyodmodules/batch/utilities/tools/post_prompt_tool.py +48 -60
- cwyodmodules/batch/utilities/tools/question_answer_tool.py +196 -206
- cwyodmodules/batch/utilities/tools/text_processing_tool.py +36 -39
- cwyodmodules/logging_config.py +15 -0
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/METADATA +2 -1
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/RECORD +46 -45
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/WHEEL +0 -0
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/licenses/LICENSE +0 -0
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/top_level.txt +0 -0
@@ -4,14 +4,11 @@ import json
|
|
4
4
|
from urllib.parse import urlparse, quote
|
5
5
|
from ..helpers.azure_blob_storage_client import AzureBlobStorageClient
|
6
6
|
|
7
|
-
from
|
8
|
-
from
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
logger = getLogger("__main__")
|
13
|
-
##tracer = trace.get_tracer("__main__" + ".base_package")
|
14
|
-
tracer = trace.get_tracer("__main__")
|
7
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
8
|
+
from logging_config import logger
|
9
|
+
env_helper: EnvHelper = EnvHelper()
|
10
|
+
log_args = env_helper.LOG_ARGS
|
11
|
+
log_result = env_helper.LOG_RESULT
|
15
12
|
|
16
13
|
class SourceDocument:
|
17
14
|
def __init__(
|
@@ -54,19 +51,20 @@ class SourceDocument:
|
|
54
51
|
)
|
55
52
|
return False
|
56
53
|
|
54
|
+
@logger.trace_function(log_args=False, log_result=False)
|
57
55
|
def to_json(self):
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
return json_string
|
56
|
+
json_string = json.dumps(self, cls=SourceDocumentEncoder)
|
57
|
+
logger.debug(f"Serialized SourceDocument to JSON: {json_string}")
|
58
|
+
return json_string
|
62
59
|
|
63
60
|
@classmethod
|
61
|
+
@logger.trace_function(log_args=False, log_result=False)
|
64
62
|
def from_json(cls, json_string):
|
65
|
-
logger.debug(f"Deserializing SourceDocument from JSON: {json_string}")
|
66
63
|
source_document = json.loads(json_string, cls=SourceDocumentDecoder)
|
67
64
|
return source_document
|
68
65
|
|
69
66
|
@classmethod
|
67
|
+
@logger.trace_function(log_args=False, log_result=False)
|
70
68
|
def from_dict(cls, dict_obj):
|
71
69
|
logger.debug(f"Creating SourceDocument from dict: {dict_obj}")
|
72
70
|
return cls(
|
@@ -81,6 +79,7 @@ class SourceDocument:
|
|
81
79
|
)
|
82
80
|
|
83
81
|
@classmethod
|
82
|
+
@logger.trace_function(log_args=False, log_result=False)
|
84
83
|
def from_metadata(
|
85
84
|
cls: Type["SourceDocument"],
|
86
85
|
content: str,
|
@@ -114,62 +113,62 @@ class SourceDocument:
|
|
114
113
|
)
|
115
114
|
return source_document
|
116
115
|
|
116
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
117
117
|
def get_filename(self, include_path=False):
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
return filename
|
118
|
+
filename = self.source.replace("_SAS_TOKEN_PLACEHOLDER_", "").replace(
|
119
|
+
"http://", ""
|
120
|
+
)
|
121
|
+
if include_path:
|
122
|
+
filename = filename.split("/")[-1]
|
123
|
+
else:
|
124
|
+
filename = filename.split("/")[-1].split(".")[0]
|
125
|
+
logger.debug(
|
126
|
+
f"Extracted filename: {filename}, include_path: {include_path}"
|
127
|
+
)
|
128
|
+
return filename
|
130
129
|
|
130
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
131
131
|
def get_markdown_url(self):
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
return f"[{self.title}]({url})"
|
132
|
+
url = quote(self.source, safe=":/")
|
133
|
+
if "_SAS_TOKEN_PLACEHOLDER_" in url:
|
134
|
+
blob_client = AzureBlobStorageClient()
|
135
|
+
container_sas = blob_client.get_container_sas()
|
136
|
+
url = url.replace("_SAS_TOKEN_PLACEHOLDER_", container_sas)
|
137
|
+
logger.debug(f"Generated markdown URL: {url}")
|
138
|
+
return f"[{self.title}]({url})"
|
140
139
|
|
141
140
|
|
142
141
|
class SourceDocumentEncoder(json.JSONEncoder):
|
142
|
+
@logger.trace_function(log_args=False, log_result=False)
|
143
143
|
def default(self, obj):
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
return super().default(obj)
|
144
|
+
if isinstance(obj, SourceDocument):
|
145
|
+
logger.debug(f"Encoding SourceDocument: {obj}")
|
146
|
+
return {
|
147
|
+
"id": obj.id,
|
148
|
+
"content": obj.content,
|
149
|
+
"source": obj.source,
|
150
|
+
"title": obj.title,
|
151
|
+
"chunk": obj.chunk,
|
152
|
+
"offset": obj.offset,
|
153
|
+
"page_number": obj.page_number,
|
154
|
+
"chunk_id": obj.chunk_id,
|
155
|
+
}
|
156
|
+
return super().default(obj)
|
158
157
|
|
159
158
|
|
160
159
|
class SourceDocumentDecoder(json.JSONDecoder):
|
160
|
+
@logger.trace_function(log_args=False, log_result=False)
|
161
161
|
def decode(self, s, **kwargs):
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
return source_document
|
162
|
+
logger.debug(f"Decoding JSON string: {s}")
|
163
|
+
obj = super().decode(s, **kwargs)
|
164
|
+
source_document = SourceDocument(
|
165
|
+
id=obj["id"],
|
166
|
+
content=obj["content"],
|
167
|
+
source=obj["source"],
|
168
|
+
title=obj["title"],
|
169
|
+
chunk=obj["chunk"],
|
170
|
+
offset=obj["offset"],
|
171
|
+
page_number=obj["page_number"],
|
172
|
+
chunk_id=obj["chunk_id"],
|
173
|
+
)
|
174
|
+
return source_document
|
@@ -3,12 +3,17 @@ from .document_chunking_base import DocumentChunkingBase
|
|
3
3
|
from langchain.text_splitter import TokenTextSplitter
|
4
4
|
from .chunking_strategy import ChunkingSettings
|
5
5
|
from ..common.source_document import SourceDocument
|
6
|
-
import
|
6
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
7
|
+
from logging_config import logger
|
8
|
+
env_helper: EnvHelper = EnvHelper()
|
9
|
+
log_args = env_helper.LOG_ARGS
|
10
|
+
log_result = env_helper.LOG_RESULT
|
7
11
|
|
8
12
|
class FixedSizeOverlapDocumentChunking(DocumentChunkingBase):
|
9
13
|
def __init__(self) -> None:
|
10
14
|
pass
|
11
15
|
|
16
|
+
@logger.trace_function(log_args=False, log_result=False)
|
12
17
|
def chunk(
|
13
18
|
self, documents: List[SourceDocument], chunking: ChunkingSettings
|
14
19
|
) -> List[SourceDocument]:
|
@@ -19,8 +24,8 @@ class FixedSizeOverlapDocumentChunking(DocumentChunkingBase):
|
|
19
24
|
document_url = documents[0].source
|
20
25
|
except IndexError as e:
|
21
26
|
# If no documents are provided, set document_url to None
|
22
|
-
|
23
|
-
|
27
|
+
logger.error("No documents provided for chunking.")
|
28
|
+
logger.debug(e)
|
24
29
|
document_url = None
|
25
30
|
splitter = TokenTextSplitter.from_tiktoken_encoder(
|
26
31
|
chunk_size=chunking.chunk_size, chunk_overlap=chunking.chunk_overlap
|
@@ -3,12 +3,17 @@ from .document_chunking_base import DocumentChunkingBase
|
|
3
3
|
from langchain.text_splitter import MarkdownTextSplitter
|
4
4
|
from .chunking_strategy import ChunkingSettings
|
5
5
|
from ..common.source_document import SourceDocument
|
6
|
-
import
|
6
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
7
|
+
from logging_config import logger
|
8
|
+
env_helper: EnvHelper = EnvHelper()
|
9
|
+
log_args = env_helper.LOG_ARGS
|
10
|
+
log_result = env_helper.LOG_RESULT
|
7
11
|
|
8
12
|
class LayoutDocumentChunking(DocumentChunkingBase):
|
9
13
|
def __init__(self) -> None:
|
10
14
|
pass
|
11
15
|
|
16
|
+
@logger.trace_function(log_args=False, log_result=False)
|
12
17
|
def chunk(
|
13
18
|
self, documents: List[SourceDocument], chunking: ChunkingSettings
|
14
19
|
) -> List[SourceDocument]:
|
@@ -19,8 +24,8 @@ class LayoutDocumentChunking(DocumentChunkingBase):
|
|
19
24
|
document_url = documents[0].source
|
20
25
|
except IndexError as e:
|
21
26
|
# If no documents are provided, set document_url to None
|
22
|
-
|
23
|
-
|
27
|
+
logger.error("No documents provided for chunking.")
|
28
|
+
logger.debug(e)
|
24
29
|
document_url = None
|
25
30
|
splitter = MarkdownTextSplitter.from_tiktoken_encoder(
|
26
31
|
chunk_size=chunking.chunk_size, chunk_overlap=chunking.chunk_overlap
|
@@ -3,12 +3,17 @@ from .document_chunking_base import DocumentChunkingBase
|
|
3
3
|
from langchain.text_splitter import MarkdownTextSplitter
|
4
4
|
from .chunking_strategy import ChunkingSettings
|
5
5
|
from ..common.source_document import SourceDocument
|
6
|
-
import
|
6
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
7
|
+
from logging_config import logger
|
8
|
+
env_helper: EnvHelper = EnvHelper()
|
9
|
+
log_args = env_helper.LOG_ARGS
|
10
|
+
log_result = env_helper.LOG_RESULT
|
7
11
|
|
8
12
|
class PageDocumentChunking(DocumentChunkingBase):
|
9
13
|
def __init__(self) -> None:
|
10
14
|
pass
|
11
15
|
|
16
|
+
@logger.trace_function(log_args=False, log_result=False)
|
12
17
|
def chunk(
|
13
18
|
self, documents: List[SourceDocument], chunking: ChunkingSettings
|
14
19
|
) -> List[SourceDocument]:
|
@@ -16,8 +21,8 @@ class PageDocumentChunking(DocumentChunkingBase):
|
|
16
21
|
document_url = documents[0].source
|
17
22
|
except IndexError as e:
|
18
23
|
# If no documents are provided, set document_url to None
|
19
|
-
|
20
|
-
|
24
|
+
logger.error("No documents provided for chunking.")
|
25
|
+
logger.debug(e)
|
21
26
|
document_url = None
|
22
27
|
splitter = MarkdownTextSplitter.from_tiktoken_encoder(
|
23
28
|
chunk_size=chunking.chunk_size, chunk_overlap=chunking.chunk_overlap
|
@@ -3,46 +3,42 @@ from .document_loading_base import DocumentLoadingBase
|
|
3
3
|
from ..helpers.azure_form_recognizer_helper import AzureFormRecognizerClient
|
4
4
|
from ..common.source_document import SourceDocument
|
5
5
|
|
6
|
-
from
|
7
|
-
from
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
logger = getLogger("__main__")
|
12
|
-
# tracer = trace.get_tracer("__main__" + ".base_package")
|
13
|
-
tracer = trace.get_tracer("__main__")
|
6
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
7
|
+
from logging_config import logger
|
8
|
+
env_helper: EnvHelper = EnvHelper()
|
9
|
+
log_args = env_helper.LOG_ARGS
|
10
|
+
log_result = env_helper.LOG_RESULT
|
14
11
|
|
15
12
|
|
16
13
|
class ReadDocumentLoading(DocumentLoadingBase):
|
17
14
|
def __init__(self) -> None:
|
18
15
|
super().__init__()
|
19
16
|
|
17
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
20
18
|
def load(self, document_url: str) -> List[SourceDocument]:
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
document_url, use_layout=False
|
28
|
-
)
|
29
|
-
)
|
30
|
-
documents = [
|
31
|
-
SourceDocument(
|
32
|
-
content=page["page_text"],
|
33
|
-
source=document_url,
|
34
|
-
page_number=page["page_number"],
|
35
|
-
offset=page["offset"],
|
36
|
-
)
|
37
|
-
for page in pages_content
|
38
|
-
]
|
39
|
-
logger.info(
|
40
|
-
f"Successfully loaded {len(documents)} pages from {document_url}"
|
19
|
+
logger.info(f"Loading document from URL: {document_url}")
|
20
|
+
try:
|
21
|
+
azure_form_recognizer_client = AzureFormRecognizerClient()
|
22
|
+
pages_content = (
|
23
|
+
azure_form_recognizer_client.begin_analyze_document_from_url(
|
24
|
+
document_url, use_layout=False
|
41
25
|
)
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
26
|
+
)
|
27
|
+
documents = [
|
28
|
+
SourceDocument(
|
29
|
+
content=page["page_text"],
|
30
|
+
source=document_url,
|
31
|
+
page_number=page["page_number"],
|
32
|
+
offset=page["offset"],
|
46
33
|
)
|
47
|
-
|
48
|
-
|
34
|
+
for page in pages_content
|
35
|
+
]
|
36
|
+
logger.info(
|
37
|
+
f"Successfully loaded {len(documents)} pages from {document_url}"
|
38
|
+
)
|
39
|
+
return documents
|
40
|
+
except Exception as e:
|
41
|
+
logger.error(
|
42
|
+
f"Error loading document from {document_url}: {e}", exc_info=True
|
43
|
+
)
|
44
|
+
raise
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import logging
|
2
1
|
from urllib.parse import urljoin
|
3
2
|
from ..helpers.azure_identity_helper import AzureIdentityHelper
|
4
3
|
|
@@ -6,8 +5,10 @@ import requests
|
|
6
5
|
from requests import Response
|
7
6
|
|
8
7
|
from .env_helper import EnvHelper
|
9
|
-
|
10
|
-
|
8
|
+
from logging_config import logger
|
9
|
+
env_helper: EnvHelper = EnvHelper()
|
10
|
+
log_args = env_helper.LOG_ARGS
|
11
|
+
log_result = env_helper.LOG_RESULT
|
11
12
|
|
12
13
|
|
13
14
|
class AzureComputerVisionClient:
|
@@ -28,6 +29,7 @@ class AzureComputerVisionClient:
|
|
28
29
|
env_helper.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION
|
29
30
|
)
|
30
31
|
|
32
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
31
33
|
def vectorize_image(self, image_url: str) -> list[float]:
|
32
34
|
logger.info(f"Making call to computer vision to vectorize image: {image_url}")
|
33
35
|
response = self.__make_request(
|
@@ -39,6 +41,7 @@ class AzureComputerVisionClient:
|
|
39
41
|
response_json = self.__get_json_body(response)
|
40
42
|
return self.__get_vectors(response_json)
|
41
43
|
|
44
|
+
@logger.trace_function(log_args=False, log_result=False)
|
42
45
|
def vectorize_text(self, text: str) -> list[float]:
|
43
46
|
logger.debug(f"Making call to computer vision to vectorize text: {text}")
|
44
47
|
response = self.__make_request(
|
@@ -50,6 +53,7 @@ class AzureComputerVisionClient:
|
|
50
53
|
response_json = self.__get_json_body(response)
|
51
54
|
return self.__get_vectors(response_json)
|
52
55
|
|
56
|
+
@logger.trace_function(log_args=False, log_result=False)
|
53
57
|
def __make_request(self, path: str, body) -> Response:
|
54
58
|
try:
|
55
59
|
headers = {}
|
@@ -74,12 +78,14 @@ class AzureComputerVisionClient:
|
|
74
78
|
except Exception as e:
|
75
79
|
raise Exception("Call to Azure Computer Vision failed") from e
|
76
80
|
|
81
|
+
@logger.trace_function(log_args=False, log_result=log_result)
|
77
82
|
def __validate_response(self, response: Response):
|
78
83
|
if response.status_code != 200:
|
79
84
|
raise Exception(
|
80
85
|
f"Call to Azure Computer Vision failed with status: {response.status_code}, body: {response.text}"
|
81
86
|
)
|
82
87
|
|
88
|
+
@logger.trace_function(log_args=False, log_result=False)
|
83
89
|
def __get_json_body(self, response: Response) -> dict:
|
84
90
|
try:
|
85
91
|
return response.json()
|
@@ -88,6 +94,7 @@ class AzureComputerVisionClient:
|
|
88
94
|
f"Call to Azure Computer Vision returned malformed response body: {response.text}",
|
89
95
|
) from e
|
90
96
|
|
97
|
+
@logger.trace_function(log_args=False, log_result=log_result)
|
91
98
|
def __get_vectors(self, response_json: dict) -> list[float]:
|
92
99
|
if self.__RESPONSE_VECTOR_KEY in response_json:
|
93
100
|
return response_json[self.__RESPONSE_VECTOR_KEY]
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import logging
|
2
1
|
from azure.core.credentials import AzureKeyCredential
|
3
2
|
from azure.ai.formrecognizer import DocumentAnalysisClient
|
4
3
|
from ..helpers.azure_identity_helper import AzureIdentityHelper
|
@@ -6,7 +5,10 @@ import html
|
|
6
5
|
import traceback
|
7
6
|
from .env_helper import EnvHelper
|
8
7
|
|
9
|
-
|
8
|
+
from logging_config import logger
|
9
|
+
env_helper: EnvHelper = EnvHelper()
|
10
|
+
log_args = env_helper.LOG_ARGS
|
11
|
+
log_result = env_helper.LOG_RESULT
|
10
12
|
|
11
13
|
|
12
14
|
class AzureFormRecognizerClient:
|
@@ -45,6 +47,7 @@ class AzureFormRecognizerClient:
|
|
45
47
|
"paragraph": "p",
|
46
48
|
}
|
47
49
|
|
50
|
+
@logger.trace_function(log_args=False, log_result=False)
|
48
51
|
def _table_to_html(self, table):
|
49
52
|
table_html = "<table>"
|
50
53
|
rows = [
|
@@ -72,6 +75,7 @@ class AzureFormRecognizerClient:
|
|
72
75
|
table_html += "</table>"
|
73
76
|
return table_html
|
74
77
|
|
78
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
75
79
|
def begin_analyze_document_from_url(
|
76
80
|
self, source_url: str, use_layout: bool = True, paragraph_separator: str = ""
|
77
81
|
):
|
@@ -1,11 +1,13 @@
|
|
1
|
-
import logging
|
2
1
|
import psycopg2
|
3
2
|
from psycopg2.extras import execute_values, RealDictCursor
|
4
3
|
from ..helpers.azure_identity_helper import AzureIdentityHelper
|
5
4
|
from .llm_helper import LLMHelper
|
6
5
|
from .env_helper import EnvHelper
|
7
6
|
|
8
|
-
|
7
|
+
from logging_config import logger
|
8
|
+
env_helper: EnvHelper = EnvHelper()
|
9
|
+
log_args = env_helper.LOG_ARGS
|
10
|
+
log_result = env_helper.LOG_RESULT
|
9
11
|
|
10
12
|
|
11
13
|
class AzurePostgresHelper:
|
@@ -15,6 +17,7 @@ class AzurePostgresHelper:
|
|
15
17
|
self.azure_identity_helper = AzureIdentityHelper()
|
16
18
|
self.conn = None
|
17
19
|
|
20
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
18
21
|
def _create_search_client(self):
|
19
22
|
"""
|
20
23
|
Establishes a connection to Azure PostgreSQL using AAD authentication.
|
@@ -44,6 +47,7 @@ class AzurePostgresHelper:
|
|
44
47
|
logger.error(f"Error establishing a connection to PostgreSQL: {e}")
|
45
48
|
raise
|
46
49
|
|
50
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
47
51
|
def get_search_client(self):
|
48
52
|
"""
|
49
53
|
Provides a reusable database connection.
|
@@ -52,6 +56,7 @@ class AzurePostgresHelper:
|
|
52
56
|
self.conn = self._create_search_client()
|
53
57
|
return self.conn
|
54
58
|
|
59
|
+
@logger.trace_function(log_args=False, log_result=False)
|
55
60
|
def get_vector_store(self, embedding_array):
|
56
61
|
"""
|
57
62
|
Fetches search indexes from PostgreSQL based on an embedding vector.
|
@@ -80,6 +85,7 @@ class AzurePostgresHelper:
|
|
80
85
|
finally:
|
81
86
|
conn.close()
|
82
87
|
|
88
|
+
@logger.trace_function(log_args=False, log_result=log_result)
|
83
89
|
def create_vector_store(self, documents_to_upload):
|
84
90
|
"""
|
85
91
|
Inserts documents into the `vector_store` table in batch mode.
|
@@ -123,6 +129,7 @@ class AzurePostgresHelper:
|
|
123
129
|
finally:
|
124
130
|
conn.close()
|
125
131
|
|
132
|
+
@logger.trace_function(log_args=False, log_result=log_result)
|
126
133
|
def get_files(self):
|
127
134
|
"""
|
128
135
|
Fetches distinct titles from the PostgreSQL database.
|
@@ -155,6 +162,7 @@ class AzurePostgresHelper:
|
|
155
162
|
finally:
|
156
163
|
conn.close()
|
157
164
|
|
165
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
158
166
|
def delete_documents(self, ids_to_delete):
|
159
167
|
"""
|
160
168
|
Deletes documents from the PostgreSQL database based on the provided ids.
|
@@ -202,6 +210,7 @@ class AzurePostgresHelper:
|
|
202
210
|
finally:
|
203
211
|
conn.close()
|
204
212
|
|
213
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
205
214
|
def perform_search(self, title):
|
206
215
|
"""
|
207
216
|
Fetches search results from PostgreSQL based on the title.
|
@@ -228,6 +237,7 @@ class AzurePostgresHelper:
|
|
228
237
|
finally:
|
229
238
|
conn.close()
|
230
239
|
|
240
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
231
241
|
def get_unique_files(self):
|
232
242
|
"""
|
233
243
|
Fetches unique titles from PostgreSQL.
|
@@ -252,6 +262,7 @@ class AzurePostgresHelper:
|
|
252
262
|
finally:
|
253
263
|
conn.close()
|
254
264
|
|
265
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
255
266
|
def search_by_blob_url(self, blob_url):
|
256
267
|
"""
|
257
268
|
Fetches unique titles from PostgreSQL based on a given blob URL.
|
@@ -278,6 +289,7 @@ class AzurePostgresHelper:
|
|
278
289
|
finally:
|
279
290
|
conn.close()
|
280
291
|
|
292
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
281
293
|
def store_with_lightrag(self, documents_to_upload):
|
282
294
|
"""
|
283
295
|
Stores documents using LightRAG for enhanced vector and text storage capabilities.
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import logging
|
2
1
|
import psycopg2
|
3
2
|
from psycopg2.extras import execute_values, RealDictCursor
|
4
3
|
from .llm_helper import LLMHelper
|
@@ -6,7 +5,10 @@ from .env_helper import EnvHelper
|
|
6
5
|
from .lightrag_helper import LightRAGHelper
|
7
6
|
from ..helpers.azure_identity_helper import AzureIdentityHelper
|
8
7
|
|
9
|
-
|
8
|
+
from logging_config import logger
|
9
|
+
env_helper: EnvHelper = EnvHelper()
|
10
|
+
log_args = env_helper.LOG_ARGS
|
11
|
+
log_result = env_helper.LOG_RESULT
|
10
12
|
|
11
13
|
|
12
14
|
class AzurePostgresHelper:
|
@@ -17,6 +19,7 @@ class AzurePostgresHelper:
|
|
17
19
|
self.azure_identity_helper = AzureIdentityHelper()
|
18
20
|
self.conn = None
|
19
21
|
|
22
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
20
23
|
def _create_search_client(self):
|
21
24
|
"""
|
22
25
|
Establishes a connection to Azure PostgreSQL using AAD authentication.
|
@@ -46,6 +49,7 @@ class AzurePostgresHelper:
|
|
46
49
|
logger.error(f"Error establishing a connection to PostgreSQL: {e}")
|
47
50
|
raise
|
48
51
|
|
52
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
49
53
|
def get_search_client(self):
|
50
54
|
"""
|
51
55
|
Provides a reusable database connection.
|
@@ -54,6 +58,7 @@ class AzurePostgresHelper:
|
|
54
58
|
self.conn = self._create_search_client()
|
55
59
|
return self.conn
|
56
60
|
|
61
|
+
@logger.trace_function(log_args=False, log_result=False)
|
57
62
|
def get_vector_store(self, embedding_array):
|
58
63
|
"""
|
59
64
|
Fetches search indexes from PostgreSQL based on an embedding vector.
|
@@ -82,6 +87,7 @@ class AzurePostgresHelper:
|
|
82
87
|
finally:
|
83
88
|
conn.close()
|
84
89
|
|
90
|
+
@logger.trace_function(log_args=False, log_result=log_result)
|
85
91
|
def create_vector_store(self, documents_to_upload):
|
86
92
|
"""
|
87
93
|
Inserts documents into the `vector_store` table in batch mode.
|
@@ -125,6 +131,7 @@ class AzurePostgresHelper:
|
|
125
131
|
finally:
|
126
132
|
conn.close()
|
127
133
|
|
134
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
128
135
|
def get_files(self):
|
129
136
|
"""
|
130
137
|
Fetches distinct titles from the PostgreSQL database.
|
@@ -157,6 +164,7 @@ class AzurePostgresHelper:
|
|
157
164
|
finally:
|
158
165
|
conn.close()
|
159
166
|
|
167
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
160
168
|
def delete_documents(self, ids_to_delete):
|
161
169
|
"""
|
162
170
|
Deletes documents from the PostgreSQL database based on the provided ids.
|
@@ -204,6 +212,7 @@ class AzurePostgresHelper:
|
|
204
212
|
finally:
|
205
213
|
conn.close()
|
206
214
|
|
215
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
207
216
|
def perform_search(self, title):
|
208
217
|
"""
|
209
218
|
Fetches search results from PostgreSQL based on the title.
|
@@ -230,6 +239,7 @@ class AzurePostgresHelper:
|
|
230
239
|
finally:
|
231
240
|
conn.close()
|
232
241
|
|
242
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
233
243
|
def get_unique_files(self):
|
234
244
|
"""
|
235
245
|
Fetches unique titles from PostgreSQL.
|
@@ -254,6 +264,7 @@ class AzurePostgresHelper:
|
|
254
264
|
finally:
|
255
265
|
conn.close()
|
256
266
|
|
267
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
257
268
|
def search_by_blob_url(self, blob_url):
|
258
269
|
"""
|
259
270
|
Fetches unique titles from PostgreSQL based on a given blob URL.
|
@@ -280,6 +291,7 @@ class AzurePostgresHelper:
|
|
280
291
|
finally:
|
281
292
|
conn.close()
|
282
293
|
|
294
|
+
@logger.trace_function(log_args=False, log_result=False)
|
283
295
|
def store_with_lightrag(self, documents_to_upload):
|
284
296
|
"""
|
285
297
|
Stores documents using LightRAG for enhanced vector and text storage capabilities.
|