cwyodmodules 0.3.35__py3-none-any.whl → 0.3.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cwyodmodules/batch/utilities/helpers/embedders/integrated_vectorization_embedder.py +11 -3
- cwyodmodules/batch/utilities/helpers/embedders/postgres_embedder.py +7 -2
- cwyodmodules/batch/utilities/helpers/embedders/push_embedder.py +10 -2
- {cwyodmodules-0.3.35.dist-info → cwyodmodules-0.3.36.dist-info}/METADATA +1 -1
- {cwyodmodules-0.3.35.dist-info → cwyodmodules-0.3.36.dist-info}/RECORD +8 -8
- {cwyodmodules-0.3.35.dist-info → cwyodmodules-0.3.36.dist-info}/WHEEL +0 -0
- {cwyodmodules-0.3.35.dist-info → cwyodmodules-0.3.36.dist-info}/licenses/LICENSE +0 -0
- {cwyodmodules-0.3.35.dist-info → cwyodmodules-0.3.36.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
|
|
1
|
+
import azure.functions as func
|
1
2
|
from .embedder_base import EmbedderBase
|
2
3
|
from ..env_helper import EnvHelper
|
3
4
|
from ..llm_helper import LLMHelper
|
@@ -6,9 +7,13 @@ from ...integrated_vectorization.azure_search_indexer import AzureSearchIndexer
|
|
6
7
|
from ...integrated_vectorization.azure_search_datasource import AzureSearchDatasource
|
7
8
|
from ...integrated_vectorization.azure_search_skillset import AzureSearchSkillset
|
8
9
|
from ..config.config_helper import ConfigHelper
|
9
|
-
import logging
|
10
10
|
|
11
|
-
|
11
|
+
from logging_config import logger
|
12
|
+
env_helper: EnvHelper = EnvHelper()
|
13
|
+
log_args = env_helper.LOG_ARGS
|
14
|
+
log_result = env_helper.LOG_RESULT
|
15
|
+
|
16
|
+
bp_add_url_embeddings = func.Blueprint()
|
12
17
|
|
13
18
|
|
14
19
|
class IntegratedVectorizationEmbedder(EmbedderBase):
|
@@ -16,13 +21,15 @@ class IntegratedVectorizationEmbedder(EmbedderBase):
|
|
16
21
|
self.env_helper = env_helper
|
17
22
|
self.llm_helper: LLMHelper = LLMHelper()
|
18
23
|
logger.info("Initialized IntegratedVectorizationEmbedder.")
|
19
|
-
|
24
|
+
|
25
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
20
26
|
def embed_file(self, source_url: str, file_name: str = None):
|
21
27
|
logger.info(
|
22
28
|
f"Starting embed_file for source_url: {source_url}, file_name: {file_name}."
|
23
29
|
)
|
24
30
|
self.process_using_integrated_vectorization(source_url=source_url)
|
25
31
|
|
32
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
26
33
|
def process_using_integrated_vectorization(self, source_url: str):
|
27
34
|
logger.info(f"Starting integrated vectorization for source_url: {source_url}.")
|
28
35
|
config = ConfigHelper.get_active_config_or_default()
|
@@ -46,6 +53,7 @@ class IntegratedVectorizationEmbedder(EmbedderBase):
|
|
46
53
|
logger.error(f"Error processing {source_url}: {e}")
|
47
54
|
raise e
|
48
55
|
|
56
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
49
57
|
def reprocess_all(self):
|
50
58
|
logger.info("Starting reprocess_all operation.")
|
51
59
|
search_indexer = AzureSearchIndexer(self.env_helper)
|
@@ -1,5 +1,4 @@
|
|
1
1
|
import json
|
2
|
-
import logging
|
3
2
|
from typing import List
|
4
3
|
|
5
4
|
from ...helpers.llm_helper import LLMHelper
|
@@ -15,7 +14,10 @@ from ..document_loading_helper import DocumentLoading
|
|
15
14
|
from ..document_chunking_helper import DocumentChunking
|
16
15
|
from ...common.source_document import SourceDocument
|
17
16
|
|
18
|
-
|
17
|
+
from logging_config import logger
|
18
|
+
env_helper: EnvHelper = EnvHelper()
|
19
|
+
log_args = env_helper.LOG_ARGS
|
20
|
+
log_result = env_helper.LOG_RESULT
|
19
21
|
|
20
22
|
|
21
23
|
class PostgresEmbedder(EmbedderBase):
|
@@ -33,6 +35,7 @@ class PostgresEmbedder(EmbedderBase):
|
|
33
35
|
ext = processor.document_type.lower()
|
34
36
|
self.embedding_configs[ext] = processor
|
35
37
|
|
38
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
36
39
|
def embed_file(self, source_url: str, file_name: str):
|
37
40
|
logger.info(f"Embedding file: {file_name} from source: {source_url}")
|
38
41
|
file_extension = file_name.split(".")[-1].lower()
|
@@ -47,6 +50,7 @@ class PostgresEmbedder(EmbedderBase):
|
|
47
50
|
file_name, {"embeddings_added": "true"}
|
48
51
|
)
|
49
52
|
|
53
|
+
@logger.trace_function(log_args=False, log_result=log_result)
|
50
54
|
def __embed(
|
51
55
|
self, source_url: str, file_extension: str, embedding_config: EmbeddingConfig
|
52
56
|
):
|
@@ -84,6 +88,7 @@ class PostgresEmbedder(EmbedderBase):
|
|
84
88
|
else:
|
85
89
|
logger.warning("No documents to upload.")
|
86
90
|
|
91
|
+
@logger.trace_function(log_args=False, log_result=False)
|
87
92
|
def __convert_to_search_document(self, document: SourceDocument):
|
88
93
|
logger.info(f"Generating embeddings for document ID: {document.id}")
|
89
94
|
embedded_content = self.llm_helper.generate_embeddings(document.content)
|
@@ -1,6 +1,5 @@
|
|
1
1
|
import hashlib
|
2
2
|
import json
|
3
|
-
import logging
|
4
3
|
from typing import List
|
5
4
|
from urllib.parse import urlparse
|
6
5
|
|
@@ -19,7 +18,10 @@ from ..document_loading_helper import DocumentLoading
|
|
19
18
|
from ..document_chunking_helper import DocumentChunking
|
20
19
|
from ...common.source_document import SourceDocument
|
21
20
|
|
22
|
-
|
21
|
+
from logging_config import logger
|
22
|
+
env_helper: EnvHelper = EnvHelper()
|
23
|
+
log_args = env_helper.LOG_ARGS
|
24
|
+
log_result = env_helper.LOG_RESULT
|
23
25
|
|
24
26
|
|
25
27
|
class PushEmbedder(EmbedderBase):
|
@@ -40,6 +42,7 @@ class PushEmbedder(EmbedderBase):
|
|
40
42
|
self.embedding_configs[ext] = processor
|
41
43
|
logger.info("Document processors loaded")
|
42
44
|
|
45
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
43
46
|
def embed_file(self, source_url: str, file_name: str):
|
44
47
|
logger.info(f"Embedding file: {file_name} from URL: {source_url}")
|
45
48
|
file_extension = file_name.split(".")[-1].lower()
|
@@ -55,6 +58,7 @@ class PushEmbedder(EmbedderBase):
|
|
55
58
|
file_name, {"embeddings_added": "true"}
|
56
59
|
)
|
57
60
|
|
61
|
+
@logger.trace_function(log_args=False, log_result=log_result)
|
58
62
|
def __embed(
|
59
63
|
self, source_url: str, file_extension: str, embedding_config: EmbeddingConfig
|
60
64
|
):
|
@@ -101,6 +105,7 @@ class PushEmbedder(EmbedderBase):
|
|
101
105
|
else:
|
102
106
|
logger.warning("No documents to upload.")
|
103
107
|
|
108
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
104
109
|
def __generate_image_caption(self, source_url):
|
105
110
|
logger.info(f"Generating image caption for URL: {source_url}")
|
106
111
|
model = self.env_helper.AZURE_OPENAI_VISION_MODEL
|
@@ -129,6 +134,7 @@ If the image is mostly text, use OCR to extract the text as it is displayed in t
|
|
129
134
|
logger.info("Caption generation completed")
|
130
135
|
return caption
|
131
136
|
|
137
|
+
@logger.trace_function(log_args=False, log_result=False)
|
132
138
|
def __convert_to_search_document(self, document: SourceDocument):
|
133
139
|
logger.info(f"Converting document ID {document.id} to search document format")
|
134
140
|
embedded_content = self.llm_helper.generate_embeddings(document.content)
|
@@ -152,10 +158,12 @@ If the image is mostly text, use OCR to extract the text as it is displayed in t
|
|
152
158
|
self.env_helper.AZURE_SEARCH_OFFSET_COLUMN: document.offset,
|
153
159
|
}
|
154
160
|
|
161
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
155
162
|
def __generate_document_id(self, source_url: str) -> str:
|
156
163
|
hash_key = hashlib.sha1(f"{source_url}_1".encode("utf-8")).hexdigest()
|
157
164
|
return f"doc_{hash_key}"
|
158
165
|
|
166
|
+
@logger.trace_function(log_args=False, log_result=False)
|
159
167
|
def __create_image_document(
|
160
168
|
self,
|
161
169
|
source_url: str,
|
@@ -52,9 +52,9 @@ cwyodmodules/batch/utilities/helpers/config/default_employee_assistant_prompt.tx
|
|
52
52
|
cwyodmodules/batch/utilities/helpers/config/embedding_config.py,sha256=9pCJxpsouln9dngjVHaKGFYP14PrwmSts_UFDytSiVk,950
|
53
53
|
cwyodmodules/batch/utilities/helpers/embedders/embedder_base.py,sha256=z34LTNGzjiHr6_YWZ8NejUsX1KKYqXPWcqZ8mW_3CHI,699
|
54
54
|
cwyodmodules/batch/utilities/helpers/embedders/embedder_factory.py,sha256=cJ9ZTXZEyOJ5TLB6pDsb9zvUZrJYY-LD40n0l1-qHcw,790
|
55
|
-
cwyodmodules/batch/utilities/helpers/embedders/integrated_vectorization_embedder.py,sha256=
|
56
|
-
cwyodmodules/batch/utilities/helpers/embedders/postgres_embedder.py,sha256=
|
57
|
-
cwyodmodules/batch/utilities/helpers/embedders/push_embedder.py,sha256=
|
55
|
+
cwyodmodules/batch/utilities/helpers/embedders/integrated_vectorization_embedder.py,sha256=SWm81GhH149_E9Nd2hi53a-e2uTRz6QIVFQ4dFakCs0,3213
|
56
|
+
cwyodmodules/batch/utilities/helpers/embedders/postgres_embedder.py,sha256=fUYRui6whPtoWpaCrJeDvvm3C3F48kuktwwVS18h4kQ,4740
|
57
|
+
cwyodmodules/batch/utilities/helpers/embedders/push_embedder.py,sha256=lUc89j5tCYh8EUGsLULnVkI5jM51rgxLwezylPYP__w,8931
|
58
58
|
cwyodmodules/batch/utilities/integrated_vectorization/azure_search_datasource.py,sha256=rDwPgr-UCSYscc7hPOUJMwP09a9rX1MXAGf94TubdQo,2231
|
59
59
|
cwyodmodules/batch/utilities/integrated_vectorization/azure_search_index.py,sha256=6jNz1nnIAPV57mMSWeKC-A6JQ5bLZW63rjgauF0wtY0,6579
|
60
60
|
cwyodmodules/batch/utilities/integrated_vectorization/azure_search_indexer.py,sha256=rSDwIONB3BlpkXk6pm7-B2LpJ8WkZKJJ2WSr1YttVFk,3236
|
@@ -109,8 +109,8 @@ cwyodmodules/graphrag/query/generate.py,sha256=xBnZs2U9xFWtPk4AfAZgYKbHdcxNcIO6Q
|
|
109
109
|
cwyodmodules/graphrag/query/graph_search.py,sha256=95h3ecSWx4864XgKABtG0fh3Nk8HkqJVzoCrO8daJ-Y,7724
|
110
110
|
cwyodmodules/graphrag/query/types.py,sha256=1Iq1dp4I4a56_cuFjOZ0NTgd0A2_MpVFznp_czgt6cI,617
|
111
111
|
cwyodmodules/graphrag/query/vector_search.py,sha256=9Gwu9LPjtoAYUU8WKqCvbCHAIg3dpk71reoYd1scLnQ,1807
|
112
|
-
cwyodmodules-0.3.
|
113
|
-
cwyodmodules-0.3.
|
114
|
-
cwyodmodules-0.3.
|
115
|
-
cwyodmodules-0.3.
|
116
|
-
cwyodmodules-0.3.
|
112
|
+
cwyodmodules-0.3.36.dist-info/licenses/LICENSE,sha256=UqBDTipijsSW2ZSOXyTZnMsXmLoEHTgNEM0tL4g-Sso,1150
|
113
|
+
cwyodmodules-0.3.36.dist-info/METADATA,sha256=TLCeuQ_6a1nn0kX2gvSfD1UtIuZ53eP90zbcV8svSaE,2002
|
114
|
+
cwyodmodules-0.3.36.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
115
|
+
cwyodmodules-0.3.36.dist-info/top_level.txt,sha256=99RENLbkdRX-qpJvsxZ5AfmTL5s6shSaKOWYpz1vwzg,13
|
116
|
+
cwyodmodules-0.3.36.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|