cwyodmodules 0.3.35__py3-none-any.whl → 0.3.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import azure.functions as func
1
2
  from .embedder_base import EmbedderBase
2
3
  from ..env_helper import EnvHelper
3
4
  from ..llm_helper import LLMHelper
@@ -6,9 +7,13 @@ from ...integrated_vectorization.azure_search_indexer import AzureSearchIndexer
6
7
  from ...integrated_vectorization.azure_search_datasource import AzureSearchDatasource
7
8
  from ...integrated_vectorization.azure_search_skillset import AzureSearchSkillset
8
9
  from ..config.config_helper import ConfigHelper
9
- import logging
10
10
 
11
- logger = logging.getLogger("__main__")
11
+ from logging_config import logger
12
+ env_helper: EnvHelper = EnvHelper()
13
+ log_args = env_helper.LOG_ARGS
14
+ log_result = env_helper.LOG_RESULT
15
+
16
+ bp_add_url_embeddings = func.Blueprint()
12
17
 
13
18
 
14
19
  class IntegratedVectorizationEmbedder(EmbedderBase):
@@ -16,13 +21,15 @@ class IntegratedVectorizationEmbedder(EmbedderBase):
16
21
  self.env_helper = env_helper
17
22
  self.llm_helper: LLMHelper = LLMHelper()
18
23
  logger.info("Initialized IntegratedVectorizationEmbedder.")
19
-
24
+
25
+ @logger.trace_function(log_args=log_args, log_result=False)
20
26
  def embed_file(self, source_url: str, file_name: str = None):
21
27
  logger.info(
22
28
  f"Starting embed_file for source_url: {source_url}, file_name: {file_name}."
23
29
  )
24
30
  self.process_using_integrated_vectorization(source_url=source_url)
25
31
 
32
+ @logger.trace_function(log_args=log_args, log_result=False)
26
33
  def process_using_integrated_vectorization(self, source_url: str):
27
34
  logger.info(f"Starting integrated vectorization for source_url: {source_url}.")
28
35
  config = ConfigHelper.get_active_config_or_default()
@@ -46,6 +53,7 @@ class IntegratedVectorizationEmbedder(EmbedderBase):
46
53
  logger.error(f"Error processing {source_url}: {e}")
47
54
  raise e
48
55
 
56
+ @logger.trace_function(log_args=log_args, log_result=log_result)
49
57
  def reprocess_all(self):
50
58
  logger.info("Starting reprocess_all operation.")
51
59
  search_indexer = AzureSearchIndexer(self.env_helper)
@@ -1,5 +1,4 @@
1
1
  import json
2
- import logging
3
2
  from typing import List
4
3
 
5
4
  from ...helpers.llm_helper import LLMHelper
@@ -15,7 +14,10 @@ from ..document_loading_helper import DocumentLoading
15
14
  from ..document_chunking_helper import DocumentChunking
16
15
  from ...common.source_document import SourceDocument
17
16
 
18
- logger = logging.getLogger("__main__")
17
+ from logging_config import logger
18
+ env_helper: EnvHelper = EnvHelper()
19
+ log_args = env_helper.LOG_ARGS
20
+ log_result = env_helper.LOG_RESULT
19
21
 
20
22
 
21
23
  class PostgresEmbedder(EmbedderBase):
@@ -33,6 +35,7 @@ class PostgresEmbedder(EmbedderBase):
33
35
  ext = processor.document_type.lower()
34
36
  self.embedding_configs[ext] = processor
35
37
 
38
+ @logger.trace_function(log_args=log_args, log_result=False)
36
39
  def embed_file(self, source_url: str, file_name: str):
37
40
  logger.info(f"Embedding file: {file_name} from source: {source_url}")
38
41
  file_extension = file_name.split(".")[-1].lower()
@@ -47,6 +50,7 @@ class PostgresEmbedder(EmbedderBase):
47
50
  file_name, {"embeddings_added": "true"}
48
51
  )
49
52
 
53
+ @logger.trace_function(log_args=False, log_result=log_result)
50
54
  def __embed(
51
55
  self, source_url: str, file_extension: str, embedding_config: EmbeddingConfig
52
56
  ):
@@ -84,6 +88,7 @@ class PostgresEmbedder(EmbedderBase):
84
88
  else:
85
89
  logger.warning("No documents to upload.")
86
90
 
91
+ @logger.trace_function(log_args=False, log_result=False)
87
92
  def __convert_to_search_document(self, document: SourceDocument):
88
93
  logger.info(f"Generating embeddings for document ID: {document.id}")
89
94
  embedded_content = self.llm_helper.generate_embeddings(document.content)
@@ -1,6 +1,5 @@
1
1
  import hashlib
2
2
  import json
3
- import logging
4
3
  from typing import List
5
4
  from urllib.parse import urlparse
6
5
 
@@ -19,7 +18,10 @@ from ..document_loading_helper import DocumentLoading
19
18
  from ..document_chunking_helper import DocumentChunking
20
19
  from ...common.source_document import SourceDocument
21
20
 
22
- logger = logging.getLogger("__main__")
21
+ from logging_config import logger
22
+ env_helper: EnvHelper = EnvHelper()
23
+ log_args = env_helper.LOG_ARGS
24
+ log_result = env_helper.LOG_RESULT
23
25
 
24
26
 
25
27
  class PushEmbedder(EmbedderBase):
@@ -40,6 +42,7 @@ class PushEmbedder(EmbedderBase):
40
42
  self.embedding_configs[ext] = processor
41
43
  logger.info("Document processors loaded")
42
44
 
45
+ @logger.trace_function(log_args=log_args, log_result=log_result)
43
46
  def embed_file(self, source_url: str, file_name: str):
44
47
  logger.info(f"Embedding file: {file_name} from URL: {source_url}")
45
48
  file_extension = file_name.split(".")[-1].lower()
@@ -55,6 +58,7 @@ class PushEmbedder(EmbedderBase):
55
58
  file_name, {"embeddings_added": "true"}
56
59
  )
57
60
 
61
+ @logger.trace_function(log_args=False, log_result=log_result)
58
62
  def __embed(
59
63
  self, source_url: str, file_extension: str, embedding_config: EmbeddingConfig
60
64
  ):
@@ -101,6 +105,7 @@ class PushEmbedder(EmbedderBase):
101
105
  else:
102
106
  logger.warning("No documents to upload.")
103
107
 
108
+ @logger.trace_function(log_args=log_args, log_result=False)
104
109
  def __generate_image_caption(self, source_url):
105
110
  logger.info(f"Generating image caption for URL: {source_url}")
106
111
  model = self.env_helper.AZURE_OPENAI_VISION_MODEL
@@ -129,6 +134,7 @@ If the image is mostly text, use OCR to extract the text as it is displayed in t
129
134
  logger.info("Caption generation completed")
130
135
  return caption
131
136
 
137
+ @logger.trace_function(log_args=False, log_result=False)
132
138
  def __convert_to_search_document(self, document: SourceDocument):
133
139
  logger.info(f"Converting document ID {document.id} to search document format")
134
140
  embedded_content = self.llm_helper.generate_embeddings(document.content)
@@ -152,10 +158,12 @@ If the image is mostly text, use OCR to extract the text as it is displayed in t
152
158
  self.env_helper.AZURE_SEARCH_OFFSET_COLUMN: document.offset,
153
159
  }
154
160
 
161
+ @logger.trace_function(log_args=log_args, log_result=log_result)
155
162
  def __generate_document_id(self, source_url: str) -> str:
156
163
  hash_key = hashlib.sha1(f"{source_url}_1".encode("utf-8")).hexdigest()
157
164
  return f"doc_{hash_key}"
158
165
 
166
+ @logger.trace_function(log_args=False, log_result=False)
159
167
  def __create_image_document(
160
168
  self,
161
169
  source_url: str,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cwyodmodules
3
- Version: 0.3.35
3
+ Version: 0.3.36
4
4
  Summary: Add your description here
5
5
  Author-email: Patrik <patrikhartl@gmail.com>
6
6
  Classifier: Operating System :: OS Independent
@@ -52,9 +52,9 @@ cwyodmodules/batch/utilities/helpers/config/default_employee_assistant_prompt.tx
52
52
  cwyodmodules/batch/utilities/helpers/config/embedding_config.py,sha256=9pCJxpsouln9dngjVHaKGFYP14PrwmSts_UFDytSiVk,950
53
53
  cwyodmodules/batch/utilities/helpers/embedders/embedder_base.py,sha256=z34LTNGzjiHr6_YWZ8NejUsX1KKYqXPWcqZ8mW_3CHI,699
54
54
  cwyodmodules/batch/utilities/helpers/embedders/embedder_factory.py,sha256=cJ9ZTXZEyOJ5TLB6pDsb9zvUZrJYY-LD40n0l1-qHcw,790
55
- cwyodmodules/batch/utilities/helpers/embedders/integrated_vectorization_embedder.py,sha256=sJr3ZT1ijn0and2OEgcQONkcHF_OxPchNFBTv3XKcZk,2856
56
- cwyodmodules/batch/utilities/helpers/embedders/postgres_embedder.py,sha256=aVXn0UxuaIeBfOvIqByqoLkZn1BOsiJpFxDhLymMAkA,4467
57
- cwyodmodules/batch/utilities/helpers/embedders/push_embedder.py,sha256=uh-JYx1C7MBpMYzsHFfPqZlALzj6GxkkSvSFAsL3NpI,8459
55
+ cwyodmodules/batch/utilities/helpers/embedders/integrated_vectorization_embedder.py,sha256=SWm81GhH149_E9Nd2hi53a-e2uTRz6QIVFQ4dFakCs0,3213
56
+ cwyodmodules/batch/utilities/helpers/embedders/postgres_embedder.py,sha256=fUYRui6whPtoWpaCrJeDvvm3C3F48kuktwwVS18h4kQ,4740
57
+ cwyodmodules/batch/utilities/helpers/embedders/push_embedder.py,sha256=lUc89j5tCYh8EUGsLULnVkI5jM51rgxLwezylPYP__w,8931
58
58
  cwyodmodules/batch/utilities/integrated_vectorization/azure_search_datasource.py,sha256=rDwPgr-UCSYscc7hPOUJMwP09a9rX1MXAGf94TubdQo,2231
59
59
  cwyodmodules/batch/utilities/integrated_vectorization/azure_search_index.py,sha256=6jNz1nnIAPV57mMSWeKC-A6JQ5bLZW63rjgauF0wtY0,6579
60
60
  cwyodmodules/batch/utilities/integrated_vectorization/azure_search_indexer.py,sha256=rSDwIONB3BlpkXk6pm7-B2LpJ8WkZKJJ2WSr1YttVFk,3236
@@ -109,8 +109,8 @@ cwyodmodules/graphrag/query/generate.py,sha256=xBnZs2U9xFWtPk4AfAZgYKbHdcxNcIO6Q
109
109
  cwyodmodules/graphrag/query/graph_search.py,sha256=95h3ecSWx4864XgKABtG0fh3Nk8HkqJVzoCrO8daJ-Y,7724
110
110
  cwyodmodules/graphrag/query/types.py,sha256=1Iq1dp4I4a56_cuFjOZ0NTgd0A2_MpVFznp_czgt6cI,617
111
111
  cwyodmodules/graphrag/query/vector_search.py,sha256=9Gwu9LPjtoAYUU8WKqCvbCHAIg3dpk71reoYd1scLnQ,1807
112
- cwyodmodules-0.3.35.dist-info/licenses/LICENSE,sha256=UqBDTipijsSW2ZSOXyTZnMsXmLoEHTgNEM0tL4g-Sso,1150
113
- cwyodmodules-0.3.35.dist-info/METADATA,sha256=Fcv9mPqvwG7iufW4-v9ryWyuG_7L2DTFK9nb2jOQqLM,2002
114
- cwyodmodules-0.3.35.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
115
- cwyodmodules-0.3.35.dist-info/top_level.txt,sha256=99RENLbkdRX-qpJvsxZ5AfmTL5s6shSaKOWYpz1vwzg,13
116
- cwyodmodules-0.3.35.dist-info/RECORD,,
112
+ cwyodmodules-0.3.36.dist-info/licenses/LICENSE,sha256=UqBDTipijsSW2ZSOXyTZnMsXmLoEHTgNEM0tL4g-Sso,1150
113
+ cwyodmodules-0.3.36.dist-info/METADATA,sha256=TLCeuQ_6a1nn0kX2gvSfD1UtIuZ53eP90zbcV8svSaE,2002
114
+ cwyodmodules-0.3.36.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
115
+ cwyodmodules-0.3.36.dist-info/top_level.txt,sha256=99RENLbkdRX-qpJvsxZ5AfmTL5s6shSaKOWYpz1vwzg,13
116
+ cwyodmodules-0.3.36.dist-info/RECORD,,