cwyodmodules 0.3.32__py3-none-any.whl → 0.3.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cwyodmodules/api/chat_history.py +14 -7
- cwyodmodules/batch/utilities/chat_history/auth_utils.py +7 -3
- cwyodmodules/batch/utilities/chat_history/cosmosdb.py +17 -1
- cwyodmodules/batch/utilities/chat_history/postgresdbservice.py +239 -254
- cwyodmodules/batch/utilities/common/source_document.py +60 -61
- cwyodmodules/batch/utilities/document_chunking/fixed_size_overlap.py +8 -3
- cwyodmodules/batch/utilities/document_chunking/layout.py +8 -3
- cwyodmodules/batch/utilities/document_chunking/page.py +8 -3
- cwyodmodules/batch/utilities/document_loading/read.py +30 -34
- cwyodmodules/batch/utilities/helpers/azure_computer_vision_client.py +10 -3
- cwyodmodules/batch/utilities/helpers/azure_form_recognizer_helper.py +6 -2
- cwyodmodules/batch/utilities/helpers/azure_postgres_helper.py +14 -2
- cwyodmodules/batch/utilities/helpers/azure_postgres_helper_light_rag.py +14 -2
- cwyodmodules/batch/utilities/helpers/azure_search_helper.py +15 -6
- cwyodmodules/batch/utilities/helpers/config/config_helper.py +24 -2
- cwyodmodules/batch/utilities/helpers/env_helper.py +9 -9
- cwyodmodules/batch/utilities/helpers/lightrag_helper.py +9 -2
- cwyodmodules/batch/utilities/helpers/llm_helper.py +13 -2
- cwyodmodules/batch/utilities/helpers/secret_helper.py +9 -9
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_index.py +8 -2
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_indexer.py +9 -2
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_skillset.py +6 -2
- cwyodmodules/batch/utilities/orchestrator/lang_chain_agent.py +8 -2
- cwyodmodules/batch/utilities/orchestrator/open_ai_functions.py +6 -2
- cwyodmodules/batch/utilities/orchestrator/orchestrator_base.py +9 -3
- cwyodmodules/batch/utilities/orchestrator/prompt_flow.py +8 -2
- cwyodmodules/batch/utilities/orchestrator/semantic_kernel_orchestrator.py +135 -138
- cwyodmodules/batch/utilities/parser/output_parser_tool.py +64 -64
- cwyodmodules/batch/utilities/plugins/outlook_calendar_plugin.py +91 -93
- cwyodmodules/batch/utilities/search/azure_search_handler.py +16 -3
- cwyodmodules/batch/utilities/search/azure_search_handler_light_rag.py +14 -2
- cwyodmodules/batch/utilities/search/integrated_vectorization_search_handler.py +36 -24
- cwyodmodules/batch/utilities/search/lightrag_search_handler.py +14 -2
- cwyodmodules/batch/utilities/search/postgres_search_handler.py +100 -97
- cwyodmodules/batch/utilities/search/postgres_search_handler_light_rag.py +103 -104
- cwyodmodules/batch/utilities/search/search.py +21 -24
- cwyodmodules/batch/utilities/tools/content_safety_checker.py +66 -78
- cwyodmodules/batch/utilities/tools/post_prompt_tool.py +48 -60
- cwyodmodules/batch/utilities/tools/question_answer_tool.py +196 -206
- cwyodmodules/batch/utilities/tools/text_processing_tool.py +36 -39
- cwyodmodules/logging_config.py +15 -0
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/METADATA +2 -1
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/RECORD +46 -45
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/WHEEL +0 -0
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/licenses/LICENSE +0 -0
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,13 @@
|
|
1
1
|
import requests
|
2
2
|
import datetime
|
3
3
|
from semantic_kernel.functions import kernel_function
|
4
|
-
from logging import getLogger
|
5
|
-
from opentelemetry import trace, baggage
|
6
|
-
from opentelemetry.propagate import extract
|
7
|
-
from typing import Annotated
|
8
4
|
from ..tools.text_processing_tool import TextProcessingTool
|
9
|
-
from
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
5
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
6
|
+
|
7
|
+
from logging_config import logger
|
8
|
+
env_helper: EnvHelper = EnvHelper()
|
9
|
+
log_args = env_helper.LOG_ARGS
|
10
|
+
log_result = env_helper.LOG_RESULT
|
14
11
|
|
15
12
|
class OutlookCalendarPlugin:
|
16
13
|
def __init__(self, question: str, chat_history: list[dict], user_info: dict):
|
@@ -19,6 +16,7 @@ class OutlookCalendarPlugin:
|
|
19
16
|
self.user_info = user_info
|
20
17
|
self.env_helper = EnvHelper()
|
21
18
|
|
19
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
22
20
|
def _get_access_token(self) -> str:
|
23
21
|
logger.info("Retrieving access token from user info")
|
24
22
|
access_token = self.user_info.get("access_token", None)
|
@@ -29,93 +27,93 @@ class OutlookCalendarPlugin:
|
|
29
27
|
return access_token
|
30
28
|
|
31
29
|
@kernel_function(name="get_calendar_events", description="Get upcoming Outlook calendar events, appointments, metings, etc.")
|
30
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
32
31
|
def get_calendar_events(self, days: int = 1) -> str:
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
if resp.status_code != 200:
|
54
|
-
answer = TextProcessingTool().answer_question(
|
55
|
-
question=self.question,
|
56
|
-
chat_history=self.chat_history,
|
57
|
-
text=f"Failed to fetch events: {resp.text[0:120]}",
|
58
|
-
operation=f"Explain the user in his language {language} that you failed to fetch calendar events due to an error.",
|
59
|
-
)
|
60
|
-
return answer
|
61
|
-
events = resp.json().get("value", [])
|
62
|
-
if not events:
|
63
|
-
answer = TextProcessingTool().answer_question(
|
64
|
-
question=self.question,
|
65
|
-
chat_history=self.chat_history,
|
66
|
-
text="No events found.",
|
67
|
-
operation=f"Explain the user in his language {language} that no events were found in the calendar.",
|
68
|
-
)
|
69
|
-
return answer
|
70
|
-
events_text = "\n".join([f"{e.get('subject', 'No subject')} at {e.get('start', {}).get('dateTime', 'Unknown time')}" for e in events])
|
32
|
+
language = self.env_helper.AZURE_MAIN_CHAT_LANGUAGE
|
33
|
+
logger.info("Method get_calendar_events of OutlookCalendarPlugin started")
|
34
|
+
try:
|
35
|
+
logger.info("Retrieving access token for calendar events")
|
36
|
+
token = self._get_access_token()
|
37
|
+
except Exception as e:
|
38
|
+
answer = TextProcessingTool().answer_question(
|
39
|
+
question=self.question,
|
40
|
+
chat_history=self.chat_history,
|
41
|
+
text=f"Authentication error: {str(e)}",
|
42
|
+
operation="Explain the user in his language {language} that you failed to get calendar appointment due to an error.",
|
43
|
+
)
|
44
|
+
return answer
|
45
|
+
headers = {"Authorization": f"Bearer {token}"}
|
46
|
+
now = datetime.datetime.utcnow().isoformat() + "Z"
|
47
|
+
end = (datetime.datetime.utcnow() + datetime.timedelta(days=days)).isoformat() + "Z"
|
48
|
+
url = f"https://graph.microsoft.com/v1.0/me/calendarview?startDateTime={now}&endDateTime={end}"
|
49
|
+
resp = requests.get(url, headers=headers)
|
50
|
+
logger.info("Calendar get results: %s", resp.text[0:120])
|
51
|
+
if resp.status_code != 200:
|
71
52
|
answer = TextProcessingTool().answer_question(
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
53
|
+
question=self.question,
|
54
|
+
chat_history=self.chat_history,
|
55
|
+
text=f"Failed to fetch events: {resp.text[0:120]}",
|
56
|
+
operation=f"Explain the user in his language {language} that you failed to fetch calendar events due to an error.",
|
57
|
+
)
|
77
58
|
return answer
|
59
|
+
events = resp.json().get("value", [])
|
60
|
+
if not events:
|
61
|
+
answer = TextProcessingTool().answer_question(
|
62
|
+
question=self.question,
|
63
|
+
chat_history=self.chat_history,
|
64
|
+
text="No events found.",
|
65
|
+
operation=f"Explain the user in his language {language} that no events were found in the calendar.",
|
66
|
+
)
|
67
|
+
return answer
|
68
|
+
events_text = "\n".join([f"{e.get('subject', 'No subject')} at {e.get('start', {}).get('dateTime', 'Unknown time')}" for e in events])
|
69
|
+
answer = TextProcessingTool().answer_question(
|
70
|
+
question=self.question,
|
71
|
+
chat_history=self.chat_history,
|
72
|
+
text=events_text,
|
73
|
+
operation=f"Summarize the calendar schedule in the user's {language}.",
|
74
|
+
)
|
75
|
+
return answer
|
78
76
|
|
79
77
|
@kernel_function(name="schedule_appointment", description="Schedule a new Outlook calendar appointment, meeting, etc.")
|
78
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
80
79
|
def schedule_appointment(self, subject: str, start_time: str, end_time: str) -> str:
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
return answer
|
80
|
+
logger.info("Method schedule_appointment of OutlookCalendarPlugin started")
|
81
|
+
language = self.env_helper.AZURE_MAIN_CHAT_LANGUAGE
|
82
|
+
try:
|
83
|
+
token = self._get_access_token()
|
84
|
+
except Exception as e:
|
85
|
+
answer = TextProcessingTool().answer_question(
|
86
|
+
question=self.question,
|
87
|
+
chat_history=self.chat_history,
|
88
|
+
text=f"Failed to schedule appointment: {str(e)}",
|
89
|
+
operation=f"Explain the user in his language {language} that you failed to schedule a calendar appointment due to an error.",
|
90
|
+
)
|
91
|
+
return answer
|
92
|
+
headers = {
|
93
|
+
"Authorization": f"Bearer {token}",
|
94
|
+
"Content-Type": "application/json"
|
95
|
+
}
|
96
|
+
url = "https://graph.microsoft.com/v1.0/me/events"
|
97
|
+
event = {
|
98
|
+
"subject": subject,
|
99
|
+
"start": {"dateTime": start_time, "timeZone": "UTC"},
|
100
|
+
"end": {"dateTime": end_time, "timeZone": "UTC"},
|
101
|
+
}
|
102
|
+
resp = requests.post(url, headers=headers, json=event)
|
103
|
+
logger.info("Calendar set results: %s", resp.text[0:120])
|
104
|
+
if resp.status_code == 201:
|
105
|
+
answer = TextProcessingTool().answer_question(
|
106
|
+
question=self.question,
|
107
|
+
chat_history=self.chat_history,
|
108
|
+
text="Appointment scheduled successfully.",
|
109
|
+
operation=f"Explain to the user in his language {language} that the appointment was scheduled successfully. And summarize the appointment details. And all appointments shortly before and after the scheduled appointment.",
|
110
|
+
)
|
111
|
+
return answer
|
112
|
+
else:
|
113
|
+
answer = TextProcessingTool().answer_question(
|
114
|
+
question=self.question,
|
115
|
+
chat_history=self.chat_history,
|
116
|
+
text=f"Failed to schedule appointment: {resp.text[0:120]}",
|
117
|
+
operation=f"Explain to the user in his language {language} that the appointment scheduling failed.",
|
118
|
+
)
|
119
|
+
return answer
|
@@ -1,6 +1,4 @@
|
|
1
|
-
import logging
|
2
1
|
from typing import List
|
3
|
-
|
4
2
|
from .search_handler_base import SearchHandlerBase
|
5
3
|
from ..helpers.llm_helper import LLMHelper
|
6
4
|
from ..helpers.azure_computer_vision_client import AzureComputerVisionClient
|
@@ -10,7 +8,11 @@ import json
|
|
10
8
|
from azure.search.documents.models import VectorizedQuery
|
11
9
|
import tiktoken
|
12
10
|
|
13
|
-
|
11
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
12
|
+
from logging_config import logger
|
13
|
+
env_helper: EnvHelper = EnvHelper()
|
14
|
+
log_args = env_helper.LOG_ARGS
|
15
|
+
log_result = env_helper.LOG_RESULT
|
14
16
|
|
15
17
|
|
16
18
|
class AzureSearchHandler(SearchHandlerBase):
|
@@ -21,14 +23,17 @@ class AzureSearchHandler(SearchHandlerBase):
|
|
21
23
|
self.llm_helper = LLMHelper()
|
22
24
|
self.azure_computer_vision_client = AzureComputerVisionClient(env_helper)
|
23
25
|
|
26
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
24
27
|
def create_search_client(self):
|
25
28
|
return AzureSearchHelper().get_search_client()
|
26
29
|
|
30
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
27
31
|
def perform_search(self, filename):
|
28
32
|
return self.search_client.search(
|
29
33
|
"*", select="title, content, metadata", filter=f"title eq '{filename}'"
|
30
34
|
)
|
31
35
|
|
36
|
+
@logger.trace_function(log_args=False, log_result=False)
|
32
37
|
def process_results(self, results):
|
33
38
|
logger.info("Processing search results")
|
34
39
|
if results is None:
|
@@ -42,11 +47,13 @@ class AzureSearchHandler(SearchHandlerBase):
|
|
42
47
|
logger.info("Processed results")
|
43
48
|
return data
|
44
49
|
|
50
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
45
51
|
def get_files(self):
|
46
52
|
return self.search_client.search(
|
47
53
|
"*", select="id, title", include_total_count=True
|
48
54
|
)
|
49
55
|
|
56
|
+
@logger.trace_function(log_args=False, log_result=False)
|
50
57
|
def output_results(self, results):
|
51
58
|
files = {}
|
52
59
|
for result in results:
|
@@ -59,6 +66,7 @@ class AzureSearchHandler(SearchHandlerBase):
|
|
59
66
|
|
60
67
|
return files
|
61
68
|
|
69
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
62
70
|
def delete_files(self, files):
|
63
71
|
ids_to_delete = []
|
64
72
|
files_to_delete = []
|
@@ -70,6 +78,7 @@ class AzureSearchHandler(SearchHandlerBase):
|
|
70
78
|
|
71
79
|
return ", ".join(files_to_delete)
|
72
80
|
|
81
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
73
82
|
def search_by_blob_url(self, blob_url):
|
74
83
|
return self.search_client.search(
|
75
84
|
"*",
|
@@ -78,6 +87,7 @@ class AzureSearchHandler(SearchHandlerBase):
|
|
78
87
|
filter=f"source eq '{blob_url}_SAS_TOKEN_PLACEHOLDER_'",
|
79
88
|
)
|
80
89
|
|
90
|
+
@logger.trace_function(log_args=False, log_result=False)
|
81
91
|
def query_search(self, question) -> List[SourceDocument]:
|
82
92
|
logger.info(f"Performing query search for question: {question}")
|
83
93
|
encoding = tiktoken.get_encoding(self._ENCODER_NAME)
|
@@ -106,6 +116,7 @@ class AzureSearchHandler(SearchHandlerBase):
|
|
106
116
|
logger.info("Converting search results to SourceDocument list")
|
107
117
|
return self._convert_to_source_documents(results)
|
108
118
|
|
119
|
+
@logger.trace_function(log_args=False, log_result=False)
|
109
120
|
def _semantic_search(
|
110
121
|
self,
|
111
122
|
question: str,
|
@@ -140,6 +151,7 @@ class AzureSearchHandler(SearchHandlerBase):
|
|
140
151
|
top=self.env_helper.AZURE_SEARCH_TOP_K,
|
141
152
|
)
|
142
153
|
|
154
|
+
@logger.trace_function(log_args=False, log_result=False)
|
143
155
|
def _hybrid_search(
|
144
156
|
self,
|
145
157
|
question: str,
|
@@ -172,6 +184,7 @@ class AzureSearchHandler(SearchHandlerBase):
|
|
172
184
|
top=self.env_helper.AZURE_SEARCH_TOP_K,
|
173
185
|
)
|
174
186
|
|
187
|
+
@logger.trace_function(log_args=False, log_result=False)
|
175
188
|
def _convert_to_source_documents(self, search_results) -> List[SourceDocument]:
|
176
189
|
source_documents = []
|
177
190
|
for source in search_results:
|
@@ -1,11 +1,14 @@
|
|
1
|
-
import logging
|
2
1
|
from typing import List
|
3
2
|
from ..helpers.lightrag_helper import LightRAGHelper
|
4
3
|
from .search_handler_base import SearchHandlerBase
|
5
4
|
from ..common.source_document import SourceDocument
|
6
5
|
import json
|
7
6
|
|
8
|
-
|
7
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
8
|
+
from logging_config import logger
|
9
|
+
env_helper: EnvHelper = EnvHelper()
|
10
|
+
log_args = env_helper.LOG_ARGS
|
11
|
+
log_result = env_helper.LOG_RESULT
|
9
12
|
|
10
13
|
|
11
14
|
class AzureSearchHandlerLightRag(SearchHandlerBase):
|
@@ -13,14 +16,17 @@ class AzureSearchHandlerLightRag(SearchHandlerBase):
|
|
13
16
|
super().__init__(env_helper)
|
14
17
|
self.light_rag_helper = LightRAGHelper(env_helper)
|
15
18
|
|
19
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
16
20
|
def create_search_client(self):
|
17
21
|
return self.light_rag_helper.get_search_client()
|
18
22
|
|
23
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
19
24
|
def perform_search(self, filename):
|
20
25
|
return self.light_rag_helper.search(
|
21
26
|
"*", select="title, content, metadata", filter=f"title eq '{filename}'"
|
22
27
|
)
|
23
28
|
|
29
|
+
@logger.trace_function(log_args=False, log_result=False)
|
24
30
|
def process_results(self, results):
|
25
31
|
logger.info("Processing search results")
|
26
32
|
if results is None:
|
@@ -33,9 +39,11 @@ class AzureSearchHandlerLightRag(SearchHandlerBase):
|
|
33
39
|
logger.info("Processed results")
|
34
40
|
return data
|
35
41
|
|
42
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
36
43
|
def get_files(self):
|
37
44
|
return self.light_rag_helper.get_files()
|
38
45
|
|
46
|
+
@logger.trace_function(log_args=False, log_result=False)
|
39
47
|
def output_results(self, results):
|
40
48
|
files = {}
|
41
49
|
for result in results:
|
@@ -48,6 +56,7 @@ class AzureSearchHandlerLightRag(SearchHandlerBase):
|
|
48
56
|
|
49
57
|
return files
|
50
58
|
|
59
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
51
60
|
def delete_files(self, files):
|
52
61
|
ids_to_delete = []
|
53
62
|
files_to_delete = []
|
@@ -59,15 +68,18 @@ class AzureSearchHandlerLightRag(SearchHandlerBase):
|
|
59
68
|
|
60
69
|
return ", ".join(files_to_delete)
|
61
70
|
|
71
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
62
72
|
def search_by_blob_url(self, blob_url):
|
63
73
|
return self.light_rag_helper.search_by_blob_url(blob_url)
|
64
74
|
|
75
|
+
@logger.trace_function(log_args=False, log_result=False)
|
65
76
|
def query_search(self, question) -> List[SourceDocument]:
|
66
77
|
logger.info(f"Performing query search for question: {question}")
|
67
78
|
results = self.light_rag_helper.query_search(question)
|
68
79
|
logger.info("Converting search results to SourceDocument list")
|
69
80
|
return self._convert_to_source_documents(results)
|
70
81
|
|
82
|
+
@logger.trace_function(log_args=False, log_result=False)
|
71
83
|
def _convert_to_source_documents(self, search_results) -> List[SourceDocument]:
|
72
84
|
source_documents = []
|
73
85
|
for source in search_results:
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import logging
|
2
1
|
from typing import List
|
3
2
|
from .search_handler_base import SearchHandlerBase
|
4
3
|
from azure.search.documents import SearchClient
|
@@ -8,16 +7,20 @@ from azure.core.credentials import AzureKeyCredential
|
|
8
7
|
from ..helpers.azure_identity_helper import AzureIdentityHelper
|
9
8
|
from ..common.source_document import SourceDocument
|
10
9
|
import re
|
11
|
-
|
10
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
11
|
+
from logging_config import logger
|
12
|
+
env_helper: EnvHelper = EnvHelper()
|
13
|
+
log_args = env_helper.LOG_ARGS
|
14
|
+
log_result = env_helper.LOG_RESULT
|
12
15
|
|
13
16
|
class IntegratedVectorizationSearchHandler(SearchHandlerBase):
|
14
17
|
def __init__(self):
|
15
18
|
self.azure_identity_helper = AzureIdentityHelper()
|
16
|
-
|
19
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
17
20
|
def create_search_client(self):
|
18
|
-
|
21
|
+
logger.info("Creating Azure Search Client.")
|
19
22
|
if self._check_index_exists():
|
20
|
-
|
23
|
+
logger.info("Search index exists. Returning Search Client.")
|
21
24
|
return SearchClient(
|
22
25
|
endpoint=self.env_helper.AZURE_SEARCH_SERVICE,
|
23
26
|
index_name=self.env_helper.AZURE_SEARCH_INDEX,
|
@@ -28,8 +31,9 @@ class IntegratedVectorizationSearchHandler(SearchHandlerBase):
|
|
28
31
|
),
|
29
32
|
)
|
30
33
|
|
34
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
31
35
|
def perform_search(self, filename):
|
32
|
-
|
36
|
+
logger.info(f"Performing search for file: {filename}.")
|
33
37
|
if self._check_index_exists():
|
34
38
|
return self.search_client.search(
|
35
39
|
search_text="*",
|
@@ -37,27 +41,30 @@ class IntegratedVectorizationSearchHandler(SearchHandlerBase):
|
|
37
41
|
filter=f"title eq '{filename}'",
|
38
42
|
)
|
39
43
|
|
44
|
+
@logger.trace_function(log_args=False, log_result=False)
|
40
45
|
def process_results(self, results):
|
41
|
-
|
46
|
+
logger.info("Processing search results.")
|
42
47
|
if results is None:
|
43
|
-
|
48
|
+
logger.warning("No results found to process.")
|
44
49
|
return []
|
45
50
|
data = [
|
46
51
|
[re.findall(r"\d+", result["chunk_id"])[-1], result["content"]]
|
47
52
|
for result in results
|
48
53
|
]
|
49
|
-
|
54
|
+
logger.info(f"Processed {len(data)} results.")
|
50
55
|
return data
|
51
56
|
|
57
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
52
58
|
def get_files(self):
|
53
|
-
|
59
|
+
logger.info("Fetching files from search index.")
|
54
60
|
if self._check_index_exists():
|
55
61
|
return self.search_client.search(
|
56
62
|
"*", select="id, chunk_id, title", include_total_count=True
|
57
63
|
)
|
58
64
|
|
65
|
+
@logger.trace_function(log_args=False, log_result=False)
|
59
66
|
def output_results(self, results):
|
60
|
-
|
67
|
+
logger.info("Organizing search results into output format.")
|
61
68
|
files = {}
|
62
69
|
for result in results:
|
63
70
|
id = result["chunk_id"]
|
@@ -68,8 +75,9 @@ class IntegratedVectorizationSearchHandler(SearchHandlerBase):
|
|
68
75
|
files[filename] = [id]
|
69
76
|
return files
|
70
77
|
|
78
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
71
79
|
def search_by_blob_url(self, blob_url: str):
|
72
|
-
|
80
|
+
logger.info(f"Searching by blob URL: {blob_url}.")
|
73
81
|
if self._check_index_exists():
|
74
82
|
title = blob_url.split(f"{self.env_helper.AZURE_BLOB_CONTAINER_NAME}/")[1]
|
75
83
|
return self.search_client.search(
|
@@ -79,8 +87,9 @@ class IntegratedVectorizationSearchHandler(SearchHandlerBase):
|
|
79
87
|
filter=f"title eq '{title}'",
|
80
88
|
)
|
81
89
|
|
90
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
82
91
|
def delete_files(self, files):
|
83
|
-
|
92
|
+
logger.info("Deleting files.")
|
84
93
|
ids_to_delete = []
|
85
94
|
files_to_delete = []
|
86
95
|
|
@@ -90,24 +99,26 @@ class IntegratedVectorizationSearchHandler(SearchHandlerBase):
|
|
90
99
|
|
91
100
|
self.search_client.delete_documents(ids_to_delete)
|
92
101
|
|
93
|
-
|
102
|
+
logger.info(f"Deleted files: {', '.join(files_to_delete)}.")
|
94
103
|
return ", ".join(files_to_delete)
|
95
104
|
|
105
|
+
@logger.trace_function(log_args=False, log_result=False)
|
96
106
|
def query_search(self, question) -> List[SourceDocument]:
|
97
|
-
|
107
|
+
logger.info(f"Querying search for question: {question}.")
|
98
108
|
if self._check_index_exists():
|
99
|
-
|
109
|
+
logger.info("Search index exists. Proceeding with search.")
|
100
110
|
if self.env_helper.AZURE_SEARCH_USE_SEMANTIC_SEARCH:
|
101
|
-
|
111
|
+
logger.info("Using semantic search.")
|
102
112
|
search_results = self._semantic_search(question)
|
103
113
|
else:
|
104
|
-
|
114
|
+
logger.info("Using hybrid search.")
|
105
115
|
search_results = self._hybrid_search(question)
|
106
|
-
|
116
|
+
logger.info("Search completed. Converting results to SourceDocuments.")
|
107
117
|
return self._convert_to_source_documents(search_results)
|
108
118
|
|
119
|
+
@logger.trace_function(log_args=False, log_result=False)
|
109
120
|
def _hybrid_search(self, question: str):
|
110
|
-
|
121
|
+
logger.info(f"Performing hybrid search for question: {question}.")
|
111
122
|
vector_query = VectorizableTextQuery(
|
112
123
|
text=question,
|
113
124
|
k_nearest_neighbors=self.env_helper.AZURE_SEARCH_TOP_K,
|
@@ -120,8 +131,9 @@ class IntegratedVectorizationSearchHandler(SearchHandlerBase):
|
|
120
131
|
top=self.env_helper.AZURE_SEARCH_TOP_K,
|
121
132
|
)
|
122
133
|
|
134
|
+
@logger.trace_function(log_args=False, log_result=False)
|
123
135
|
def _semantic_search(self, question: str):
|
124
|
-
|
136
|
+
logger.info(f"Performing semantic search for question: {question}.")
|
125
137
|
vector_query = VectorizableTextQuery(
|
126
138
|
text=question,
|
127
139
|
k_nearest_neighbors=self.env_helper.AZURE_SEARCH_TOP_K,
|
@@ -138,9 +150,9 @@ class IntegratedVectorizationSearchHandler(SearchHandlerBase):
|
|
138
150
|
query_answer="extractive",
|
139
151
|
top=self.env_helper.AZURE_SEARCH_TOP_K,
|
140
152
|
)
|
141
|
-
|
153
|
+
@logger.trace_function(log_args=False, log_result=False)
|
142
154
|
def _convert_to_source_documents(self, search_results) -> List[SourceDocument]:
|
143
|
-
|
155
|
+
logger.info("Converting search results to SourceDocument objects.")
|
144
156
|
source_documents = []
|
145
157
|
for source in search_results:
|
146
158
|
source_documents.append(
|
@@ -152,7 +164,7 @@ class IntegratedVectorizationSearchHandler(SearchHandlerBase):
|
|
152
164
|
chunk_id=source.get("chunk_id"),
|
153
165
|
)
|
154
166
|
)
|
155
|
-
|
167
|
+
logger.info("Converted SourceDocument objects.")
|
156
168
|
return source_documents
|
157
169
|
|
158
170
|
def _extract_source_url(self, original_source: str) -> str:
|
@@ -1,10 +1,14 @@
|
|
1
|
-
import logging
|
2
1
|
from typing import List
|
3
2
|
from ..helpers.lightrag_helper import LightRAGHelper
|
4
3
|
from .search_handler_base import SearchHandlerBase
|
5
4
|
from ..common.source_document import SourceDocument
|
6
5
|
|
7
|
-
|
6
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
7
|
+
from logging_config import logger
|
8
|
+
env_helper: EnvHelper = EnvHelper()
|
9
|
+
log_args = env_helper.LOG_ARGS
|
10
|
+
log_result = env_helper.LOG_RESULT
|
11
|
+
|
8
12
|
|
9
13
|
|
10
14
|
class LightRAGSearchHandler(SearchHandlerBase):
|
@@ -12,6 +16,7 @@ class LightRAGSearchHandler(SearchHandlerBase):
|
|
12
16
|
super().__init__(env_helper)
|
13
17
|
self.lightrag_helper = LightRAGHelper()
|
14
18
|
|
19
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
15
20
|
def query_search(self, question) -> List[SourceDocument]:
|
16
21
|
logger.info(f"Performing query search for question: {question}")
|
17
22
|
search_results = self.lightrag_helper.search(question)
|
@@ -19,6 +24,7 @@ class LightRAGSearchHandler(SearchHandlerBase):
|
|
19
24
|
logger.info(f"Found {len(source_documents)} source documents.")
|
20
25
|
return source_documents
|
21
26
|
|
27
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
22
28
|
def _convert_to_source_documents(self, search_results) -> List[SourceDocument]:
|
23
29
|
source_documents = []
|
24
30
|
for source in search_results:
|
@@ -35,26 +41,32 @@ class LightRAGSearchHandler(SearchHandlerBase):
|
|
35
41
|
)
|
36
42
|
return source_documents
|
37
43
|
|
44
|
+
@logger.trace_function(log_args=False, log_result=False)
|
38
45
|
def create_vector_store(self, documents_to_upload):
|
39
46
|
logger.info(f"Creating vector store with {len(documents_to_upload)} documents.")
|
40
47
|
return self.lightrag_helper.create_vector_store(documents_to_upload)
|
41
48
|
|
49
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
42
50
|
def perform_search(self, filename):
|
43
51
|
logger.info(f"Performing search for filename: {filename}")
|
44
52
|
return self.lightrag_helper.perform_search(filename)
|
45
53
|
|
54
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
46
55
|
def get_files(self):
|
47
56
|
logger.info("Fetching files from LightRAG.")
|
48
57
|
return self.lightrag_helper.get_files()
|
49
58
|
|
59
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
50
60
|
def delete_files(self, files):
|
51
61
|
logger.info(f"Deleting files: {files}")
|
52
62
|
return self.lightrag_helper.delete_files(files)
|
53
63
|
|
64
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
54
65
|
def search_by_blob_url(self, blob_url):
|
55
66
|
logger.info(f"Searching by blob URL: {blob_url}")
|
56
67
|
return self.lightrag_helper.search_by_blob_url(blob_url)
|
57
68
|
|
69
|
+
@logger.trace_function(log_args=log_args, log_result=False)
|
58
70
|
def get_unique_files(self):
|
59
71
|
logger.info("Fetching unique files from LightRAG.")
|
60
72
|
return self.lightrag_helper.get_unique_files()
|