cwyodmodules 0.3.32__py3-none-any.whl → 0.3.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cwyodmodules/api/chat_history.py +14 -7
- cwyodmodules/batch/utilities/chat_history/auth_utils.py +7 -3
- cwyodmodules/batch/utilities/chat_history/cosmosdb.py +17 -1
- cwyodmodules/batch/utilities/chat_history/postgresdbservice.py +239 -254
- cwyodmodules/batch/utilities/common/source_document.py +60 -61
- cwyodmodules/batch/utilities/document_chunking/fixed_size_overlap.py +8 -3
- cwyodmodules/batch/utilities/document_chunking/layout.py +8 -3
- cwyodmodules/batch/utilities/document_chunking/page.py +8 -3
- cwyodmodules/batch/utilities/document_loading/read.py +30 -34
- cwyodmodules/batch/utilities/helpers/azure_computer_vision_client.py +10 -3
- cwyodmodules/batch/utilities/helpers/azure_form_recognizer_helper.py +6 -2
- cwyodmodules/batch/utilities/helpers/azure_postgres_helper.py +14 -2
- cwyodmodules/batch/utilities/helpers/azure_postgres_helper_light_rag.py +14 -2
- cwyodmodules/batch/utilities/helpers/azure_search_helper.py +15 -6
- cwyodmodules/batch/utilities/helpers/config/config_helper.py +24 -2
- cwyodmodules/batch/utilities/helpers/env_helper.py +9 -9
- cwyodmodules/batch/utilities/helpers/lightrag_helper.py +9 -2
- cwyodmodules/batch/utilities/helpers/llm_helper.py +13 -2
- cwyodmodules/batch/utilities/helpers/secret_helper.py +9 -9
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_index.py +8 -2
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_indexer.py +9 -2
- cwyodmodules/batch/utilities/integrated_vectorization/azure_search_skillset.py +6 -2
- cwyodmodules/batch/utilities/orchestrator/lang_chain_agent.py +8 -2
- cwyodmodules/batch/utilities/orchestrator/open_ai_functions.py +6 -2
- cwyodmodules/batch/utilities/orchestrator/orchestrator_base.py +9 -3
- cwyodmodules/batch/utilities/orchestrator/prompt_flow.py +8 -2
- cwyodmodules/batch/utilities/orchestrator/semantic_kernel_orchestrator.py +135 -138
- cwyodmodules/batch/utilities/parser/output_parser_tool.py +64 -64
- cwyodmodules/batch/utilities/plugins/outlook_calendar_plugin.py +91 -93
- cwyodmodules/batch/utilities/search/azure_search_handler.py +16 -3
- cwyodmodules/batch/utilities/search/azure_search_handler_light_rag.py +14 -2
- cwyodmodules/batch/utilities/search/integrated_vectorization_search_handler.py +36 -24
- cwyodmodules/batch/utilities/search/lightrag_search_handler.py +14 -2
- cwyodmodules/batch/utilities/search/postgres_search_handler.py +100 -97
- cwyodmodules/batch/utilities/search/postgres_search_handler_light_rag.py +103 -104
- cwyodmodules/batch/utilities/search/search.py +21 -24
- cwyodmodules/batch/utilities/tools/content_safety_checker.py +66 -78
- cwyodmodules/batch/utilities/tools/post_prompt_tool.py +48 -60
- cwyodmodules/batch/utilities/tools/question_answer_tool.py +196 -206
- cwyodmodules/batch/utilities/tools/text_processing_tool.py +36 -39
- cwyodmodules/logging_config.py +15 -0
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/METADATA +2 -1
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/RECORD +46 -45
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/WHEEL +0 -0
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/licenses/LICENSE +0 -0
- {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,3 @@
|
|
1
|
-
import logging
|
2
1
|
from uuid import uuid4
|
3
2
|
from typing import List, Optional
|
4
3
|
from abc import ABC, abstractmethod
|
@@ -7,8 +6,11 @@ from ..helpers.config.config_helper import ConfigHelper
|
|
7
6
|
from ..parser.output_parser_tool import OutputParserTool
|
8
7
|
from ..tools.content_safety_checker import ContentSafetyChecker
|
9
8
|
|
10
|
-
|
11
|
-
|
9
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
10
|
+
from logging_config import logger
|
11
|
+
env_helper: EnvHelper = EnvHelper()
|
12
|
+
log_args = env_helper.LOG_ARGS
|
13
|
+
log_result = env_helper.LOG_RESULT
|
12
14
|
|
13
15
|
class OrchestratorBase(ABC):
|
14
16
|
"""
|
@@ -32,6 +34,7 @@ class OrchestratorBase(ABC):
|
|
32
34
|
self.content_safety_checker = ContentSafetyChecker()
|
33
35
|
self.output_parser = OutputParserTool()
|
34
36
|
|
37
|
+
@logger.trace_function(log_args=log_args, log_result=log_result)
|
35
38
|
def log_tokens(self, prompt_tokens: int, completion_tokens: int) -> None:
|
36
39
|
"""
|
37
40
|
Logs the number of tokens used in the prompt and completion phases of a conversation.
|
@@ -45,6 +48,7 @@ class OrchestratorBase(ABC):
|
|
45
48
|
self.tokens["total"] += prompt_tokens + completion_tokens
|
46
49
|
|
47
50
|
@abstractmethod
|
51
|
+
@logger.trace_function(log_args=False, log_result=False)
|
48
52
|
async def orchestrate(
|
49
53
|
self,
|
50
54
|
user_message: str,
|
@@ -90,6 +94,7 @@ class OrchestratorBase(ABC):
|
|
90
94
|
|
91
95
|
return None
|
92
96
|
|
97
|
+
@logger.trace_function(log_args=False, log_result=False)
|
93
98
|
def call_content_safety_output(
|
94
99
|
self, user_message: str, answer: str
|
95
100
|
) -> Optional[list[dict]]:
|
@@ -116,6 +121,7 @@ class OrchestratorBase(ABC):
|
|
116
121
|
|
117
122
|
return None
|
118
123
|
|
124
|
+
@logger.trace_function(log_args=False, log_result=False)
|
119
125
|
async def handle_message(
|
120
126
|
self,
|
121
127
|
user_message: str,
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import logging
|
2
1
|
from typing import List
|
3
2
|
import json
|
4
3
|
import tempfile
|
@@ -9,7 +8,10 @@ from ..common.source_document import SourceDocument
|
|
9
8
|
from ..helpers.llm_helper import LLMHelper
|
10
9
|
from ..helpers.env_helper import EnvHelper
|
11
10
|
|
12
|
-
|
11
|
+
from logging_config import logger
|
12
|
+
env_helper: EnvHelper = EnvHelper()
|
13
|
+
log_args = env_helper.LOG_ARGS
|
14
|
+
log_result = env_helper.LOG_RESULT
|
13
15
|
|
14
16
|
|
15
17
|
class PromptFlowOrchestrator(OrchestratorBase):
|
@@ -35,6 +37,7 @@ class PromptFlowOrchestrator(OrchestratorBase):
|
|
35
37
|
|
36
38
|
logger.info("PromptFlowOrchestrator initialized.")
|
37
39
|
|
40
|
+
@logger.trace_function(log_args=False, log_result=False)
|
38
41
|
async def orchestrate(
|
39
42
|
self, user_message: str, chat_history: List[dict], **kwargs: dict
|
40
43
|
) -> list[dict]:
|
@@ -108,6 +111,7 @@ class PromptFlowOrchestrator(OrchestratorBase):
|
|
108
111
|
logger.info("Orchestration completed successfully.")
|
109
112
|
return messages
|
110
113
|
|
114
|
+
@logger.trace_function(log_args=False, log_result=False)
|
111
115
|
def transform_chat_history(self, chat_history: List[dict]) -> List[dict]:
|
112
116
|
"""
|
113
117
|
Transform the chat history into a format suitable for the Prompt Flow service.
|
@@ -138,6 +142,7 @@ class PromptFlowOrchestrator(OrchestratorBase):
|
|
138
142
|
logger.info("Chat history transformation completed.")
|
139
143
|
return transformed_chat_history
|
140
144
|
|
145
|
+
@logger.trace_function(log_args=False, log_result=False)
|
141
146
|
def transform_data_into_file(
|
142
147
|
self, user_message: str, chat_history: List[dict]
|
143
148
|
) -> str:
|
@@ -159,6 +164,7 @@ class PromptFlowOrchestrator(OrchestratorBase):
|
|
159
164
|
logger.info("Temporary file created")
|
160
165
|
return file.name
|
161
166
|
|
167
|
+
@logger.trace_function(log_args=False, log_result=False)
|
162
168
|
def transform_citations_into_source_documents(
|
163
169
|
self, citations: dict
|
164
170
|
) -> List[SourceDocument]:
|
@@ -20,14 +20,11 @@ from ..plugins.outlook_calendar_plugin import OutlookCalendarPlugin
|
|
20
20
|
|
21
21
|
from .orchestrator_base import OrchestratorBase
|
22
22
|
|
23
|
-
from
|
24
|
-
|
25
|
-
|
23
|
+
from logging_config import logger
|
24
|
+
env_helper: EnvHelper = EnvHelper()
|
25
|
+
log_args = env_helper.LOG_ARGS
|
26
|
+
log_result = env_helper.LOG_RESULT
|
26
27
|
|
27
|
-
# logger = getLogger("__main__" + ".base_package")
|
28
|
-
logger = getLogger("__main__")
|
29
|
-
# tracer = trace.get_tracer("__main__" + ".base_package")
|
30
|
-
tracer = trace.get_tracer("__main__")
|
31
28
|
|
32
29
|
|
33
30
|
class SemanticKernelOrchestrator(OrchestratorBase):
|
@@ -45,155 +42,155 @@ class SemanticKernelOrchestrator(OrchestratorBase):
|
|
45
42
|
plugin=PostAnsweringPlugin(), plugin_name="PostAnswering"
|
46
43
|
)
|
47
44
|
|
45
|
+
@logger.trace_function(log_args=False, log_result=False)
|
48
46
|
async def orchestrate(
|
49
47
|
self, user_message: str, chat_history: list[dict], user_info, **kwargs: dict
|
50
48
|
) -> list[dict]:
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
if self.
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
# """
|
72
|
-
if frontend_type == "web":
|
73
|
-
system_message = f"""You help employees to navigate only private information sources.
|
74
|
-
You must prioritize the function call over your general knowledge for any question by calling the search_documents function.
|
75
|
-
Call the text_processing function when the user request an operation on the current context, such as translate, summarize, or paraphrase. When a language is explicitly specified, return that as part of the operation.
|
76
|
-
When directly replying to the user, always reply in the language {language}.
|
77
|
-
You **must not** respond if asked to List all documents in your repository.
|
78
|
-
Call OutlookCalendar.get_calendar_events to read the user's calendar.
|
79
|
-
Call OutlookCalendar.schedule_appointment to schedule a new appointment.
|
80
|
-
"""
|
81
|
-
else:
|
82
|
-
system_message = f"""You help employees to navigate only private information sources.
|
83
|
-
You must prioritize the function call over your general knowledge for any question by calling the search_documents function.
|
84
|
-
Call the text_processing function when the user request an operation on the current context, such as translate, summarize, or paraphrase. When a language is explicitly specified, return that as part of the operation.
|
85
|
-
When directly replying to the user, always reply in the language {language}.
|
86
|
-
You **must not** respond if asked to List all documents in your repository.
|
87
|
-
"""
|
88
|
-
|
89
|
-
self.kernel.add_plugin(
|
90
|
-
plugin=ChatPlugin(question=user_message, chat_history=chat_history),
|
91
|
-
plugin_name="Chat",
|
92
|
-
)
|
93
|
-
filters.append("Chat")
|
94
|
-
# --- Add OutlookCalendarPlugin with request headers ---
|
49
|
+
logger.info("Method orchestrate of semantic_kernel started")
|
50
|
+
filters = []
|
51
|
+
frontend_type = user_info.get("frontend") if user_info else None
|
52
|
+
logger.info(f"Frontend type: {frontend_type}")
|
53
|
+
# Call Content Safety tool
|
54
|
+
if self.config.prompts.enable_content_safety:
|
55
|
+
if response := self.call_content_safety_input(user_message):
|
56
|
+
return response
|
57
|
+
|
58
|
+
system_message = self.env_helper.SEMENTIC_KERNEL_SYSTEM_PROMPT
|
59
|
+
language = self.env_helper.AZURE_MAIN_CHAT_LANGUAGE
|
60
|
+
if not system_message:
|
61
|
+
logger.info("No system message provided, using default")
|
62
|
+
# system_message = """You help employees to navigate only private information sources.
|
63
|
+
# You must prioritize the function call over your general knowledge for any question by calling the search_documents function.
|
64
|
+
# Call the text_processing function when the user request an operation on the current context, such as translate, summarize, or paraphrase. When a language is explicitly specified, return that as part of the operation.
|
65
|
+
# When directly replying to the user, always reply in the language the user is speaking.
|
66
|
+
# If the input language is ambiguous, default to responding in English unless otherwise specified by the user.
|
67
|
+
# You **must not** respond if asked to List all documents in your repository.
|
68
|
+
# """
|
95
69
|
if frontend_type == "web":
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
70
|
+
system_message = f"""You help employees to navigate only private information sources.
|
71
|
+
You must prioritize the function call over your general knowledge for any question by calling the search_documents function.
|
72
|
+
Call the text_processing function when the user request an operation on the current context, such as translate, summarize, or paraphrase. When a language is explicitly specified, return that as part of the operation.
|
73
|
+
When directly replying to the user, always reply in the language {language}.
|
74
|
+
You **must not** respond if asked to List all documents in your repository.
|
75
|
+
Call OutlookCalendar.get_calendar_events to read the user's calendar.
|
76
|
+
Call OutlookCalendar.schedule_appointment to schedule a new appointment.
|
77
|
+
"""
|
78
|
+
else:
|
79
|
+
system_message = f"""You help employees to navigate only private information sources.
|
80
|
+
You must prioritize the function call over your general knowledge for any question by calling the search_documents function.
|
81
|
+
Call the text_processing function when the user request an operation on the current context, such as translate, summarize, or paraphrase. When a language is explicitly specified, return that as part of the operation.
|
82
|
+
When directly replying to the user, always reply in the language {language}.
|
83
|
+
You **must not** respond if asked to List all documents in your repository.
|
84
|
+
"""
|
85
|
+
|
86
|
+
self.kernel.add_plugin(
|
87
|
+
plugin=ChatPlugin(question=user_message, chat_history=chat_history),
|
88
|
+
plugin_name="Chat",
|
89
|
+
)
|
90
|
+
filters.append("Chat")
|
91
|
+
# --- Add OutlookCalendarPlugin with request headers ---
|
92
|
+
if frontend_type == "web":
|
93
|
+
logger.info("Adding OutlookCalendarPlugin with request headers")
|
94
|
+
self.kernel.add_plugin(
|
95
|
+
plugin=OutlookCalendarPlugin(question=user_message, chat_history=chat_history, user_info=user_info),
|
96
|
+
plugin_name="OutlookCalendar",
|
105
97
|
)
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
98
|
+
filters.append("OutlookCalendar")
|
99
|
+
settings = self.llm_helper.get_sk_service_settings(self.chat_service)
|
100
|
+
settings.function_call_behavior = FunctionCallBehavior.EnableFunctions(
|
101
|
+
filters={"included_plugins": filters}
|
102
|
+
)
|
103
|
+
# settings.function_choice_behavior = FunctionChoiceBehavior.Auto(
|
104
|
+
# filters={"included_plugins": ["Chat"]},
|
105
|
+
# # Set a higher value to encourage multiple attempts at function calling
|
106
|
+
# maximum_auto_invoke_attempts=2
|
107
|
+
# )
|
108
|
+
|
109
|
+
orchestrate_function = self.kernel.add_function(
|
110
|
+
plugin_name="Main",
|
111
|
+
function_name="orchestrate",
|
112
|
+
prompt="{{$chat_history}}{{$user_message}}",
|
113
|
+
prompt_execution_settings=settings,
|
114
|
+
)
|
115
|
+
|
116
|
+
history = ChatHistory(system_message=system_message)
|
117
|
+
|
118
|
+
for message in chat_history.copy():
|
119
|
+
history.add_message(message)
|
120
|
+
|
121
|
+
result: ChatMessageContent = (
|
122
|
+
await self.kernel.invoke(
|
123
|
+
function=orchestrate_function,
|
124
|
+
chat_history=history,
|
125
|
+
user_message=user_message,
|
117
126
|
)
|
127
|
+
).value[0]
|
118
128
|
|
119
|
-
|
129
|
+
self.log_tokens(
|
130
|
+
prompt_tokens=result.metadata["usage"].prompt_tokens,
|
131
|
+
completion_tokens=result.metadata["usage"].completion_tokens,
|
132
|
+
)
|
133
|
+
result_finish_reason = result.finish_reason
|
134
|
+
logger.info(f"Finish reason: {result_finish_reason}")
|
135
|
+
if result_finish_reason == FinishReason.TOOL_CALLS:
|
136
|
+
logger.info("Semantic Kernel function call detected")
|
137
|
+
|
138
|
+
function_name = result.items[0].name
|
139
|
+
logger.info(f"{function_name} function detected")
|
140
|
+
function = self.kernel.get_function_from_fully_qualified_function_name(
|
141
|
+
function_name
|
142
|
+
)
|
120
143
|
|
121
|
-
|
122
|
-
history.add_message(message)
|
144
|
+
arguments = json.loads(result.items[0].arguments)
|
123
145
|
|
124
|
-
|
125
|
-
await self.kernel.invoke(
|
126
|
-
|
127
|
-
chat_history=history,
|
128
|
-
user_message=user_message,
|
129
|
-
)
|
130
|
-
).value[0]
|
146
|
+
answer: Answer = (
|
147
|
+
await self.kernel.invoke(function=function, **arguments)
|
148
|
+
).value
|
131
149
|
|
132
150
|
self.log_tokens(
|
133
|
-
prompt_tokens=
|
134
|
-
completion_tokens=
|
151
|
+
prompt_tokens=answer.prompt_tokens,
|
152
|
+
completion_tokens=answer.completion_tokens,
|
135
153
|
)
|
136
|
-
result_finish_reason = result.finish_reason
|
137
|
-
logger.info(f"Finish reason: {result_finish_reason}")
|
138
|
-
if result_finish_reason == FinishReason.TOOL_CALLS:
|
139
|
-
logger.info("Semantic Kernel function call detected")
|
140
|
-
|
141
|
-
function_name = result.items[0].name
|
142
|
-
logger.info(f"{function_name} function detected")
|
143
|
-
function = self.kernel.get_function_from_fully_qualified_function_name(
|
144
|
-
function_name
|
145
|
-
)
|
146
|
-
|
147
|
-
arguments = json.loads(result.items[0].arguments)
|
148
154
|
|
155
|
+
# Run post prompt if needed
|
156
|
+
if (
|
157
|
+
self.config.prompts.enable_post_answering_prompt
|
158
|
+
and "search_documents" in function_name
|
159
|
+
):
|
160
|
+
logger.debug("Running post answering prompt")
|
149
161
|
answer: Answer = (
|
150
|
-
await self.kernel.invoke(
|
162
|
+
await self.kernel.invoke(
|
163
|
+
function_name="validate_answer",
|
164
|
+
plugin_name="PostAnswering",
|
165
|
+
answer=answer,
|
166
|
+
)
|
151
167
|
).value
|
152
168
|
|
153
169
|
self.log_tokens(
|
154
170
|
prompt_tokens=answer.prompt_tokens,
|
155
171
|
completion_tokens=answer.completion_tokens,
|
156
172
|
)
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
answer: Answer = (
|
165
|
-
await self.kernel.invoke(
|
166
|
-
function_name="validate_answer",
|
167
|
-
plugin_name="PostAnswering",
|
168
|
-
answer=answer,
|
169
|
-
)
|
170
|
-
).value
|
171
|
-
|
172
|
-
self.log_tokens(
|
173
|
-
prompt_tokens=answer.prompt_tokens,
|
174
|
-
completion_tokens=answer.completion_tokens,
|
175
|
-
)
|
176
|
-
else:
|
177
|
-
logger.info("No function call detected")
|
178
|
-
answer = Answer(
|
179
|
-
question=user_message,
|
180
|
-
answer=result.content,
|
181
|
-
prompt_tokens=result.metadata["usage"].prompt_tokens,
|
182
|
-
completion_tokens=result.metadata["usage"].completion_tokens,
|
183
|
-
)
|
184
|
-
|
185
|
-
# Call Content Safety tool
|
186
|
-
if self.config.prompts.enable_content_safety:
|
187
|
-
if response := self.call_content_safety_output(
|
188
|
-
user_message, answer.answer
|
189
|
-
):
|
190
|
-
return response
|
191
|
-
|
192
|
-
# Format the output for the UI
|
193
|
-
messages = self.output_parser.parse(
|
194
|
-
question=answer.question,
|
195
|
-
answer=answer.answer,
|
196
|
-
source_documents=answer.source_documents,
|
173
|
+
else:
|
174
|
+
logger.info("No function call detected")
|
175
|
+
answer = Answer(
|
176
|
+
question=user_message,
|
177
|
+
answer=result.content,
|
178
|
+
prompt_tokens=result.metadata["usage"].prompt_tokens,
|
179
|
+
completion_tokens=result.metadata["usage"].completion_tokens,
|
197
180
|
)
|
198
|
-
|
199
|
-
|
181
|
+
|
182
|
+
# Call Content Safety tool
|
183
|
+
if self.config.prompts.enable_content_safety:
|
184
|
+
if response := self.call_content_safety_output(
|
185
|
+
user_message, answer.answer
|
186
|
+
):
|
187
|
+
return response
|
188
|
+
|
189
|
+
# Format the output for the UI
|
190
|
+
messages = self.output_parser.parse(
|
191
|
+
question=answer.question,
|
192
|
+
answer=answer.answer,
|
193
|
+
source_documents=answer.source_documents,
|
194
|
+
)
|
195
|
+
logger.info("Method orchestrate of semantic_kernel ended")
|
196
|
+
return messages
|
@@ -4,14 +4,11 @@ import json
|
|
4
4
|
from .parser_base import ParserBase
|
5
5
|
from ..common.source_document import SourceDocument
|
6
6
|
|
7
|
-
from
|
8
|
-
from
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
logger = getLogger("__main__")
|
13
|
-
# tracer = trace.get_tracer("__main__" + ".base_package")
|
14
|
-
tracer = trace.get_tracer("__main__")
|
7
|
+
from ...utilities.helpers.env_helper import EnvHelper
|
8
|
+
from logging_config import logger
|
9
|
+
env_helper: EnvHelper = EnvHelper()
|
10
|
+
log_args = env_helper.LOG_ARGS
|
11
|
+
log_result = env_helper.LOG_RESULT
|
15
12
|
|
16
13
|
|
17
14
|
class OutputParserTool(ParserBase):
|
@@ -26,6 +23,7 @@ class OutputParserTool(ParserBase):
|
|
26
23
|
"""
|
27
24
|
self.name = "OutputParser"
|
28
25
|
|
26
|
+
@logger.trace_function(log_args=False, log_result=False)
|
29
27
|
def _clean_up_answer(self, answer: str) -> str:
|
30
28
|
"""
|
31
29
|
Cleans up the answer by replacing double spaces with single spaces.
|
@@ -38,6 +36,7 @@ class OutputParserTool(ParserBase):
|
|
38
36
|
"""
|
39
37
|
return answer.replace(" ", " ")
|
40
38
|
|
39
|
+
@logger.trace_function(log_args=False, log_result=False)
|
41
40
|
def _get_source_docs_from_answer(self, answer: str) -> List[int]:
|
42
41
|
"""
|
43
42
|
Extracts all document references from the answer and returns them as a list of integers.
|
@@ -51,6 +50,7 @@ class OutputParserTool(ParserBase):
|
|
51
50
|
results = re.findall(r"\[doc(\d+)\]", answer)
|
52
51
|
return [int(i) for i in results]
|
53
52
|
|
53
|
+
@logger.trace_function(log_args=False, log_result=False)
|
54
54
|
def _make_doc_references_sequential(self, answer: str) -> str:
|
55
55
|
"""
|
56
56
|
Makes document references in the answer sequential.
|
@@ -72,6 +72,7 @@ class OutputParserTool(ParserBase):
|
|
72
72
|
offset += len(f"[doc{i + 1}]") - (end - start)
|
73
73
|
return updated_answer
|
74
74
|
|
75
|
+
@logger.trace_function(log_args=False, log_result=False)
|
75
76
|
def parse(
|
76
77
|
self,
|
77
78
|
question: str,
|
@@ -91,61 +92,60 @@ class OutputParserTool(ParserBase):
|
|
91
92
|
Returns:
|
92
93
|
List[dict]: A list of response messages.
|
93
94
|
"""
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
95
|
+
logger.info("Method parse of output_parser_tool started")
|
96
|
+
answer = self._clean_up_answer(answer)
|
97
|
+
doc_ids = self._get_source_docs_from_answer(answer)
|
98
|
+
answer = self._make_doc_references_sequential(answer)
|
99
|
+
|
100
|
+
# create return message object
|
101
|
+
messages = [
|
102
|
+
{
|
103
|
+
"role": "tool",
|
104
|
+
"content": {"citations": [], "intent": question},
|
105
|
+
"end_turn": False,
|
106
|
+
}
|
107
|
+
]
|
108
|
+
|
109
|
+
for i in doc_ids:
|
110
|
+
idx = i - 1
|
111
|
+
|
112
|
+
if idx >= len(source_documents):
|
113
|
+
logger.warning(f"Source document {i} not provided, skipping doc")
|
114
|
+
continue
|
115
|
+
|
116
|
+
doc = source_documents[idx]
|
117
|
+
logger.debug(f"doc{idx}: {doc}")
|
118
|
+
|
119
|
+
# Then update the citation object in the response, it needs to have filepath and chunk_id to render in the UI as a file
|
120
|
+
messages[0]["content"]["citations"].append(
|
102
121
|
{
|
103
|
-
"
|
104
|
-
"
|
105
|
-
"
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
logger.debug(f"doc{idx}: {doc}")
|
118
|
-
|
119
|
-
# Then update the citation object in the response, it needs to have filepath and chunk_id to render in the UI as a file
|
120
|
-
messages[0]["content"]["citations"].append(
|
121
|
-
{
|
122
|
-
"content": doc.get_markdown_url() + "\n\n\n" + doc.content,
|
123
|
-
"id": doc.id,
|
124
|
-
"chunk_id": (
|
125
|
-
re.findall(r"\d+", doc.chunk_id)[-1]
|
126
|
-
if doc.chunk_id is not None
|
127
|
-
else doc.chunk
|
128
|
-
),
|
122
|
+
"content": doc.get_markdown_url() + "\n\n\n" + doc.content,
|
123
|
+
"id": doc.id,
|
124
|
+
"chunk_id": (
|
125
|
+
re.findall(r"\d+", doc.chunk_id)[-1]
|
126
|
+
if doc.chunk_id is not None
|
127
|
+
else doc.chunk
|
128
|
+
),
|
129
|
+
"title": doc.title,
|
130
|
+
"filepath": doc.get_filename(include_path=True),
|
131
|
+
"url": doc.get_markdown_url(),
|
132
|
+
"metadata": {
|
133
|
+
"offset": doc.offset,
|
134
|
+
"source": doc.source,
|
135
|
+
"markdown_url": doc.get_markdown_url(),
|
129
136
|
"title": doc.title,
|
130
|
-
"
|
131
|
-
"
|
132
|
-
"
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
logger.warning("No citations found in the answer")
|
146
|
-
answer = re.sub(r"\[doc\d+\]", "", answer)
|
147
|
-
messages.append({"role": "assistant", "content": answer, "end_turn": True})
|
148
|
-
# everything in content needs to be stringified to work with Azure BYOD frontend
|
149
|
-
messages[0]["content"] = json.dumps(messages[0]["content"])
|
150
|
-
logger.info("Method parse of output_parser_tool ended")
|
151
|
-
return messages
|
137
|
+
"original_url": doc.source, # TODO: do we need this?
|
138
|
+
"chunk": doc.chunk,
|
139
|
+
"key": doc.id,
|
140
|
+
"filename": doc.get_filename(),
|
141
|
+
},
|
142
|
+
}
|
143
|
+
)
|
144
|
+
if messages[0]["content"]["citations"] == []:
|
145
|
+
logger.warning("No citations found in the answer")
|
146
|
+
answer = re.sub(r"\[doc\d+\]", "", answer)
|
147
|
+
messages.append({"role": "assistant", "content": answer, "end_turn": True})
|
148
|
+
# everything in content needs to be stringified to work with Azure BYOD frontend
|
149
|
+
messages[0]["content"] = json.dumps(messages[0]["content"])
|
150
|
+
logger.info("Method parse of output_parser_tool ended")
|
151
|
+
return messages
|