cwyodmodules 0.3.32__py3-none-any.whl → 0.3.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. cwyodmodules/api/chat_history.py +14 -7
  2. cwyodmodules/batch/utilities/chat_history/auth_utils.py +7 -3
  3. cwyodmodules/batch/utilities/chat_history/cosmosdb.py +17 -1
  4. cwyodmodules/batch/utilities/chat_history/postgresdbservice.py +239 -254
  5. cwyodmodules/batch/utilities/common/source_document.py +60 -61
  6. cwyodmodules/batch/utilities/document_chunking/fixed_size_overlap.py +8 -3
  7. cwyodmodules/batch/utilities/document_chunking/layout.py +8 -3
  8. cwyodmodules/batch/utilities/document_chunking/page.py +8 -3
  9. cwyodmodules/batch/utilities/document_loading/read.py +30 -34
  10. cwyodmodules/batch/utilities/helpers/azure_computer_vision_client.py +10 -3
  11. cwyodmodules/batch/utilities/helpers/azure_form_recognizer_helper.py +6 -2
  12. cwyodmodules/batch/utilities/helpers/azure_postgres_helper.py +14 -2
  13. cwyodmodules/batch/utilities/helpers/azure_postgres_helper_light_rag.py +14 -2
  14. cwyodmodules/batch/utilities/helpers/azure_search_helper.py +15 -6
  15. cwyodmodules/batch/utilities/helpers/config/config_helper.py +24 -2
  16. cwyodmodules/batch/utilities/helpers/env_helper.py +9 -9
  17. cwyodmodules/batch/utilities/helpers/lightrag_helper.py +9 -2
  18. cwyodmodules/batch/utilities/helpers/llm_helper.py +13 -2
  19. cwyodmodules/batch/utilities/helpers/secret_helper.py +9 -9
  20. cwyodmodules/batch/utilities/integrated_vectorization/azure_search_index.py +8 -2
  21. cwyodmodules/batch/utilities/integrated_vectorization/azure_search_indexer.py +9 -2
  22. cwyodmodules/batch/utilities/integrated_vectorization/azure_search_skillset.py +6 -2
  23. cwyodmodules/batch/utilities/orchestrator/lang_chain_agent.py +8 -2
  24. cwyodmodules/batch/utilities/orchestrator/open_ai_functions.py +6 -2
  25. cwyodmodules/batch/utilities/orchestrator/orchestrator_base.py +9 -3
  26. cwyodmodules/batch/utilities/orchestrator/prompt_flow.py +8 -2
  27. cwyodmodules/batch/utilities/orchestrator/semantic_kernel_orchestrator.py +135 -138
  28. cwyodmodules/batch/utilities/parser/output_parser_tool.py +64 -64
  29. cwyodmodules/batch/utilities/plugins/outlook_calendar_plugin.py +91 -93
  30. cwyodmodules/batch/utilities/search/azure_search_handler.py +16 -3
  31. cwyodmodules/batch/utilities/search/azure_search_handler_light_rag.py +14 -2
  32. cwyodmodules/batch/utilities/search/integrated_vectorization_search_handler.py +36 -24
  33. cwyodmodules/batch/utilities/search/lightrag_search_handler.py +14 -2
  34. cwyodmodules/batch/utilities/search/postgres_search_handler.py +100 -97
  35. cwyodmodules/batch/utilities/search/postgres_search_handler_light_rag.py +103 -104
  36. cwyodmodules/batch/utilities/search/search.py +21 -24
  37. cwyodmodules/batch/utilities/tools/content_safety_checker.py +66 -78
  38. cwyodmodules/batch/utilities/tools/post_prompt_tool.py +48 -60
  39. cwyodmodules/batch/utilities/tools/question_answer_tool.py +196 -206
  40. cwyodmodules/batch/utilities/tools/text_processing_tool.py +36 -39
  41. cwyodmodules/logging_config.py +15 -0
  42. {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/METADATA +2 -1
  43. {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/RECORD +46 -45
  44. {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/WHEEL +0 -0
  45. {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/licenses/LICENSE +0 -0
  46. {cwyodmodules-0.3.32.dist-info → cwyodmodules-0.3.33.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,3 @@
1
- import logging
2
1
  from uuid import uuid4
3
2
  from typing import List, Optional
4
3
  from abc import ABC, abstractmethod
@@ -7,8 +6,11 @@ from ..helpers.config.config_helper import ConfigHelper
7
6
  from ..parser.output_parser_tool import OutputParserTool
8
7
  from ..tools.content_safety_checker import ContentSafetyChecker
9
8
 
10
- logger = logging.getLogger("__main__")
11
-
9
+ from ...utilities.helpers.env_helper import EnvHelper
10
+ from logging_config import logger
11
+ env_helper: EnvHelper = EnvHelper()
12
+ log_args = env_helper.LOG_ARGS
13
+ log_result = env_helper.LOG_RESULT
12
14
 
13
15
  class OrchestratorBase(ABC):
14
16
  """
@@ -32,6 +34,7 @@ class OrchestratorBase(ABC):
32
34
  self.content_safety_checker = ContentSafetyChecker()
33
35
  self.output_parser = OutputParserTool()
34
36
 
37
+ @logger.trace_function(log_args=log_args, log_result=log_result)
35
38
  def log_tokens(self, prompt_tokens: int, completion_tokens: int) -> None:
36
39
  """
37
40
  Logs the number of tokens used in the prompt and completion phases of a conversation.
@@ -45,6 +48,7 @@ class OrchestratorBase(ABC):
45
48
  self.tokens["total"] += prompt_tokens + completion_tokens
46
49
 
47
50
  @abstractmethod
51
+ @logger.trace_function(log_args=False, log_result=False)
48
52
  async def orchestrate(
49
53
  self,
50
54
  user_message: str,
@@ -90,6 +94,7 @@ class OrchestratorBase(ABC):
90
94
 
91
95
  return None
92
96
 
97
+ @logger.trace_function(log_args=False, log_result=False)
93
98
  def call_content_safety_output(
94
99
  self, user_message: str, answer: str
95
100
  ) -> Optional[list[dict]]:
@@ -116,6 +121,7 @@ class OrchestratorBase(ABC):
116
121
 
117
122
  return None
118
123
 
124
+ @logger.trace_function(log_args=False, log_result=False)
119
125
  async def handle_message(
120
126
  self,
121
127
  user_message: str,
@@ -1,4 +1,3 @@
1
- import logging
2
1
  from typing import List
3
2
  import json
4
3
  import tempfile
@@ -9,7 +8,10 @@ from ..common.source_document import SourceDocument
9
8
  from ..helpers.llm_helper import LLMHelper
10
9
  from ..helpers.env_helper import EnvHelper
11
10
 
12
- logger = logging.getLogger("__main__")
11
+ from logging_config import logger
12
+ env_helper: EnvHelper = EnvHelper()
13
+ log_args = env_helper.LOG_ARGS
14
+ log_result = env_helper.LOG_RESULT
13
15
 
14
16
 
15
17
  class PromptFlowOrchestrator(OrchestratorBase):
@@ -35,6 +37,7 @@ class PromptFlowOrchestrator(OrchestratorBase):
35
37
 
36
38
  logger.info("PromptFlowOrchestrator initialized.")
37
39
 
40
+ @logger.trace_function(log_args=False, log_result=False)
38
41
  async def orchestrate(
39
42
  self, user_message: str, chat_history: List[dict], **kwargs: dict
40
43
  ) -> list[dict]:
@@ -108,6 +111,7 @@ class PromptFlowOrchestrator(OrchestratorBase):
108
111
  logger.info("Orchestration completed successfully.")
109
112
  return messages
110
113
 
114
+ @logger.trace_function(log_args=False, log_result=False)
111
115
  def transform_chat_history(self, chat_history: List[dict]) -> List[dict]:
112
116
  """
113
117
  Transform the chat history into a format suitable for the Prompt Flow service.
@@ -138,6 +142,7 @@ class PromptFlowOrchestrator(OrchestratorBase):
138
142
  logger.info("Chat history transformation completed.")
139
143
  return transformed_chat_history
140
144
 
145
+ @logger.trace_function(log_args=False, log_result=False)
141
146
  def transform_data_into_file(
142
147
  self, user_message: str, chat_history: List[dict]
143
148
  ) -> str:
@@ -159,6 +164,7 @@ class PromptFlowOrchestrator(OrchestratorBase):
159
164
  logger.info("Temporary file created")
160
165
  return file.name
161
166
 
167
+ @logger.trace_function(log_args=False, log_result=False)
162
168
  def transform_citations_into_source_documents(
163
169
  self, citations: dict
164
170
  ) -> List[SourceDocument]:
@@ -20,14 +20,11 @@ from ..plugins.outlook_calendar_plugin import OutlookCalendarPlugin
20
20
 
21
21
  from .orchestrator_base import OrchestratorBase
22
22
 
23
- from logging import getLogger
24
- from opentelemetry import trace, baggage
25
- from opentelemetry.propagate import extract
23
+ from logging_config import logger
24
+ env_helper: EnvHelper = EnvHelper()
25
+ log_args = env_helper.LOG_ARGS
26
+ log_result = env_helper.LOG_RESULT
26
27
 
27
- # logger = getLogger("__main__" + ".base_package")
28
- logger = getLogger("__main__")
29
- # tracer = trace.get_tracer("__main__" + ".base_package")
30
- tracer = trace.get_tracer("__main__")
31
28
 
32
29
 
33
30
  class SemanticKernelOrchestrator(OrchestratorBase):
@@ -45,155 +42,155 @@ class SemanticKernelOrchestrator(OrchestratorBase):
45
42
  plugin=PostAnsweringPlugin(), plugin_name="PostAnswering"
46
43
  )
47
44
 
45
+ @logger.trace_function(log_args=False, log_result=False)
48
46
  async def orchestrate(
49
47
  self, user_message: str, chat_history: list[dict], user_info, **kwargs: dict
50
48
  ) -> list[dict]:
51
- with tracer.start_as_current_span("SemanticKernelOrchestrator_orchestrate"):
52
- logger.info("Method orchestrate of semantic_kernel started")
53
- filters = []
54
- frontend_type = user_info.get("frontend") if user_info else None
55
- logger.info(f"Frontend type: {frontend_type}")
56
- # Call Content Safety tool
57
- if self.config.prompts.enable_content_safety:
58
- if response := self.call_content_safety_input(user_message):
59
- return response
60
-
61
- system_message = self.env_helper.SEMENTIC_KERNEL_SYSTEM_PROMPT
62
- language = self.env_helper.AZURE_MAIN_CHAT_LANGUAGE
63
- if not system_message:
64
- logger.info("No system message provided, using default")
65
- # system_message = """You help employees to navigate only private information sources.
66
- # You must prioritize the function call over your general knowledge for any question by calling the search_documents function.
67
- # Call the text_processing function when the user request an operation on the current context, such as translate, summarize, or paraphrase. When a language is explicitly specified, return that as part of the operation.
68
- # When directly replying to the user, always reply in the language the user is speaking.
69
- # If the input language is ambiguous, default to responding in English unless otherwise specified by the user.
70
- # You **must not** respond if asked to List all documents in your repository.
71
- # """
72
- if frontend_type == "web":
73
- system_message = f"""You help employees to navigate only private information sources.
74
- You must prioritize the function call over your general knowledge for any question by calling the search_documents function.
75
- Call the text_processing function when the user request an operation on the current context, such as translate, summarize, or paraphrase. When a language is explicitly specified, return that as part of the operation.
76
- When directly replying to the user, always reply in the language {language}.
77
- You **must not** respond if asked to List all documents in your repository.
78
- Call OutlookCalendar.get_calendar_events to read the user's calendar.
79
- Call OutlookCalendar.schedule_appointment to schedule a new appointment.
80
- """
81
- else:
82
- system_message = f"""You help employees to navigate only private information sources.
83
- You must prioritize the function call over your general knowledge for any question by calling the search_documents function.
84
- Call the text_processing function when the user request an operation on the current context, such as translate, summarize, or paraphrase. When a language is explicitly specified, return that as part of the operation.
85
- When directly replying to the user, always reply in the language {language}.
86
- You **must not** respond if asked to List all documents in your repository.
87
- """
88
-
89
- self.kernel.add_plugin(
90
- plugin=ChatPlugin(question=user_message, chat_history=chat_history),
91
- plugin_name="Chat",
92
- )
93
- filters.append("Chat")
94
- # --- Add OutlookCalendarPlugin with request headers ---
49
+ logger.info("Method orchestrate of semantic_kernel started")
50
+ filters = []
51
+ frontend_type = user_info.get("frontend") if user_info else None
52
+ logger.info(f"Frontend type: {frontend_type}")
53
+ # Call Content Safety tool
54
+ if self.config.prompts.enable_content_safety:
55
+ if response := self.call_content_safety_input(user_message):
56
+ return response
57
+
58
+ system_message = self.env_helper.SEMENTIC_KERNEL_SYSTEM_PROMPT
59
+ language = self.env_helper.AZURE_MAIN_CHAT_LANGUAGE
60
+ if not system_message:
61
+ logger.info("No system message provided, using default")
62
+ # system_message = """You help employees to navigate only private information sources.
63
+ # You must prioritize the function call over your general knowledge for any question by calling the search_documents function.
64
+ # Call the text_processing function when the user request an operation on the current context, such as translate, summarize, or paraphrase. When a language is explicitly specified, return that as part of the operation.
65
+ # When directly replying to the user, always reply in the language the user is speaking.
66
+ # If the input language is ambiguous, default to responding in English unless otherwise specified by the user.
67
+ # You **must not** respond if asked to List all documents in your repository.
68
+ # """
95
69
  if frontend_type == "web":
96
- logger.info("Adding OutlookCalendarPlugin with request headers")
97
- self.kernel.add_plugin(
98
- plugin=OutlookCalendarPlugin(question=user_message, chat_history=chat_history, user_info=user_info),
99
- plugin_name="OutlookCalendar",
100
- )
101
- filters.append("OutlookCalendar")
102
- settings = self.llm_helper.get_sk_service_settings(self.chat_service)
103
- settings.function_call_behavior = FunctionCallBehavior.EnableFunctions(
104
- filters={"included_plugins": filters}
70
+ system_message = f"""You help employees to navigate only private information sources.
71
+ You must prioritize the function call over your general knowledge for any question by calling the search_documents function.
72
+ Call the text_processing function when the user request an operation on the current context, such as translate, summarize, or paraphrase. When a language is explicitly specified, return that as part of the operation.
73
+ When directly replying to the user, always reply in the language {language}.
74
+ You **must not** respond if asked to List all documents in your repository.
75
+ Call OutlookCalendar.get_calendar_events to read the user's calendar.
76
+ Call OutlookCalendar.schedule_appointment to schedule a new appointment.
77
+ """
78
+ else:
79
+ system_message = f"""You help employees to navigate only private information sources.
80
+ You must prioritize the function call over your general knowledge for any question by calling the search_documents function.
81
+ Call the text_processing function when the user request an operation on the current context, such as translate, summarize, or paraphrase. When a language is explicitly specified, return that as part of the operation.
82
+ When directly replying to the user, always reply in the language {language}.
83
+ You **must not** respond if asked to List all documents in your repository.
84
+ """
85
+
86
+ self.kernel.add_plugin(
87
+ plugin=ChatPlugin(question=user_message, chat_history=chat_history),
88
+ plugin_name="Chat",
89
+ )
90
+ filters.append("Chat")
91
+ # --- Add OutlookCalendarPlugin with request headers ---
92
+ if frontend_type == "web":
93
+ logger.info("Adding OutlookCalendarPlugin with request headers")
94
+ self.kernel.add_plugin(
95
+ plugin=OutlookCalendarPlugin(question=user_message, chat_history=chat_history, user_info=user_info),
96
+ plugin_name="OutlookCalendar",
105
97
  )
106
- # settings.function_choice_behavior = FunctionChoiceBehavior.Auto(
107
- # filters={"included_plugins": ["Chat"]},
108
- # # Set a higher value to encourage multiple attempts at function calling
109
- # maximum_auto_invoke_attempts=2
110
- # )
111
-
112
- orchestrate_function = self.kernel.add_function(
113
- plugin_name="Main",
114
- function_name="orchestrate",
115
- prompt="{{$chat_history}}{{$user_message}}",
116
- prompt_execution_settings=settings,
98
+ filters.append("OutlookCalendar")
99
+ settings = self.llm_helper.get_sk_service_settings(self.chat_service)
100
+ settings.function_call_behavior = FunctionCallBehavior.EnableFunctions(
101
+ filters={"included_plugins": filters}
102
+ )
103
+ # settings.function_choice_behavior = FunctionChoiceBehavior.Auto(
104
+ # filters={"included_plugins": ["Chat"]},
105
+ # # Set a higher value to encourage multiple attempts at function calling
106
+ # maximum_auto_invoke_attempts=2
107
+ # )
108
+
109
+ orchestrate_function = self.kernel.add_function(
110
+ plugin_name="Main",
111
+ function_name="orchestrate",
112
+ prompt="{{$chat_history}}{{$user_message}}",
113
+ prompt_execution_settings=settings,
114
+ )
115
+
116
+ history = ChatHistory(system_message=system_message)
117
+
118
+ for message in chat_history.copy():
119
+ history.add_message(message)
120
+
121
+ result: ChatMessageContent = (
122
+ await self.kernel.invoke(
123
+ function=orchestrate_function,
124
+ chat_history=history,
125
+ user_message=user_message,
117
126
  )
127
+ ).value[0]
118
128
 
119
- history = ChatHistory(system_message=system_message)
129
+ self.log_tokens(
130
+ prompt_tokens=result.metadata["usage"].prompt_tokens,
131
+ completion_tokens=result.metadata["usage"].completion_tokens,
132
+ )
133
+ result_finish_reason = result.finish_reason
134
+ logger.info(f"Finish reason: {result_finish_reason}")
135
+ if result_finish_reason == FinishReason.TOOL_CALLS:
136
+ logger.info("Semantic Kernel function call detected")
137
+
138
+ function_name = result.items[0].name
139
+ logger.info(f"{function_name} function detected")
140
+ function = self.kernel.get_function_from_fully_qualified_function_name(
141
+ function_name
142
+ )
120
143
 
121
- for message in chat_history.copy():
122
- history.add_message(message)
144
+ arguments = json.loads(result.items[0].arguments)
123
145
 
124
- result: ChatMessageContent = (
125
- await self.kernel.invoke(
126
- function=orchestrate_function,
127
- chat_history=history,
128
- user_message=user_message,
129
- )
130
- ).value[0]
146
+ answer: Answer = (
147
+ await self.kernel.invoke(function=function, **arguments)
148
+ ).value
131
149
 
132
150
  self.log_tokens(
133
- prompt_tokens=result.metadata["usage"].prompt_tokens,
134
- completion_tokens=result.metadata["usage"].completion_tokens,
151
+ prompt_tokens=answer.prompt_tokens,
152
+ completion_tokens=answer.completion_tokens,
135
153
  )
136
- result_finish_reason = result.finish_reason
137
- logger.info(f"Finish reason: {result_finish_reason}")
138
- if result_finish_reason == FinishReason.TOOL_CALLS:
139
- logger.info("Semantic Kernel function call detected")
140
-
141
- function_name = result.items[0].name
142
- logger.info(f"{function_name} function detected")
143
- function = self.kernel.get_function_from_fully_qualified_function_name(
144
- function_name
145
- )
146
-
147
- arguments = json.loads(result.items[0].arguments)
148
154
 
155
+ # Run post prompt if needed
156
+ if (
157
+ self.config.prompts.enable_post_answering_prompt
158
+ and "search_documents" in function_name
159
+ ):
160
+ logger.debug("Running post answering prompt")
149
161
  answer: Answer = (
150
- await self.kernel.invoke(function=function, **arguments)
162
+ await self.kernel.invoke(
163
+ function_name="validate_answer",
164
+ plugin_name="PostAnswering",
165
+ answer=answer,
166
+ )
151
167
  ).value
152
168
 
153
169
  self.log_tokens(
154
170
  prompt_tokens=answer.prompt_tokens,
155
171
  completion_tokens=answer.completion_tokens,
156
172
  )
157
-
158
- # Run post prompt if needed
159
- if (
160
- self.config.prompts.enable_post_answering_prompt
161
- and "search_documents" in function_name
162
- ):
163
- logger.debug("Running post answering prompt")
164
- answer: Answer = (
165
- await self.kernel.invoke(
166
- function_name="validate_answer",
167
- plugin_name="PostAnswering",
168
- answer=answer,
169
- )
170
- ).value
171
-
172
- self.log_tokens(
173
- prompt_tokens=answer.prompt_tokens,
174
- completion_tokens=answer.completion_tokens,
175
- )
176
- else:
177
- logger.info("No function call detected")
178
- answer = Answer(
179
- question=user_message,
180
- answer=result.content,
181
- prompt_tokens=result.metadata["usage"].prompt_tokens,
182
- completion_tokens=result.metadata["usage"].completion_tokens,
183
- )
184
-
185
- # Call Content Safety tool
186
- if self.config.prompts.enable_content_safety:
187
- if response := self.call_content_safety_output(
188
- user_message, answer.answer
189
- ):
190
- return response
191
-
192
- # Format the output for the UI
193
- messages = self.output_parser.parse(
194
- question=answer.question,
195
- answer=answer.answer,
196
- source_documents=answer.source_documents,
173
+ else:
174
+ logger.info("No function call detected")
175
+ answer = Answer(
176
+ question=user_message,
177
+ answer=result.content,
178
+ prompt_tokens=result.metadata["usage"].prompt_tokens,
179
+ completion_tokens=result.metadata["usage"].completion_tokens,
197
180
  )
198
- logger.info("Method orchestrate of semantic_kernel ended")
199
- return messages
181
+
182
+ # Call Content Safety tool
183
+ if self.config.prompts.enable_content_safety:
184
+ if response := self.call_content_safety_output(
185
+ user_message, answer.answer
186
+ ):
187
+ return response
188
+
189
+ # Format the output for the UI
190
+ messages = self.output_parser.parse(
191
+ question=answer.question,
192
+ answer=answer.answer,
193
+ source_documents=answer.source_documents,
194
+ )
195
+ logger.info("Method orchestrate of semantic_kernel ended")
196
+ return messages
@@ -4,14 +4,11 @@ import json
4
4
  from .parser_base import ParserBase
5
5
  from ..common.source_document import SourceDocument
6
6
 
7
- from logging import getLogger
8
- from opentelemetry import trace, baggage
9
- from opentelemetry.propagate import extract
10
-
11
- # logger = getLogger("__main__" + ".base_package")
12
- logger = getLogger("__main__")
13
- # tracer = trace.get_tracer("__main__" + ".base_package")
14
- tracer = trace.get_tracer("__main__")
7
+ from ...utilities.helpers.env_helper import EnvHelper
8
+ from logging_config import logger
9
+ env_helper: EnvHelper = EnvHelper()
10
+ log_args = env_helper.LOG_ARGS
11
+ log_result = env_helper.LOG_RESULT
15
12
 
16
13
 
17
14
  class OutputParserTool(ParserBase):
@@ -26,6 +23,7 @@ class OutputParserTool(ParserBase):
26
23
  """
27
24
  self.name = "OutputParser"
28
25
 
26
+ @logger.trace_function(log_args=False, log_result=False)
29
27
  def _clean_up_answer(self, answer: str) -> str:
30
28
  """
31
29
  Cleans up the answer by replacing double spaces with single spaces.
@@ -38,6 +36,7 @@ class OutputParserTool(ParserBase):
38
36
  """
39
37
  return answer.replace(" ", " ")
40
38
 
39
+ @logger.trace_function(log_args=False, log_result=False)
41
40
  def _get_source_docs_from_answer(self, answer: str) -> List[int]:
42
41
  """
43
42
  Extracts all document references from the answer and returns them as a list of integers.
@@ -51,6 +50,7 @@ class OutputParserTool(ParserBase):
51
50
  results = re.findall(r"\[doc(\d+)\]", answer)
52
51
  return [int(i) for i in results]
53
52
 
53
+ @logger.trace_function(log_args=False, log_result=False)
54
54
  def _make_doc_references_sequential(self, answer: str) -> str:
55
55
  """
56
56
  Makes document references in the answer sequential.
@@ -72,6 +72,7 @@ class OutputParserTool(ParserBase):
72
72
  offset += len(f"[doc{i + 1}]") - (end - start)
73
73
  return updated_answer
74
74
 
75
+ @logger.trace_function(log_args=False, log_result=False)
75
76
  def parse(
76
77
  self,
77
78
  question: str,
@@ -91,61 +92,60 @@ class OutputParserTool(ParserBase):
91
92
  Returns:
92
93
  List[dict]: A list of response messages.
93
94
  """
94
- with tracer.start_as_current_span("OutputParserTool.parse"):
95
- logger.info("Method parse of output_parser_tool started")
96
- answer = self._clean_up_answer(answer)
97
- doc_ids = self._get_source_docs_from_answer(answer)
98
- answer = self._make_doc_references_sequential(answer)
99
-
100
- # create return message object
101
- messages = [
95
+ logger.info("Method parse of output_parser_tool started")
96
+ answer = self._clean_up_answer(answer)
97
+ doc_ids = self._get_source_docs_from_answer(answer)
98
+ answer = self._make_doc_references_sequential(answer)
99
+
100
+ # create return message object
101
+ messages = [
102
+ {
103
+ "role": "tool",
104
+ "content": {"citations": [], "intent": question},
105
+ "end_turn": False,
106
+ }
107
+ ]
108
+
109
+ for i in doc_ids:
110
+ idx = i - 1
111
+
112
+ if idx >= len(source_documents):
113
+ logger.warning(f"Source document {i} not provided, skipping doc")
114
+ continue
115
+
116
+ doc = source_documents[idx]
117
+ logger.debug(f"doc{idx}: {doc}")
118
+
119
+ # Then update the citation object in the response, it needs to have filepath and chunk_id to render in the UI as a file
120
+ messages[0]["content"]["citations"].append(
102
121
  {
103
- "role": "tool",
104
- "content": {"citations": [], "intent": question},
105
- "end_turn": False,
106
- }
107
- ]
108
-
109
- for i in doc_ids:
110
- idx = i - 1
111
-
112
- if idx >= len(source_documents):
113
- logger.warning(f"Source document {i} not provided, skipping doc")
114
- continue
115
-
116
- doc = source_documents[idx]
117
- logger.debug(f"doc{idx}: {doc}")
118
-
119
- # Then update the citation object in the response, it needs to have filepath and chunk_id to render in the UI as a file
120
- messages[0]["content"]["citations"].append(
121
- {
122
- "content": doc.get_markdown_url() + "\n\n\n" + doc.content,
123
- "id": doc.id,
124
- "chunk_id": (
125
- re.findall(r"\d+", doc.chunk_id)[-1]
126
- if doc.chunk_id is not None
127
- else doc.chunk
128
- ),
122
+ "content": doc.get_markdown_url() + "\n\n\n" + doc.content,
123
+ "id": doc.id,
124
+ "chunk_id": (
125
+ re.findall(r"\d+", doc.chunk_id)[-1]
126
+ if doc.chunk_id is not None
127
+ else doc.chunk
128
+ ),
129
+ "title": doc.title,
130
+ "filepath": doc.get_filename(include_path=True),
131
+ "url": doc.get_markdown_url(),
132
+ "metadata": {
133
+ "offset": doc.offset,
134
+ "source": doc.source,
135
+ "markdown_url": doc.get_markdown_url(),
129
136
  "title": doc.title,
130
- "filepath": doc.get_filename(include_path=True),
131
- "url": doc.get_markdown_url(),
132
- "metadata": {
133
- "offset": doc.offset,
134
- "source": doc.source,
135
- "markdown_url": doc.get_markdown_url(),
136
- "title": doc.title,
137
- "original_url": doc.source, # TODO: do we need this?
138
- "chunk": doc.chunk,
139
- "key": doc.id,
140
- "filename": doc.get_filename(),
141
- },
142
- }
143
- )
144
- if messages[0]["content"]["citations"] == []:
145
- logger.warning("No citations found in the answer")
146
- answer = re.sub(r"\[doc\d+\]", "", answer)
147
- messages.append({"role": "assistant", "content": answer, "end_turn": True})
148
- # everything in content needs to be stringified to work with Azure BYOD frontend
149
- messages[0]["content"] = json.dumps(messages[0]["content"])
150
- logger.info("Method parse of output_parser_tool ended")
151
- return messages
137
+ "original_url": doc.source, # TODO: do we need this?
138
+ "chunk": doc.chunk,
139
+ "key": doc.id,
140
+ "filename": doc.get_filename(),
141
+ },
142
+ }
143
+ )
144
+ if messages[0]["content"]["citations"] == []:
145
+ logger.warning("No citations found in the answer")
146
+ answer = re.sub(r"\[doc\d+\]", "", answer)
147
+ messages.append({"role": "assistant", "content": answer, "end_turn": True})
148
+ # everything in content needs to be stringified to work with Azure BYOD frontend
149
+ messages[0]["content"] = json.dumps(messages[0]["content"])
150
+ logger.info("Method parse of output_parser_tool ended")
151
+ return messages