llm-ie 1.2.1__tar.gz → 1.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {llm_ie-1.2.1 → llm_ie-1.2.2}/PKG-INFO +1 -1
  2. {llm_ie-1.2.1 → llm_ie-1.2.2}/pyproject.toml +1 -1
  3. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/__init__.py +2 -2
  4. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/chunkers.py +35 -0
  5. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/engines.py +109 -74
  6. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/extractors.py +63 -28
  7. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/prompt_editor.py +4 -4
  8. {llm_ie-1.2.1 → llm_ie-1.2.2}/README.md +0 -0
  9. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/PromptEditor_prompts/chat.txt +0 -0
  10. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/PromptEditor_prompts/comment.txt +0 -0
  11. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/PromptEditor_prompts/rewrite.txt +0 -0
  12. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/PromptEditor_prompts/system.txt +0 -0
  13. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/default_prompts/BasicReviewFrameExtractor_addition_review_prompt.txt +0 -0
  14. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/default_prompts/BasicReviewFrameExtractor_revision_review_prompt.txt +0 -0
  15. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt +0 -0
  16. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt +0 -0
  17. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_addition_review_prompt.txt +0 -0
  18. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_revision_review_prompt.txt +0 -0
  19. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/prompt_guide/AttributeExtractor_prompt_guide.txt +0 -0
  20. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt +0 -0
  21. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/prompt_guide/BasicReviewFrameExtractor_prompt_guide.txt +0 -0
  22. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/prompt_guide/BinaryRelationExtractor_prompt_guide.txt +0 -0
  23. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/prompt_guide/DirectFrameExtractor_prompt_guide.txt +0 -0
  24. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/prompt_guide/MultiClassRelationExtractor_prompt_guide.txt +0 -0
  25. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt +0 -0
  26. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt +0 -0
  27. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt +0 -0
  28. {llm_ie-1.2.1 → llm_ie-1.2.2}/src/llm_ie/data_types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llm-ie
3
- Version: 1.2.1
3
+ Version: 1.2.2
4
4
  Summary: A comprehensive toolkit that provides building blocks for LLM-based named entity recognition, attribute extraction, and relation extraction pipelines.
5
5
  License: MIT
6
6
  Author: Enshuo (David) Hsu
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "llm-ie"
3
- version = "1.2.1"
3
+ version = "1.2.2"
4
4
  description = "A comprehensive toolkit that provides building blocks for LLM-based named entity recognition, attribute extraction, and relation extraction pipelines."
5
5
  authors = ["Enshuo (David) Hsu"]
6
6
  license = "MIT"
@@ -1,11 +1,11 @@
1
1
  from .data_types import LLMInformationExtractionFrame, LLMInformationExtractionDocument
2
- from .engines import BasicLLMConfig, Qwen3LLMConfig, OpenAIReasoningLLMConfig, LlamaCppInferenceEngine, OllamaInferenceEngine, HuggingFaceHubInferenceEngine, OpenAIInferenceEngine, AzureOpenAIInferenceEngine, LiteLLMInferenceEngine
2
+ from .engines import BasicLLMConfig, ReasoningLLMConfig, Qwen3LLMConfig, OpenAIReasoningLLMConfig, LlamaCppInferenceEngine, OllamaInferenceEngine, HuggingFaceHubInferenceEngine, OpenAIInferenceEngine, AzureOpenAIInferenceEngine, LiteLLMInferenceEngine
3
3
  from .extractors import DirectFrameExtractor, ReviewFrameExtractor, BasicFrameExtractor, BasicReviewFrameExtractor, SentenceFrameExtractor, SentenceReviewFrameExtractor, AttributeExtractor, BinaryRelationExtractor, MultiClassRelationExtractor
4
4
  from .chunkers import UnitChunker, WholeDocumentUnitChunker, SentenceUnitChunker, TextLineUnitChunker, ContextChunker, NoContextChunker, WholeDocumentContextChunker, SlideWindowContextChunker
5
5
  from .prompt_editor import PromptEditor
6
6
 
7
7
  __all__ = ["LLMInformationExtractionFrame", "LLMInformationExtractionDocument",
8
- "BasicLLMConfig", "Qwen3LLMConfig", "OpenAIReasoningLLMConfig", "LlamaCppInferenceEngine", "OllamaInferenceEngine", "HuggingFaceHubInferenceEngine", "OpenAIInferenceEngine", "AzureOpenAIInferenceEngine", "LiteLLMInferenceEngine",
8
+ "BasicLLMConfig", "ReasoningLLMConfig", "Qwen3LLMConfig", "OpenAIReasoningLLMConfig", "LlamaCppInferenceEngine", "OllamaInferenceEngine", "HuggingFaceHubInferenceEngine", "OpenAIInferenceEngine", "AzureOpenAIInferenceEngine", "LiteLLMInferenceEngine",
9
9
  "DirectFrameExtractor", "ReviewFrameExtractor", "BasicFrameExtractor", "BasicReviewFrameExtractor", "SentenceFrameExtractor", "SentenceReviewFrameExtractor", "AttributeExtractor", "BinaryRelationExtractor", "MultiClassRelationExtractor",
10
10
  "UnitChunker", "WholeDocumentUnitChunker", "SentenceUnitChunker", "TextLineUnitChunker", "ContextChunker", "NoContextChunker", "WholeDocumentContextChunker", "SlideWindowContextChunker",
11
11
  "PromptEditor"]
@@ -41,6 +41,41 @@ class WholeDocumentUnitChunker(UnitChunker):
41
41
  text=text
42
42
  )]
43
43
 
44
+ class SeparatorUnitChunker(UnitChunker):
45
+ def __init__(self, sep:str):
46
+ """
47
+ This class chunks a document by separator provided.
48
+
49
+ Parameters:
50
+ ----------
51
+ sep : str
52
+ a separator string.
53
+ """
54
+ super().__init__()
55
+ if not isinstance(sep, str):
56
+ raise ValueError("sep must be a string")
57
+
58
+ self.sep = sep
59
+
60
+ def chunk(self, text:str) -> List[FrameExtractionUnit]:
61
+ """
62
+ Parameters:
63
+ ----------
64
+ text : str
65
+ The document text.
66
+ """
67
+ paragraphs = text.split(self.sep)
68
+ paragraph_units = []
69
+ start = 0
70
+ for paragraph in paragraphs:
71
+ end = start + len(paragraph)
72
+ paragraph_units.append(FrameExtractionUnit(
73
+ start=start,
74
+ end=end,
75
+ text=paragraph
76
+ ))
77
+ start = end + len(self.sep)
78
+ return paragraph_units
44
79
 
45
80
  class SentenceUnitChunker(UnitChunker):
46
81
  from nltk.tokenize.punkt import PunktSentenceTokenizer
@@ -33,18 +33,18 @@ class LLMConfig(abc.ABC):
33
33
  return NotImplemented
34
34
 
35
35
  @abc.abstractmethod
36
- def postprocess_response(self, response:Union[str, Generator[str, None, None]]) -> Union[str, Generator[str, None, None]]:
36
+ def postprocess_response(self, response:Union[str, Generator[str, None, None]]) -> Union[Dict[str,str], Generator[Dict[str, str], None, None]]:
37
37
  """
38
38
  This method postprocesses the LLM response after it is generated.
39
39
 
40
40
  Parameters:
41
41
  ----------
42
- response : Union[str, Generator[str, None, None]]
43
- the LLM response. Can be a string or a generator.
42
+ response : Union[str, Generator[Dict[str, str], None, None]]
43
+ the LLM response. Can be a dict or a generator.
44
44
 
45
45
  Returns:
46
46
  -------
47
- response : str
47
+ response : Union[Dict[str,str], Generator[Dict[str, str], None, None]]
48
48
  the postprocessed LLM response
49
49
  """
50
50
  return NotImplemented
@@ -77,7 +77,7 @@ class BasicLLMConfig(LLMConfig):
77
77
  """
78
78
  return messages
79
79
 
80
- def postprocess_response(self, response:Union[str, Generator[str, None, None]]) -> Union[str, Generator[Dict[str, str], None, None]]:
80
+ def postprocess_response(self, response:Union[str, Generator[str, None, None]]) -> Union[Dict[str,str], Generator[Dict[str, str], None, None]]:
81
81
  """
82
82
  This method postprocesses the LLM response after it is generated.
83
83
 
@@ -86,12 +86,13 @@ class BasicLLMConfig(LLMConfig):
86
86
  response : Union[str, Generator[str, None, None]]
87
87
  the LLM response. Can be a string or a generator.
88
88
 
89
- Returns: Union[str, Generator[Dict[str, str], None, None]]
89
+ Returns: Union[Dict[str,str], Generator[Dict[str, str], None, None]]
90
90
  the postprocessed LLM response.
91
- if input is a generator, the output will be a generator {"data": <content>}.
91
+ If input is a string, the output will be a dict {"response": <response>}.
92
+ if input is a generator, the output will be a generator {"type": "response", "data": <content>}.
92
93
  """
93
94
  if isinstance(response, str):
94
- return response
95
+ return {"response": response}
95
96
 
96
97
  def _process_stream():
97
98
  for chunk in response:
@@ -99,23 +100,19 @@ class BasicLLMConfig(LLMConfig):
99
100
 
100
101
  return _process_stream()
101
102
 
102
- class Qwen3LLMConfig(LLMConfig):
103
- def __init__(self, thinking_mode:bool=True, **kwargs):
104
- """
105
- The Qwen3 LLM configuration for reasoning models.
106
103
 
107
- Parameters:
108
- ----------
109
- thinking_mode : bool, Optional
110
- if True, a special token "/think" will be placed after each system and user prompt. Otherwise, "/no_think" will be placed.
104
+ class ReasoningLLMConfig(LLMConfig):
105
+ def __init__(self, thinking_token_start="<think>", thinking_token_end="</think>", **kwargs):
106
+ """
107
+ The general LLM configuration for reasoning models.
111
108
  """
112
109
  super().__init__(**kwargs)
113
- self.thinking_mode = thinking_mode
110
+ self.thinking_token_start = thinking_token_start
111
+ self.thinking_token_end = thinking_token_end
114
112
 
115
113
  def preprocess_messages(self, messages:List[Dict[str,str]]) -> List[Dict[str,str]]:
116
114
  """
117
- Append a special token to the system and user prompts.
118
- The token is "/think" if thinking_mode is True, otherwise "/no_think".
115
+ This method preprocesses the input messages before passing them to the LLM.
119
116
 
120
117
  Parameters:
121
118
  ----------
@@ -127,23 +124,11 @@ class Qwen3LLMConfig(LLMConfig):
127
124
  messages : List[Dict[str,str]]
128
125
  a list of dict with role and content. role must be one of {"system", "user", "assistant"}
129
126
  """
130
- thinking_token = "/think" if self.thinking_mode else "/no_think"
131
- new_messages = []
132
- for message in messages:
133
- if message['role'] in ['system', 'user']:
134
- new_message = {'role': message['role'], 'content': f"{message['content']} {thinking_token}"}
135
- else:
136
- new_message = {'role': message['role'], 'content': message['content']}
137
-
138
- new_messages.append(new_message)
139
-
140
- return new_messages
127
+ return messages
141
128
 
142
- def postprocess_response(self, response:Union[str, Generator[str, None, None]]) -> Union[str, Generator[Dict[str,str], None, None]]:
129
+ def postprocess_response(self, response:Union[str, Generator[str, None, None]]) -> Union[Dict[str,str], Generator[Dict[str,str], None, None]]:
143
130
  """
144
- If input is a generator, tag contents in <think> and </think> as {"type": "reasoning", "data": <content>},
145
- and the rest as {"type": "response", "data": <content>}.
146
- If input is a string, drop contents in <think> and </think>.
131
+ This method postprocesses the LLM response after it is generated.
147
132
 
148
133
  Parameters:
149
134
  ----------
@@ -153,11 +138,16 @@ class Qwen3LLMConfig(LLMConfig):
153
138
  Returns:
154
139
  -------
155
140
  response : Union[str, Generator[str, None, None]]
156
- the postprocessed LLM response.
141
+ the postprocessed LLM response as a dict {"reasoning": <reasoning>, "response": <content>}
157
142
  if input is a generator, the output will be a generator {"type": <reasoning or response>, "data": <content>}.
158
143
  """
159
144
  if isinstance(response, str):
160
- return re.sub(r"<think>.*?</think>\s*", "", response, flags=re.DOTALL).strip()
145
+ # get contents between thinking_token_start and thinking_token_end
146
+ match = re.search(f"{self.thinking_token_start}.*?{self.thinking_token_end}", response, re.DOTALL)
147
+ reasoning = match.group(0) if match else ""
148
+ # get response AFTER thinking_token_end
149
+ response = re.sub(f".*?{self.thinking_token_end}", "", response, flags=re.DOTALL).strip()
150
+ return {"reasoning": reasoning, "response": response}
161
151
 
162
152
  if isinstance(response, Generator):
163
153
  def _process_stream():
@@ -167,24 +157,66 @@ class Qwen3LLMConfig(LLMConfig):
167
157
  if isinstance(chunk, str):
168
158
  buffer += chunk
169
159
  # switch between reasoning and response
170
- if "<think>" in buffer:
160
+ if self.thinking_token_start in buffer:
171
161
  think_flag = True
172
- buffer = buffer.replace("<think>", "")
173
- elif "</think>" in buffer:
162
+ buffer = buffer.replace(self.thinking_token_start, "")
163
+ elif self.thinking_token_end in buffer:
174
164
  think_flag = False
175
- buffer = buffer.replace("</think>", "")
165
+ buffer = buffer.replace(self.thinking_token_end, "")
176
166
 
177
167
  # if chunk is in thinking block, tag it as reasoning; else tag it as response
178
- if chunk not in ["<think>", "</think>"]:
168
+ if chunk not in [self.thinking_token_start, self.thinking_token_end]:
179
169
  if think_flag:
180
170
  yield {"type": "reasoning", "data": chunk}
181
171
  else:
182
172
  yield {"type": "response", "data": chunk}
183
173
 
184
174
  return _process_stream()
175
+
176
+
177
+ class Qwen3LLMConfig(ReasoningLLMConfig):
178
+ def __init__(self, thinking_mode:bool=True, **kwargs):
179
+ """
180
+ The Qwen3 **hybrid thinking** LLM configuration.
181
+ For Qwen3 thinking 2507, use ReasoningLLMConfig instead; for Qwen3 Instruct, use BasicLLMConfig instead.
182
+
183
+ Parameters:
184
+ ----------
185
+ thinking_mode : bool, Optional
186
+ if True, a special token "/think" will be placed after each system and user prompt. Otherwise, "/no_think" will be placed.
187
+ """
188
+ super().__init__(**kwargs)
189
+ self.thinking_mode = thinking_mode
190
+
191
+ def preprocess_messages(self, messages:List[Dict[str,str]]) -> List[Dict[str,str]]:
192
+ """
193
+ Append a special token to the system and user prompts.
194
+ The token is "/think" if thinking_mode is True, otherwise "/no_think".
195
+
196
+ Parameters:
197
+ ----------
198
+ messages : List[Dict[str,str]]
199
+ a list of dict with role and content. role must be one of {"system", "user", "assistant"}
200
+
201
+ Returns:
202
+ -------
203
+ messages : List[Dict[str,str]]
204
+ a list of dict with role and content. role must be one of {"system", "user", "assistant"}
205
+ """
206
+ thinking_token = "/think" if self.thinking_mode else "/no_think"
207
+ new_messages = []
208
+ for message in messages:
209
+ if message['role'] in ['system', 'user']:
210
+ new_message = {'role': message['role'], 'content': f"{message['content']} {thinking_token}"}
211
+ else:
212
+ new_message = {'role': message['role'], 'content': message['content']}
185
213
 
214
+ new_messages.append(new_message)
186
215
 
187
- class OpenAIReasoningLLMConfig(LLMConfig):
216
+ return new_messages
217
+
218
+
219
+ class OpenAIReasoningLLMConfig(ReasoningLLMConfig):
188
220
  def __init__(self, reasoning_effort:str=None, **kwargs):
189
221
  """
190
222
  The OpenAI "o" series configuration.
@@ -246,28 +278,6 @@ class OpenAIReasoningLLMConfig(LLMConfig):
246
278
 
247
279
  return new_messages
248
280
 
249
- def postprocess_response(self, response:Union[str, Generator[str, None, None]]) -> Union[str, Generator[Dict[str, str], None, None]]:
250
- """
251
- This method postprocesses the LLM response after it is generated.
252
-
253
- Parameters:
254
- ----------
255
- response : Union[str, Generator[str, None, None]]
256
- the LLM response. Can be a string or a generator.
257
-
258
- Returns: Union[str, Generator[Dict[str, str], None, None]]
259
- the postprocessed LLM response.
260
- if input is a generator, the output will be a generator {"type": "response", "data": <content>}.
261
- """
262
- if isinstance(response, str):
263
- return response
264
-
265
- def _process_stream():
266
- for chunk in response:
267
- yield {"type": "response", "data": chunk}
268
-
269
- return _process_stream()
270
-
271
281
 
272
282
  class InferenceEngine:
273
283
  @abc.abstractmethod
@@ -286,7 +296,7 @@ class InferenceEngine:
286
296
 
287
297
  @abc.abstractmethod
288
298
  def chat(self, messages:List[Dict[str,str]],
289
- verbose:bool=False, stream:bool=False) -> Union[str, Generator[Dict[str, str], None, None]]:
299
+ verbose:bool=False, stream:bool=False) -> Union[Dict[str,str], Generator[Dict[str, str], None, None]]:
290
300
  """
291
301
  This method inputs chat messages and outputs LLM generated text.
292
302
 
@@ -298,6 +308,11 @@ class InferenceEngine:
298
308
  if True, LLM generated text will be printed in terminal in real-time.
299
309
  stream : bool, Optional
300
310
  if True, returns a generator that yields the output in real-time.
311
+
312
+ Returns:
313
+ -------
314
+ response : Union[Dict[str,str], Generator[Dict[str, str], None, None]]
315
+ a dict {"reasoning": <reasoning>, "response": <response>} or Generator {"type": <reasoning or response>, "data": <content>}
301
316
  """
302
317
  return NotImplemented
303
318
 
@@ -363,7 +378,7 @@ class LlamaCppInferenceEngine(InferenceEngine):
363
378
 
364
379
  return formatted_params
365
380
 
366
- def chat(self, messages:List[Dict[str,str]], verbose:bool=False) -> str:
381
+ def chat(self, messages:List[Dict[str,str]], verbose:bool=False) -> Dict[str,str]:
367
382
  """
368
383
  This method inputs chat messages and outputs LLM generated text.
369
384
 
@@ -436,7 +451,7 @@ class OllamaInferenceEngine(InferenceEngine):
436
451
  return formatted_params
437
452
 
438
453
  def chat(self, messages:List[Dict[str,str]],
439
- verbose:bool=False, stream:bool=False) -> Union[str, Generator[Dict[str, str], None, None]]:
454
+ verbose:bool=False, stream:bool=False) -> Union[Dict[str,str], Generator[Dict[str, str], None, None]]:
440
455
  """
441
456
  This method inputs chat messages and outputs VLM generated text.
442
457
 
@@ -448,6 +463,11 @@ class OllamaInferenceEngine(InferenceEngine):
448
463
  if True, VLM generated text will be printed in terminal in real-time.
449
464
  stream : bool, Optional
450
465
  if True, returns a generator that yields the output in real-time.
466
+
467
+ Returns:
468
+ -------
469
+ response : Union[Dict[str,str], Generator[Dict[str, str], None, None]]
470
+ a dict {"reasoning": <reasoning>, "response": <response>} or Generator {"type": <reasoning or response>, "data": <content>}
451
471
  """
452
472
  processed_messages = self.config.preprocess_messages(messages)
453
473
 
@@ -497,7 +517,7 @@ class OllamaInferenceEngine(InferenceEngine):
497
517
  return self.config.postprocess_response(res)
498
518
 
499
519
 
500
- async def chat_async(self, messages:List[Dict[str,str]]) -> str:
520
+ async def chat_async(self, messages:List[Dict[str,str]]) -> Dict[str,str]:
501
521
  """
502
522
  Async version of chat method. Streaming is not supported.
503
523
  """
@@ -558,7 +578,7 @@ class HuggingFaceHubInferenceEngine(InferenceEngine):
558
578
 
559
579
 
560
580
  def chat(self, messages:List[Dict[str,str]],
561
- verbose:bool=False, stream:bool=False) -> Union[str, Generator[Dict[str, str], None, None]]:
581
+ verbose:bool=False, stream:bool=False) -> Union[Dict[str,str], Generator[Dict[str, str], None, None]]:
562
582
  """
563
583
  This method inputs chat messages and outputs LLM generated text.
564
584
 
@@ -570,6 +590,11 @@ class HuggingFaceHubInferenceEngine(InferenceEngine):
570
590
  if True, VLM generated text will be printed in terminal in real-time.
571
591
  stream : bool, Optional
572
592
  if True, returns a generator that yields the output in real-time.
593
+
594
+ Returns:
595
+ -------
596
+ response : Union[Dict[str,str], Generator[Dict[str, str], None, None]]
597
+ a dict {"reasoning": <reasoning>, "response": <response>} or Generator {"type": <reasoning or response>, "data": <content>}
573
598
  """
574
599
  processed_messages = self.config.preprocess_messages(messages)
575
600
 
@@ -611,7 +636,7 @@ class HuggingFaceHubInferenceEngine(InferenceEngine):
611
636
  res = response.choices[0].message.content
612
637
  return self.config.postprocess_response(res)
613
638
 
614
- async def chat_async(self, messages:List[Dict[str,str]]) -> str:
639
+ async def chat_async(self, messages:List[Dict[str,str]]) -> Dict[str,str]:
615
640
  """
616
641
  Async version of chat method. Streaming is not supported.
617
642
  """
@@ -662,7 +687,7 @@ class OpenAIInferenceEngine(InferenceEngine):
662
687
 
663
688
  return formatted_params
664
689
 
665
- def chat(self, messages:List[Dict[str,str]], verbose:bool=False, stream:bool=False) -> Union[str, Generator[Dict[str, str], None, None]]:
690
+ def chat(self, messages:List[Dict[str,str]], verbose:bool=False, stream:bool=False) -> Union[Dict[str, str], Generator[Dict[str, str], None, None]]:
666
691
  """
667
692
  This method inputs chat messages and outputs LLM generated text.
668
693
 
@@ -674,6 +699,11 @@ class OpenAIInferenceEngine(InferenceEngine):
674
699
  if True, VLM generated text will be printed in terminal in real-time.
675
700
  stream : bool, Optional
676
701
  if True, returns a generator that yields the output in real-time.
702
+
703
+ Returns:
704
+ -------
705
+ response : Union[Dict[str,str], Generator[Dict[str, str], None, None]]
706
+ a dict {"reasoning": <reasoning>, "response": <response>} or Generator {"type": <reasoning or response>, "data": <content>}
677
707
  """
678
708
  processed_messages = self.config.preprocess_messages(messages)
679
709
 
@@ -723,7 +753,7 @@ class OpenAIInferenceEngine(InferenceEngine):
723
753
  return self.config.postprocess_response(res)
724
754
 
725
755
 
726
- async def chat_async(self, messages:List[Dict[str,str]]) -> str:
756
+ async def chat_async(self, messages:List[Dict[str,str]]) -> Dict[str,str]:
727
757
  """
728
758
  Async version of chat method. Streaming is not supported.
729
759
  """
@@ -813,7 +843,7 @@ class LiteLLMInferenceEngine(InferenceEngine):
813
843
 
814
844
  return formatted_params
815
845
 
816
- def chat(self, messages:List[Dict[str,str]], verbose:bool=False, stream:bool=False) -> Union[str, Generator[Dict[str, str], None, None]]:
846
+ def chat(self, messages:List[Dict[str,str]], verbose:bool=False, stream:bool=False) -> Union[Dict[str,str], Generator[Dict[str, str], None, None]]:
817
847
  """
818
848
  This method inputs chat messages and outputs LLM generated text.
819
849
 
@@ -825,6 +855,11 @@ class LiteLLMInferenceEngine(InferenceEngine):
825
855
  if True, VLM generated text will be printed in terminal in real-time.
826
856
  stream : bool, Optional
827
857
  if True, returns a generator that yields the output in real-time.
858
+
859
+ Returns:
860
+ -------
861
+ response : Union[Dict[str,str], Generator[Dict[str, str], None, None]]
862
+ a dict {"reasoning": <reasoning>, "response": <response>} or Generator {"type": <reasoning or response>, "data": <content>}
828
863
  """
829
864
  processed_messages = self.config.preprocess_messages(messages)
830
865
 
@@ -877,7 +912,7 @@ class LiteLLMInferenceEngine(InferenceEngine):
877
912
  res = response.choices[0].message.content
878
913
  return self.config.postprocess_response(res)
879
914
 
880
- async def chat_async(self, messages:List[Dict[str,str]]) -> str:
915
+ async def chat_async(self, messages:List[Dict[str,str]]) -> Dict[str,str]:
881
916
  """
882
917
  Async version of chat method. Streaming is not supported.
883
918
  """
@@ -489,7 +489,10 @@ class DirectFrameExtractor(FrameExtractor):
489
489
  )
490
490
 
491
491
  if return_messages_log:
492
- messages.append({"role": "assistant", "content": gen_text})
492
+ message = {"role": "assistant", "content": gen_text["response"]}
493
+ if "reasoning" in gen_text:
494
+ message["reasoning"] = gen_text["reasoning"]
495
+ messages.append(message)
493
496
  messages_log.append(messages)
494
497
 
495
498
  # add to output
@@ -497,7 +500,7 @@ class DirectFrameExtractor(FrameExtractor):
497
500
  start=unit.start,
498
501
  end=unit.end,
499
502
  text=unit.text,
500
- gen_text=gen_text)
503
+ gen_text=gen_text["response"])
501
504
  output.append(result)
502
505
 
503
506
  if return_messages_log:
@@ -581,7 +584,8 @@ class DirectFrameExtractor(FrameExtractor):
581
584
  )
582
585
  for chunk in response_stream:
583
586
  yield chunk
584
- current_gen_text += chunk
587
+ if chunk["type"] == "response":
588
+ current_gen_text += chunk["data"]
585
589
 
586
590
  # Store the result for this unit
587
591
  result_for_unit = FrameExtractionUnitResult(
@@ -679,7 +683,11 @@ class DirectFrameExtractor(FrameExtractor):
679
683
  gen_text = await self.inference_engine.chat_async(
680
684
  messages=messages
681
685
  )
682
- return {"original_index": original_index, "unit": unit, "gen_text": gen_text, "messages": messages}
686
+
687
+ out = {"original_index": original_index, "unit": unit, "gen_text": gen_text["response"], "messages": messages}
688
+ if "reasoning" in gen_text:
689
+ out["reasoning"] = gen_text["reasoning"]
690
+ return out
683
691
 
684
692
  # Create and gather tasks
685
693
  tasks = []
@@ -713,7 +721,10 @@ class DirectFrameExtractor(FrameExtractor):
713
721
 
714
722
  # Append to messages log if requested
715
723
  if return_messages_log:
716
- final_messages = result_data["messages"] + [{"role": "assistant", "content": gen_text}]
724
+ message = {"role": "assistant", "content": gen_text}
725
+ if "reasoning" in result_data:
726
+ message["reasoning"] = result_data["reasoning"]
727
+ final_messages = result_data["messages"] + [message]
717
728
  messages_log.append(final_messages)
718
729
 
719
730
  if return_messages_log:
@@ -975,15 +986,11 @@ class ReviewFrameExtractor(DirectFrameExtractor):
975
986
  stream=False
976
987
  )
977
988
 
978
- if return_messages_log:
979
- messages.append({"role": "assistant", "content": initial})
980
- messages_log.append(messages)
981
-
982
989
  # <--- Review step --->
983
990
  if verbose:
984
991
  print(f"\n{Fore.YELLOW}Review:{Style.RESET_ALL}")
985
992
 
986
- messages.append({'role': 'assistant', 'content': initial})
993
+ messages.append({'role': 'assistant', 'content': initial["response"]})
987
994
  messages.append({'role': 'user', 'content': self.review_prompt})
988
995
 
989
996
  review = self.inference_engine.chat(
@@ -994,12 +1001,18 @@ class ReviewFrameExtractor(DirectFrameExtractor):
994
1001
 
995
1002
  # Output
996
1003
  if self.review_mode == "revision":
997
- gen_text = review
1004
+ gen_text = review["response"]
998
1005
  elif self.review_mode == "addition":
999
- gen_text = initial + '\n' + review
1006
+ gen_text = initial["response"] + '\n' + review["response"]
1000
1007
 
1001
1008
  if return_messages_log:
1002
- messages.append({"role": "assistant", "content": review})
1009
+ if "reasoning" in initial:
1010
+ messages[-2]["reasoning"] = initial["reasoning"]
1011
+
1012
+ message = {"role": "assistant", "content": review["response"]}
1013
+ if "reasoning" in review:
1014
+ message["reasoning"] = review["reasoning"]
1015
+ messages.append(message)
1003
1016
  messages_log.append(messages)
1004
1017
 
1005
1018
  # add to output
@@ -1192,7 +1205,10 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1192
1205
  messages=messages
1193
1206
  )
1194
1207
  # Return initial generation result along with the messages used and the unit
1195
- return {"original_index": original_index, "unit": unit, "initial_gen_text": gen_text, "initial_messages": messages}
1208
+ out = {"original_index": original_index, "unit": unit, "initial_gen_text": gen_text["response"], "initial_messages": messages}
1209
+ if "reasoning" in gen_text:
1210
+ out["reasoning"] = gen_text["reasoning"]
1211
+ return out
1196
1212
 
1197
1213
  # Create and gather initial generation tasks
1198
1214
  initial_tasks = [
@@ -1218,28 +1234,35 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1218
1234
  {'role': 'user', 'content': self.review_prompt}
1219
1235
  ]
1220
1236
  # Store data needed for review task
1237
+ if "reasoning" in result_data:
1238
+ message = {'role': 'assistant', 'content': initial_gen_text, "reasoning": result_data["reasoning"]}
1239
+ else:
1240
+ message = {'role': 'assistant', 'content': initial_gen_text}
1241
+
1221
1242
  review_tasks_input.append({
1222
1243
  "unit": result_data["unit"],
1223
1244
  "initial_gen_text": initial_gen_text,
1224
1245
  "messages": review_messages,
1225
1246
  "original_index": result_data["original_index"],
1226
- "full_initial_log": initial_messages + [{'role': 'assistant', 'content': initial_gen_text}] if return_messages_log else None # Log up to initial generation
1247
+ "full_initial_log": initial_messages + [message] + [{'role': 'user', 'content': self.review_prompt}] if return_messages_log else None
1227
1248
  })
1228
1249
 
1229
1250
 
1230
1251
  async def review_semaphore_helper(task_data: Dict, **kwrs):
1231
1252
  messages = task_data["messages"]
1232
- original_index = task_data["original_index"]
1233
1253
 
1234
1254
  async with semaphore:
1235
1255
  review_gen_text = await self.inference_engine.chat_async(
1236
1256
  messages=messages
1237
1257
  )
1238
1258
  # Combine initial and review results
1239
- task_data["review_gen_text"] = review_gen_text
1259
+ task_data["review_gen_text"] = review_gen_text["response"]
1240
1260
  if return_messages_log:
1241
1261
  # Log for the review call itself
1242
- task_data["full_review_log"] = messages + [{'role': 'assistant', 'content': review_gen_text}]
1262
+ message = {'role': 'assistant', 'content': review_gen_text["response"]}
1263
+ if "reasoning" in review_gen_text:
1264
+ message["reasoning"] = review_gen_text["reasoning"]
1265
+ task_data["full_review_log"] = task_data["full_initial_log"] + [message]
1243
1266
  return task_data # Return the augmented dictionary
1244
1267
 
1245
1268
  # Create and gather review tasks
@@ -1283,7 +1306,7 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1283
1306
 
1284
1307
  # Append full conversation log if requested
1285
1308
  if return_messages_log:
1286
- full_log_for_unit = result_data.get("full_initial_log", []) + [{'role': 'user', 'content': self.review_prompt}] + [{'role': 'assistant', 'content': review_gen}]
1309
+ full_log_for_unit = result_data["full_review_log"]
1287
1310
  messages_log.append(full_log_for_unit)
1288
1311
 
1289
1312
  if return_messages_log:
@@ -1541,15 +1564,18 @@ class AttributeExtractor(Extractor):
1541
1564
 
1542
1565
  print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
1543
1566
 
1544
- get_text = self.inference_engine.chat(
1567
+ gen_text = self.inference_engine.chat(
1545
1568
  messages=messages,
1546
1569
  verbose=verbose,
1547
1570
  stream=False
1548
1571
  )
1549
1572
  if return_messages_log:
1550
- messages.append({"role": "assistant", "content": get_text})
1573
+ message = {"role": "assistant", "content": gen_text["response"]}
1574
+ if "reasoning" in gen_text:
1575
+ message["reasoning"] = gen_text["reasoning"]
1576
+ messages.append(message)
1551
1577
 
1552
- attribute_list = self._extract_json(gen_text=get_text)
1578
+ attribute_list = self._extract_json(gen_text=gen_text["response"])
1553
1579
  if isinstance(attribute_list, list) and len(attribute_list) > 0:
1554
1580
  attributes = attribute_list[0]
1555
1581
  if return_messages_log:
@@ -1658,9 +1684,12 @@ class AttributeExtractor(Extractor):
1658
1684
  gen_text = await self.inference_engine.chat_async(messages=messages)
1659
1685
 
1660
1686
  if return_messages_log:
1661
- messages.append({"role": "assistant", "content": gen_text})
1687
+ message = {"role": "assistant", "content": gen_text["response"]}
1688
+ if "reasoning" in gen_text:
1689
+ message["reasoning"] = gen_text["reasoning"]
1690
+ messages.append(message)
1662
1691
 
1663
- attribute_list = self._extract_json(gen_text=gen_text)
1692
+ attribute_list = self._extract_json(gen_text=gen_text["response"])
1664
1693
  attributes = attribute_list[0] if isinstance(attribute_list, list) and len(attribute_list) > 0 else {}
1665
1694
  return {"frame": frame, "attributes": attributes, "messages": messages}
1666
1695
 
@@ -1824,12 +1853,15 @@ class RelationExtractor(Extractor):
1824
1853
  messages=task_payload['messages'],
1825
1854
  verbose=verbose
1826
1855
  )
1827
- relation = self._post_process_result(gen_text, task_payload)
1856
+ relation = self._post_process_result(gen_text["response"], task_payload)
1828
1857
  if relation:
1829
1858
  relations.append(relation)
1830
1859
 
1831
1860
  if return_messages_log:
1832
- task_payload['messages'].append({"role": "assistant", "content": gen_text})
1861
+ message = {"role": "assistant", "content": gen_text["response"]}
1862
+ if "reasoning" in gen_text:
1863
+ message["reasoning"] = gen_text["reasoning"]
1864
+ task_payload['messages'].append(message)
1833
1865
  messages_log.append(task_payload['messages'])
1834
1866
 
1835
1867
  return (relations, messages_log) if return_messages_log else relations
@@ -1853,12 +1885,15 @@ class RelationExtractor(Extractor):
1853
1885
  results = await asyncio.gather(*tasks)
1854
1886
 
1855
1887
  for gen_text, task_payload in results:
1856
- relation = self._post_process_result(gen_text, task_payload)
1888
+ relation = self._post_process_result(gen_text["response"], task_payload)
1857
1889
  if relation:
1858
1890
  relations.append(relation)
1859
1891
 
1860
1892
  if return_messages_log:
1861
- task_payload['messages'].append({"role": "assistant", "content": gen_text})
1893
+ message = {"role": "assistant", "content": gen_text["response"]}
1894
+ if "reasoning" in gen_text:
1895
+ message["reasoning"] = gen_text["reasoning"]
1896
+ task_payload['messages'].append(message)
1862
1897
  messages_log.append(task_payload['messages'])
1863
1898
 
1864
1899
  return (relations, messages_log) if return_messages_log else relations
@@ -85,7 +85,7 @@ class PromptEditor:
85
85
  messages = [{"role": "system", "content": self.system_prompt},
86
86
  {"role": "user", "content": prompt}]
87
87
  res = self.inference_engine.chat(messages, verbose=True)
88
- return res
88
+ return res["response"]
89
89
 
90
90
  def comment(self, draft:str) -> str:
91
91
  """
@@ -101,7 +101,7 @@ class PromptEditor:
101
101
  messages = [{"role": "system", "content": self.system_prompt},
102
102
  {"role": "user", "content": prompt}]
103
103
  res = self.inference_engine.chat(messages, verbose=True)
104
- return res
104
+ return res["response"]
105
105
 
106
106
  def clear_messages(self):
107
107
  """
@@ -175,7 +175,7 @@ class PromptEditor:
175
175
  self.messages.append({"role": "user", "content": user_input})
176
176
  print(f"{Fore.BLUE}Assistant: {Style.RESET_ALL}", end="")
177
177
  response = self.inference_engine.chat(self.messages, verbose=True)
178
- self.messages.append({"role": "assistant", "content": response})
178
+ self.messages.append({"role": "assistant", "content": response["response"]})
179
179
 
180
180
 
181
181
  def _IPython_chat(self):
@@ -229,7 +229,7 @@ class PromptEditor:
229
229
  # Get assistant's response and append it to conversation
230
230
  print("Assistant: ", end="")
231
231
  response = self.inference_engine.chat(self.messages, verbose=True)
232
- self.messages.append({"role": "assistant", "content": response})
232
+ self.messages.append({"role": "assistant", "content": response["response"]})
233
233
 
234
234
  # Display the assistant's response
235
235
  with output_area:
File without changes
File without changes