llm-ie 0.4.4__tar.gz → 0.4.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {llm_ie-0.4.4 → llm_ie-0.4.5}/PKG-INFO +39 -5
  2. {llm_ie-0.4.4 → llm_ie-0.4.5}/README.md +38 -4
  3. {llm_ie-0.4.4 → llm_ie-0.4.5}/pyproject.toml +1 -1
  4. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/prompt_guide/SentenceCoTFrameExtractor_prompt_guide.txt +4 -4
  5. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt +4 -4
  6. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt +4 -4
  7. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/engines.py +119 -34
  8. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/extractors.py +177 -119
  9. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/__init__.py +0 -0
  10. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/PromptEditor_prompts/chat.txt +0 -0
  11. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/PromptEditor_prompts/comment.txt +0 -0
  12. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/PromptEditor_prompts/rewrite.txt +0 -0
  13. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/PromptEditor_prompts/system.txt +0 -0
  14. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt +0 -0
  15. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt +0 -0
  16. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_addition_review_prompt.txt +0 -0
  17. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_revision_review_prompt.txt +0 -0
  18. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt +0 -0
  19. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/prompt_guide/BinaryRelationExtractor_prompt_guide.txt +0 -0
  20. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/prompt_guide/MultiClassRelationExtractor_prompt_guide.txt +0 -0
  21. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt +0 -0
  22. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/data_types.py +0 -0
  23. {llm_ie-0.4.4 → llm_ie-0.4.5}/src/llm_ie/prompt_editor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llm-ie
3
- Version: 0.4.4
3
+ Version: 0.4.5
4
4
  Summary: An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
5
5
  License: MIT
6
6
  Author: Enshuo (David) Hsu
@@ -41,6 +41,10 @@ An LLM-powered tool that transforms everyday language into robust information ex
41
41
  - Support for LiteLLM.
42
42
  - [v0.4.1](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.1) (Jan 25, 2025): Added filters, table view, and some new features to visualization tool (make sure to update [ie-viz](https://github.com/daviden1013/ie-viz)).
43
43
  - [v0.4.3](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.3) (Feb 7, 2025): Added Azure OpenAI support.
44
+ - [v0.4.5](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.5) (Feb 16, 2025):
45
+ - Added option to adjust number of context sentences in sentence-based extractors.
46
+ - Added support for OpenAI reasoning models ("o" series).
47
+
44
48
 
45
49
  ## Table of Contents
46
50
  - [Overview](#overview)
@@ -340,6 +344,14 @@ from llm_ie.engines import OpenAIInferenceEngine
340
344
  inference_engine = OpenAIInferenceEngine(model="gpt-4o-mini")
341
345
  ```
342
346
 
347
+ For reasoning models ("o" series), use the `reasoning_model=True` flag. The `max_completion_tokens` will be used instead of the `max_tokens`. `temperature` will be ignored.
348
+
349
+ ```python
350
+ from llm_ie.engines import OpenAIInferenceEngine
351
+
352
+ inference_engine = OpenAIInferenceEngine(model="o1-mini", reasoning_model=True)
353
+ ```
354
+
343
355
  #### <img src=doc_asset/readme_img/Azure_icon.png width=32 /> Azure OpenAI API
344
356
  In bash, save the endpoint name and API key to environmental variables `AZURE_OPENAI_ENDPOINT` and `AZURE_OPENAI_API_KEY`.
345
357
  ```
@@ -356,6 +368,14 @@ from llm_ie.engines import AzureOpenAIInferenceEngine
356
368
  inference_engine = AzureOpenAIInferenceEngine(model="gpt-4o-mini")
357
369
  ```
358
370
 
371
+ For reasoning models ("o" series), use the `reasoning_model=True` flag. The `max_completion_tokens` will be used instead of the `max_tokens`. `temperature` will be ignored.
372
+
373
+ ```python
374
+ from llm_ie.engines import AzureOpenAIInferenceEngine
375
+
376
+ inference_engine = AzureOpenAIInferenceEngine(model="o1-mini", reasoning_model=True)
377
+ ```
378
+
359
379
  #### 🤗 huggingface_hub
360
380
  The ```model``` can be a model id hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. Refer to the [Inference Client](https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client) documentation for more details.
361
381
 
@@ -783,7 +803,7 @@ frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", str
783
803
 
784
804
  The ```SentenceFrameExtractor``` instructs the LLM to extract sentence by sentence. The reason is to ensure the accuracy of frame spans. It also prevents LLMs from overseeing sections/ sentences. Empirically, this extractor results in better recall than the ```BasicFrameExtractor``` in complex tasks.
785
805
 
786
- For concurrent extraction (recommended), the `async/ await` feature is used to speed up inferencing. The `concurrent_batch_size` sets the batch size of sentences to be processed in cocurrent.
806
+ For concurrent extraction (recommended), the `async/await` feature is used to speed up inferencing. The `concurrent_batch_size` sets the batch size of sentences to be processed in cocurrent.
787
807
 
788
808
  ```python
789
809
  from llm_ie.extractors import SentenceFrameExtractor
@@ -792,15 +812,29 @@ extractor = SentenceFrameExtractor(inference_engine, prompt_temp)
792
812
  frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", case_sensitive=False, fuzzy_match=True, concurrent=True, concurrent_batch_size=32)
793
813
  ```
794
814
 
795
- The ```multi_turn``` parameter specifies multi-turn conversation for prompting. If True, sentences and LLM outputs will be appended to the input message and carry-over. If False, only the current sentence is prompted. For LLM inference engines that supports prompt cache (e.g., Llama.Cpp, Ollama), use multi-turn conversation prompting can better utilize the KV caching and results in faster inferencing. But for vLLM with [Automatic Prefix Caching (APC)](https://docs.vllm.ai/en/latest/automatic_prefix_caching/apc.html), multi-turn conversation is not necessary.
815
+ The `context_sentences` sets number of sentences before and after the sentence of interest to provide additional context. When `context_sentences=2`, 2 sentences before and 2 sentences after are included in the user prompt as context. When `context_sentences="all"`, the entire document is included as context. When `context_sentences=0`, no context is provided and LLM will only extract based on the current sentence of interest.
796
816
 
797
817
  ```python
798
818
  from llm_ie.extractors import SentenceFrameExtractor
799
819
 
800
- extractor = SentenceFrameExtractor(inference_engine, prompt_temp)
801
- frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", multi_turn=False, case_sensitive=False, fuzzy_match=True, stream=True)
820
+ extractor = SentenceFrameExtractor(inference_engine=inference_engine,
821
+ prompt_template=prompt_temp,
822
+ context_sentences=2)
823
+ frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", case_sensitive=False, fuzzy_match=True, stream=True)
802
824
  ```
803
825
 
826
+ For the sentence:
827
+
828
+ *The patient has a history of hypertension, hyperlipidemia, and Type 2 diabetes mellitus.*
829
+
830
+ The context is "previous sentence 2" "previous sentence 1" "the sentence of interest" "proceeding sentence 1" "proceeding sentence 2":
831
+
832
+ *Emily Brown, MD (Cardiology), Dr. Michael Green, MD (Pulmonology)
833
+
834
+ *#### Reason for Admission*
835
+ *John Doe, a 49-year-old male, was admitted to the hospital with complaints of chest pain, shortness of breath, and dizziness. The patient has a history of hypertension, hyperlipidemia, and Type 2 diabetes mellitus. #### History of Present Illness*
836
+ *The patient reported that the chest pain started two days prior to admission. The pain was described as a pressure-like sensation in the central chest, radiating to the left arm and jaw.*
837
+
804
838
  </details>
805
839
 
806
840
  <details>
@@ -24,6 +24,10 @@ An LLM-powered tool that transforms everyday language into robust information ex
24
24
  - Support for LiteLLM.
25
25
  - [v0.4.1](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.1) (Jan 25, 2025): Added filters, table view, and some new features to visualization tool (make sure to update [ie-viz](https://github.com/daviden1013/ie-viz)).
26
26
  - [v0.4.3](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.3) (Feb 7, 2025): Added Azure OpenAI support.
27
+ - [v0.4.5](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.5) (Feb 16, 2025):
28
+ - Added option to adjust number of context sentences in sentence-based extractors.
29
+ - Added support for OpenAI reasoning models ("o" series).
30
+
27
31
 
28
32
  ## Table of Contents
29
33
  - [Overview](#overview)
@@ -323,6 +327,14 @@ from llm_ie.engines import OpenAIInferenceEngine
323
327
  inference_engine = OpenAIInferenceEngine(model="gpt-4o-mini")
324
328
  ```
325
329
 
330
+ For reasoning models ("o" series), use the `reasoning_model=True` flag. The `max_completion_tokens` will be used instead of the `max_tokens`. `temperature` will be ignored.
331
+
332
+ ```python
333
+ from llm_ie.engines import OpenAIInferenceEngine
334
+
335
+ inference_engine = OpenAIInferenceEngine(model="o1-mini", reasoning_model=True)
336
+ ```
337
+
326
338
  #### <img src=doc_asset/readme_img/Azure_icon.png width=32 /> Azure OpenAI API
327
339
  In bash, save the endpoint name and API key to environmental variables `AZURE_OPENAI_ENDPOINT` and `AZURE_OPENAI_API_KEY`.
328
340
  ```
@@ -339,6 +351,14 @@ from llm_ie.engines import AzureOpenAIInferenceEngine
339
351
  inference_engine = AzureOpenAIInferenceEngine(model="gpt-4o-mini")
340
352
  ```
341
353
 
354
+ For reasoning models ("o" series), use the `reasoning_model=True` flag. The `max_completion_tokens` will be used instead of the `max_tokens`. `temperature` will be ignored.
355
+
356
+ ```python
357
+ from llm_ie.engines import AzureOpenAIInferenceEngine
358
+
359
+ inference_engine = AzureOpenAIInferenceEngine(model="o1-mini", reasoning_model=True)
360
+ ```
361
+
342
362
  #### 🤗 huggingface_hub
343
363
  The ```model``` can be a model id hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. Refer to the [Inference Client](https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client) documentation for more details.
344
364
 
@@ -766,7 +786,7 @@ frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", str
766
786
 
767
787
  The ```SentenceFrameExtractor``` instructs the LLM to extract sentence by sentence. The reason is to ensure the accuracy of frame spans. It also prevents LLMs from overseeing sections/ sentences. Empirically, this extractor results in better recall than the ```BasicFrameExtractor``` in complex tasks.
768
788
 
769
- For concurrent extraction (recommended), the `async/ await` feature is used to speed up inferencing. The `concurrent_batch_size` sets the batch size of sentences to be processed in cocurrent.
789
+ For concurrent extraction (recommended), the `async/await` feature is used to speed up inferencing. The `concurrent_batch_size` sets the batch size of sentences to be processed in cocurrent.
770
790
 
771
791
  ```python
772
792
  from llm_ie.extractors import SentenceFrameExtractor
@@ -775,15 +795,29 @@ extractor = SentenceFrameExtractor(inference_engine, prompt_temp)
775
795
  frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", case_sensitive=False, fuzzy_match=True, concurrent=True, concurrent_batch_size=32)
776
796
  ```
777
797
 
778
- The ```multi_turn``` parameter specifies multi-turn conversation for prompting. If True, sentences and LLM outputs will be appended to the input message and carry-over. If False, only the current sentence is prompted. For LLM inference engines that supports prompt cache (e.g., Llama.Cpp, Ollama), use multi-turn conversation prompting can better utilize the KV caching and results in faster inferencing. But for vLLM with [Automatic Prefix Caching (APC)](https://docs.vllm.ai/en/latest/automatic_prefix_caching/apc.html), multi-turn conversation is not necessary.
798
+ The `context_sentences` sets number of sentences before and after the sentence of interest to provide additional context. When `context_sentences=2`, 2 sentences before and 2 sentences after are included in the user prompt as context. When `context_sentences="all"`, the entire document is included as context. When `context_sentences=0`, no context is provided and LLM will only extract based on the current sentence of interest.
779
799
 
780
800
  ```python
781
801
  from llm_ie.extractors import SentenceFrameExtractor
782
802
 
783
- extractor = SentenceFrameExtractor(inference_engine, prompt_temp)
784
- frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", multi_turn=False, case_sensitive=False, fuzzy_match=True, stream=True)
803
+ extractor = SentenceFrameExtractor(inference_engine=inference_engine,
804
+ prompt_template=prompt_temp,
805
+ context_sentences=2)
806
+ frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", case_sensitive=False, fuzzy_match=True, stream=True)
785
807
  ```
786
808
 
809
+ For the sentence:
810
+
811
+ *The patient has a history of hypertension, hyperlipidemia, and Type 2 diabetes mellitus.*
812
+
813
+ The context is "previous sentence 2" "previous sentence 1" "the sentence of interest" "proceeding sentence 1" "proceeding sentence 2":
814
+
815
+ *Emily Brown, MD (Cardiology), Dr. Michael Green, MD (Pulmonology)
816
+
817
+ *#### Reason for Admission*
818
+ *John Doe, a 49-year-old male, was admitted to the hospital with complaints of chest pain, shortness of breath, and dizziness. The patient has a history of hypertension, hyperlipidemia, and Type 2 diabetes mellitus. #### History of Present Illness*
819
+ *The patient reported that the chest pain started two days prior to admission. The pain was described as a pressure-like sensation in the central chest, radiating to the left arm and jaw.*
820
+
787
821
  </details>
788
822
 
789
823
  <details>
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "llm-ie"
3
- version = "0.4.4"
3
+ version = "0.4.5"
4
4
  description = "An LLM-powered tool that transforms everyday language into robust information extraction pipelines."
5
5
  authors = ["Enshuo (David) Hsu"]
6
6
  license = "MIT"
@@ -61,8 +61,8 @@ Example 1 (single entity type with attributes):
61
61
  If there is no specific arm, just omit the "Arm" key. If the percentage is not reported, just omit the "Percentage" key. The "Evidence" should always be provided.
62
62
 
63
63
  # Input placeholder
64
- Below is the Adverse reactions section for your reference. I will feed you with sentences from it one by one.
65
- {{input}}
64
+ Below is the Adverse reactions section:
65
+ "{{input}}"
66
66
 
67
67
 
68
68
  Example 2 (multiple entity types):
@@ -121,7 +121,7 @@ Example 2 (multiple entity types):
121
121
  </Outputs>
122
122
 
123
123
  # Input placeholder
124
- Below is the medical note for your reference. I will feed you with sentences from it one by one.
124
+ Below is the medical note:
125
125
  "{{input}}"
126
126
 
127
127
 
@@ -213,5 +213,5 @@ Example 3 (multiple entity types with corresponding attributes):
213
213
  </Outputs>
214
214
 
215
215
  # Input placeholder
216
- Below is the entire medical note for your reference. I will feed you with sentences from it one by one.
216
+ Below is the medical note:
217
217
  "{{input}}"
@@ -46,8 +46,8 @@ Example 1 (single entity type with attributes):
46
46
  If there is no specific arm, just omit the "Arm" key. If the percentage is not reported, just omit the "Percentage" key. The "Evidence" should always be provided.
47
47
 
48
48
  # Input placeholder
49
- Below is the Adverse reactions section for your reference. I will feed you with sentences from it one by one.
50
- {{input}}
49
+ Below is the Adverse reactions section:
50
+ "{{input}}"
51
51
 
52
52
 
53
53
  Example 2 (multiple entity types):
@@ -81,7 +81,7 @@ Example 2 (multiple entity types):
81
81
 
82
82
 
83
83
  # Input placeholder
84
- Below is the medical note for your reference. I will feed you with sentences from it one by one.
84
+ Below is the medical note:
85
85
  "{{input}}"
86
86
 
87
87
 
@@ -141,5 +141,5 @@ Example 3 (multiple entity types with corresponding attributes):
141
141
 
142
142
 
143
143
  # Input placeholder
144
- Below is the entire medical note for your reference. I will feed you with sentences from it one by one.
144
+ Below is the medical note:
145
145
  "{{input}}"
@@ -46,8 +46,8 @@ Example 1 (single entity type with attributes):
46
46
  If there is no specific arm, just omit the "Arm" key. If the percentage is not reported, just omit the "Percentage" key. The "Evidence" should always be provided.
47
47
 
48
48
  # Input placeholder
49
- Below is the Adverse reactions section for your reference. I will feed you with sentences from it one by one.
50
- {{input}}
49
+ Below is the Adverse reactions section:
50
+ "{{input}}"
51
51
 
52
52
 
53
53
  Example 2 (multiple entity types):
@@ -81,7 +81,7 @@ Example 2 (multiple entity types):
81
81
 
82
82
 
83
83
  # Input placeholder
84
- Below is the medical note for your reference. I will feed you with sentences from it one by one.
84
+ Below is the medical note:
85
85
  "{{input}}"
86
86
 
87
87
 
@@ -141,5 +141,5 @@ Example 3 (multiple entity types with corresponding attributes):
141
141
 
142
142
 
143
143
  # Input placeholder
144
- Below is the entire medical note for your reference. I will feed you with sentences from it one by one.
144
+ Below is the medical note:
145
145
  "{{input}}"
@@ -1,4 +1,5 @@
1
1
  import abc
2
+ import warnings
2
3
  import importlib
3
4
  from typing import List, Dict, Union
4
5
 
@@ -242,7 +243,7 @@ class HuggingFaceHubInferenceEngine(InferenceEngine):
242
243
 
243
244
 
244
245
  class OpenAIInferenceEngine(InferenceEngine):
245
- def __init__(self, model:str, **kwrs):
246
+ def __init__(self, model:str, reasoning_model:bool=False, **kwrs):
246
247
  """
247
248
  The OpenAI API inference engine. Supports OpenAI models and OpenAI compatible servers:
248
249
  - vLLM OpenAI compatible server (https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html)
@@ -254,6 +255,8 @@ class OpenAIInferenceEngine(InferenceEngine):
254
255
  ----------
255
256
  model_name : str
256
257
  model name as described in https://platform.openai.com/docs/models
258
+ reasoning_model : bool, Optional
259
+ indicator for OpenAI reasoning models ("o" series).
257
260
  """
258
261
  if importlib.util.find_spec("openai") is None:
259
262
  raise ImportError("OpenAI Python API library not found. Please install OpanAI (```pip install openai```).")
@@ -262,6 +265,7 @@ class OpenAIInferenceEngine(InferenceEngine):
262
265
  self.client = OpenAI(**kwrs)
263
266
  self.async_client = AsyncOpenAI(**kwrs)
264
267
  self.model = model
268
+ self.reasoning_model = reasoning_model
265
269
 
266
270
  def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
267
271
  """
@@ -278,14 +282,27 @@ class OpenAIInferenceEngine(InferenceEngine):
278
282
  stream : bool, Optional
279
283
  if True, LLM generated text will be printed in terminal in real-time.
280
284
  """
281
- response = self.client.chat.completions.create(
282
- model=self.model,
283
- messages=messages,
284
- max_tokens=max_new_tokens,
285
- temperature=temperature,
286
- stream=stream,
287
- **kwrs
288
- )
285
+ if self.reasoning_model:
286
+ if temperature != 0.0:
287
+ warnings.warn("Reasoning models do not support temperature parameter. Will be ignored.", UserWarning)
288
+
289
+ response = self.client.chat.completions.create(
290
+ model=self.model,
291
+ messages=messages,
292
+ max_completion_tokens=max_new_tokens,
293
+ stream=stream,
294
+ **kwrs
295
+ )
296
+
297
+ else:
298
+ response = self.client.chat.completions.create(
299
+ model=self.model,
300
+ messages=messages,
301
+ max_tokens=max_new_tokens,
302
+ temperature=temperature,
303
+ stream=stream,
304
+ **kwrs
305
+ )
289
306
 
290
307
  if stream:
291
308
  res = ''
@@ -294,8 +311,17 @@ class OpenAIInferenceEngine(InferenceEngine):
294
311
  if chunk.choices[0].delta.content is not None:
295
312
  res += chunk.choices[0].delta.content
296
313
  print(chunk.choices[0].delta.content, end="", flush=True)
314
+ if chunk.choices[0].finish_reason == "length":
315
+ warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
316
+ if self.reasoning_model:
317
+ warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
297
318
  return res
298
319
 
320
+ if response.choices[0].finish_reason == "length":
321
+ warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
322
+ if self.reasoning_model:
323
+ warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
324
+
299
325
  return response.choices[0].message.content
300
326
 
301
327
 
@@ -303,20 +329,37 @@ class OpenAIInferenceEngine(InferenceEngine):
303
329
  """
304
330
  Async version of chat method. Streaming is not supported.
305
331
  """
306
- response = await self.async_client.chat.completions.create(
307
- model=self.model,
308
- messages=messages,
309
- max_tokens=max_new_tokens,
310
- temperature=temperature,
311
- stream=False,
312
- **kwrs
313
- )
332
+ if self.reasoning_model:
333
+ if temperature != 0.0:
334
+ warnings.warn("Reasoning models do not support temperature parameter. Will be ignored.", UserWarning)
335
+
336
+ response = await self.async_client.chat.completions.create(
337
+ model=self.model,
338
+ messages=messages,
339
+ max_completion_tokens=max_new_tokens,
340
+ stream=False,
341
+ **kwrs
342
+ )
343
+ else:
344
+ response = await self.async_client.chat.completions.create(
345
+ model=self.model,
346
+ messages=messages,
347
+ max_tokens=max_new_tokens,
348
+ temperature=temperature,
349
+ stream=False,
350
+ **kwrs
351
+ )
314
352
 
353
+ if response.choices[0].finish_reason == "length":
354
+ warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
355
+ if self.reasoning_model:
356
+ warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
357
+
315
358
  return response.choices[0].message.content
316
359
 
317
360
 
318
361
  class AzureOpenAIInferenceEngine(InferenceEngine):
319
- def __init__(self, model:str, api_version:str, **kwrs):
362
+ def __init__(self, model:str, api_version:str, reasoning_model:bool=False, **kwrs):
320
363
  """
321
364
  The Azure OpenAI API inference engine.
322
365
  For parameters and documentation, refer to
@@ -329,6 +372,8 @@ class AzureOpenAIInferenceEngine(InferenceEngine):
329
372
  model name as described in https://platform.openai.com/docs/models
330
373
  api_version : str
331
374
  the Azure OpenAI API version
375
+ reasoning_model : bool, Optional
376
+ indicator for OpenAI reasoning models ("o" series).
332
377
  """
333
378
  if importlib.util.find_spec("openai") is None:
334
379
  raise ImportError("OpenAI Python API library not found. Please install OpanAI (```pip install openai```).")
@@ -340,6 +385,7 @@ class AzureOpenAIInferenceEngine(InferenceEngine):
340
385
  **kwrs)
341
386
  self.async_client = AsyncAzureOpenAI(api_version=self.api_version,
342
387
  **kwrs)
388
+ self.reasoning_model = reasoning_model
343
389
 
344
390
  def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
345
391
  """
@@ -356,14 +402,27 @@ class AzureOpenAIInferenceEngine(InferenceEngine):
356
402
  stream : bool, Optional
357
403
  if True, LLM generated text will be printed in terminal in real-time.
358
404
  """
359
- response = self.client.chat.completions.create(
360
- model=self.model,
361
- messages=messages,
362
- max_tokens=max_new_tokens,
363
- temperature=temperature,
364
- stream=stream,
365
- **kwrs
366
- )
405
+ if self.reasoning_model:
406
+ if temperature != 0.0:
407
+ warnings.warn("Reasoning models do not support temperature parameter. Will be ignored.", UserWarning)
408
+
409
+ response = self.client.chat.completions.create(
410
+ model=self.model,
411
+ messages=messages,
412
+ max_completion_tokens=max_new_tokens,
413
+ stream=stream,
414
+ **kwrs
415
+ )
416
+
417
+ else:
418
+ response = self.client.chat.completions.create(
419
+ model=self.model,
420
+ messages=messages,
421
+ max_tokens=max_new_tokens,
422
+ temperature=temperature,
423
+ stream=stream,
424
+ **kwrs
425
+ )
367
426
 
368
427
  if stream:
369
428
  res = ''
@@ -372,8 +431,17 @@ class AzureOpenAIInferenceEngine(InferenceEngine):
372
431
  if chunk.choices[0].delta.content is not None:
373
432
  res += chunk.choices[0].delta.content
374
433
  print(chunk.choices[0].delta.content, end="", flush=True)
434
+ if chunk.choices[0].finish_reason == "length":
435
+ warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
436
+ if self.reasoning_model:
437
+ warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
375
438
  return res
376
439
 
440
+ if response.choices[0].finish_reason == "length":
441
+ warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
442
+ if self.reasoning_model:
443
+ warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
444
+
377
445
  return response.choices[0].message.content
378
446
 
379
447
 
@@ -381,15 +449,32 @@ class AzureOpenAIInferenceEngine(InferenceEngine):
381
449
  """
382
450
  Async version of chat method. Streaming is not supported.
383
451
  """
384
- response = await self.async_client.chat.completions.create(
385
- model=self.model,
386
- messages=messages,
387
- max_tokens=max_new_tokens,
388
- temperature=temperature,
389
- stream=False,
390
- **kwrs
391
- )
452
+ if self.reasoning_model:
453
+ if temperature != 0.0:
454
+ warnings.warn("Reasoning models do not support temperature parameter. Will be ignored.", UserWarning)
455
+
456
+ response = await self.async_client.chat.completions.create(
457
+ model=self.model,
458
+ messages=messages,
459
+ max_completion_tokens=max_new_tokens,
460
+ stream=False,
461
+ **kwrs
462
+ )
463
+ else:
464
+ response = await self.async_client.chat.completions.create(
465
+ model=self.model,
466
+ messages=messages,
467
+ max_tokens=max_new_tokens,
468
+ temperature=temperature,
469
+ stream=False,
470
+ **kwrs
471
+ )
392
472
 
473
+ if response.choices[0].finish_reason == "length":
474
+ warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
475
+ if self.reasoning_model:
476
+ warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
477
+
393
478
  return response.choices[0].message.content
394
479
 
395
480
 
@@ -1,6 +1,5 @@
1
1
  import abc
2
2
  import re
3
- import copy
4
3
  import json
5
4
  import json_repair
6
5
  import inspect
@@ -13,7 +12,6 @@ from typing import Set, List, Dict, Tuple, Union, Callable
13
12
  from llm_ie.data_types import LLMInformationExtractionFrame, LLMInformationExtractionDocument
14
13
  from llm_ie.engines import InferenceEngine
15
14
  from colorama import Fore, Style
16
- from nltk.tokenize import RegexpTokenizer
17
15
 
18
16
 
19
17
  class Extractor:
@@ -139,6 +137,7 @@ class Extractor:
139
137
 
140
138
 
141
139
  class FrameExtractor(Extractor):
140
+ from nltk.tokenize import RegexpTokenizer
142
141
  def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
143
142
  """
144
143
  This is the abstract class for frame extraction.
@@ -157,7 +156,8 @@ class FrameExtractor(Extractor):
157
156
  prompt_template=prompt_template,
158
157
  system_prompt=system_prompt,
159
158
  **kwrs)
160
- self.tokenizer = RegexpTokenizer(r'\w+|[^\w\s]')
159
+
160
+ self.tokenizer = self.RegexpTokenizer(r'\w+|[^\w\s]')
161
161
 
162
162
 
163
163
  def _jaccard_score(self, s1:Set[str], s2:Set[str]) -> float:
@@ -569,7 +569,8 @@ class ReviewFrameExtractor(BasicFrameExtractor):
569
569
 
570
570
  class SentenceFrameExtractor(FrameExtractor):
571
571
  from nltk.tokenize.punkt import PunktSentenceTokenizer
572
- def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
572
+ def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None,
573
+ context_sentences:Union[str, int]="all", **kwrs):
573
574
  """
574
575
  This class performs sentence-by-sentence information extraction.
575
576
  The process is as follows:
@@ -590,10 +591,26 @@ class SentenceFrameExtractor(FrameExtractor):
590
591
  prompt template with "{{<placeholder name>}}" placeholder.
591
592
  system_prompt : str, Optional
592
593
  system prompt.
594
+ context_sentences : Union[str, int], Optional
595
+ number of sentences before and after the given sentence to provide additional context.
596
+ if "all", the full text will be provided in the prompt as context.
597
+ if 0, no additional context will be provided.
598
+ This is good for tasks that does not require context beyond the given sentence.
599
+ if > 0, the number of sentences before and after the given sentence to provide as context.
600
+ This is good for tasks that require context beyond the given sentence.
593
601
  """
594
602
  super().__init__(inference_engine=inference_engine, prompt_template=prompt_template,
595
603
  system_prompt=system_prompt, **kwrs)
596
604
 
605
+ if not isinstance(context_sentences, int) and context_sentences != "all":
606
+ raise ValueError('context_sentences must be an integer (>= 0) or "all".')
607
+
608
+ if isinstance(context_sentences, int) and context_sentences < 0:
609
+ raise ValueError("context_sentences must be a positive integer.")
610
+
611
+ self.context_sentences =context_sentences
612
+
613
+
597
614
  def _get_sentences(self, text:str) -> List[Dict[str,str]]:
598
615
  """
599
616
  This method sentence tokenize the input text into a list of sentences
@@ -614,9 +631,24 @@ class SentenceFrameExtractor(FrameExtractor):
614
631
  "end": end})
615
632
  return sentences
616
633
 
634
+
635
+ def _get_context_sentences(self, text_content, i:int, sentences:List[Dict[str, str]], document_key:str=None) -> str:
636
+ """
637
+ This function returns the context sentences for the current sentence of interest (i).
638
+ """
639
+ if self.context_sentences == "all":
640
+ context = text_content if isinstance(text_content, str) else text_content[document_key]
641
+ elif self.context_sentences == 0:
642
+ context = ""
643
+ else:
644
+ start = max(0, i - self.context_sentences)
645
+ end = min(i + 1 + self.context_sentences, len(sentences))
646
+ context = " ".join([s['sentence_text'] for s in sentences[start:end]])
647
+ return context
648
+
617
649
 
618
650
  def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
619
- document_key:str=None, multi_turn:bool=False, temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict[str,str]]:
651
+ document_key:str=None, temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict[str,str]]:
620
652
  """
621
653
  This method inputs a text and outputs a list of outputs per sentence.
622
654
 
@@ -631,12 +663,6 @@ class SentenceFrameExtractor(FrameExtractor):
631
663
  document_key : str, Optional
632
664
  specify the key in text_content where document text is.
633
665
  If text_content is str, this parameter will be ignored.
634
- multi_turn : bool, Optional
635
- multi-turn conversation prompting.
636
- If True, sentences and LLM outputs will be appended to the input message and carry-over.
637
- If False, only the current sentence is prompted.
638
- For LLM inference engines that supports prompt cache (e.g., Llama.Cpp, Ollama), use multi-turn conversation prompting
639
- can better utilize the KV caching.
640
666
  temperature : float, Optional
641
667
  the temperature for token sampling.
642
668
  stream : bool, Optional
@@ -654,19 +680,32 @@ class SentenceFrameExtractor(FrameExtractor):
654
680
  if document_key is None:
655
681
  raise ValueError("document_key must be provided when text_content is dict.")
656
682
  sentences = self._get_sentences(text_content[document_key])
657
- # construct chat messages
658
- messages = []
659
- if self.system_prompt:
660
- messages.append({'role': 'system', 'content': self.system_prompt})
661
-
662
- messages.append({'role': 'user', 'content': self._get_user_prompt(text_content)})
663
- messages.append({'role': 'assistant', 'content': 'Sure, please start with the first sentence.'})
664
683
 
665
684
  # generate sentence by sentence
666
- for sent in sentences:
667
- messages.append({'role': 'user', 'content': sent['sentence_text']})
685
+ for i, sent in enumerate(sentences):
686
+ # construct chat messages
687
+ messages = []
688
+ if self.system_prompt:
689
+ messages.append({'role': 'system', 'content': self.system_prompt})
690
+
691
+ context = self._get_context_sentences(text_content, i, sentences, document_key)
692
+
693
+ if self.context_sentences == 0:
694
+ # no context, just place sentence of interest
695
+ messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
696
+ else:
697
+ # insert context
698
+ messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
699
+ # simulate conversation
700
+ messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
701
+ # place sentence of interest
702
+ messages.append({'role': 'user', 'content': sent['sentence_text']})
703
+
668
704
  if stream:
669
- print(f"\n\n{Fore.GREEN}Sentence: {Style.RESET_ALL}\n{sent['sentence_text']}\n")
705
+ print(f"\n\n{Fore.GREEN}Sentence {i}:{Style.RESET_ALL}\n{sent['sentence_text']}\n")
706
+ if isinstance(self.context_sentences, int) and self.context_sentences > 0:
707
+ print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
708
+
670
709
  print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
671
710
 
672
711
  gen_text = self.inference_engine.chat(
@@ -676,19 +715,13 @@ class SentenceFrameExtractor(FrameExtractor):
676
715
  stream=stream,
677
716
  **kwrs
678
717
  )
679
-
680
- if multi_turn:
681
- # update chat messages with LLM outputs
682
- messages.append({'role': 'assistant', 'content': gen_text})
683
- else:
684
- # delete sentence so that message is reset
685
- del messages[-1]
686
718
 
687
719
  # add to output
688
720
  output.append({'sentence_start': sent['start'],
689
721
  'sentence_end': sent['end'],
690
722
  'sentence_text': sent['sentence_text'],
691
723
  'gen_text': gen_text})
724
+
692
725
  return output
693
726
 
694
727
 
@@ -726,21 +759,31 @@ class SentenceFrameExtractor(FrameExtractor):
726
759
  if document_key is None:
727
760
  raise ValueError("document_key must be provided when text_content is dict.")
728
761
  sentences = self._get_sentences(text_content[document_key])
729
- # construct chat messages
730
- base_messages = []
731
- if self.system_prompt:
732
- base_messages.append({'role': 'system', 'content': self.system_prompt})
733
-
734
- base_messages.append({'role': 'user', 'content': self._get_user_prompt(text_content)})
735
- base_messages.append({'role': 'assistant', 'content': 'Sure, please start with the first sentence.'})
736
762
 
737
763
  # generate sentence by sentence
738
764
  tasks = []
739
765
  for i in range(0, len(sentences), concurrent_batch_size):
740
766
  batch = sentences[i:i + concurrent_batch_size]
741
- for sent in batch:
742
- messages = copy.deepcopy(base_messages)
743
- messages.append({'role': 'user', 'content': sent['sentence_text']})
767
+ for j, sent in enumerate(batch):
768
+ # construct chat messages
769
+ messages = []
770
+ if self.system_prompt:
771
+ messages.append({'role': 'system', 'content': self.system_prompt})
772
+
773
+ context = self._get_context_sentences(text_content, i + j, sentences, document_key)
774
+
775
+ if self.context_sentences == 0:
776
+ # no context, just place sentence of interest
777
+ messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
778
+ else:
779
+ # insert context
780
+ messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
781
+ # simulate conversation
782
+ messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
783
+ # place sentence of interest
784
+ messages.append({'role': 'user', 'content': sent['sentence_text']})
785
+
786
+ # add to tasks
744
787
  task = asyncio.create_task(
745
788
  self.inference_engine.chat_async(
746
789
  messages=messages,
@@ -764,10 +807,10 @@ class SentenceFrameExtractor(FrameExtractor):
764
807
 
765
808
 
766
809
  def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str, max_new_tokens:int=512,
767
- document_key:str=None, multi_turn:bool=False, temperature:float=0.0, stream:bool=False,
768
- concurrent:bool=False, concurrent_batch_size:int=32,
769
- case_sensitive:bool=False, fuzzy_match:bool=True, fuzzy_buffer_size:float=0.2, fuzzy_score_cutoff:float=0.8,
770
- **kwrs) -> List[LLMInformationExtractionFrame]:
810
+ document_key:str=None, temperature:float=0.0, stream:bool=False,
811
+ concurrent:bool=False, concurrent_batch_size:int=32,
812
+ case_sensitive:bool=False, fuzzy_match:bool=True, fuzzy_buffer_size:float=0.2, fuzzy_score_cutoff:float=0.8,
813
+ **kwrs) -> List[LLMInformationExtractionFrame]:
771
814
  """
772
815
  This method inputs a text and outputs a list of LLMInformationExtractionFrame
773
816
  It use the extract() method and post-process outputs into frames.
@@ -785,12 +828,6 @@ class SentenceFrameExtractor(FrameExtractor):
785
828
  document_key : str, Optional
786
829
  specify the key in text_content where document text is.
787
830
  If text_content is str, this parameter will be ignored.
788
- multi_turn : bool, Optional
789
- multi-turn conversation prompting.
790
- If True, sentences and LLM outputs will be appended to the input message and carry-over.
791
- If False, only the current sentence is prompted.
792
- For LLM inference engines that supports prompt cache (e.g., Llama.Cpp, Ollama), use multi-turn conversation prompting
793
- can better utilize the KV caching.
794
831
  temperature : float, Optional
795
832
  the temperature for token sampling.
796
833
  stream : bool, Optional
@@ -815,8 +852,6 @@ class SentenceFrameExtractor(FrameExtractor):
815
852
  if concurrent:
816
853
  if stream:
817
854
  warnings.warn("stream=True is not supported in concurrent mode.", RuntimeWarning)
818
- if multi_turn:
819
- warnings.warn("multi_turn=True is not supported in concurrent mode.", RuntimeWarning)
820
855
 
821
856
  nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
822
857
  llm_output_sentences = asyncio.run(self.extract_async(text_content=text_content,
@@ -830,7 +865,6 @@ class SentenceFrameExtractor(FrameExtractor):
830
865
  llm_output_sentences = self.extract(text_content=text_content,
831
866
  max_new_tokens=max_new_tokens,
832
867
  document_key=document_key,
833
- multi_turn=multi_turn,
834
868
  temperature=temperature,
835
869
  stream=stream,
836
870
  **kwrs)
@@ -866,7 +900,8 @@ class SentenceFrameExtractor(FrameExtractor):
866
900
 
867
901
  class SentenceReviewFrameExtractor(SentenceFrameExtractor):
868
902
  def __init__(self, inference_engine:InferenceEngine, prompt_template:str,
869
- review_mode:str, review_prompt:str=None, system_prompt:str=None, **kwrs):
903
+ review_mode:str, review_prompt:str=None, system_prompt:str=None,
904
+ context_sentences:Union[str, int]="all", **kwrs):
870
905
  """
871
906
  This class adds a review step after the SentenceFrameExtractor.
872
907
  For each sentence, the review process asks LLM to review its output and:
@@ -888,9 +923,16 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
888
923
  addition mode only ask LLM to add new frames, while revision mode ask LLM to regenerate.
889
924
  system_prompt : str, Optional
890
925
  system prompt.
926
+ context_sentences : Union[str, int], Optional
927
+ number of sentences before and after the given sentence to provide additional context.
928
+ if "all", the full text will be provided in the prompt as context.
929
+ if 0, no additional context will be provided.
930
+ This is good for tasks that does not require context beyond the given sentence.
931
+ if > 0, the number of sentences before and after the given sentence to provide as context.
932
+ This is good for tasks that require context beyond the given sentence.
891
933
  """
892
934
  super().__init__(inference_engine=inference_engine, prompt_template=prompt_template,
893
- system_prompt=system_prompt, **kwrs)
935
+ system_prompt=system_prompt, context_sentences=context_sentences, **kwrs)
894
936
 
895
937
  if review_mode not in {"addition", "revision"}:
896
938
  raise ValueError('review_mode must be one of {"addition", "revision"}.')
@@ -908,7 +950,7 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
908
950
 
909
951
 
910
952
  def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
911
- document_key:str=None, multi_turn:bool=False, temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict[str,str]]:
953
+ document_key:str=None, temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict[str,str]]:
912
954
  """
913
955
  This method inputs a text and outputs a list of outputs per sentence.
914
956
 
@@ -923,12 +965,6 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
923
965
  document_key : str, Optional
924
966
  specify the key in text_content where document text is.
925
967
  If text_content is str, this parameter will be ignored.
926
- multi_turn : bool, Optional
927
- multi-turn conversation prompting.
928
- If True, sentences and LLM outputs will be appended to the input message and carry-over.
929
- If False, only the current sentence is prompted.
930
- For LLM inference engines that supports prompt cache (e.g., Llama.Cpp, Ollama), use multi-turn conversation prompting
931
- can better utilize the KV caching.
932
968
  temperature : float, Optional
933
969
  the temperature for token sampling.
934
970
  stream : bool, Optional
@@ -946,19 +982,31 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
946
982
  if document_key is None:
947
983
  raise ValueError("document_key must be provided when text_content is dict.")
948
984
  sentences = self._get_sentences(text_content[document_key])
949
- # construct chat messages
950
- messages = []
951
- if self.system_prompt:
952
- messages.append({'role': 'system', 'content': self.system_prompt})
985
+
986
+ # generate sentence by sentence
987
+ for i, sent in enumerate(sentences):
988
+ # construct chat messages
989
+ messages = []
990
+ if self.system_prompt:
991
+ messages.append({'role': 'system', 'content': self.system_prompt})
953
992
 
954
- messages.append({'role': 'user', 'content': self._get_user_prompt(text_content)})
955
- messages.append({'role': 'assistant', 'content': 'Sure, please start with the first sentence.'})
993
+ context = self._get_context_sentences(text_content, i, sentences, document_key)
994
+
995
+ if self.context_sentences == 0:
996
+ # no context, just place sentence of interest
997
+ messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
998
+ else:
999
+ # insert context
1000
+ messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
1001
+ # simulate conversation
1002
+ messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
1003
+ # place sentence of interest
1004
+ messages.append({'role': 'user', 'content': sent['sentence_text']})
956
1005
 
957
- # generate sentence by sentence
958
- for sent in sentences:
959
- messages.append({'role': 'user', 'content': sent['sentence_text']})
960
1006
  if stream:
961
- print(f"\n\n{Fore.GREEN}Sentence: {Style.RESET_ALL}\n{sent['sentence_text']}\n")
1007
+ print(f"\n\n{Fore.GREEN}Sentence {i}: {Style.RESET_ALL}\n{sent['sentence_text']}\n")
1008
+ if isinstance(self.context_sentences, int) and self.context_sentences > 0:
1009
+ print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
962
1010
  print(f"{Fore.BLUE}Initial Output:{Style.RESET_ALL}")
963
1011
 
964
1012
  initial = self.inference_engine.chat(
@@ -988,13 +1036,6 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
988
1036
  gen_text = review
989
1037
  elif self.review_mode == "addition":
990
1038
  gen_text = initial + '\n' + review
991
-
992
- if multi_turn:
993
- # update chat messages with LLM outputs
994
- messages.append({'role': 'assistant', 'content': review})
995
- else:
996
- # delete sentence and review so that message is reset
997
- del messages[-3:]
998
1039
 
999
1040
  # add to output
1000
1041
  output.append({'sentence_start': sent['start'],
@@ -1040,24 +1081,33 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
1040
1081
  if document_key is None:
1041
1082
  raise ValueError("document_key must be provided when text_content is dict.")
1042
1083
  sentences = self._get_sentences(text_content[document_key])
1043
- # construct chat messages
1044
- base_messages = []
1045
- if self.system_prompt:
1046
- base_messages.append({'role': 'system', 'content': self.system_prompt})
1047
-
1048
- base_messages.append({'role': 'user', 'content': self._get_user_prompt(text_content)})
1049
- base_messages.append({'role': 'assistant', 'content': 'Sure, please start with the first sentence.'})
1050
1084
 
1051
1085
  # generate initial outputs sentence by sentence
1052
- initials = []
1053
1086
  tasks = []
1054
- message_list = []
1087
+ messages_list = []
1055
1088
  for i in range(0, len(sentences), concurrent_batch_size):
1056
1089
  batch = sentences[i:i + concurrent_batch_size]
1057
- for sent in batch:
1058
- messages = copy.deepcopy(base_messages)
1059
- messages.append({'role': 'user', 'content': sent['sentence_text']})
1060
- message_list.append(messages)
1090
+ for j, sent in enumerate(batch):
1091
+ # construct chat messages
1092
+ messages = []
1093
+ if self.system_prompt:
1094
+ messages.append({'role': 'system', 'content': self.system_prompt})
1095
+
1096
+ context = self._get_context_sentences(text_content, i + j, sentences, document_key)
1097
+
1098
+ if self.context_sentences == 0:
1099
+ # no context, just place sentence of interest
1100
+ messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
1101
+ else:
1102
+ # insert context
1103
+ messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
1104
+ # simulate conversation
1105
+ messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
1106
+ # place sentence of interest
1107
+ messages.append({'role': 'user', 'content': sent['sentence_text']})
1108
+
1109
+ messages_list.append(messages)
1110
+
1061
1111
  task = asyncio.create_task(
1062
1112
  self.inference_engine.chat_async(
1063
1113
  messages=messages,
@@ -1071,15 +1121,15 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
1071
1121
  # Wait until the batch is done, collect results and move on to next batch
1072
1122
  responses = await asyncio.gather(*tasks)
1073
1123
  # Collect initials
1074
- for gen_text, sent, message in zip(responses, sentences, message_list):
1124
+ initials = []
1125
+ for gen_text, sent, messages in zip(responses, sentences, messages_list):
1075
1126
  initials.append({'sentence_start': sent['start'],
1076
1127
  'sentence_end': sent['end'],
1077
1128
  'sentence_text': sent['sentence_text'],
1078
1129
  'gen_text': gen_text,
1079
- 'messages': message})
1080
-
1130
+ 'messages': messages})
1131
+
1081
1132
  # Review
1082
- reviews = []
1083
1133
  tasks = []
1084
1134
  for i in range(0, len(initials), concurrent_batch_size):
1085
1135
  batch = initials[i:i + concurrent_batch_size]
@@ -1101,6 +1151,7 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
1101
1151
  responses = await asyncio.gather(*tasks)
1102
1152
 
1103
1153
  # Collect reviews
1154
+ reviews = []
1104
1155
  for gen_text, sent in zip(responses, sentences):
1105
1156
  reviews.append({'sentence_start': sent['start'],
1106
1157
  'sentence_end': sent['end'],
@@ -1123,7 +1174,8 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
1123
1174
 
1124
1175
  class SentenceCoTFrameExtractor(SentenceFrameExtractor):
1125
1176
  from nltk.tokenize.punkt import PunktSentenceTokenizer
1126
- def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
1177
+ def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None,
1178
+ context_sentences:Union[str, int]="all", **kwrs):
1127
1179
  """
1128
1180
  This class performs sentence-based Chain-of-thoughts (CoT) information extraction.
1129
1181
  A simulated chat follows this process:
@@ -1145,13 +1197,20 @@ class SentenceCoTFrameExtractor(SentenceFrameExtractor):
1145
1197
  prompt template with "{{<placeholder name>}}" placeholder.
1146
1198
  system_prompt : str, Optional
1147
1199
  system prompt.
1200
+ context_sentences : Union[str, int], Optional
1201
+ number of sentences before and after the given sentence to provide additional context.
1202
+ if "all", the full text will be provided in the prompt as context.
1203
+ if 0, no additional context will be provided.
1204
+ This is good for tasks that does not require context beyond the given sentence.
1205
+ if > 0, the number of sentences before and after the given sentence to provide as context.
1206
+ This is good for tasks that require context beyond the given sentence.
1148
1207
  """
1149
1208
  super().__init__(inference_engine=inference_engine, prompt_template=prompt_template,
1150
- system_prompt=system_prompt, **kwrs)
1209
+ system_prompt=system_prompt, context_sentences=context_sentences, **kwrs)
1151
1210
 
1152
1211
 
1153
1212
  def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
1154
- document_key:str=None, multi_turn:bool=False, temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict[str,str]]:
1213
+ document_key:str=None, temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict[str,str]]:
1155
1214
  """
1156
1215
  This method inputs a text and outputs a list of outputs per sentence.
1157
1216
 
@@ -1166,12 +1225,6 @@ class SentenceCoTFrameExtractor(SentenceFrameExtractor):
1166
1225
  document_key : str, Optional
1167
1226
  specify the key in text_content where document text is.
1168
1227
  If text_content is str, this parameter will be ignored.
1169
- multi_turn : bool, Optional
1170
- multi-turn conversation prompting.
1171
- If True, sentences and LLM outputs will be appended to the input message and carry-over.
1172
- If False, only the current sentence is prompted.
1173
- For LLM inference engines that supports prompt cache (e.g., Llama.Cpp, Ollama), use multi-turn conversation prompting
1174
- can better utilize the KV caching.
1175
1228
  temperature : float, Optional
1176
1229
  the temperature for token sampling.
1177
1230
  stream : bool, Optional
@@ -1187,19 +1240,31 @@ class SentenceCoTFrameExtractor(SentenceFrameExtractor):
1187
1240
  sentences = self._get_sentences(text_content)
1188
1241
  elif isinstance(text_content, dict):
1189
1242
  sentences = self._get_sentences(text_content[document_key])
1190
- # construct chat messages
1191
- messages = []
1192
- if self.system_prompt:
1193
- messages.append({'role': 'system', 'content': self.system_prompt})
1194
-
1195
- messages.append({'role': 'user', 'content': self._get_user_prompt(text_content)})
1196
- messages.append({'role': 'assistant', 'content': 'Sure, please start with the first sentence.'})
1197
1243
 
1198
1244
  # generate sentence by sentence
1199
- for sent in sentences:
1200
- messages.append({'role': 'user', 'content': sent['sentence_text']})
1245
+ for i, sent in enumerate(sentences):
1246
+ # construct chat messages
1247
+ messages = []
1248
+ if self.system_prompt:
1249
+ messages.append({'role': 'system', 'content': self.system_prompt})
1250
+
1251
+ context = self._get_context_sentences(text_content, i, sentences, document_key)
1252
+
1253
+ if self.context_sentences == 0:
1254
+ # no context, just place sentence of interest
1255
+ messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
1256
+ else:
1257
+ # insert context
1258
+ messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
1259
+ # simulate conversation
1260
+ messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
1261
+ # place sentence of interest
1262
+ messages.append({'role': 'user', 'content': sent['sentence_text']})
1263
+
1201
1264
  if stream:
1202
1265
  print(f"\n\n{Fore.GREEN}Sentence: {Style.RESET_ALL}\n{sent['sentence_text']}\n")
1266
+ if isinstance(self.context_sentences, int) and self.context_sentences > 0:
1267
+ print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
1203
1268
  print(f"{Fore.BLUE}CoT:{Style.RESET_ALL}")
1204
1269
 
1205
1270
  gen_text = self.inference_engine.chat(
@@ -1209,13 +1274,6 @@ class SentenceCoTFrameExtractor(SentenceFrameExtractor):
1209
1274
  stream=stream,
1210
1275
  **kwrs
1211
1276
  )
1212
-
1213
- if multi_turn:
1214
- # update chat messages with LLM outputs
1215
- messages.append({'role': 'assistant', 'content': gen_text})
1216
- else:
1217
- # delete sentence so that message is reset
1218
- del messages[-1]
1219
1277
 
1220
1278
  # add to output
1221
1279
  output.append({'sentence_start': sent['start'],
File without changes
File without changes