llm-ie 0.4.1__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {llm_ie-0.4.1 → llm_ie-0.4.2}/PKG-INFO +1 -1
  2. {llm_ie-0.4.1 → llm_ie-0.4.2}/pyproject.toml +1 -1
  3. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/data_types.py +1 -1
  4. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/extractors.py +20 -6
  5. {llm_ie-0.4.1 → llm_ie-0.4.2}/README.md +0 -0
  6. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/__init__.py +0 -0
  7. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/PromptEditor_prompts/chat.txt +0 -0
  8. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/PromptEditor_prompts/comment.txt +0 -0
  9. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/PromptEditor_prompts/rewrite.txt +0 -0
  10. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/PromptEditor_prompts/system.txt +0 -0
  11. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt +0 -0
  12. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt +0 -0
  13. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_addition_review_prompt.txt +0 -0
  14. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_revision_review_prompt.txt +0 -0
  15. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt +0 -0
  16. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/prompt_guide/BinaryRelationExtractor_prompt_guide.txt +0 -0
  17. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/prompt_guide/MultiClassRelationExtractor_prompt_guide.txt +0 -0
  18. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt +0 -0
  19. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/prompt_guide/SentenceCoTFrameExtractor_prompt_guide.txt +0 -0
  20. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt +0 -0
  21. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt +0 -0
  22. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/engines.py +0 -0
  23. {llm_ie-0.4.1 → llm_ie-0.4.2}/src/llm_ie/prompt_editor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llm-ie
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
5
5
  License: MIT
6
6
  Author: Enshuo (David) Hsu
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "llm-ie"
3
- version = "0.4.1"
3
+ version = "0.4.2"
4
4
  description = "An LLM-powered tool that transforms everyday language into robust information extraction pipelines."
5
5
  authors = ["Enshuo (David) Hsu"]
6
6
  license = "MIT"
@@ -204,7 +204,7 @@ class LLMInformationExtractionDocument:
204
204
  # Add frame
205
205
  frame_clone = frame.copy()
206
206
  if create_id:
207
- frame_clone.doc_id = f"{self.doc_id}_{len(self.frames)}"
207
+ frame_clone.frame_id = str(len(self.frames))
208
208
 
209
209
  self.frames.append(frame_clone)
210
210
  return True
@@ -59,7 +59,7 @@ class Extractor:
59
59
  text_content : Union[str, Dict[str,str]]
60
60
  the input text content to put in prompt template.
61
61
  If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
62
- If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
62
+ If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}. All values must be str.
63
63
 
64
64
  Returns : str
65
65
  a user prompt.
@@ -73,6 +73,10 @@ class Extractor:
73
73
  prompt = pattern.sub(text, self.prompt_template)
74
74
 
75
75
  elif isinstance(text_content, dict):
76
+ # Check if all values are str
77
+ if not all([isinstance(v, str) for v in text_content.values()]):
78
+ raise ValueError("All values in text_content must be str.")
79
+ # Check if all keys are in the prompt template
76
80
  placeholders = pattern.findall(self.prompt_template)
77
81
  if len(placeholders) != len(text_content):
78
82
  raise ValueError(f"Expect text_content ({len(text_content)}) and prompt template placeholder ({len(placeholders)}) to have equal size.")
@@ -422,6 +426,13 @@ class BasicFrameExtractor(FrameExtractor):
422
426
  Return : str
423
427
  a list of frames.
424
428
  """
429
+ if isinstance(text_content, str):
430
+ text = text_content
431
+ elif isinstance(text_content, dict):
432
+ if document_key is None:
433
+ raise ValueError("document_key must be provided when text_content is dict.")
434
+ text = text_content[document_key]
435
+
425
436
  frame_list = []
426
437
  gen_text = self.extract(text_content=text_content,
427
438
  max_new_tokens=max_new_tokens,
@@ -435,11 +446,6 @@ class BasicFrameExtractor(FrameExtractor):
435
446
  entity_json.append(entity)
436
447
  else:
437
448
  warnings.warn(f'Extractor output "{entity}" does not have entity_key ("{entity_key}"). This frame will be dropped.', RuntimeWarning)
438
-
439
- if isinstance(text_content, str):
440
- text = text_content
441
- elif isinstance(text_content, dict):
442
- text = text_content[document_key]
443
449
 
444
450
  spans = self._find_entity_spans(text=text,
445
451
  entities=[e[entity_key] for e in entity_json],
@@ -645,6 +651,8 @@ class SentenceFrameExtractor(FrameExtractor):
645
651
  if isinstance(text_content, str):
646
652
  sentences = self._get_sentences(text_content)
647
653
  elif isinstance(text_content, dict):
654
+ if document_key is None:
655
+ raise ValueError("document_key must be provided when text_content is dict.")
648
656
  sentences = self._get_sentences(text_content[document_key])
649
657
  # construct chat messages
650
658
  messages = []
@@ -715,6 +723,8 @@ class SentenceFrameExtractor(FrameExtractor):
715
723
  if isinstance(text_content, str):
716
724
  sentences = self._get_sentences(text_content)
717
725
  elif isinstance(text_content, dict):
726
+ if document_key is None:
727
+ raise ValueError("document_key must be provided when text_content is dict.")
718
728
  sentences = self._get_sentences(text_content[document_key])
719
729
  # construct chat messages
720
730
  base_messages = []
@@ -933,6 +943,8 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
933
943
  if isinstance(text_content, str):
934
944
  sentences = self._get_sentences(text_content)
935
945
  elif isinstance(text_content, dict):
946
+ if document_key is None:
947
+ raise ValueError("document_key must be provided when text_content is dict.")
936
948
  sentences = self._get_sentences(text_content[document_key])
937
949
  # construct chat messages
938
950
  messages = []
@@ -1025,6 +1037,8 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
1025
1037
  if isinstance(text_content, str):
1026
1038
  sentences = self._get_sentences(text_content)
1027
1039
  elif isinstance(text_content, dict):
1040
+ if document_key is None:
1041
+ raise ValueError("document_key must be provided when text_content is dict.")
1028
1042
  sentences = self._get_sentences(text_content[document_key])
1029
1043
  # construct chat messages
1030
1044
  base_messages = []
File without changes
File without changes
File without changes