PyPI - llm-ie - Versions diffs - 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

llm-ie 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

llm_ie/extractors.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import abc
 import re
 import json
+import json_repair
 import inspect
 import importlib.resources
 import warnings
@@ -117,7 +118,12 @@ class Extractor:
                 dict_obj = json.loads(dict_str)
                 out.append(dict_obj)
             except json.JSONDecodeError:
-                warnings.warn(f'Post-processing failed:\n{dict_str}', RuntimeWarning)
+                dict_obj = json_repair.repair_json(dict_str, skip_json_loads=True, return_objects=True)
+                if dict_obj:
+                    warnings.warn(f'JSONDecodeError detected, fixed with repair_json:\n{dict_str}', RuntimeWarning)
+                    out.append(dict_obj)
+                else:
+                    warnings.warn(f'JSONDecodeError could not be fixed:\n{dict_str}', RuntimeWarning)
         return out
@@ -181,19 +187,22 @@ class FrameExtractor(Extractor):
         Returns : Tuple[Tuple[int, int], float]
             a tuple of 2-tuple span and Jaccard score.
         """
+        if not text or not pattern:
+            return None, 0
         text_tokens, text_spans = self._get_word_tokens(text)
         pattern_tokens, _ = self._get_word_tokens(pattern)
         pattern_tokens_set = set(pattern_tokens)
         window_size = len(pattern_tokens)
-        window_size_min = int(window_size * (1 - buffer_size))
-        window_size_max = int(window_size * (1 + buffer_size))
+        window_size_min = max(1, int(window_size * (1 - buffer_size)))
+        window_size_max = int(window_size * (1 + buffer_size)) + 1
         closest_substring_span = None
         best_score = 0
         for i in range(len(text_tokens) - window_size_max):
             for w in range(window_size_min, window_size_max):
                 sub_str_tokens = text_tokens[i:i + w]
-                if sub_str_tokens[0] == pattern_tokens[0]:
+                if len(sub_str_tokens) > 0 and sub_str_tokens[0] == pattern_tokens[0]:
                     score = self._jaccard_score(set(sub_str_tokens), pattern_tokens_set)
                     if score > best_score:
                         best_score = score
@@ -238,7 +247,7 @@ class FrameExtractor(Extractor):
             # Exact match
             match = re.search(re.escape(entity), text)
-            if match:
+            if match and entity:
                 start, end = match.span()
                 entity_spans.append((start, end))
                 # Replace the found entity with spaces to avoid finding the same instance again
@@ -246,7 +255,7 @@ class FrameExtractor(Extractor):
             # Fuzzy match
             elif fuzzy_match:
                 closest_substring_span, best_score = self._get_closest_substring(text, entity, buffer_size=fuzzy_buffer_size)
-                if best_score >= fuzzy_score_cutoff and closest_substring_span:
+                if closest_substring_span and best_score >= fuzzy_score_cutoff:
                     entity_spans.append(closest_substring_span)
                     # Replace the found entity with spaces to avoid finding the same instance again
                     text = text[:closest_substring_span[0]] + ' ' * (closest_substring_span[1] - closest_substring_span[0]) + text[closest_substring_span[1]:]

{llm_ie-0.3.3.dist-info → llm_ie-0.3.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llm-ie
-Version: 0.3.3
+Version: 0.3.5
 Summary: An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
 License: MIT
 Author: Enshuo (David) Hsu
@@ -10,6 +10,7 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Requires-Dist: colorama (>=0.4.6,<0.5.0)
+Requires-Dist: json_repair (>=0.30,<0.31)
 Requires-Dist: nltk (>=3.8,<4.0)
 Description-Content-Type: text/markdown

{llm_ie-0.3.3.dist-info → llm_ie-0.3.5.dist-info}/RECORD RENAMED Viewed

@@ -16,8 +16,8 @@ llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt,sha256=oKH_QeD
 llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt,sha256=oKH_QeDgpw771ZdHk3L7DYz2Jvfm7OolUoTiJyMJI30,9541
 llm_ie/data_types.py,sha256=hPz3WOeAzfn2QKmb0CxHmRdQWZQ4G9zq8U-RJBVFdYk,14329
 llm_ie/engines.py,sha256=PTYs7s_iCPmI-yFUCVCPY_cMGS77ma2VGoz4rdNkODI,9308
-llm_ie/extractors.py,sha256=yBdIcevjMfwto85Jb0KkRMN-AjIMk92fD5yWB3Qm8MY,64408
+llm_ie/extractors.py,sha256=WM-9ZmhGYCYzXLPiShfF42grezjRz42JbgXwueixZRI,64870
 llm_ie/prompt_editor.py,sha256=Xc5ZHsEnM8-YYITokIsM6BVsf2Ec_8ajJDaldPf-P8U,8577
-llm_ie-0.3.3.dist-info/METADATA,sha256=CeTsMNtWhEWCvOqHWSXu0KqOgDp3kMwN2WtBF4N-4zE,41266
-llm_ie-0.3.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-llm_ie-0.3.3.dist-info/RECORD,,
+llm_ie-0.3.5.dist-info/METADATA,sha256=y5nABgPeeMPEkZ58WoOBR3sgFrKOE_mF4fqaHK59K1w,41308
+llm_ie-0.3.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+llm_ie-0.3.5.dist-info/RECORD,,

{llm_ie-0.3.3.dist-info → llm_ie-0.3.5.dist-info}/WHEEL RENAMED Viewed

File without changes

llm-ie 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

llm-ie 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl