llm-ie 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_ie/extractors.py CHANGED
@@ -181,19 +181,22 @@ class FrameExtractor(Extractor):
181
181
  Returns : Tuple[Tuple[int, int], float]
182
182
  a tuple of 2-tuple span and Jaccard score.
183
183
  """
184
+ if not text or not pattern:
185
+ return None, 0
186
+
184
187
  text_tokens, text_spans = self._get_word_tokens(text)
185
188
  pattern_tokens, _ = self._get_word_tokens(pattern)
186
189
  pattern_tokens_set = set(pattern_tokens)
187
190
  window_size = len(pattern_tokens)
188
- window_size_min = int(window_size * (1 - buffer_size))
189
- window_size_max = int(window_size * (1 + buffer_size))
191
+ window_size_min = max(1, int(window_size * (1 - buffer_size)))
192
+ window_size_max = int(window_size * (1 + buffer_size)) + 1
190
193
  closest_substring_span = None
191
194
  best_score = 0
192
195
 
193
196
  for i in range(len(text_tokens) - window_size_max):
194
197
  for w in range(window_size_min, window_size_max):
195
198
  sub_str_tokens = text_tokens[i:i + w]
196
- if sub_str_tokens[0] == pattern_tokens[0]:
199
+ if len(sub_str_tokens) > 0 and sub_str_tokens[0] == pattern_tokens[0]:
197
200
  score = self._jaccard_score(set(sub_str_tokens), pattern_tokens_set)
198
201
  if score > best_score:
199
202
  best_score = score
@@ -238,7 +241,7 @@ class FrameExtractor(Extractor):
238
241
 
239
242
  # Exact match
240
243
  match = re.search(re.escape(entity), text)
241
- if match:
244
+ if match and entity:
242
245
  start, end = match.span()
243
246
  entity_spans.append((start, end))
244
247
  # Replace the found entity with spaces to avoid finding the same instance again
@@ -246,7 +249,7 @@ class FrameExtractor(Extractor):
246
249
  # Fuzzy match
247
250
  elif fuzzy_match:
248
251
  closest_substring_span, best_score = self._get_closest_substring(text, entity, buffer_size=fuzzy_buffer_size)
249
- if best_score >= fuzzy_score_cutoff and closest_substring_span:
252
+ if closest_substring_span and best_score >= fuzzy_score_cutoff:
250
253
  entity_spans.append(closest_substring_span)
251
254
  # Replace the found entity with spaces to avoid finding the same instance again
252
255
  text = text[:closest_substring_span[0]] + ' ' * (closest_substring_span[1] - closest_substring_span[0]) + text[closest_substring_span[1]:]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llm-ie
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
5
5
  License: MIT
6
6
  Author: Enshuo (David) Hsu
@@ -16,8 +16,8 @@ llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt,sha256=oKH_QeD
16
16
  llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt,sha256=oKH_QeDgpw771ZdHk3L7DYz2Jvfm7OolUoTiJyMJI30,9541
17
17
  llm_ie/data_types.py,sha256=hPz3WOeAzfn2QKmb0CxHmRdQWZQ4G9zq8U-RJBVFdYk,14329
18
18
  llm_ie/engines.py,sha256=PTYs7s_iCPmI-yFUCVCPY_cMGS77ma2VGoz4rdNkODI,9308
19
- llm_ie/extractors.py,sha256=yBdIcevjMfwto85Jb0KkRMN-AjIMk92fD5yWB3Qm8MY,64408
19
+ llm_ie/extractors.py,sha256=j9L9USybJBmYZM4RAjGO6DR4StYBgzhqCN6nEFZZQVQ,64523
20
20
  llm_ie/prompt_editor.py,sha256=Xc5ZHsEnM8-YYITokIsM6BVsf2Ec_8ajJDaldPf-P8U,8577
21
- llm_ie-0.3.3.dist-info/METADATA,sha256=CeTsMNtWhEWCvOqHWSXu0KqOgDp3kMwN2WtBF4N-4zE,41266
22
- llm_ie-0.3.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
23
- llm_ie-0.3.3.dist-info/RECORD,,
21
+ llm_ie-0.3.4.dist-info/METADATA,sha256=-5Tf9TCWczCVOsdMavkBZ-KnYPGnbNrV1rsU-pMHfPA,41266
22
+ llm_ie-0.3.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
23
+ llm_ie-0.3.4.dist-info/RECORD,,
File without changes