kodexa 7.4.413092627931__tar.gz → 7.4.413159458494__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/PKG-INFO +1 -1
  2. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/dataclasses/__init__.py +46 -76
  3. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/pyproject.toml +1 -1
  4. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/LICENSE +0 -0
  5. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/README.md +0 -0
  6. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/__init__.py +0 -0
  7. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/assistant/__init__.py +0 -0
  8. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/assistant/assistant.py +0 -0
  9. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/connectors/__init__.py +0 -0
  10. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/connectors/connectors.py +0 -0
  11. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/dataclasses/templates/llm_data_class.j2 +0 -0
  12. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/model/__init__.py +0 -0
  13. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/model/base.py +0 -0
  14. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/model/entities/__init__.py +0 -0
  15. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/model/entities/check_response.py +0 -0
  16. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/model/entities/product.py +0 -0
  17. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/model/entities/product_group.py +0 -0
  18. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/model/entities/product_subscription.py +0 -0
  19. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/model/model.py +0 -0
  20. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/model/objects.py +0 -0
  21. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/model/persistence.py +0 -0
  22. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/model/utils.py +0 -0
  23. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/pipeline/__init__.py +0 -0
  24. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/pipeline/pipeline.py +0 -0
  25. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/platform/__init__.py +0 -0
  26. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/platform/client.py +0 -0
  27. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/platform/interaction.py +0 -0
  28. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/platform/kodexa.py +0 -0
  29. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/selectors/__init__.py +0 -0
  30. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/selectors/ast.py +0 -0
  31. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/selectors/core.py +0 -0
  32. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/selectors/lexrules.py +0 -0
  33. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/selectors/lextab.py +0 -0
  34. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/selectors/lextab.pyi +0 -0
  35. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/selectors/parserules.py +0 -0
  36. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/selectors/parserules.pyi +0 -0
  37. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/selectors/parsetab.py +0 -0
  38. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/selectors/parsetab.pyi +0 -0
  39. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/spatial/__init__.py +0 -0
  40. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/spatial/azure_models.py +0 -0
  41. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/spatial/bbox_common.py +0 -0
  42. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/spatial/table_form_common.py +0 -0
  43. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/steps/__init__.py +0 -0
  44. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/steps/common.py +0 -0
  45. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/testing/__init__.py +0 -0
  46. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/testing/test_components.py +0 -0
  47. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/testing/test_utils.py +0 -0
  48. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/training/__init__.py +0 -0
  49. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/training/train_utils.py +0 -0
  50. {kodexa-7.4.413092627931 → kodexa-7.4.413159458494}/kodexa/utils/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kodexa
3
- Version: 7.4.413092627931
3
+ Version: 7.4.413159458494
4
4
  Summary: Python SDK for the Kodexa Platform
5
5
  Author: Austin Redenbaugh
6
6
  Author-email: austin@kodexa.com
@@ -54,21 +54,23 @@ class LLMDataAttribute(BaseModel):
54
54
  self.node_uuid_list = source.node_uuid_list
55
55
  self.page_number = source.page_number
56
56
 
57
- def create_exception(
58
- self,
59
- exception_type_id: str,
60
- exception_type: str,
61
- normalized_text: str,
62
- message: str,
63
- exception_detail: str,
64
- ):
65
- content_exception = ContentException(
66
- exception_type=exception_type,
67
- exception_detail=exception_detail,
68
- message=message,
69
- tag_uuid=self.tag_uuid,
70
- )
71
- self.exceptions.append(content_exception)
57
+ def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
58
+ # Lets make sure we add all the content exceptions
59
+ if self.exceptions is not None:
60
+ for exception in self.exceptions:
61
+ # We have two types of exception, one in the API and one in the
62
+ # document
63
+ from kodexa.model import ContentException as KodexaContentException
64
+ internal_exception = KodexaContentException(
65
+ tag=exception.tag,
66
+ exception_type=exception.exception_type,
67
+ message=exception.message,
68
+ exception_details=exception.exception_details,
69
+ severity=exception.severity,
70
+ group_uuid=self.group_uuid,
71
+ tag_uuid=self.tag_uuid,
72
+ )
73
+ document.doc.add_exception(internal_exception)
72
74
 
73
75
  def to_dict(self, taxonomy: Taxonomy) -> dict:
74
76
  """Convert attribute to JSON with normalized value"""
@@ -111,6 +113,24 @@ class LLMDataObject(BaseModel):
111
113
  class Config:
112
114
  arbitrary_types_allowed = True
113
115
 
116
+ def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
117
+ # Lets make sure we add all the content exceptions
118
+ if self.exceptions is not None:
119
+ for exception in self.exceptions:
120
+ # We have two types of exception, one in the API and one in the
121
+ # document
122
+ from kodexa.model import ContentException as KodexaContentException
123
+ internal_exception = KodexaContentException(
124
+ tag=exception.tag,
125
+ exception_type=exception.exception_type,
126
+ message=exception.message,
127
+ exception_details=exception.exception_details,
128
+ severity=exception.severity,
129
+ group_uuid=self.group_uuid,
130
+ tag_uuid=self.tag_uuid,
131
+ )
132
+ document.doc.add_exception(internal_exception)
133
+
114
134
  def get_all_review_pages(self):
115
135
  """
116
136
  Returns a list of unique page numbers that would be included in the review.
@@ -118,7 +138,7 @@ class LLMDataObject(BaseModel):
118
138
  :return: list of unique page numbers
119
139
  """
120
140
  pages = set()
121
- for field in self.__fields__:
141
+ for field in self.model_fields:
122
142
  pages.update(self._get_field_pages(field))
123
143
  return sorted(list(pages))
124
144
 
@@ -169,7 +189,7 @@ class LLMDataObject(BaseModel):
169
189
  def to_dict(self, taxonomy: Taxonomy) -> dict:
170
190
  """Convert data object to JSON using normalized values and taxon paths"""
171
191
  result = {}
172
- for field in self.__fields__:
192
+ for field in self.model_fields:
173
193
  value = getattr(self, field)
174
194
 
175
195
  if isinstance(value, list) and len(value) > 0:
@@ -177,7 +197,7 @@ class LLMDataObject(BaseModel):
177
197
  # We need to find the first field of the object that is a LLMDataAttribute
178
198
  # and use that to derive the taxon path of the LLMDataObject
179
199
  data_attribute = None
180
- for child_field in value[0].__fields__:
200
+ for child_field in value[0].model_fields:
181
201
  child_attr = getattr(value[0], child_field)
182
202
  if isinstance(child_attr, LLMDataAttribute):
183
203
  data_attribute = child_attr
@@ -205,7 +225,7 @@ class LLMDataObject(BaseModel):
205
225
  :return: dict of this data object and children for the specified page
206
226
  """
207
227
  review = {}
208
- for field in self.__fields__:
228
+ for field in self.model_fields:
209
229
  review_field = self._build_review(field, page_number)
210
230
  if review_field:
211
231
  review[field] = review_field
@@ -234,26 +254,6 @@ class LLMDataObject(BaseModel):
234
254
 
235
255
  return None
236
256
 
237
- def create_exception(
238
- self,
239
- exception_type_id: str,
240
- exception_type: str,
241
- message: str,
242
- exception_detail: str,
243
- severity: str = "ERROR",
244
- ):
245
- content_exception = ContentException(
246
- exception_type=exception_type,
247
- exception_details=exception_detail,
248
- message=message,
249
- group_uuid=self.group_uuid,
250
- severity=severity,
251
- )
252
- if self.exceptions is None:
253
- self.exceptions = []
254
-
255
- self.exceptions.append(content_exception)
256
-
257
257
  def apply_labels(
258
258
  self, document: "KodexaDocumentLLMWrapper", parent_group_uuid: str = None,
259
259
  assistant: Optional["Assistant"] = None
@@ -275,24 +275,11 @@ class LLMDataObject(BaseModel):
275
275
  """
276
276
 
277
277
  # Lets make sure we add all the content exceptions
278
- if self.exceptions is not None:
279
- for exception in self.exceptions:
280
- # We have two types of exception, one in the API and one in the
281
- # document
282
- from kodexa.model import ContentException as KodexaContentException
283
- internal_exception = KodexaContentException(
284
- exception_type=exception.exception_type,
285
- message=exception.message,
286
- exception_details=exception.exception_details,
287
- severity=exception.severity,
288
- group_uuid=exception.group_uuid,
289
- tag_uuid=exception.tag_uuid,
290
- )
291
- document.doc.add_exception(internal_exception)
278
+ self.process_exceptions(document)
292
279
 
293
280
  # Let's go through this data object and find all the attributes that have a value
294
281
  # then we will apply the labels to the document
295
- for field in self.__fields__:
282
+ for field in self.model_fields:
296
283
  logger.info(f"Processing field {field}")
297
284
  value = getattr(self, field)
298
285
 
@@ -311,8 +298,6 @@ class LLMDataObject(BaseModel):
311
298
  # We need to add the label to the document for this attribute
312
299
 
313
300
  tag = value.taxon_path
314
-
315
- # TODO need to work out why we are missing them?
316
301
  logger.info(f"Value: {value.normalized_text}, node_uuid_list: {value.node_uuid_list}")
317
302
  if value.node_uuid_list is None:
318
303
  value.node_uuid_list = value.line_ids
@@ -361,31 +346,16 @@ class LLMDataObject(BaseModel):
361
346
  current_value.append(new_tag)
362
347
  node.remove_feature("tag", tag)
363
348
  node.add_feature("tag", tag, current_value, single=False)
364
- # try:
365
- # if value.data_type == 'Derived':
366
- # logger.info(f"Node already has tag {tag} - Tagging something nearby {node.get_all_content()}")
367
- # nearby_node = find_nearby_word_to_tag(node, tag)
368
- # nearby_node.tag(
369
- # tag_to_apply=tag,
370
- # value=value.normalized_text,
371
- # tag_uuid=tag_uuid,
372
- # cell_index=self.cell_index,
373
- # selector="//word",
374
- # confidence=-1,
375
- # group_uuid=self.group_uuid,
376
- # parent_group_uuid=parent_group_uuid,
377
- # owner_uri=f"assistant://{assistant.id}" if assistant else f"model://taxonomy-llm",
378
- # )
379
- # else:
380
- # logger.info(f"Node already has tag {tag} - Skipping.")
381
- # except:
382
- # logger.error(f"Error tagging nearby node with tag {tag}")
383
349
 
384
350
  logger.info(f"Applied label {tag} to {len(nodes_to_label)} nodes")
351
+
352
+ # Lets make sure we add all the content exceptions
353
+ self.process_exceptions(document)
354
+
385
355
  if isinstance(value, LLMDataObject):
386
356
  # We need to apply the labels to the document for this object
387
357
  value.apply_labels(document, parent_group_uuid=self.group_uuid)
388
- # logger.info(f"Applied labels to data object {value.group_uuid}")
358
+ logger.info(f"Applied labels to data object {value.group_uuid}")
389
359
 
390
360
 
391
361
  def find_nearby_word_to_tag(node, tag):
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "kodexa"
3
- version = "7.4.413092627931"
3
+ version = "7.4.413159458494"
4
4
  description = "Python SDK for the Kodexa Platform"
5
5
  authors = ["Austin Redenbaugh <austin@kodexa.com>", "Philip Dodds <philip@kodexa.com>", "Romar Cablao <rcablao@kodexa.com>", "Amadea Paula Dodds <amadeapaula@kodexa.com>"]
6
6
  readme = "README.md"