kodexa 7.4.413092627931__py3-none-any.whl → 7.4.413159458494__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -54,21 +54,23 @@ class LLMDataAttribute(BaseModel):
54
54
  self.node_uuid_list = source.node_uuid_list
55
55
  self.page_number = source.page_number
56
56
 
57
- def create_exception(
58
- self,
59
- exception_type_id: str,
60
- exception_type: str,
61
- normalized_text: str,
62
- message: str,
63
- exception_detail: str,
64
- ):
65
- content_exception = ContentException(
66
- exception_type=exception_type,
67
- exception_detail=exception_detail,
68
- message=message,
69
- tag_uuid=self.tag_uuid,
70
- )
71
- self.exceptions.append(content_exception)
57
+ def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
58
+ # Lets make sure we add all the content exceptions
59
+ if self.exceptions is not None:
60
+ for exception in self.exceptions:
61
+ # We have two types of exception, one in the API and one in the
62
+ # document
63
+ from kodexa.model import ContentException as KodexaContentException
64
+ internal_exception = KodexaContentException(
65
+ tag=exception.tag,
66
+ exception_type=exception.exception_type,
67
+ message=exception.message,
68
+ exception_details=exception.exception_details,
69
+ severity=exception.severity,
70
+ group_uuid=self.group_uuid,
71
+ tag_uuid=self.tag_uuid,
72
+ )
73
+ document.doc.add_exception(internal_exception)
72
74
 
73
75
  def to_dict(self, taxonomy: Taxonomy) -> dict:
74
76
  """Convert attribute to JSON with normalized value"""
@@ -111,6 +113,24 @@ class LLMDataObject(BaseModel):
111
113
  class Config:
112
114
  arbitrary_types_allowed = True
113
115
 
116
+ def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
117
+ # Lets make sure we add all the content exceptions
118
+ if self.exceptions is not None:
119
+ for exception in self.exceptions:
120
+ # We have two types of exception, one in the API and one in the
121
+ # document
122
+ from kodexa.model import ContentException as KodexaContentException
123
+ internal_exception = KodexaContentException(
124
+ tag=exception.tag,
125
+ exception_type=exception.exception_type,
126
+ message=exception.message,
127
+ exception_details=exception.exception_details,
128
+ severity=exception.severity,
129
+ group_uuid=self.group_uuid,
130
+ tag_uuid=self.tag_uuid,
131
+ )
132
+ document.doc.add_exception(internal_exception)
133
+
114
134
  def get_all_review_pages(self):
115
135
  """
116
136
  Returns a list of unique page numbers that would be included in the review.
@@ -118,7 +138,7 @@ class LLMDataObject(BaseModel):
118
138
  :return: list of unique page numbers
119
139
  """
120
140
  pages = set()
121
- for field in self.__fields__:
141
+ for field in self.model_fields:
122
142
  pages.update(self._get_field_pages(field))
123
143
  return sorted(list(pages))
124
144
 
@@ -169,7 +189,7 @@ class LLMDataObject(BaseModel):
169
189
  def to_dict(self, taxonomy: Taxonomy) -> dict:
170
190
  """Convert data object to JSON using normalized values and taxon paths"""
171
191
  result = {}
172
- for field in self.__fields__:
192
+ for field in self.model_fields:
173
193
  value = getattr(self, field)
174
194
 
175
195
  if isinstance(value, list) and len(value) > 0:
@@ -177,7 +197,7 @@ class LLMDataObject(BaseModel):
177
197
  # We need to find the first field of the object that is a LLMDataAttribute
178
198
  # and use that to derive the taxon path of the LLMDataObject
179
199
  data_attribute = None
180
- for child_field in value[0].__fields__:
200
+ for child_field in value[0].model_fields:
181
201
  child_attr = getattr(value[0], child_field)
182
202
  if isinstance(child_attr, LLMDataAttribute):
183
203
  data_attribute = child_attr
@@ -205,7 +225,7 @@ class LLMDataObject(BaseModel):
205
225
  :return: dict of this data object and children for the specified page
206
226
  """
207
227
  review = {}
208
- for field in self.__fields__:
228
+ for field in self.model_fields:
209
229
  review_field = self._build_review(field, page_number)
210
230
  if review_field:
211
231
  review[field] = review_field
@@ -234,26 +254,6 @@ class LLMDataObject(BaseModel):
234
254
 
235
255
  return None
236
256
 
237
- def create_exception(
238
- self,
239
- exception_type_id: str,
240
- exception_type: str,
241
- message: str,
242
- exception_detail: str,
243
- severity: str = "ERROR",
244
- ):
245
- content_exception = ContentException(
246
- exception_type=exception_type,
247
- exception_details=exception_detail,
248
- message=message,
249
- group_uuid=self.group_uuid,
250
- severity=severity,
251
- )
252
- if self.exceptions is None:
253
- self.exceptions = []
254
-
255
- self.exceptions.append(content_exception)
256
-
257
257
  def apply_labels(
258
258
  self, document: "KodexaDocumentLLMWrapper", parent_group_uuid: str = None,
259
259
  assistant: Optional["Assistant"] = None
@@ -275,24 +275,11 @@ class LLMDataObject(BaseModel):
275
275
  """
276
276
 
277
277
  # Lets make sure we add all the content exceptions
278
- if self.exceptions is not None:
279
- for exception in self.exceptions:
280
- # We have two types of exception, one in the API and one in the
281
- # document
282
- from kodexa.model import ContentException as KodexaContentException
283
- internal_exception = KodexaContentException(
284
- exception_type=exception.exception_type,
285
- message=exception.message,
286
- exception_details=exception.exception_details,
287
- severity=exception.severity,
288
- group_uuid=exception.group_uuid,
289
- tag_uuid=exception.tag_uuid,
290
- )
291
- document.doc.add_exception(internal_exception)
278
+ self.process_exceptions(document)
292
279
 
293
280
  # Let's go through this data object and find all the attributes that have a value
294
281
  # then we will apply the labels to the document
295
- for field in self.__fields__:
282
+ for field in self.model_fields:
296
283
  logger.info(f"Processing field {field}")
297
284
  value = getattr(self, field)
298
285
 
@@ -311,8 +298,6 @@ class LLMDataObject(BaseModel):
311
298
  # We need to add the label to the document for this attribute
312
299
 
313
300
  tag = value.taxon_path
314
-
315
- # TODO need to work out why we are missing them?
316
301
  logger.info(f"Value: {value.normalized_text}, node_uuid_list: {value.node_uuid_list}")
317
302
  if value.node_uuid_list is None:
318
303
  value.node_uuid_list = value.line_ids
@@ -361,31 +346,16 @@ class LLMDataObject(BaseModel):
361
346
  current_value.append(new_tag)
362
347
  node.remove_feature("tag", tag)
363
348
  node.add_feature("tag", tag, current_value, single=False)
364
- # try:
365
- # if value.data_type == 'Derived':
366
- # logger.info(f"Node already has tag {tag} - Tagging something nearby {node.get_all_content()}")
367
- # nearby_node = find_nearby_word_to_tag(node, tag)
368
- # nearby_node.tag(
369
- # tag_to_apply=tag,
370
- # value=value.normalized_text,
371
- # tag_uuid=tag_uuid,
372
- # cell_index=self.cell_index,
373
- # selector="//word",
374
- # confidence=-1,
375
- # group_uuid=self.group_uuid,
376
- # parent_group_uuid=parent_group_uuid,
377
- # owner_uri=f"assistant://{assistant.id}" if assistant else f"model://taxonomy-llm",
378
- # )
379
- # else:
380
- # logger.info(f"Node already has tag {tag} - Skipping.")
381
- # except:
382
- # logger.error(f"Error tagging nearby node with tag {tag}")
383
349
 
384
350
  logger.info(f"Applied label {tag} to {len(nodes_to_label)} nodes")
351
+
352
+ # Lets make sure we add all the content exceptions
353
+ self.process_exceptions(document)
354
+
385
355
  if isinstance(value, LLMDataObject):
386
356
  # We need to apply the labels to the document for this object
387
357
  value.apply_labels(document, parent_group_uuid=self.group_uuid)
388
- # logger.info(f"Applied labels to data object {value.group_uuid}")
358
+ logger.info(f"Applied labels to data object {value.group_uuid}")
389
359
 
390
360
 
391
361
  def find_nearby_word_to_tag(node, tag):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kodexa
3
- Version: 7.4.413092627931
3
+ Version: 7.4.413159458494
4
4
  Summary: Python SDK for the Kodexa Platform
5
5
  Author: Austin Redenbaugh
6
6
  Author-email: austin@kodexa.com
@@ -3,7 +3,7 @@ kodexa/assistant/__init__.py,sha256=nlXm_YnV_50hgn0TIT2Fkc2fQ-86OjmctY_j8My9nc4,
3
3
  kodexa/assistant/assistant.py,sha256=5KFdbqFSLIZJyDRyZdpcfr448fT-CW4JhYu9A6B9DGY,14663
4
4
  kodexa/connectors/__init__.py,sha256=WF6G_MUeU32TlKSUKkpNoNX7dq8iBPliFMep4E8BmZc,328
5
5
  kodexa/connectors/connectors.py,sha256=FpUZDkSyHld2b9eYRuVOWzaFtuGoaRuPXXicJB7THbc,10413
6
- kodexa/dataclasses/__init__.py,sha256=l1bpG3wyGEwq9SpdD9uVFuIep1A9x0si1eqxCmyWH68,20641
6
+ kodexa/dataclasses/__init__.py,sha256=CHMNsOamWA3gY5203gn8Ef5q1fgcczMtWKEvNjIOzPs,19486
7
7
  kodexa/dataclasses/templates/llm_data_class.j2,sha256=YWjStW136chV_59JM3AYis3i-0jdrqDvLXsISUW9zDU,660
8
8
  kodexa/model/__init__.py,sha256=rtLXYJBxB-rnukhslN9rlqoB3--1H3253HyHGbD_Gc8,796
9
9
  kodexa/model/base.py,sha256=CaZK8nMhT1LdCpt4aLhebJGcorjq9qRID1FjnXnP14M,521
@@ -44,7 +44,7 @@ kodexa/testing/test_utils.py,sha256=v44p__gE7ia67W7WeHN2HBFCWSCUrCZt7G4xBNCmwf8,
44
44
  kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
45
45
  kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
46
  kodexa/utils/__init__.py,sha256=Pnim1o9_db5YEnNvDTxpM7HG-qTlL6n8JwFwOafU9wo,5928
47
- kodexa-7.4.413092627931.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
48
- kodexa-7.4.413092627931.dist-info/METADATA,sha256=GFaqjOkldJymEr9odh1g5d0XbTcrx91KocpbuWPcqO0,3528
49
- kodexa-7.4.413092627931.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
50
- kodexa-7.4.413092627931.dist-info/RECORD,,
47
+ kodexa-7.4.413159458494.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
48
+ kodexa-7.4.413159458494.dist-info/METADATA,sha256=cGd2yP7RRGfQviKkswuRnvAKwyvcGX-tZ_tb_kEN-5c,3528
49
+ kodexa-7.4.413159458494.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
50
+ kodexa-7.4.413159458494.dist-info/RECORD,,