kodexa 7.4.413043039367__py3-none-any.whl → 7.4.413159458494__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kodexa/dataclasses/__init__.py +46 -76
- kodexa/model/objects.py +2 -0
- {kodexa-7.4.413043039367.dist-info → kodexa-7.4.413159458494.dist-info}/METADATA +1 -1
- {kodexa-7.4.413043039367.dist-info → kodexa-7.4.413159458494.dist-info}/RECORD +6 -6
- {kodexa-7.4.413043039367.dist-info → kodexa-7.4.413159458494.dist-info}/LICENSE +0 -0
- {kodexa-7.4.413043039367.dist-info → kodexa-7.4.413159458494.dist-info}/WHEEL +0 -0
kodexa/dataclasses/__init__.py
CHANGED
@@ -54,21 +54,23 @@ class LLMDataAttribute(BaseModel):
|
|
54
54
|
self.node_uuid_list = source.node_uuid_list
|
55
55
|
self.page_number = source.page_number
|
56
56
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
57
|
+
def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
|
58
|
+
# Lets make sure we add all the content exceptions
|
59
|
+
if self.exceptions is not None:
|
60
|
+
for exception in self.exceptions:
|
61
|
+
# We have two types of exception, one in the API and one in the
|
62
|
+
# document
|
63
|
+
from kodexa.model import ContentException as KodexaContentException
|
64
|
+
internal_exception = KodexaContentException(
|
65
|
+
tag=exception.tag,
|
66
|
+
exception_type=exception.exception_type,
|
67
|
+
message=exception.message,
|
68
|
+
exception_details=exception.exception_details,
|
69
|
+
severity=exception.severity,
|
70
|
+
group_uuid=self.group_uuid,
|
71
|
+
tag_uuid=self.tag_uuid,
|
72
|
+
)
|
73
|
+
document.doc.add_exception(internal_exception)
|
72
74
|
|
73
75
|
def to_dict(self, taxonomy: Taxonomy) -> dict:
|
74
76
|
"""Convert attribute to JSON with normalized value"""
|
@@ -111,6 +113,24 @@ class LLMDataObject(BaseModel):
|
|
111
113
|
class Config:
|
112
114
|
arbitrary_types_allowed = True
|
113
115
|
|
116
|
+
def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
|
117
|
+
# Lets make sure we add all the content exceptions
|
118
|
+
if self.exceptions is not None:
|
119
|
+
for exception in self.exceptions:
|
120
|
+
# We have two types of exception, one in the API and one in the
|
121
|
+
# document
|
122
|
+
from kodexa.model import ContentException as KodexaContentException
|
123
|
+
internal_exception = KodexaContentException(
|
124
|
+
tag=exception.tag,
|
125
|
+
exception_type=exception.exception_type,
|
126
|
+
message=exception.message,
|
127
|
+
exception_details=exception.exception_details,
|
128
|
+
severity=exception.severity,
|
129
|
+
group_uuid=self.group_uuid,
|
130
|
+
tag_uuid=self.tag_uuid,
|
131
|
+
)
|
132
|
+
document.doc.add_exception(internal_exception)
|
133
|
+
|
114
134
|
def get_all_review_pages(self):
|
115
135
|
"""
|
116
136
|
Returns a list of unique page numbers that would be included in the review.
|
@@ -118,7 +138,7 @@ class LLMDataObject(BaseModel):
|
|
118
138
|
:return: list of unique page numbers
|
119
139
|
"""
|
120
140
|
pages = set()
|
121
|
-
for field in self.
|
141
|
+
for field in self.model_fields:
|
122
142
|
pages.update(self._get_field_pages(field))
|
123
143
|
return sorted(list(pages))
|
124
144
|
|
@@ -169,7 +189,7 @@ class LLMDataObject(BaseModel):
|
|
169
189
|
def to_dict(self, taxonomy: Taxonomy) -> dict:
|
170
190
|
"""Convert data object to JSON using normalized values and taxon paths"""
|
171
191
|
result = {}
|
172
|
-
for field in self.
|
192
|
+
for field in self.model_fields:
|
173
193
|
value = getattr(self, field)
|
174
194
|
|
175
195
|
if isinstance(value, list) and len(value) > 0:
|
@@ -177,7 +197,7 @@ class LLMDataObject(BaseModel):
|
|
177
197
|
# We need to find the first field of the object that is a LLMDataAttribute
|
178
198
|
# and use that to derive the taxon path of the LLMDataObject
|
179
199
|
data_attribute = None
|
180
|
-
for child_field in value[0].
|
200
|
+
for child_field in value[0].model_fields:
|
181
201
|
child_attr = getattr(value[0], child_field)
|
182
202
|
if isinstance(child_attr, LLMDataAttribute):
|
183
203
|
data_attribute = child_attr
|
@@ -205,7 +225,7 @@ class LLMDataObject(BaseModel):
|
|
205
225
|
:return: dict of this data object and children for the specified page
|
206
226
|
"""
|
207
227
|
review = {}
|
208
|
-
for field in self.
|
228
|
+
for field in self.model_fields:
|
209
229
|
review_field = self._build_review(field, page_number)
|
210
230
|
if review_field:
|
211
231
|
review[field] = review_field
|
@@ -234,26 +254,6 @@ class LLMDataObject(BaseModel):
|
|
234
254
|
|
235
255
|
return None
|
236
256
|
|
237
|
-
def create_exception(
|
238
|
-
self,
|
239
|
-
exception_type_id: str,
|
240
|
-
exception_type: str,
|
241
|
-
message: str,
|
242
|
-
exception_detail: str,
|
243
|
-
severity: str = "ERROR",
|
244
|
-
):
|
245
|
-
content_exception = ContentException(
|
246
|
-
exception_type=exception_type,
|
247
|
-
exception_details=exception_detail,
|
248
|
-
message=message,
|
249
|
-
group_uuid=self.group_uuid,
|
250
|
-
severity=severity,
|
251
|
-
)
|
252
|
-
if self.exceptions is None:
|
253
|
-
self.exceptions = []
|
254
|
-
|
255
|
-
self.exceptions.append(content_exception)
|
256
|
-
|
257
257
|
def apply_labels(
|
258
258
|
self, document: "KodexaDocumentLLMWrapper", parent_group_uuid: str = None,
|
259
259
|
assistant: Optional["Assistant"] = None
|
@@ -275,24 +275,11 @@ class LLMDataObject(BaseModel):
|
|
275
275
|
"""
|
276
276
|
|
277
277
|
# Lets make sure we add all the content exceptions
|
278
|
-
|
279
|
-
for exception in self.exceptions:
|
280
|
-
# We have two types of exception, one in the API and one in the
|
281
|
-
# document
|
282
|
-
from kodexa.model import ContentException as KodexaContentException
|
283
|
-
internal_exception = KodexaContentException(
|
284
|
-
exception_type=exception.exception_type,
|
285
|
-
message=exception.message,
|
286
|
-
exception_details=exception.exception_details,
|
287
|
-
severity=exception.severity,
|
288
|
-
group_uuid=exception.group_uuid,
|
289
|
-
tag_uuid=exception.tag_uuid,
|
290
|
-
)
|
291
|
-
document.doc.add_exception(internal_exception)
|
278
|
+
self.process_exceptions(document)
|
292
279
|
|
293
280
|
# Let's go through this data object and find all the attributes that have a value
|
294
281
|
# then we will apply the labels to the document
|
295
|
-
for field in self.
|
282
|
+
for field in self.model_fields:
|
296
283
|
logger.info(f"Processing field {field}")
|
297
284
|
value = getattr(self, field)
|
298
285
|
|
@@ -311,8 +298,6 @@ class LLMDataObject(BaseModel):
|
|
311
298
|
# We need to add the label to the document for this attribute
|
312
299
|
|
313
300
|
tag = value.taxon_path
|
314
|
-
|
315
|
-
# TODO need to work out why we are missing them?
|
316
301
|
logger.info(f"Value: {value.normalized_text}, node_uuid_list: {value.node_uuid_list}")
|
317
302
|
if value.node_uuid_list is None:
|
318
303
|
value.node_uuid_list = value.line_ids
|
@@ -361,31 +346,16 @@ class LLMDataObject(BaseModel):
|
|
361
346
|
current_value.append(new_tag)
|
362
347
|
node.remove_feature("tag", tag)
|
363
348
|
node.add_feature("tag", tag, current_value, single=False)
|
364
|
-
# try:
|
365
|
-
# if value.data_type == 'Derived':
|
366
|
-
# logger.info(f"Node already has tag {tag} - Tagging something nearby {node.get_all_content()}")
|
367
|
-
# nearby_node = find_nearby_word_to_tag(node, tag)
|
368
|
-
# nearby_node.tag(
|
369
|
-
# tag_to_apply=tag,
|
370
|
-
# value=value.normalized_text,
|
371
|
-
# tag_uuid=tag_uuid,
|
372
|
-
# cell_index=self.cell_index,
|
373
|
-
# selector="//word",
|
374
|
-
# confidence=-1,
|
375
|
-
# group_uuid=self.group_uuid,
|
376
|
-
# parent_group_uuid=parent_group_uuid,
|
377
|
-
# owner_uri=f"assistant://{assistant.id}" if assistant else f"model://taxonomy-llm",
|
378
|
-
# )
|
379
|
-
# else:
|
380
|
-
# logger.info(f"Node already has tag {tag} - Skipping.")
|
381
|
-
# except:
|
382
|
-
# logger.error(f"Error tagging nearby node with tag {tag}")
|
383
349
|
|
384
350
|
logger.info(f"Applied label {tag} to {len(nodes_to_label)} nodes")
|
351
|
+
|
352
|
+
# Lets make sure we add all the content exceptions
|
353
|
+
self.process_exceptions(document)
|
354
|
+
|
385
355
|
if isinstance(value, LLMDataObject):
|
386
356
|
# We need to apply the labels to the document for this object
|
387
357
|
value.apply_labels(document, parent_group_uuid=self.group_uuid)
|
388
|
-
|
358
|
+
logger.info(f"Applied labels to data object {value.group_uuid}")
|
389
359
|
|
390
360
|
|
391
361
|
def find_nearby_word_to_tag(node, tag):
|
kodexa/model/objects.py
CHANGED
@@ -823,6 +823,8 @@ class SelectionOption(BaseModel):
|
|
823
823
|
id: Optional[str] = None
|
824
824
|
description: Optional[str] = None
|
825
825
|
lexical_relations: Optional[List[LexicalRelation]] = Field(default_factory=list, alias="lexicalRelations")
|
826
|
+
is_conditional: Optional[bool] = Field(None, alias="isConditional")
|
827
|
+
conditional_formula: Optional[str] = Field(None, alias="conditionalFormula")
|
826
828
|
|
827
829
|
|
828
830
|
class SlugBasedMetadata1(BaseModel):
|
@@ -3,7 +3,7 @@ kodexa/assistant/__init__.py,sha256=nlXm_YnV_50hgn0TIT2Fkc2fQ-86OjmctY_j8My9nc4,
|
|
3
3
|
kodexa/assistant/assistant.py,sha256=5KFdbqFSLIZJyDRyZdpcfr448fT-CW4JhYu9A6B9DGY,14663
|
4
4
|
kodexa/connectors/__init__.py,sha256=WF6G_MUeU32TlKSUKkpNoNX7dq8iBPliFMep4E8BmZc,328
|
5
5
|
kodexa/connectors/connectors.py,sha256=FpUZDkSyHld2b9eYRuVOWzaFtuGoaRuPXXicJB7THbc,10413
|
6
|
-
kodexa/dataclasses/__init__.py,sha256=
|
6
|
+
kodexa/dataclasses/__init__.py,sha256=CHMNsOamWA3gY5203gn8Ef5q1fgcczMtWKEvNjIOzPs,19486
|
7
7
|
kodexa/dataclasses/templates/llm_data_class.j2,sha256=YWjStW136chV_59JM3AYis3i-0jdrqDvLXsISUW9zDU,660
|
8
8
|
kodexa/model/__init__.py,sha256=rtLXYJBxB-rnukhslN9rlqoB3--1H3253HyHGbD_Gc8,796
|
9
9
|
kodexa/model/base.py,sha256=CaZK8nMhT1LdCpt4aLhebJGcorjq9qRID1FjnXnP14M,521
|
@@ -13,7 +13,7 @@ kodexa/model/entities/product.py,sha256=StUhTEeLXmc05cj6XnZppQfeJsqCPbX1jdhsysHH
|
|
13
13
|
kodexa/model/entities/product_group.py,sha256=540fRGyUf34h1BzAN1DiWu6rGgvaj3xDFhZ2k-RvSFY,3617
|
14
14
|
kodexa/model/entities/product_subscription.py,sha256=UcmWR-qgLfdV7VCtJNwzgkanoS8nBSL6ngVuxQUK1M8,3810
|
15
15
|
kodexa/model/model.py,sha256=qh1YUew3UgtjU0t4fAwSXYYuzQjXTOZWZkafyFp_w8M,118801
|
16
|
-
kodexa/model/objects.py,sha256=
|
16
|
+
kodexa/model/objects.py,sha256=2YlJJwk8uc_9hAzwT_sjNJciPOHTojmDYo8ML2skQhw,185848
|
17
17
|
kodexa/model/persistence.py,sha256=HX_uIkGs8bqHwqyE5wB2qMlGIG5ZnjuTu7xMdvKhEzA,72033
|
18
18
|
kodexa/model/utils.py,sha256=6R-3rFiW9irBwj0Mq5yhp7EDXkNUFaeFhr3bWmnlW4g,2961
|
19
19
|
kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
|
@@ -44,7 +44,7 @@ kodexa/testing/test_utils.py,sha256=v44p__gE7ia67W7WeHN2HBFCWSCUrCZt7G4xBNCmwf8,
|
|
44
44
|
kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
|
45
45
|
kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
46
46
|
kodexa/utils/__init__.py,sha256=Pnim1o9_db5YEnNvDTxpM7HG-qTlL6n8JwFwOafU9wo,5928
|
47
|
-
kodexa-7.4.
|
48
|
-
kodexa-7.4.
|
49
|
-
kodexa-7.4.
|
50
|
-
kodexa-7.4.
|
47
|
+
kodexa-7.4.413159458494.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
48
|
+
kodexa-7.4.413159458494.dist-info/METADATA,sha256=cGd2yP7RRGfQviKkswuRnvAKwyvcGX-tZ_tb_kEN-5c,3528
|
49
|
+
kodexa-7.4.413159458494.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
50
|
+
kodexa-7.4.413159458494.dist-info/RECORD,,
|
File without changes
|
File without changes
|