kodexa 7.4.413043039367__py3-none-any.whl → 7.4.413159458494__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- kodexa/dataclasses/__init__.py +46 -76
- kodexa/model/objects.py +2 -0
- {kodexa-7.4.413043039367.dist-info → kodexa-7.4.413159458494.dist-info}/METADATA +1 -1
- {kodexa-7.4.413043039367.dist-info → kodexa-7.4.413159458494.dist-info}/RECORD +6 -6
- {kodexa-7.4.413043039367.dist-info → kodexa-7.4.413159458494.dist-info}/LICENSE +0 -0
- {kodexa-7.4.413043039367.dist-info → kodexa-7.4.413159458494.dist-info}/WHEEL +0 -0
kodexa/dataclasses/__init__.py
CHANGED
@@ -54,21 +54,23 @@ class LLMDataAttribute(BaseModel):
|
|
54
54
|
self.node_uuid_list = source.node_uuid_list
|
55
55
|
self.page_number = source.page_number
|
56
56
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
57
|
+
def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
|
58
|
+
# Lets make sure we add all the content exceptions
|
59
|
+
if self.exceptions is not None:
|
60
|
+
for exception in self.exceptions:
|
61
|
+
# We have two types of exception, one in the API and one in the
|
62
|
+
# document
|
63
|
+
from kodexa.model import ContentException as KodexaContentException
|
64
|
+
internal_exception = KodexaContentException(
|
65
|
+
tag=exception.tag,
|
66
|
+
exception_type=exception.exception_type,
|
67
|
+
message=exception.message,
|
68
|
+
exception_details=exception.exception_details,
|
69
|
+
severity=exception.severity,
|
70
|
+
group_uuid=self.group_uuid,
|
71
|
+
tag_uuid=self.tag_uuid,
|
72
|
+
)
|
73
|
+
document.doc.add_exception(internal_exception)
|
72
74
|
|
73
75
|
def to_dict(self, taxonomy: Taxonomy) -> dict:
|
74
76
|
"""Convert attribute to JSON with normalized value"""
|
@@ -111,6 +113,24 @@ class LLMDataObject(BaseModel):
|
|
111
113
|
class Config:
|
112
114
|
arbitrary_types_allowed = True
|
113
115
|
|
116
|
+
def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
|
117
|
+
# Lets make sure we add all the content exceptions
|
118
|
+
if self.exceptions is not None:
|
119
|
+
for exception in self.exceptions:
|
120
|
+
# We have two types of exception, one in the API and one in the
|
121
|
+
# document
|
122
|
+
from kodexa.model import ContentException as KodexaContentException
|
123
|
+
internal_exception = KodexaContentException(
|
124
|
+
tag=exception.tag,
|
125
|
+
exception_type=exception.exception_type,
|
126
|
+
message=exception.message,
|
127
|
+
exception_details=exception.exception_details,
|
128
|
+
severity=exception.severity,
|
129
|
+
group_uuid=self.group_uuid,
|
130
|
+
tag_uuid=self.tag_uuid,
|
131
|
+
)
|
132
|
+
document.doc.add_exception(internal_exception)
|
133
|
+
|
114
134
|
def get_all_review_pages(self):
|
115
135
|
"""
|
116
136
|
Returns a list of unique page numbers that would be included in the review.
|
@@ -118,7 +138,7 @@ class LLMDataObject(BaseModel):
|
|
118
138
|
:return: list of unique page numbers
|
119
139
|
"""
|
120
140
|
pages = set()
|
121
|
-
for field in self.
|
141
|
+
for field in self.model_fields:
|
122
142
|
pages.update(self._get_field_pages(field))
|
123
143
|
return sorted(list(pages))
|
124
144
|
|
@@ -169,7 +189,7 @@ class LLMDataObject(BaseModel):
|
|
169
189
|
def to_dict(self, taxonomy: Taxonomy) -> dict:
|
170
190
|
"""Convert data object to JSON using normalized values and taxon paths"""
|
171
191
|
result = {}
|
172
|
-
for field in self.
|
192
|
+
for field in self.model_fields:
|
173
193
|
value = getattr(self, field)
|
174
194
|
|
175
195
|
if isinstance(value, list) and len(value) > 0:
|
@@ -177,7 +197,7 @@ class LLMDataObject(BaseModel):
|
|
177
197
|
# We need to find the first field of the object that is a LLMDataAttribute
|
178
198
|
# and use that to derive the taxon path of the LLMDataObject
|
179
199
|
data_attribute = None
|
180
|
-
for child_field in value[0].
|
200
|
+
for child_field in value[0].model_fields:
|
181
201
|
child_attr = getattr(value[0], child_field)
|
182
202
|
if isinstance(child_attr, LLMDataAttribute):
|
183
203
|
data_attribute = child_attr
|
@@ -205,7 +225,7 @@ class LLMDataObject(BaseModel):
|
|
205
225
|
:return: dict of this data object and children for the specified page
|
206
226
|
"""
|
207
227
|
review = {}
|
208
|
-
for field in self.
|
228
|
+
for field in self.model_fields:
|
209
229
|
review_field = self._build_review(field, page_number)
|
210
230
|
if review_field:
|
211
231
|
review[field] = review_field
|
@@ -234,26 +254,6 @@ class LLMDataObject(BaseModel):
|
|
234
254
|
|
235
255
|
return None
|
236
256
|
|
237
|
-
def create_exception(
|
238
|
-
self,
|
239
|
-
exception_type_id: str,
|
240
|
-
exception_type: str,
|
241
|
-
message: str,
|
242
|
-
exception_detail: str,
|
243
|
-
severity: str = "ERROR",
|
244
|
-
):
|
245
|
-
content_exception = ContentException(
|
246
|
-
exception_type=exception_type,
|
247
|
-
exception_details=exception_detail,
|
248
|
-
message=message,
|
249
|
-
group_uuid=self.group_uuid,
|
250
|
-
severity=severity,
|
251
|
-
)
|
252
|
-
if self.exceptions is None:
|
253
|
-
self.exceptions = []
|
254
|
-
|
255
|
-
self.exceptions.append(content_exception)
|
256
|
-
|
257
257
|
def apply_labels(
|
258
258
|
self, document: "KodexaDocumentLLMWrapper", parent_group_uuid: str = None,
|
259
259
|
assistant: Optional["Assistant"] = None
|
@@ -275,24 +275,11 @@ class LLMDataObject(BaseModel):
|
|
275
275
|
"""
|
276
276
|
|
277
277
|
# Lets make sure we add all the content exceptions
|
278
|
-
|
279
|
-
for exception in self.exceptions:
|
280
|
-
# We have two types of exception, one in the API and one in the
|
281
|
-
# document
|
282
|
-
from kodexa.model import ContentException as KodexaContentException
|
283
|
-
internal_exception = KodexaContentException(
|
284
|
-
exception_type=exception.exception_type,
|
285
|
-
message=exception.message,
|
286
|
-
exception_details=exception.exception_details,
|
287
|
-
severity=exception.severity,
|
288
|
-
group_uuid=exception.group_uuid,
|
289
|
-
tag_uuid=exception.tag_uuid,
|
290
|
-
)
|
291
|
-
document.doc.add_exception(internal_exception)
|
278
|
+
self.process_exceptions(document)
|
292
279
|
|
293
280
|
# Let's go through this data object and find all the attributes that have a value
|
294
281
|
# then we will apply the labels to the document
|
295
|
-
for field in self.
|
282
|
+
for field in self.model_fields:
|
296
283
|
logger.info(f"Processing field {field}")
|
297
284
|
value = getattr(self, field)
|
298
285
|
|
@@ -311,8 +298,6 @@ class LLMDataObject(BaseModel):
|
|
311
298
|
# We need to add the label to the document for this attribute
|
312
299
|
|
313
300
|
tag = value.taxon_path
|
314
|
-
|
315
|
-
# TODO need to work out why we are missing them?
|
316
301
|
logger.info(f"Value: {value.normalized_text}, node_uuid_list: {value.node_uuid_list}")
|
317
302
|
if value.node_uuid_list is None:
|
318
303
|
value.node_uuid_list = value.line_ids
|
@@ -361,31 +346,16 @@ class LLMDataObject(BaseModel):
|
|
361
346
|
current_value.append(new_tag)
|
362
347
|
node.remove_feature("tag", tag)
|
363
348
|
node.add_feature("tag", tag, current_value, single=False)
|
364
|
-
# try:
|
365
|
-
# if value.data_type == 'Derived':
|
366
|
-
# logger.info(f"Node already has tag {tag} - Tagging something nearby {node.get_all_content()}")
|
367
|
-
# nearby_node = find_nearby_word_to_tag(node, tag)
|
368
|
-
# nearby_node.tag(
|
369
|
-
# tag_to_apply=tag,
|
370
|
-
# value=value.normalized_text,
|
371
|
-
# tag_uuid=tag_uuid,
|
372
|
-
# cell_index=self.cell_index,
|
373
|
-
# selector="//word",
|
374
|
-
# confidence=-1,
|
375
|
-
# group_uuid=self.group_uuid,
|
376
|
-
# parent_group_uuid=parent_group_uuid,
|
377
|
-
# owner_uri=f"assistant://{assistant.id}" if assistant else f"model://taxonomy-llm",
|
378
|
-
# )
|
379
|
-
# else:
|
380
|
-
# logger.info(f"Node already has tag {tag} - Skipping.")
|
381
|
-
# except:
|
382
|
-
# logger.error(f"Error tagging nearby node with tag {tag}")
|
383
349
|
|
384
350
|
logger.info(f"Applied label {tag} to {len(nodes_to_label)} nodes")
|
351
|
+
|
352
|
+
# Lets make sure we add all the content exceptions
|
353
|
+
self.process_exceptions(document)
|
354
|
+
|
385
355
|
if isinstance(value, LLMDataObject):
|
386
356
|
# We need to apply the labels to the document for this object
|
387
357
|
value.apply_labels(document, parent_group_uuid=self.group_uuid)
|
388
|
-
|
358
|
+
logger.info(f"Applied labels to data object {value.group_uuid}")
|
389
359
|
|
390
360
|
|
391
361
|
def find_nearby_word_to_tag(node, tag):
|
kodexa/model/objects.py
CHANGED
@@ -823,6 +823,8 @@ class SelectionOption(BaseModel):
|
|
823
823
|
id: Optional[str] = None
|
824
824
|
description: Optional[str] = None
|
825
825
|
lexical_relations: Optional[List[LexicalRelation]] = Field(default_factory=list, alias="lexicalRelations")
|
826
|
+
is_conditional: Optional[bool] = Field(None, alias="isConditional")
|
827
|
+
conditional_formula: Optional[str] = Field(None, alias="conditionalFormula")
|
826
828
|
|
827
829
|
|
828
830
|
class SlugBasedMetadata1(BaseModel):
|
@@ -3,7 +3,7 @@ kodexa/assistant/__init__.py,sha256=nlXm_YnV_50hgn0TIT2Fkc2fQ-86OjmctY_j8My9nc4,
|
|
3
3
|
kodexa/assistant/assistant.py,sha256=5KFdbqFSLIZJyDRyZdpcfr448fT-CW4JhYu9A6B9DGY,14663
|
4
4
|
kodexa/connectors/__init__.py,sha256=WF6G_MUeU32TlKSUKkpNoNX7dq8iBPliFMep4E8BmZc,328
|
5
5
|
kodexa/connectors/connectors.py,sha256=FpUZDkSyHld2b9eYRuVOWzaFtuGoaRuPXXicJB7THbc,10413
|
6
|
-
kodexa/dataclasses/__init__.py,sha256=
|
6
|
+
kodexa/dataclasses/__init__.py,sha256=CHMNsOamWA3gY5203gn8Ef5q1fgcczMtWKEvNjIOzPs,19486
|
7
7
|
kodexa/dataclasses/templates/llm_data_class.j2,sha256=YWjStW136chV_59JM3AYis3i-0jdrqDvLXsISUW9zDU,660
|
8
8
|
kodexa/model/__init__.py,sha256=rtLXYJBxB-rnukhslN9rlqoB3--1H3253HyHGbD_Gc8,796
|
9
9
|
kodexa/model/base.py,sha256=CaZK8nMhT1LdCpt4aLhebJGcorjq9qRID1FjnXnP14M,521
|
@@ -13,7 +13,7 @@ kodexa/model/entities/product.py,sha256=StUhTEeLXmc05cj6XnZppQfeJsqCPbX1jdhsysHH
|
|
13
13
|
kodexa/model/entities/product_group.py,sha256=540fRGyUf34h1BzAN1DiWu6rGgvaj3xDFhZ2k-RvSFY,3617
|
14
14
|
kodexa/model/entities/product_subscription.py,sha256=UcmWR-qgLfdV7VCtJNwzgkanoS8nBSL6ngVuxQUK1M8,3810
|
15
15
|
kodexa/model/model.py,sha256=qh1YUew3UgtjU0t4fAwSXYYuzQjXTOZWZkafyFp_w8M,118801
|
16
|
-
kodexa/model/objects.py,sha256=
|
16
|
+
kodexa/model/objects.py,sha256=2YlJJwk8uc_9hAzwT_sjNJciPOHTojmDYo8ML2skQhw,185848
|
17
17
|
kodexa/model/persistence.py,sha256=HX_uIkGs8bqHwqyE5wB2qMlGIG5ZnjuTu7xMdvKhEzA,72033
|
18
18
|
kodexa/model/utils.py,sha256=6R-3rFiW9irBwj0Mq5yhp7EDXkNUFaeFhr3bWmnlW4g,2961
|
19
19
|
kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
|
@@ -44,7 +44,7 @@ kodexa/testing/test_utils.py,sha256=v44p__gE7ia67W7WeHN2HBFCWSCUrCZt7G4xBNCmwf8,
|
|
44
44
|
kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
|
45
45
|
kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
46
46
|
kodexa/utils/__init__.py,sha256=Pnim1o9_db5YEnNvDTxpM7HG-qTlL6n8JwFwOafU9wo,5928
|
47
|
-
kodexa-7.4.
|
48
|
-
kodexa-7.4.
|
49
|
-
kodexa-7.4.
|
50
|
-
kodexa-7.4.
|
47
|
+
kodexa-7.4.413159458494.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
48
|
+
kodexa-7.4.413159458494.dist-info/METADATA,sha256=cGd2yP7RRGfQviKkswuRnvAKwyvcGX-tZ_tb_kEN-5c,3528
|
49
|
+
kodexa-7.4.413159458494.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
50
|
+
kodexa-7.4.413159458494.dist-info/RECORD,,
|
File without changes
|
File without changes
|