kodexa 7.0.12399293688__py3-none-any.whl → 7.4.5a13228665254__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kodexa/dataclasses/__init__.py +85 -74
- kodexa/model/entities/product.py +59 -6
- kodexa/model/entities/product_group.py +126 -0
- kodexa/model/model.py +7 -4
- kodexa/model/objects.py +155 -40
- kodexa/model/persistence.py +87 -27
- kodexa/pipeline/pipeline.py +6 -4
- kodexa/platform/client.py +227 -14
- {kodexa-7.0.12399293688.dist-info → kodexa-7.4.5a13228665254.dist-info}/METADATA +1 -1
- {kodexa-7.0.12399293688.dist-info → kodexa-7.4.5a13228665254.dist-info}/RECORD +12 -11
- {kodexa-7.0.12399293688.dist-info → kodexa-7.4.5a13228665254.dist-info}/LICENSE +0 -0
- {kodexa-7.0.12399293688.dist-info → kodexa-7.4.5a13228665254.dist-info}/WHEEL +0 -0
kodexa/dataclasses/__init__.py
CHANGED
@@ -54,21 +54,35 @@ class LLMDataAttribute(BaseModel):
|
|
54
54
|
self.node_uuid_list = source.node_uuid_list
|
55
55
|
self.page_number = source.page_number
|
56
56
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
57
|
+
def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
|
58
|
+
# Lets make sure we add all the content exceptions
|
59
|
+
if self.exceptions is not None:
|
60
|
+
for exception in self.exceptions:
|
61
|
+
# We have two types of exception, one in the API and one in the
|
62
|
+
# document
|
63
|
+
from kodexa.model import ContentException as KodexaContentException
|
64
|
+
internal_exception = KodexaContentException(
|
65
|
+
tag=exception.tag,
|
66
|
+
exception_type=exception.exception_type,
|
67
|
+
message=exception.message,
|
68
|
+
exception_details=exception.exception_details,
|
69
|
+
severity=exception.severity,
|
70
|
+
group_uuid=self.group_uuid,
|
71
|
+
tag_uuid=self.tag_uuid,
|
72
|
+
)
|
73
|
+
document.doc.add_exception(internal_exception)
|
74
|
+
|
75
|
+
def to_dict(self, taxonomy: Taxonomy) -> dict:
|
76
|
+
"""Convert attribute to JSON with normalized value"""
|
77
|
+
|
78
|
+
target_taxon = taxonomy.get_taxon_by_path(self.taxon_path)
|
79
|
+
if target_taxon is None:
|
80
|
+
return {}
|
81
|
+
|
82
|
+
taxon_external_name = target_taxon.external_name
|
83
|
+
return {
|
84
|
+
taxon_external_name: self.normalized_text if self.normalized_text else self.value
|
85
|
+
}
|
72
86
|
|
73
87
|
|
74
88
|
class LLMDataObject(BaseModel):
|
@@ -99,6 +113,24 @@ class LLMDataObject(BaseModel):
|
|
99
113
|
class Config:
|
100
114
|
arbitrary_types_allowed = True
|
101
115
|
|
116
|
+
def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
|
117
|
+
# Lets make sure we add all the content exceptions
|
118
|
+
if self.exceptions is not None:
|
119
|
+
for exception in self.exceptions:
|
120
|
+
# We have two types of exception, one in the API and one in the
|
121
|
+
# document
|
122
|
+
from kodexa.model import ContentException as KodexaContentException
|
123
|
+
internal_exception = KodexaContentException(
|
124
|
+
tag=exception.tag,
|
125
|
+
exception_type=exception.exception_type,
|
126
|
+
message=exception.message,
|
127
|
+
exception_details=exception.exception_details,
|
128
|
+
severity=exception.severity,
|
129
|
+
group_uuid=self.group_uuid,
|
130
|
+
tag_uuid=self.tag_uuid,
|
131
|
+
)
|
132
|
+
document.doc.add_exception(internal_exception)
|
133
|
+
|
102
134
|
def get_all_review_pages(self):
|
103
135
|
"""
|
104
136
|
Returns a list of unique page numbers that would be included in the review.
|
@@ -106,7 +138,7 @@ class LLMDataObject(BaseModel):
|
|
106
138
|
:return: list of unique page numbers
|
107
139
|
"""
|
108
140
|
pages = set()
|
109
|
-
for field in self.
|
141
|
+
for field in self.model_fields:
|
110
142
|
pages.update(self._get_field_pages(field))
|
111
143
|
return sorted(list(pages))
|
112
144
|
|
@@ -154,6 +186,35 @@ class LLMDataObject(BaseModel):
|
|
154
186
|
if 'normalized_text' in field_data:
|
155
187
|
attr.normalized_text = field_data['normalized_text']
|
156
188
|
|
189
|
+
def to_dict(self, taxonomy: Taxonomy) -> dict:
|
190
|
+
"""Convert data object to JSON using normalized values and taxon paths"""
|
191
|
+
result = {}
|
192
|
+
for field in self.model_fields:
|
193
|
+
value = getattr(self, field)
|
194
|
+
|
195
|
+
if isinstance(value, list) and len(value) > 0:
|
196
|
+
if isinstance(value[0], LLMDataObject):
|
197
|
+
# We need to find the first field of the object that is a LLMDataAttribute
|
198
|
+
# and use that to derive the taxon path of the LLMDataObject
|
199
|
+
data_attribute = None
|
200
|
+
for child_field in value[0].model_fields:
|
201
|
+
child_attr = getattr(value[0], child_field)
|
202
|
+
if isinstance(child_attr, LLMDataAttribute):
|
203
|
+
data_attribute = child_attr
|
204
|
+
break
|
205
|
+
if data_attribute is not None:
|
206
|
+
taxon_path = data_attribute.taxon_path.rsplit('/', 1)[0]
|
207
|
+
target_taxon = taxonomy.get_taxon_by_path(taxon_path)
|
208
|
+
if target_taxon is not None:
|
209
|
+
result[target_taxon.external_name] = [item.to_dict(taxonomy) for item in value if isinstance(item, (LLMDataObject, LLMDataAttribute))]
|
210
|
+
elif isinstance(value, LLMDataAttribute):
|
211
|
+
result.update(value.to_dict(taxonomy))
|
212
|
+
elif isinstance(value, LLMDataObject):
|
213
|
+
target_taxon = taxonomy.get_taxon_by_path(value.taxon_path)
|
214
|
+
if target_taxon is not None:
|
215
|
+
result[target_taxon.external_name] = value.to_dict(taxonomy)
|
216
|
+
return result
|
217
|
+
|
157
218
|
def to_review(self, page_number=None):
|
158
219
|
"""
|
159
220
|
Build a representation of the data object and its data attributes that is a dict that includes the
|
@@ -164,7 +225,7 @@ class LLMDataObject(BaseModel):
|
|
164
225
|
:return: dict of this data object and children for the specified page
|
165
226
|
"""
|
166
227
|
review = {}
|
167
|
-
for field in self.
|
228
|
+
for field in self.model_fields:
|
168
229
|
review_field = self._build_review(field, page_number)
|
169
230
|
if review_field:
|
170
231
|
review[field] = review_field
|
@@ -193,26 +254,6 @@ class LLMDataObject(BaseModel):
|
|
193
254
|
|
194
255
|
return None
|
195
256
|
|
196
|
-
def create_exception(
|
197
|
-
self,
|
198
|
-
exception_type_id: str,
|
199
|
-
exception_type: str,
|
200
|
-
message: str,
|
201
|
-
exception_detail: str,
|
202
|
-
severity: str = "ERROR",
|
203
|
-
):
|
204
|
-
content_exception = ContentException(
|
205
|
-
exception_type=exception_type,
|
206
|
-
exception_details=exception_detail,
|
207
|
-
message=message,
|
208
|
-
group_uuid=self.group_uuid,
|
209
|
-
severity=severity,
|
210
|
-
)
|
211
|
-
if self.exceptions is None:
|
212
|
-
self.exceptions = []
|
213
|
-
|
214
|
-
self.exceptions.append(content_exception)
|
215
|
-
|
216
257
|
def apply_labels(
|
217
258
|
self, document: "KodexaDocumentLLMWrapper", parent_group_uuid: str = None,
|
218
259
|
assistant: Optional["Assistant"] = None
|
@@ -234,24 +275,11 @@ class LLMDataObject(BaseModel):
|
|
234
275
|
"""
|
235
276
|
|
236
277
|
# Lets make sure we add all the content exceptions
|
237
|
-
|
238
|
-
for exception in self.exceptions:
|
239
|
-
# We have two types of exception, one in the API and one in the
|
240
|
-
# document
|
241
|
-
from kodexa.model import ContentException as KodexaContentException
|
242
|
-
internal_exception = KodexaContentException(
|
243
|
-
exception_type=exception.exception_type,
|
244
|
-
message=exception.message,
|
245
|
-
exception_details=exception.exception_details,
|
246
|
-
severity=exception.severity,
|
247
|
-
group_uuid=exception.group_uuid,
|
248
|
-
tag_uuid=exception.tag_uuid,
|
249
|
-
)
|
250
|
-
document.doc.add_exception(internal_exception)
|
278
|
+
self.process_exceptions(document)
|
251
279
|
|
252
280
|
# Let's go through this data object and find all the attributes that have a value
|
253
281
|
# then we will apply the labels to the document
|
254
|
-
for field in self.
|
282
|
+
for field in self.model_fields:
|
255
283
|
logger.info(f"Processing field {field}")
|
256
284
|
value = getattr(self, field)
|
257
285
|
|
@@ -270,8 +298,6 @@ class LLMDataObject(BaseModel):
|
|
270
298
|
# We need to add the label to the document for this attribute
|
271
299
|
|
272
300
|
tag = value.taxon_path
|
273
|
-
|
274
|
-
# TODO need to work out why we are missing them?
|
275
301
|
logger.info(f"Value: {value.normalized_text}, node_uuid_list: {value.node_uuid_list}")
|
276
302
|
if value.node_uuid_list is None:
|
277
303
|
value.node_uuid_list = value.line_ids
|
@@ -320,31 +346,16 @@ class LLMDataObject(BaseModel):
|
|
320
346
|
current_value.append(new_tag)
|
321
347
|
node.remove_feature("tag", tag)
|
322
348
|
node.add_feature("tag", tag, current_value, single=False)
|
323
|
-
# try:
|
324
|
-
# if value.data_type == 'Derived':
|
325
|
-
# logger.info(f"Node already has tag {tag} - Tagging something nearby {node.get_all_content()}")
|
326
|
-
# nearby_node = find_nearby_word_to_tag(node, tag)
|
327
|
-
# nearby_node.tag(
|
328
|
-
# tag_to_apply=tag,
|
329
|
-
# value=value.normalized_text,
|
330
|
-
# tag_uuid=tag_uuid,
|
331
|
-
# cell_index=self.cell_index,
|
332
|
-
# selector="//word",
|
333
|
-
# confidence=-1,
|
334
|
-
# group_uuid=self.group_uuid,
|
335
|
-
# parent_group_uuid=parent_group_uuid,
|
336
|
-
# owner_uri=f"assistant://{assistant.id}" if assistant else f"model://taxonomy-llm",
|
337
|
-
# )
|
338
|
-
# else:
|
339
|
-
# logger.info(f"Node already has tag {tag} - Skipping.")
|
340
|
-
# except:
|
341
|
-
# logger.error(f"Error tagging nearby node with tag {tag}")
|
342
349
|
|
343
350
|
logger.info(f"Applied label {tag} to {len(nodes_to_label)} nodes")
|
351
|
+
|
352
|
+
# Lets make sure we add all the content exceptions
|
353
|
+
self.process_exceptions(document)
|
354
|
+
|
344
355
|
if isinstance(value, LLMDataObject):
|
345
356
|
# We need to apply the labels to the document for this object
|
346
357
|
value.apply_labels(document, parent_group_uuid=self.group_uuid)
|
347
|
-
|
358
|
+
logger.info(f"Applied labels to data object {value.group_uuid}")
|
348
359
|
|
349
360
|
|
350
361
|
def find_nearby_word_to_tag(node, tag):
|
kodexa/model/entities/product.py
CHANGED
@@ -1,13 +1,17 @@
|
|
1
|
-
from
|
1
|
+
from decimal import Decimal
|
2
|
+
from typing import Optional, List, Set
|
2
3
|
|
3
4
|
from pydantic import BaseModel, ConfigDict, Field
|
5
|
+
|
4
6
|
from kodexa.model.base import StandardDateTime
|
5
7
|
from kodexa.platform.client import EntityEndpoint, PageEndpoint, EntitiesEndpoint
|
8
|
+
from .product_group import ProductGroup
|
9
|
+
from ..objects import ProjectTemplate
|
6
10
|
|
7
11
|
|
8
|
-
class
|
12
|
+
class ProjectTemplateMetadata(BaseModel):
|
9
13
|
"""
|
10
|
-
|
14
|
+
A project template metadata entity
|
11
15
|
"""
|
12
16
|
model_config = ConfigDict(
|
13
17
|
populate_by_name=True,
|
@@ -15,11 +19,41 @@ class Product(BaseModel):
|
|
15
19
|
arbitrary_types_allowed=True,
|
16
20
|
protected_namespaces=("model_config",),
|
17
21
|
)
|
22
|
+
|
23
|
+
id: str
|
24
|
+
|
25
|
+
|
26
|
+
class ProductProjectTemplate(BaseModel):
|
18
27
|
"""
|
19
|
-
A product
|
28
|
+
A product project template entity representing the relationship between products and project templates
|
20
29
|
"""
|
30
|
+
model_config = ConfigDict(
|
31
|
+
populate_by_name=True,
|
32
|
+
use_enum_values=True,
|
33
|
+
arbitrary_types_allowed=True,
|
34
|
+
protected_namespaces=("model_config",),
|
35
|
+
)
|
36
|
+
|
37
|
+
id: Optional[str] = None
|
38
|
+
uuid: Optional[str] = None
|
39
|
+
change_sequence: Optional[int] = Field(None, alias="changeSequence")
|
40
|
+
created_on: Optional[StandardDateTime] = Field(None, alias="createdOn")
|
41
|
+
updated_on: Optional[StandardDateTime] = Field(None, alias="updatedOn")
|
42
|
+
display_order: Optional[int] = Field(None, alias="displayOrder")
|
43
|
+
project_template_metadata: Optional[ProjectTemplateMetadata] = Field(None, alias="projectTemplateMetadata")
|
21
44
|
|
22
45
|
|
46
|
+
class Product(BaseModel):
|
47
|
+
"""
|
48
|
+
A product entity representing a product in the Kodexa platform
|
49
|
+
"""
|
50
|
+
model_config = ConfigDict(
|
51
|
+
populate_by_name=True,
|
52
|
+
use_enum_values=True,
|
53
|
+
arbitrary_types_allowed=True,
|
54
|
+
protected_namespaces=("model_config",),
|
55
|
+
)
|
56
|
+
|
23
57
|
id: Optional[str] = None
|
24
58
|
uuid: Optional[str] = None
|
25
59
|
change_sequence: Optional[int] = Field(None, alias="changeSequence")
|
@@ -28,6 +62,26 @@ class Product(BaseModel):
|
|
28
62
|
name: str
|
29
63
|
description: Optional[str] = None
|
30
64
|
overview_markdown: Optional[str] = Field(None, alias="overviewMarkdown")
|
65
|
+
product_group: ProductGroup = Field(..., alias="productGroup")
|
66
|
+
parent: Optional['Product'] = None
|
67
|
+
image_url: Optional[str] = Field(None, alias="imageUrl")
|
68
|
+
price_id: Optional[str] = Field(None, alias="priceId")
|
69
|
+
price: Optional[Decimal] = None
|
70
|
+
number_of_credits: Optional[int] = Field(None, alias="numberOfCredits")
|
71
|
+
price_suffix: Optional[str] = Field(None, alias="priceSuffix")
|
72
|
+
has_quantity: bool = Field(False, alias="hasQuantity")
|
73
|
+
active: bool = True
|
74
|
+
order: Optional[int] = None
|
75
|
+
promoted: Optional[bool] = None
|
76
|
+
project_templates: Optional[Set[ProjectTemplate]] = Field(None, alias="projectTemplates")
|
77
|
+
search_text: Optional[str] = None
|
78
|
+
|
79
|
+
def update_search_text(self):
|
80
|
+
"""Updates the search text for the product"""
|
81
|
+
if self.product_group:
|
82
|
+
self.search_text = f"{self.name.lower()} {self.product_group.name.lower()}"
|
83
|
+
else:
|
84
|
+
self.search_text = self.name.lower()
|
31
85
|
|
32
86
|
|
33
87
|
class ProductEndpoint(Product, EntityEndpoint):
|
@@ -53,7 +107,7 @@ class ProductEndpoint(Product, EntityEndpoint):
|
|
53
107
|
|
54
108
|
class PageProduct(BaseModel):
|
55
109
|
"""
|
56
|
-
|
110
|
+
Represents a paginated list of products
|
57
111
|
"""
|
58
112
|
model_config = ConfigDict(
|
59
113
|
populate_by_name=True,
|
@@ -66,7 +120,6 @@ class PageProduct(BaseModel):
|
|
66
120
|
size: Optional[int] = None
|
67
121
|
content: Optional[List[Product]] = None
|
68
122
|
number: Optional[int] = None
|
69
|
-
|
70
123
|
number_of_elements: Optional[int] = Field(None, alias="numberOfElements")
|
71
124
|
first: Optional[bool] = None
|
72
125
|
last: Optional[bool] = None
|
@@ -0,0 +1,126 @@
|
|
1
|
+
from typing import Optional, List
|
2
|
+
|
3
|
+
from pydantic import BaseModel, ConfigDict, Field
|
4
|
+
from kodexa.model.base import StandardDateTime
|
5
|
+
from kodexa.platform.client import EntityEndpoint, PageEndpoint, EntitiesEndpoint
|
6
|
+
|
7
|
+
|
8
|
+
class ProductGroup(BaseModel):
|
9
|
+
"""
|
10
|
+
|
11
|
+
"""
|
12
|
+
model_config = ConfigDict(
|
13
|
+
populate_by_name=True,
|
14
|
+
use_enum_values=True,
|
15
|
+
arbitrary_types_allowed=True,
|
16
|
+
protected_namespaces=("model_config",),
|
17
|
+
)
|
18
|
+
"""
|
19
|
+
A product group
|
20
|
+
"""
|
21
|
+
|
22
|
+
|
23
|
+
id: Optional[str] = None
|
24
|
+
uuid: Optional[str] = None
|
25
|
+
change_sequence: Optional[int] = Field(None, alias="changeSequence")
|
26
|
+
created_on: Optional[StandardDateTime] = Field(None, alias="createdOn")
|
27
|
+
updated_on: Optional[StandardDateTime] = Field(None, alias="updatedOn")
|
28
|
+
name: str
|
29
|
+
description: Optional[str] = None
|
30
|
+
overview_markdown: Optional[str] = Field(None, alias="overviewMarkdown")
|
31
|
+
|
32
|
+
|
33
|
+
class ProductGroupEndpoint(ProductGroup, EntityEndpoint):
|
34
|
+
"""Handles the endpoint for a product group
|
35
|
+
|
36
|
+
This class is a combination of DataException and EntityEndpoint. It is used
|
37
|
+
to manage the endpoint for data exceptions.
|
38
|
+
|
39
|
+
Methods:
|
40
|
+
get_type: Returns the type of the endpoint.
|
41
|
+
"""
|
42
|
+
|
43
|
+
def get_type(self) -> str:
|
44
|
+
"""Gets the type of the endpoint.
|
45
|
+
|
46
|
+
This method returns the type of the endpoint which is "exceptions".
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
str: The type of the endpoint.
|
50
|
+
"""
|
51
|
+
return "product-groups"
|
52
|
+
|
53
|
+
|
54
|
+
class PageProductGroup(BaseModel):
|
55
|
+
"""
|
56
|
+
|
57
|
+
"""
|
58
|
+
model_config = ConfigDict(
|
59
|
+
populate_by_name=True,
|
60
|
+
use_enum_values=True,
|
61
|
+
arbitrary_types_allowed=True,
|
62
|
+
protected_namespaces=("model_config",),
|
63
|
+
)
|
64
|
+
total_pages: Optional[int] = Field(None, alias="totalPages")
|
65
|
+
total_elements: Optional[int] = Field(None, alias="totalElements")
|
66
|
+
size: Optional[int] = None
|
67
|
+
content: Optional[List[ProductGroup]] = None
|
68
|
+
number: Optional[int] = None
|
69
|
+
|
70
|
+
number_of_elements: Optional[int] = Field(None, alias="numberOfElements")
|
71
|
+
first: Optional[bool] = None
|
72
|
+
last: Optional[bool] = None
|
73
|
+
empty: Optional[bool] = None
|
74
|
+
|
75
|
+
|
76
|
+
class PageProductGroupEndpoint(PageProductGroup, PageEndpoint):
|
77
|
+
def get_type(self) -> Optional[str]:
|
78
|
+
return "product-group"
|
79
|
+
|
80
|
+
|
81
|
+
class ProductGroupsEndpoint(EntitiesEndpoint):
|
82
|
+
"""Represents the product groups endpoint
|
83
|
+
|
84
|
+
This class is used to represent the product groups endpoint in the system.
|
85
|
+
|
86
|
+
Attributes:
|
87
|
+
object_dict: A dictionary containing the object data.
|
88
|
+
"""
|
89
|
+
|
90
|
+
"""Represents a assistants endpoint"""
|
91
|
+
|
92
|
+
def get_type(self) -> str:
|
93
|
+
"""Get the type of the endpoint
|
94
|
+
|
95
|
+
This method is used to get the type of the endpoint.
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
str: The type of the endpoint.
|
99
|
+
"""
|
100
|
+
return "product-groups"
|
101
|
+
|
102
|
+
def get_instance_class(self, object_dict=None):
|
103
|
+
"""Get the instance class of the endpoint
|
104
|
+
|
105
|
+
This method is used to get the instance class of the endpoint.
|
106
|
+
|
107
|
+
Args:
|
108
|
+
object_dict (dict, optional): A dictionary containing the object data.
|
109
|
+
|
110
|
+
Returns:
|
111
|
+
AssistantEndpoint: The instance class of the endpoint.
|
112
|
+
"""
|
113
|
+
return ProductGroupEndpoint
|
114
|
+
|
115
|
+
def get_page_class(self, object_dict=None):
|
116
|
+
"""Get the page class of the endpoint
|
117
|
+
|
118
|
+
This method is used to get the page class of the endpoint.
|
119
|
+
|
120
|
+
Args:
|
121
|
+
object_dict (dict, optional): A dictionary containing the object data.
|
122
|
+
|
123
|
+
Returns:
|
124
|
+
PageAssistantEndpoint: The page class of the endpoint.
|
125
|
+
"""
|
126
|
+
return PageProductGroupEndpoint
|
kodexa/model/model.py
CHANGED
@@ -2443,11 +2443,14 @@ class Document(object):
|
|
2443
2443
|
def get_exceptions(self) -> List[ContentException]:
|
2444
2444
|
return self._persistence_layer.get_exceptions()
|
2445
2445
|
|
2446
|
-
def get_external_data(self) -> dict:
|
2447
|
-
return self._persistence_layer.get_external_data()
|
2446
|
+
def get_external_data(self, key="default") -> dict:
|
2447
|
+
return self._persistence_layer.get_external_data(key)
|
2448
2448
|
|
2449
|
-
def
|
2450
|
-
return self._persistence_layer.
|
2449
|
+
def get_external_data_keys(self) -> list[str]:
|
2450
|
+
return self._persistence_layer.get_external_data_keys()
|
2451
|
+
|
2452
|
+
def set_external_data(self, external_data:dict, key="default"):
|
2453
|
+
return self._persistence_layer.set_external_data(external_data, key)
|
2451
2454
|
|
2452
2455
|
def get_steps(self) -> list[ProcessingStep]:
|
2453
2456
|
return self._persistence_layer.get_steps()
|