kodexa 7.0.12399109365__py3-none-any.whl → 7.4.5a13228665254__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,21 +54,35 @@ class LLMDataAttribute(BaseModel):
54
54
  self.node_uuid_list = source.node_uuid_list
55
55
  self.page_number = source.page_number
56
56
 
57
- def create_exception(
58
- self,
59
- exception_type_id: str,
60
- exception_type: str,
61
- normalized_text: str,
62
- message: str,
63
- exception_detail: str,
64
- ):
65
- content_exception = ContentException(
66
- exception_type=exception_type,
67
- exception_detail=exception_detail,
68
- message=message,
69
- tag_uuid=self.tag_uuid,
70
- )
71
- self.exceptions.append(content_exception)
57
+ def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
58
+ # Lets make sure we add all the content exceptions
59
+ if self.exceptions is not None:
60
+ for exception in self.exceptions:
61
+ # We have two types of exception, one in the API and one in the
62
+ # document
63
+ from kodexa.model import ContentException as KodexaContentException
64
+ internal_exception = KodexaContentException(
65
+ tag=exception.tag,
66
+ exception_type=exception.exception_type,
67
+ message=exception.message,
68
+ exception_details=exception.exception_details,
69
+ severity=exception.severity,
70
+ group_uuid=self.group_uuid,
71
+ tag_uuid=self.tag_uuid,
72
+ )
73
+ document.doc.add_exception(internal_exception)
74
+
75
+ def to_dict(self, taxonomy: Taxonomy) -> dict:
76
+ """Convert attribute to JSON with normalized value"""
77
+
78
+ target_taxon = taxonomy.get_taxon_by_path(self.taxon_path)
79
+ if target_taxon is None:
80
+ return {}
81
+
82
+ taxon_external_name = target_taxon.external_name
83
+ return {
84
+ taxon_external_name: self.normalized_text if self.normalized_text else self.value
85
+ }
72
86
 
73
87
 
74
88
  class LLMDataObject(BaseModel):
@@ -99,6 +113,24 @@ class LLMDataObject(BaseModel):
99
113
  class Config:
100
114
  arbitrary_types_allowed = True
101
115
 
116
+ def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
117
+ # Lets make sure we add all the content exceptions
118
+ if self.exceptions is not None:
119
+ for exception in self.exceptions:
120
+ # We have two types of exception, one in the API and one in the
121
+ # document
122
+ from kodexa.model import ContentException as KodexaContentException
123
+ internal_exception = KodexaContentException(
124
+ tag=exception.tag,
125
+ exception_type=exception.exception_type,
126
+ message=exception.message,
127
+ exception_details=exception.exception_details,
128
+ severity=exception.severity,
129
+ group_uuid=self.group_uuid,
130
+ tag_uuid=self.tag_uuid,
131
+ )
132
+ document.doc.add_exception(internal_exception)
133
+
102
134
  def get_all_review_pages(self):
103
135
  """
104
136
  Returns a list of unique page numbers that would be included in the review.
@@ -106,7 +138,7 @@ class LLMDataObject(BaseModel):
106
138
  :return: list of unique page numbers
107
139
  """
108
140
  pages = set()
109
- for field in self.__fields__:
141
+ for field in self.model_fields:
110
142
  pages.update(self._get_field_pages(field))
111
143
  return sorted(list(pages))
112
144
 
@@ -154,6 +186,35 @@ class LLMDataObject(BaseModel):
154
186
  if 'normalized_text' in field_data:
155
187
  attr.normalized_text = field_data['normalized_text']
156
188
 
189
+ def to_dict(self, taxonomy: Taxonomy) -> dict:
190
+ """Convert data object to JSON using normalized values and taxon paths"""
191
+ result = {}
192
+ for field in self.model_fields:
193
+ value = getattr(self, field)
194
+
195
+ if isinstance(value, list) and len(value) > 0:
196
+ if isinstance(value[0], LLMDataObject):
197
+ # We need to find the first field of the object that is a LLMDataAttribute
198
+ # and use that to derive the taxon path of the LLMDataObject
199
+ data_attribute = None
200
+ for child_field in value[0].model_fields:
201
+ child_attr = getattr(value[0], child_field)
202
+ if isinstance(child_attr, LLMDataAttribute):
203
+ data_attribute = child_attr
204
+ break
205
+ if data_attribute is not None:
206
+ taxon_path = data_attribute.taxon_path.rsplit('/', 1)[0]
207
+ target_taxon = taxonomy.get_taxon_by_path(taxon_path)
208
+ if target_taxon is not None:
209
+ result[target_taxon.external_name] = [item.to_dict(taxonomy) for item in value if isinstance(item, (LLMDataObject, LLMDataAttribute))]
210
+ elif isinstance(value, LLMDataAttribute):
211
+ result.update(value.to_dict(taxonomy))
212
+ elif isinstance(value, LLMDataObject):
213
+ target_taxon = taxonomy.get_taxon_by_path(value.taxon_path)
214
+ if target_taxon is not None:
215
+ result[target_taxon.external_name] = value.to_dict(taxonomy)
216
+ return result
217
+
157
218
  def to_review(self, page_number=None):
158
219
  """
159
220
  Build a representation of the data object and its data attributes that is a dict that includes the
@@ -164,7 +225,7 @@ class LLMDataObject(BaseModel):
164
225
  :return: dict of this data object and children for the specified page
165
226
  """
166
227
  review = {}
167
- for field in self.__fields__:
228
+ for field in self.model_fields:
168
229
  review_field = self._build_review(field, page_number)
169
230
  if review_field:
170
231
  review[field] = review_field
@@ -193,26 +254,6 @@ class LLMDataObject(BaseModel):
193
254
 
194
255
  return None
195
256
 
196
- def create_exception(
197
- self,
198
- exception_type_id: str,
199
- exception_type: str,
200
- message: str,
201
- exception_detail: str,
202
- severity: str = "ERROR",
203
- ):
204
- content_exception = ContentException(
205
- exception_type=exception_type,
206
- exception_details=exception_detail,
207
- message=message,
208
- group_uuid=self.group_uuid,
209
- severity=severity,
210
- )
211
- if self.exceptions is None:
212
- self.exceptions = []
213
-
214
- self.exceptions.append(content_exception)
215
-
216
257
  def apply_labels(
217
258
  self, document: "KodexaDocumentLLMWrapper", parent_group_uuid: str = None,
218
259
  assistant: Optional["Assistant"] = None
@@ -234,24 +275,11 @@ class LLMDataObject(BaseModel):
234
275
  """
235
276
 
236
277
  # Lets make sure we add all the content exceptions
237
- if self.exceptions is not None:
238
- for exception in self.exceptions:
239
- # We have two types of exception, one in the API and one in the
240
- # document
241
- from kodexa.model import ContentException as KodexaContentException
242
- internal_exception = KodexaContentException(
243
- exception_type=exception.exception_type,
244
- message=exception.message,
245
- exception_details=exception.exception_details,
246
- severity=exception.severity,
247
- group_uuid=exception.group_uuid,
248
- tag_uuid=exception.tag_uuid,
249
- )
250
- document.doc.add_exception(internal_exception)
278
+ self.process_exceptions(document)
251
279
 
252
280
  # Let's go through this data object and find all the attributes that have a value
253
281
  # then we will apply the labels to the document
254
- for field in self.__fields__:
282
+ for field in self.model_fields:
255
283
  logger.info(f"Processing field {field}")
256
284
  value = getattr(self, field)
257
285
 
@@ -270,8 +298,6 @@ class LLMDataObject(BaseModel):
270
298
  # We need to add the label to the document for this attribute
271
299
 
272
300
  tag = value.taxon_path
273
-
274
- # TODO need to work out why we are missing them?
275
301
  logger.info(f"Value: {value.normalized_text}, node_uuid_list: {value.node_uuid_list}")
276
302
  if value.node_uuid_list is None:
277
303
  value.node_uuid_list = value.line_ids
@@ -320,31 +346,16 @@ class LLMDataObject(BaseModel):
320
346
  current_value.append(new_tag)
321
347
  node.remove_feature("tag", tag)
322
348
  node.add_feature("tag", tag, current_value, single=False)
323
- # try:
324
- # if value.data_type == 'Derived':
325
- # logger.info(f"Node already has tag {tag} - Tagging something nearby {node.get_all_content()}")
326
- # nearby_node = find_nearby_word_to_tag(node, tag)
327
- # nearby_node.tag(
328
- # tag_to_apply=tag,
329
- # value=value.normalized_text,
330
- # tag_uuid=tag_uuid,
331
- # cell_index=self.cell_index,
332
- # selector="//word",
333
- # confidence=-1,
334
- # group_uuid=self.group_uuid,
335
- # parent_group_uuid=parent_group_uuid,
336
- # owner_uri=f"assistant://{assistant.id}" if assistant else f"model://taxonomy-llm",
337
- # )
338
- # else:
339
- # logger.info(f"Node already has tag {tag} - Skipping.")
340
- # except:
341
- # logger.error(f"Error tagging nearby node with tag {tag}")
342
349
 
343
350
  logger.info(f"Applied label {tag} to {len(nodes_to_label)} nodes")
351
+
352
+ # Lets make sure we add all the content exceptions
353
+ self.process_exceptions(document)
354
+
344
355
  if isinstance(value, LLMDataObject):
345
356
  # We need to apply the labels to the document for this object
346
357
  value.apply_labels(document, parent_group_uuid=self.group_uuid)
347
- # logger.info(f"Applied labels to data object {value.group_uuid}")
358
+ logger.info(f"Applied labels to data object {value.group_uuid}")
348
359
 
349
360
 
350
361
  def find_nearby_word_to_tag(node, tag):
@@ -1,13 +1,17 @@
1
- from typing import Optional, List
1
+ from decimal import Decimal
2
+ from typing import Optional, List, Set
2
3
 
3
4
  from pydantic import BaseModel, ConfigDict, Field
5
+
4
6
  from kodexa.model.base import StandardDateTime
5
7
  from kodexa.platform.client import EntityEndpoint, PageEndpoint, EntitiesEndpoint
8
+ from .product_group import ProductGroup
9
+ from ..objects import ProjectTemplate
6
10
 
7
11
 
8
- class Product(BaseModel):
12
+ class ProjectTemplateMetadata(BaseModel):
9
13
  """
10
-
14
+ A project template metadata entity
11
15
  """
12
16
  model_config = ConfigDict(
13
17
  populate_by_name=True,
@@ -15,11 +19,41 @@ class Product(BaseModel):
15
19
  arbitrary_types_allowed=True,
16
20
  protected_namespaces=("model_config",),
17
21
  )
22
+
23
+ id: str
24
+
25
+
26
+ class ProductProjectTemplate(BaseModel):
18
27
  """
19
- A product
28
+ A product project template entity representing the relationship between products and project templates
20
29
  """
30
+ model_config = ConfigDict(
31
+ populate_by_name=True,
32
+ use_enum_values=True,
33
+ arbitrary_types_allowed=True,
34
+ protected_namespaces=("model_config",),
35
+ )
36
+
37
+ id: Optional[str] = None
38
+ uuid: Optional[str] = None
39
+ change_sequence: Optional[int] = Field(None, alias="changeSequence")
40
+ created_on: Optional[StandardDateTime] = Field(None, alias="createdOn")
41
+ updated_on: Optional[StandardDateTime] = Field(None, alias="updatedOn")
42
+ display_order: Optional[int] = Field(None, alias="displayOrder")
43
+ project_template_metadata: Optional[ProjectTemplateMetadata] = Field(None, alias="projectTemplateMetadata")
21
44
 
22
45
 
46
+ class Product(BaseModel):
47
+ """
48
+ A product entity representing a product in the Kodexa platform
49
+ """
50
+ model_config = ConfigDict(
51
+ populate_by_name=True,
52
+ use_enum_values=True,
53
+ arbitrary_types_allowed=True,
54
+ protected_namespaces=("model_config",),
55
+ )
56
+
23
57
  id: Optional[str] = None
24
58
  uuid: Optional[str] = None
25
59
  change_sequence: Optional[int] = Field(None, alias="changeSequence")
@@ -28,6 +62,26 @@ class Product(BaseModel):
28
62
  name: str
29
63
  description: Optional[str] = None
30
64
  overview_markdown: Optional[str] = Field(None, alias="overviewMarkdown")
65
+ product_group: ProductGroup = Field(..., alias="productGroup")
66
+ parent: Optional['Product'] = None
67
+ image_url: Optional[str] = Field(None, alias="imageUrl")
68
+ price_id: Optional[str] = Field(None, alias="priceId")
69
+ price: Optional[Decimal] = None
70
+ number_of_credits: Optional[int] = Field(None, alias="numberOfCredits")
71
+ price_suffix: Optional[str] = Field(None, alias="priceSuffix")
72
+ has_quantity: bool = Field(False, alias="hasQuantity")
73
+ active: bool = True
74
+ order: Optional[int] = None
75
+ promoted: Optional[bool] = None
76
+ project_templates: Optional[Set[ProjectTemplate]] = Field(None, alias="projectTemplates")
77
+ search_text: Optional[str] = None
78
+
79
+ def update_search_text(self):
80
+ """Updates the search text for the product"""
81
+ if self.product_group:
82
+ self.search_text = f"{self.name.lower()} {self.product_group.name.lower()}"
83
+ else:
84
+ self.search_text = self.name.lower()
31
85
 
32
86
 
33
87
  class ProductEndpoint(Product, EntityEndpoint):
@@ -53,7 +107,7 @@ class ProductEndpoint(Product, EntityEndpoint):
53
107
 
54
108
  class PageProduct(BaseModel):
55
109
  """
56
-
110
+ Represents a paginated list of products
57
111
  """
58
112
  model_config = ConfigDict(
59
113
  populate_by_name=True,
@@ -66,7 +120,6 @@ class PageProduct(BaseModel):
66
120
  size: Optional[int] = None
67
121
  content: Optional[List[Product]] = None
68
122
  number: Optional[int] = None
69
-
70
123
  number_of_elements: Optional[int] = Field(None, alias="numberOfElements")
71
124
  first: Optional[bool] = None
72
125
  last: Optional[bool] = None
@@ -0,0 +1,126 @@
1
+ from typing import Optional, List
2
+
3
+ from pydantic import BaseModel, ConfigDict, Field
4
+ from kodexa.model.base import StandardDateTime
5
+ from kodexa.platform.client import EntityEndpoint, PageEndpoint, EntitiesEndpoint
6
+
7
+
8
+ class ProductGroup(BaseModel):
9
+ """
10
+
11
+ """
12
+ model_config = ConfigDict(
13
+ populate_by_name=True,
14
+ use_enum_values=True,
15
+ arbitrary_types_allowed=True,
16
+ protected_namespaces=("model_config",),
17
+ )
18
+ """
19
+ A product group
20
+ """
21
+
22
+
23
+ id: Optional[str] = None
24
+ uuid: Optional[str] = None
25
+ change_sequence: Optional[int] = Field(None, alias="changeSequence")
26
+ created_on: Optional[StandardDateTime] = Field(None, alias="createdOn")
27
+ updated_on: Optional[StandardDateTime] = Field(None, alias="updatedOn")
28
+ name: str
29
+ description: Optional[str] = None
30
+ overview_markdown: Optional[str] = Field(None, alias="overviewMarkdown")
31
+
32
+
33
+ class ProductGroupEndpoint(ProductGroup, EntityEndpoint):
34
+ """Handles the endpoint for a product group
35
+
36
+ This class is a combination of DataException and EntityEndpoint. It is used
37
+ to manage the endpoint for data exceptions.
38
+
39
+ Methods:
40
+ get_type: Returns the type of the endpoint.
41
+ """
42
+
43
+ def get_type(self) -> str:
44
+ """Gets the type of the endpoint.
45
+
46
+ This method returns the type of the endpoint which is "exceptions".
47
+
48
+ Returns:
49
+ str: The type of the endpoint.
50
+ """
51
+ return "product-groups"
52
+
53
+
54
+ class PageProductGroup(BaseModel):
55
+ """
56
+
57
+ """
58
+ model_config = ConfigDict(
59
+ populate_by_name=True,
60
+ use_enum_values=True,
61
+ arbitrary_types_allowed=True,
62
+ protected_namespaces=("model_config",),
63
+ )
64
+ total_pages: Optional[int] = Field(None, alias="totalPages")
65
+ total_elements: Optional[int] = Field(None, alias="totalElements")
66
+ size: Optional[int] = None
67
+ content: Optional[List[ProductGroup]] = None
68
+ number: Optional[int] = None
69
+
70
+ number_of_elements: Optional[int] = Field(None, alias="numberOfElements")
71
+ first: Optional[bool] = None
72
+ last: Optional[bool] = None
73
+ empty: Optional[bool] = None
74
+
75
+
76
+ class PageProductGroupEndpoint(PageProductGroup, PageEndpoint):
77
+ def get_type(self) -> Optional[str]:
78
+ return "product-group"
79
+
80
+
81
+ class ProductGroupsEndpoint(EntitiesEndpoint):
82
+ """Represents the product groups endpoint
83
+
84
+ This class is used to represent the product groups endpoint in the system.
85
+
86
+ Attributes:
87
+ object_dict: A dictionary containing the object data.
88
+ """
89
+
90
+ """Represents a assistants endpoint"""
91
+
92
+ def get_type(self) -> str:
93
+ """Get the type of the endpoint
94
+
95
+ This method is used to get the type of the endpoint.
96
+
97
+ Returns:
98
+ str: The type of the endpoint.
99
+ """
100
+ return "product-groups"
101
+
102
+ def get_instance_class(self, object_dict=None):
103
+ """Get the instance class of the endpoint
104
+
105
+ This method is used to get the instance class of the endpoint.
106
+
107
+ Args:
108
+ object_dict (dict, optional): A dictionary containing the object data.
109
+
110
+ Returns:
111
+ AssistantEndpoint: The instance class of the endpoint.
112
+ """
113
+ return ProductGroupEndpoint
114
+
115
+ def get_page_class(self, object_dict=None):
116
+ """Get the page class of the endpoint
117
+
118
+ This method is used to get the page class of the endpoint.
119
+
120
+ Args:
121
+ object_dict (dict, optional): A dictionary containing the object data.
122
+
123
+ Returns:
124
+ PageAssistantEndpoint: The page class of the endpoint.
125
+ """
126
+ return PageProductGroupEndpoint
kodexa/model/model.py CHANGED
@@ -2443,11 +2443,14 @@ class Document(object):
2443
2443
  def get_exceptions(self) -> List[ContentException]:
2444
2444
  return self._persistence_layer.get_exceptions()
2445
2445
 
2446
- def get_external_data(self) -> dict:
2447
- return self._persistence_layer.get_external_data()
2446
+ def get_external_data(self, key="default") -> dict:
2447
+ return self._persistence_layer.get_external_data(key)
2448
2448
 
2449
- def set_external_data(self, external_data:dict):
2450
- return self._persistence_layer.set_external_data(external_data)
2449
+ def get_external_data_keys(self) -> list[str]:
2450
+ return self._persistence_layer.get_external_data_keys()
2451
+
2452
+ def set_external_data(self, external_data:dict, key="default"):
2453
+ return self._persistence_layer.set_external_data(external_data, key)
2451
2454
 
2452
2455
  def get_steps(self) -> list[ProcessingStep]:
2453
2456
  return self._persistence_layer.get_steps()