kodexa 7.0.10599563193__tar.gz → 7.0.10665853973__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/PKG-INFO +1 -1
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/dataclasses/__init__.py +121 -6
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/pyproject.toml +1 -1
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/LICENSE +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/README.md +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/__init__.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/assistant/__init__.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/assistant/assistant.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/connectors/__init__.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/connectors/connectors.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/dataclasses/llm_data_class.j2 +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/model/__init__.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/model/base.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/model/entities/__init__.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/model/entities/check_response.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/model/entities/product.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/model/entities/product_subscription.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/model/model.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/model/objects.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/model/persistence.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/pipeline/__init__.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/pipeline/pipeline.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/platform/__init__.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/platform/client.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/platform/interaction.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/platform/kodexa.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/selectors/__init__.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/selectors/ast.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/selectors/core.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/selectors/lexrules.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/selectors/lextab.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/selectors/lextab.pyi +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/selectors/parserules.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/selectors/parserules.pyi +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/selectors/parsetab.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/selectors/parsetab.pyi +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/spatial/__init__.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/spatial/azure_models.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/spatial/bbox_common.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/spatial/table_form_common.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/steps/__init__.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/steps/common.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/testing/__init__.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/testing/test_components.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/testing/test_utils.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/training/__init__.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/training/train_utils.py +0 -0
- {kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/utils/__init__.py +0 -0
@@ -1,16 +1,16 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
3
|
import uuid
|
4
|
+
from importlib.machinery import SourceFileLoader
|
4
5
|
from typing import Optional, List
|
5
6
|
|
6
7
|
import jinja2
|
7
|
-
from pydantic import BaseModel
|
8
|
-
|
9
8
|
from kodexa import ContentNode
|
10
|
-
from kodexa.model.model import Tag
|
9
|
+
from kodexa.model.model import Tag, Document
|
11
10
|
from kodexa.model.objects import ContentException, Taxon, Taxonomy, Assistant
|
12
|
-
from
|
13
|
-
|
11
|
+
from pydantic import BaseModel
|
12
|
+
|
13
|
+
from kodexa.utils import snake_to_camel, to_snake, taxon_to_property_name, taxon_to_class_name, taxon_to_group_path
|
14
14
|
|
15
15
|
logger = logging.getLogger()
|
16
16
|
|
@@ -30,6 +30,7 @@ class LLMDataAttribute(BaseModel):
|
|
30
30
|
normalized_text: Optional[str] = None
|
31
31
|
node_uuid_list: Optional[List[int]] = None
|
32
32
|
tag_uuid: Optional[str] = None
|
33
|
+
page_number: Optional[int] = None
|
33
34
|
exceptions: Optional[list[ContentException]] = None
|
34
35
|
|
35
36
|
def create_exception(
|
@@ -88,6 +89,100 @@ class LLMDataObject(BaseModel):
|
|
88
89
|
else:
|
89
90
|
self.group_uuid = group_uuid
|
90
91
|
|
92
|
+
def get_all_review_pages(self):
|
93
|
+
"""
|
94
|
+
Returns a list of unique page numbers that would be included in the review.
|
95
|
+
|
96
|
+
:return: list of unique page numbers
|
97
|
+
"""
|
98
|
+
pages = set()
|
99
|
+
for field in self.__fields__:
|
100
|
+
pages.update(self._get_field_pages(field))
|
101
|
+
return sorted(list(pages))
|
102
|
+
|
103
|
+
def _get_field_pages(self, field):
|
104
|
+
if isinstance(getattr(self, field), list):
|
105
|
+
pages = set()
|
106
|
+
for item in getattr(self, field):
|
107
|
+
|
108
|
+
if isinstance(item, LLMDataObject):
|
109
|
+
pages.update(item.get_all_review_pages())
|
110
|
+
return pages
|
111
|
+
elif isinstance(getattr(self, field), LLMDataAttribute):
|
112
|
+
if getattr(self, field).value != getattr(self, field).normalized_text:
|
113
|
+
return {getattr(self, field).page_number}
|
114
|
+
elif isinstance(getattr(self, field), LLMDataObject):
|
115
|
+
return getattr(self, field).get_all_review_pages()
|
116
|
+
return set()
|
117
|
+
|
118
|
+
def update_from_review(self, review_dict):
|
119
|
+
"""
|
120
|
+
Update the node UUIDs and value based on the provided review dictionary.
|
121
|
+
|
122
|
+
:param review_dict: A dictionary containing the updated review information
|
123
|
+
"""
|
124
|
+
for field, field_data in review_dict.items():
|
125
|
+
self._update_field_review(field, field_data)
|
126
|
+
|
127
|
+
def _update_field_review(self, field, field_data):
|
128
|
+
if isinstance(field_data, list):
|
129
|
+
for i, item_data in enumerate(field_data):
|
130
|
+
if i < len(getattr(self, field)):
|
131
|
+
getattr(self, field)[i].update_from_review(item_data)
|
132
|
+
elif isinstance(field_data, dict):
|
133
|
+
if isinstance(getattr(self, field), LLMDataAttribute):
|
134
|
+
self._update_data_attribute(field, field_data)
|
135
|
+
elif isinstance(getattr(self, field), LLMDataObject):
|
136
|
+
getattr(self, field).update_from_review(field_data)
|
137
|
+
|
138
|
+
def _update_data_attribute(self, field, field_data):
|
139
|
+
attr = getattr(self, field)
|
140
|
+
if 'value' in field_data:
|
141
|
+
attr.value = field_data['value']
|
142
|
+
if 'node_uuids' in field_data:
|
143
|
+
attr.node_uuid_list = field_data['node_uuids']
|
144
|
+
if 'normalized_text' in field_data:
|
145
|
+
attr.normalized_text = field_data['normalized_text']
|
146
|
+
|
147
|
+
def to_review(self, page_number=None):
|
148
|
+
"""
|
149
|
+
Build a representation of the data object and its data attributes that is a dict that includes the
|
150
|
+
value, normalized text and node UUIDs so we can use this to review mismatched value/normalized
|
151
|
+
with the LLM for a specific page number.
|
152
|
+
|
153
|
+
:param page_number: Optional page number to filter the review items
|
154
|
+
:return: dict of this data object and children for the specified page
|
155
|
+
"""
|
156
|
+
review = {}
|
157
|
+
for field in self.__fields__:
|
158
|
+
review_field = self._build_review(field, page_number)
|
159
|
+
if review_field:
|
160
|
+
review[field] = review_field
|
161
|
+
return review
|
162
|
+
|
163
|
+
def _build_review(self, field, page_number=None):
|
164
|
+
if isinstance(getattr(self, field), list):
|
165
|
+
review_field = []
|
166
|
+
for item in getattr(self, field):
|
167
|
+
if isinstance(item, LLMDataObject):
|
168
|
+
new_review = item.to_review(page_number)
|
169
|
+
if new_review:
|
170
|
+
review_field.append(new_review)
|
171
|
+
return review_field if review_field else None
|
172
|
+
elif isinstance(getattr(self, field), LLMDataAttribute):
|
173
|
+
if getattr(self, field).value != getattr(self, field).normalized_text:
|
174
|
+
if page_number is None or getattr(self, field).page_number == page_number:
|
175
|
+
return {
|
176
|
+
"value": getattr(self, field).value,
|
177
|
+
"normalized_text": getattr(self, field).normalized_text,
|
178
|
+
"node_uuids": getattr(self, field).node_uuid_list,
|
179
|
+
"page_number": getattr(self, field).page_number,
|
180
|
+
}
|
181
|
+
elif isinstance(getattr(self, field), LLMDataObject):
|
182
|
+
return getattr(self, field).to_review(page_number)
|
183
|
+
|
184
|
+
return None
|
185
|
+
|
91
186
|
def create_exception(
|
92
187
|
self,
|
93
188
|
exception_type_id: str,
|
@@ -215,6 +310,25 @@ class LLMDataObject(BaseModel):
|
|
215
310
|
current_value.append(new_tag)
|
216
311
|
node.remove_feature("tag", tag)
|
217
312
|
node.add_feature("tag", tag, current_value, single=False)
|
313
|
+
# try:
|
314
|
+
# if value.data_type == 'Derived':
|
315
|
+
# logger.info(f"Node already has tag {tag} - Tagging something nearby {node.get_all_content()}")
|
316
|
+
# nearby_node = find_nearby_word_to_tag(node, tag)
|
317
|
+
# nearby_node.tag(
|
318
|
+
# tag_to_apply=tag,
|
319
|
+
# value=value.normalized_text,
|
320
|
+
# tag_uuid=tag_uuid,
|
321
|
+
# cell_index=self.cell_index,
|
322
|
+
# selector="//word",
|
323
|
+
# confidence=-1,
|
324
|
+
# group_uuid=self.group_uuid,
|
325
|
+
# parent_group_uuid=parent_group_uuid,
|
326
|
+
# owner_uri=f"assistant://{assistant.id}" if assistant else f"model://taxonomy-llm",
|
327
|
+
# )
|
328
|
+
# else:
|
329
|
+
# logger.info(f"Node already has tag {tag} - Skipping.")
|
330
|
+
# except:
|
331
|
+
# logger.error(f"Error tagging nearby node with tag {tag}")
|
218
332
|
|
219
333
|
logger.info(f"Applied label {tag} to {len(nodes_to_label)} nodes")
|
220
334
|
if isinstance(value, LLMDataObject):
|
@@ -249,7 +363,8 @@ def get_template_env():
|
|
249
363
|
Returns:
|
250
364
|
|
251
365
|
"""
|
252
|
-
|
366
|
+
cli_path = os.path.dirname(os.path.abspath(__file__))
|
367
|
+
package_location = os.path.join(cli_path, "templates")
|
253
368
|
template_loader = jinja2.FileSystemLoader([os.getcwd(), package_location])
|
254
369
|
env = jinja2.Environment(loader=template_loader, autoescape=True)
|
255
370
|
env.globals["snake_to_camel"] = snake_to_camel
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "kodexa"
|
3
|
-
version = "7.0.
|
3
|
+
version = "7.0.010665853973"
|
4
4
|
description = "Python SDK for the Kodexa Platform"
|
5
5
|
authors = ["Austin Redenbaugh <austin@kodexa.com>", "Philip Dodds <philip@kodexa.com>", "Romar Cablao <rcablao@kodexa.com>", "Amadea Paula Dodds <amadeapaula@kodexa.com>"]
|
6
6
|
readme = "README.md"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{kodexa-7.0.10599563193 → kodexa-7.0.10665853973}/kodexa/model/entities/product_subscription.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|