kodexa 7.0.10217227753__tar.gz → 7.0.10350737552__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/PKG-INFO +1 -1
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/model/model.py +62 -1
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/model/persistence.py +113 -1
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/pyproject.toml +1 -1
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/LICENSE +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/README.md +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/__init__.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/assistant/__init__.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/assistant/assistant.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/connectors/__init__.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/connectors/connectors.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/model/__init__.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/model/base.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/model/entities/__init__.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/model/entities/check_response.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/model/entities/product.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/model/entities/product_subscription.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/model/objects.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/pipeline/__init__.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/pipeline/pipeline.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/platform/__init__.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/platform/client.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/platform/interaction.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/platform/kodexa.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/selectors/__init__.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/selectors/ast.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/selectors/core.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/selectors/lexrules.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/selectors/lextab.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/selectors/lextab.pyi +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/selectors/parserules.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/selectors/parserules.pyi +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/selectors/parsetab.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/selectors/parsetab.pyi +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/spatial/__init__.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/spatial/azure_models.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/spatial/bbox_common.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/spatial/table_form_common.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/steps/__init__.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/steps/common.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/testing/__init__.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/testing/test_components.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/testing/test_utils.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/training/__init__.py +0 -0
- {kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/training/train_utils.py +0 -0
@@ -12,7 +12,7 @@ from typing import Any, List, Optional
|
|
12
12
|
from addict import Dict
|
13
13
|
import deepdiff
|
14
14
|
import msgpack
|
15
|
-
from pydantic import BaseModel, ConfigDict
|
15
|
+
from pydantic import BaseModel, ConfigDict, Field
|
16
16
|
|
17
17
|
from kodexa.model.objects import ContentObject, FeatureSet
|
18
18
|
|
@@ -2369,6 +2369,55 @@ class FeatureSetDiff:
|
|
2369
2369
|
return node
|
2370
2370
|
|
2371
2371
|
|
2372
|
+
class ProcessingStep(BaseModel):
|
2373
|
+
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
2374
|
+
name: str
|
2375
|
+
metadata: dict = Field(default_factory=lambda: {})
|
2376
|
+
children: List['ProcessingStep'] = Field(default_factory=list)
|
2377
|
+
parents: List['ProcessingStep'] = Field(default_factory=list)
|
2378
|
+
|
2379
|
+
def add_child(self, child_step: 'ProcessingStep'):
|
2380
|
+
self.children.append(child_step)
|
2381
|
+
child_step.parents.append(self)
|
2382
|
+
|
2383
|
+
@staticmethod
|
2384
|
+
def merge_with(*other_steps: 'ProcessingStep') -> 'ProcessingStep':
|
2385
|
+
merged_step = ProcessingStep(name=f"Merged({', '.join(step.name for step in other_steps)})")
|
2386
|
+
for step in other_steps:
|
2387
|
+
step.children.append(merged_step)
|
2388
|
+
merged_step.parents.append(step)
|
2389
|
+
return merged_step
|
2390
|
+
|
2391
|
+
class Config:
|
2392
|
+
arbitrary_types_allowed = True
|
2393
|
+
json_encoders = {
|
2394
|
+
'ProcessingStep': lambda step: step.to_dict()
|
2395
|
+
}
|
2396
|
+
|
2397
|
+
def to_dict(self, seen=None):
|
2398
|
+
if seen is None:
|
2399
|
+
seen = set()
|
2400
|
+
|
2401
|
+
# Avoid circular references by skipping already seen objects
|
2402
|
+
if self.id in seen:
|
2403
|
+
return {'id': self.id, 'name': self.name}
|
2404
|
+
|
2405
|
+
seen.add(self.id)
|
2406
|
+
|
2407
|
+
return {
|
2408
|
+
'id': self.id,
|
2409
|
+
'name': self.name,
|
2410
|
+
'children': [child.to_dict(seen) for child in self.children],
|
2411
|
+
'parents': [{'id': parent.id, 'name': parent.name} for parent in self.parents], # or parent.to_dict(seen) if full structure is needed
|
2412
|
+
}
|
2413
|
+
|
2414
|
+
def to_json(self):
|
2415
|
+
return json.dumps(self.to_dict())
|
2416
|
+
|
2417
|
+
def __repr__(self):
|
2418
|
+
return f"Step(id={self.id}, name={self.name})"
|
2419
|
+
|
2420
|
+
|
2372
2421
|
class Document(object):
|
2373
2422
|
"""A Document is a collection of metadata and a set of content nodes."""
|
2374
2423
|
|
@@ -2384,6 +2433,18 @@ class Document(object):
|
|
2384
2433
|
def get_exceptions(self) -> List[ContentException]:
|
2385
2434
|
return self._persistence_layer.get_exceptions()
|
2386
2435
|
|
2436
|
+
def get_external_data(self) -> dict:
|
2437
|
+
return self._persistence_layer.get_external_data()
|
2438
|
+
|
2439
|
+
def set_external_data(self, external_data:dict):
|
2440
|
+
return self._persistence_layer.set_external_data(external_data)
|
2441
|
+
|
2442
|
+
def get_steps(self) -> list[ProcessingStep]:
|
2443
|
+
return self._persistence_layer.get_steps()
|
2444
|
+
|
2445
|
+
def set_steps(self, steps: list[ProcessingStep]):
|
2446
|
+
self._persistence_layer.set_steps(steps)
|
2447
|
+
|
2387
2448
|
def replace_exceptions(self, exceptions: List[ContentException]):
|
2388
2449
|
self._persistence_layer.replace_exceptions(exceptions)
|
2389
2450
|
|
@@ -13,7 +13,7 @@ from kodexa.model.model import (
|
|
13
13
|
DocumentMetadata,
|
14
14
|
ContentFeature,
|
15
15
|
ContentException,
|
16
|
-
ModelInsight,
|
16
|
+
ModelInsight, ProcessingStep,
|
17
17
|
)
|
18
18
|
|
19
19
|
logger = logging.getLogger()
|
@@ -1122,6 +1122,90 @@ class SqliteDocumentPersistence(object):
|
|
1122
1122
|
|
1123
1123
|
return content_nodes
|
1124
1124
|
|
1125
|
+
def __ensure_ed_table_exists(self):
|
1126
|
+
"""
|
1127
|
+
Ensure the 'ed' table exists in the database.
|
1128
|
+
Creates the table if it does not exist.
|
1129
|
+
"""
|
1130
|
+
self.cursor.execute("""
|
1131
|
+
CREATE TABLE IF NOT EXISTS ed (
|
1132
|
+
obj BLOB
|
1133
|
+
)
|
1134
|
+
""")
|
1135
|
+
|
1136
|
+
# Check if the table has any rows, if not, insert an initial empty row
|
1137
|
+
result = self.cursor.execute("SELECT COUNT(*) FROM ed").fetchone()
|
1138
|
+
if result[0] == 0:
|
1139
|
+
self.cursor.execute("INSERT INTO ed (obj) VALUES (?)", [sqlite3.Binary(msgpack.packb({}))])
|
1140
|
+
|
1141
|
+
def set_external_data(self, external_data: dict):
|
1142
|
+
"""
|
1143
|
+
Sets the external data for the document.
|
1144
|
+
|
1145
|
+
Args:
|
1146
|
+
external_data (dict): The external data to store, must be JSON serializable.
|
1147
|
+
"""
|
1148
|
+
self.__ensure_ed_table_exists()
|
1149
|
+
serialized_data = sqlite3.Binary(msgpack.packb(external_data))
|
1150
|
+
self.cursor.execute("UPDATE ed SET obj = ? WHERE rowid = 1", [serialized_data])
|
1151
|
+
self.connection.commit()
|
1152
|
+
|
1153
|
+
def get_external_data(self) -> dict:
|
1154
|
+
"""
|
1155
|
+
Gets the external data associated with this document.
|
1156
|
+
|
1157
|
+
Returns:
|
1158
|
+
dict: The external data stored in the ed table.
|
1159
|
+
"""
|
1160
|
+
self.__ensure_ed_table_exists()
|
1161
|
+
result = self.cursor.execute("SELECT obj FROM ed WHERE rowid = 1").fetchone()
|
1162
|
+
if result and result[0]:
|
1163
|
+
return msgpack.unpackb(result[0])
|
1164
|
+
return {}
|
1165
|
+
|
1166
|
+
def __ensure_steps_table_exists(self):
|
1167
|
+
"""
|
1168
|
+
Ensure the 'steps' table exists in the database.
|
1169
|
+
Creates the table if it does not exist.
|
1170
|
+
"""
|
1171
|
+
self.cursor.execute("""
|
1172
|
+
CREATE TABLE IF NOT EXISTS steps (
|
1173
|
+
obj BLOB
|
1174
|
+
)
|
1175
|
+
""")
|
1176
|
+
|
1177
|
+
# Check if the table has any rows, if not, insert an initial empty row
|
1178
|
+
result = self.cursor.execute("SELECT COUNT(*) FROM steps").fetchone()
|
1179
|
+
if result[0] == 0:
|
1180
|
+
self.cursor.execute("INSERT INTO steps (obj) VALUES (?)", [sqlite3.Binary(msgpack.packb([]))])
|
1181
|
+
|
1182
|
+
def set_steps(self, steps: List[ProcessingStep]):
|
1183
|
+
"""
|
1184
|
+
Sets the processing steps for the document.
|
1185
|
+
|
1186
|
+
Args:
|
1187
|
+
steps (List[ProcessingStep]): A list of ProcessingStep objects to store.
|
1188
|
+
"""
|
1189
|
+
self.__ensure_steps_table_exists()
|
1190
|
+
serialized_steps = [step.to_dict() for step in steps]
|
1191
|
+
packed_data = sqlite3.Binary(msgpack.packb(serialized_steps))
|
1192
|
+
self.cursor.execute("UPDATE steps SET obj = ? WHERE rowid = 1", [packed_data])
|
1193
|
+
self.connection.commit()
|
1194
|
+
|
1195
|
+
def get_steps(self) -> List[ProcessingStep]:
|
1196
|
+
"""
|
1197
|
+
Gets the processing steps associated with this document.
|
1198
|
+
|
1199
|
+
Returns:
|
1200
|
+
List[ProcessingStep]: A list of ProcessingStep objects.
|
1201
|
+
"""
|
1202
|
+
self.__ensure_steps_table_exists()
|
1203
|
+
result = self.cursor.execute("SELECT obj FROM steps WHERE rowid = 1").fetchone()
|
1204
|
+
if result and result[0]:
|
1205
|
+
unpacked_data = msgpack.unpackb(result[0])
|
1206
|
+
return [ProcessingStep(**step) for step in unpacked_data]
|
1207
|
+
return []
|
1208
|
+
|
1125
1209
|
|
1126
1210
|
class SimpleObjectCache(object):
|
1127
1211
|
"""
|
@@ -1260,6 +1344,34 @@ class PersistenceManager(object):
|
|
1260
1344
|
document, filename, delete_on_close, inmemory=inmemory
|
1261
1345
|
)
|
1262
1346
|
|
1347
|
+
def get_steps(self) -> list[ProcessingStep]:
|
1348
|
+
"""
|
1349
|
+
Gets the processing steps for this document
|
1350
|
+
|
1351
|
+
:return:
|
1352
|
+
"""
|
1353
|
+
return self._underlying_persistence.get_steps()
|
1354
|
+
|
1355
|
+
def set_steps(self, steps: list[ProcessingStep]):
|
1356
|
+
self._underlying_persistence.set_steps(steps)
|
1357
|
+
|
1358
|
+
def get_external_data(self) -> dict:
|
1359
|
+
"""
|
1360
|
+
Gets the external data object associated with this document
|
1361
|
+
|
1362
|
+
:return: dict of the external data
|
1363
|
+
"""
|
1364
|
+
return self._underlying_persistence.get_external_data()
|
1365
|
+
|
1366
|
+
def set_external_data(self, external_data:dict):
|
1367
|
+
"""
|
1368
|
+
Sets the external data for this document
|
1369
|
+
|
1370
|
+
:param external_data: dict representing the external data, must be JSON serializable
|
1371
|
+
:return:
|
1372
|
+
"""
|
1373
|
+
self._underlying_persistence.set_external_data(external_data)
|
1374
|
+
|
1263
1375
|
def get_nodes_by_type(self, node_type: str) -> List[ContentNode]:
|
1264
1376
|
"""
|
1265
1377
|
Retrieves all nodes of a given type from the underlying persistence layer.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "kodexa"
|
3
|
-
version = "7.0.
|
3
|
+
version = "7.0.010350737552"
|
4
4
|
description = "Python SDK for the Kodexa Platform"
|
5
5
|
authors = ["Austin Redenbaugh <austin@kodexa.com>", "Philip Dodds <philip@kodexa.com>", "Romar Cablao <rcablao@kodexa.com>", "Amadea Paula Dodds <amadeapaula@kodexa.com>"]
|
6
6
|
readme = "README.md"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{kodexa-7.0.10217227753 → kodexa-7.0.10350737552}/kodexa/model/entities/product_subscription.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|