kodexa 7.0.10217227753__py3-none-any.whl → 7.0.10350737552__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kodexa/model/model.py CHANGED
@@ -12,7 +12,7 @@ from typing import Any, List, Optional
12
12
  from addict import Dict
13
13
  import deepdiff
14
14
  import msgpack
15
- from pydantic import BaseModel, ConfigDict
15
+ from pydantic import BaseModel, ConfigDict, Field
16
16
 
17
17
  from kodexa.model.objects import ContentObject, FeatureSet
18
18
 
@@ -2369,6 +2369,55 @@ class FeatureSetDiff:
2369
2369
  return node
2370
2370
 
2371
2371
 
2372
+ class ProcessingStep(BaseModel):
2373
+ id: str = Field(default_factory=lambda: str(uuid.uuid4()))
2374
+ name: str
2375
+ metadata: dict = Field(default_factory=lambda: {})
2376
+ children: List['ProcessingStep'] = Field(default_factory=list)
2377
+ parents: List['ProcessingStep'] = Field(default_factory=list)
2378
+
2379
+ def add_child(self, child_step: 'ProcessingStep'):
2380
+ self.children.append(child_step)
2381
+ child_step.parents.append(self)
2382
+
2383
+ @staticmethod
2384
+ def merge_with(*other_steps: 'ProcessingStep') -> 'ProcessingStep':
2385
+ merged_step = ProcessingStep(name=f"Merged({', '.join(step.name for step in other_steps)})")
2386
+ for step in other_steps:
2387
+ step.children.append(merged_step)
2388
+ merged_step.parents.append(step)
2389
+ return merged_step
2390
+
2391
+ class Config:
2392
+ arbitrary_types_allowed = True
2393
+ json_encoders = {
2394
+ 'ProcessingStep': lambda step: step.to_dict()
2395
+ }
2396
+
2397
+ def to_dict(self, seen=None):
2398
+ if seen is None:
2399
+ seen = set()
2400
+
2401
+ # Avoid circular references by skipping already seen objects
2402
+ if self.id in seen:
2403
+ return {'id': self.id, 'name': self.name}
2404
+
2405
+ seen.add(self.id)
2406
+
2407
+ return {
2408
+ 'id': self.id,
2409
+ 'name': self.name,
2410
+ 'children': [child.to_dict(seen) for child in self.children],
2411
+ 'parents': [{'id': parent.id, 'name': parent.name} for parent in self.parents], # or parent.to_dict(seen) if full structure is needed
2412
+ }
2413
+
2414
+ def to_json(self):
2415
+ return json.dumps(self.to_dict())
2416
+
2417
+ def __repr__(self):
2418
+ return f"Step(id={self.id}, name={self.name})"
2419
+
2420
+
2372
2421
  class Document(object):
2373
2422
  """A Document is a collection of metadata and a set of content nodes."""
2374
2423
 
@@ -2384,6 +2433,18 @@ class Document(object):
2384
2433
  def get_exceptions(self) -> List[ContentException]:
2385
2434
  return self._persistence_layer.get_exceptions()
2386
2435
 
2436
+ def get_external_data(self) -> dict:
2437
+ return self._persistence_layer.get_external_data()
2438
+
2439
+ def set_external_data(self, external_data:dict):
2440
+ return self._persistence_layer.set_external_data(external_data)
2441
+
2442
+ def get_steps(self) -> list[ProcessingStep]:
2443
+ return self._persistence_layer.get_steps()
2444
+
2445
+ def set_steps(self, steps: list[ProcessingStep]):
2446
+ self._persistence_layer.set_steps(steps)
2447
+
2387
2448
  def replace_exceptions(self, exceptions: List[ContentException]):
2388
2449
  self._persistence_layer.replace_exceptions(exceptions)
2389
2450
 
@@ -13,7 +13,7 @@ from kodexa.model.model import (
13
13
  DocumentMetadata,
14
14
  ContentFeature,
15
15
  ContentException,
16
- ModelInsight,
16
+ ModelInsight, ProcessingStep,
17
17
  )
18
18
 
19
19
  logger = logging.getLogger()
@@ -1122,6 +1122,90 @@ class SqliteDocumentPersistence(object):
1122
1122
 
1123
1123
  return content_nodes
1124
1124
 
1125
+ def __ensure_ed_table_exists(self):
1126
+ """
1127
+ Ensure the 'ed' table exists in the database.
1128
+ Creates the table if it does not exist.
1129
+ """
1130
+ self.cursor.execute("""
1131
+ CREATE TABLE IF NOT EXISTS ed (
1132
+ obj BLOB
1133
+ )
1134
+ """)
1135
+
1136
+ # Check if the table has any rows, if not, insert an initial empty row
1137
+ result = self.cursor.execute("SELECT COUNT(*) FROM ed").fetchone()
1138
+ if result[0] == 0:
1139
+ self.cursor.execute("INSERT INTO ed (obj) VALUES (?)", [sqlite3.Binary(msgpack.packb({}))])
1140
+
1141
+ def set_external_data(self, external_data: dict):
1142
+ """
1143
+ Sets the external data for the document.
1144
+
1145
+ Args:
1146
+ external_data (dict): The external data to store, must be JSON serializable.
1147
+ """
1148
+ self.__ensure_ed_table_exists()
1149
+ serialized_data = sqlite3.Binary(msgpack.packb(external_data))
1150
+ self.cursor.execute("UPDATE ed SET obj = ? WHERE rowid = 1", [serialized_data])
1151
+ self.connection.commit()
1152
+
1153
+ def get_external_data(self) -> dict:
1154
+ """
1155
+ Gets the external data associated with this document.
1156
+
1157
+ Returns:
1158
+ dict: The external data stored in the ed table.
1159
+ """
1160
+ self.__ensure_ed_table_exists()
1161
+ result = self.cursor.execute("SELECT obj FROM ed WHERE rowid = 1").fetchone()
1162
+ if result and result[0]:
1163
+ return msgpack.unpackb(result[0])
1164
+ return {}
1165
+
1166
+ def __ensure_steps_table_exists(self):
1167
+ """
1168
+ Ensure the 'steps' table exists in the database.
1169
+ Creates the table if it does not exist.
1170
+ """
1171
+ self.cursor.execute("""
1172
+ CREATE TABLE IF NOT EXISTS steps (
1173
+ obj BLOB
1174
+ )
1175
+ """)
1176
+
1177
+ # Check if the table has any rows, if not, insert an initial empty row
1178
+ result = self.cursor.execute("SELECT COUNT(*) FROM steps").fetchone()
1179
+ if result[0] == 0:
1180
+ self.cursor.execute("INSERT INTO steps (obj) VALUES (?)", [sqlite3.Binary(msgpack.packb([]))])
1181
+
1182
+ def set_steps(self, steps: List[ProcessingStep]):
1183
+ """
1184
+ Sets the processing steps for the document.
1185
+
1186
+ Args:
1187
+ steps (List[ProcessingStep]): A list of ProcessingStep objects to store.
1188
+ """
1189
+ self.__ensure_steps_table_exists()
1190
+ serialized_steps = [step.to_dict() for step in steps]
1191
+ packed_data = sqlite3.Binary(msgpack.packb(serialized_steps))
1192
+ self.cursor.execute("UPDATE steps SET obj = ? WHERE rowid = 1", [packed_data])
1193
+ self.connection.commit()
1194
+
1195
+ def get_steps(self) -> List[ProcessingStep]:
1196
+ """
1197
+ Gets the processing steps associated with this document.
1198
+
1199
+ Returns:
1200
+ List[ProcessingStep]: A list of ProcessingStep objects.
1201
+ """
1202
+ self.__ensure_steps_table_exists()
1203
+ result = self.cursor.execute("SELECT obj FROM steps WHERE rowid = 1").fetchone()
1204
+ if result and result[0]:
1205
+ unpacked_data = msgpack.unpackb(result[0])
1206
+ return [ProcessingStep(**step) for step in unpacked_data]
1207
+ return []
1208
+
1125
1209
 
1126
1210
  class SimpleObjectCache(object):
1127
1211
  """
@@ -1260,6 +1344,34 @@ class PersistenceManager(object):
1260
1344
  document, filename, delete_on_close, inmemory=inmemory
1261
1345
  )
1262
1346
 
1347
+ def get_steps(self) -> list[ProcessingStep]:
1348
+ """
1349
+ Gets the processing steps for this document
1350
+
1351
+ :return:
1352
+ """
1353
+ return self._underlying_persistence.get_steps()
1354
+
1355
+ def set_steps(self, steps: list[ProcessingStep]):
1356
+ self._underlying_persistence.set_steps(steps)
1357
+
1358
+ def get_external_data(self) -> dict:
1359
+ """
1360
+ Gets the external data object associated with this document
1361
+
1362
+ :return: dict of the external data
1363
+ """
1364
+ return self._underlying_persistence.get_external_data()
1365
+
1366
+ def set_external_data(self, external_data:dict):
1367
+ """
1368
+ Sets the external data for this document
1369
+
1370
+ :param external_data: dict representing the external data, must be JSON serializable
1371
+ :return:
1372
+ """
1373
+ self._underlying_persistence.set_external_data(external_data)
1374
+
1263
1375
  def get_nodes_by_type(self, node_type: str) -> List[ContentNode]:
1264
1376
  """
1265
1377
  Retrieves all nodes of a given type from the underlying persistence layer.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kodexa
3
- Version: 7.0.10217227753
3
+ Version: 7.0.10350737552
4
4
  Summary: Python SDK for the Kodexa Platform
5
5
  Author: Austin Redenbaugh
6
6
  Author-email: austin@kodexa.com
@@ -9,9 +9,9 @@ kodexa/model/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
9
9
  kodexa/model/entities/check_response.py,sha256=eqBHxO6G2OAziL3p9bHGI-oiPkAG82H6Choc8wyvtM4,3949
10
10
  kodexa/model/entities/product.py,sha256=ZDpHuBE_9FJ-klnkyBvTfPwYOqBkM1wraZMtHqNA8FQ,3526
11
11
  kodexa/model/entities/product_subscription.py,sha256=UcmWR-qgLfdV7VCtJNwzgkanoS8nBSL6ngVuxQUK1M8,3810
12
- kodexa/model/model.py,sha256=BURhrOEVTTlKkDJho5CEFeLR9Dyq157mztlvbAdL1d4,115769
12
+ kodexa/model/model.py,sha256=v9dUIWXtzRXFBg3UI2zpPql1oUP2bQpey5fKLRXjhsA,117857
13
13
  kodexa/model/objects.py,sha256=C4ERIaB0Avuf2FHIvEsyyO_HAjvDVXW_CB6-EEEhUtA,176751
14
- kodexa/model/persistence.py,sha256=sx5FwTSsWMdAZpAs0-6PqyULHkQyNQClApUKJZ-ly8M,62032
14
+ kodexa/model/persistence.py,sha256=XJ4mi1dG1E1cOwbNjj9E9-_6kXsqKDjxVEJDAfI4rjo,66052
15
15
  kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
16
16
  kodexa/pipeline/pipeline.py,sha256=ZYpJAWcwV4YRK589DUhU0vXGQlkNSj4J2TsGbYqTLjo,25221
17
17
  kodexa/platform/__init__.py,sha256=1O3oiWMg292NPL_NacKDnK1T3_R6cMorrPRue_9e-O4,216
@@ -39,7 +39,7 @@ kodexa/testing/test_components.py,sha256=g5lP-GY0nTHuH5cIEw45vIejEeBaWkPKQGHL36j
39
39
  kodexa/testing/test_utils.py,sha256=DrLCkHxdb6AbZ-X3WmTMbQmnVIm55VEBL8MjtUK9POs,14021
40
40
  kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
41
41
  kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- kodexa-7.0.10217227753.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
43
- kodexa-7.0.10217227753.dist-info/METADATA,sha256=RqtKO6MqfpRacsb3dpza0ok4EfaSn5Qg5c4AzHsXCZs,3533
44
- kodexa-7.0.10217227753.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
45
- kodexa-7.0.10217227753.dist-info/RECORD,,
42
+ kodexa-7.0.10350737552.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
43
+ kodexa-7.0.10350737552.dist-info/METADATA,sha256=SFvyMuqsVbNSBZ7tal88dBHT4temLjiySjugrvxE2Bs,3533
44
+ kodexa-7.0.10350737552.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
45
+ kodexa-7.0.10350737552.dist-info/RECORD,,