kodexa 7.0.12399109365__tar.gz → 7.4.412416252968__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/PKG-INFO +1 -1
  2. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/model.py +7 -4
  3. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/persistence.py +96 -34
  4. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/pyproject.toml +2 -2
  5. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/LICENSE +0 -0
  6. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/README.md +0 -0
  7. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/__init__.py +0 -0
  8. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/assistant/__init__.py +0 -0
  9. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/assistant/assistant.py +0 -0
  10. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/connectors/__init__.py +0 -0
  11. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/connectors/connectors.py +0 -0
  12. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/dataclasses/__init__.py +0 -0
  13. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/dataclasses/templates/llm_data_class.j2 +0 -0
  14. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/__init__.py +0 -0
  15. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/base.py +0 -0
  16. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/entities/__init__.py +0 -0
  17. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/entities/check_response.py +0 -0
  18. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/entities/product.py +0 -0
  19. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/entities/product_subscription.py +0 -0
  20. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/objects.py +0 -0
  21. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/utils.py +0 -0
  22. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/pipeline/__init__.py +0 -0
  23. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/pipeline/pipeline.py +0 -0
  24. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/platform/__init__.py +0 -0
  25. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/platform/client.py +0 -0
  26. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/platform/interaction.py +0 -0
  27. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/platform/kodexa.py +0 -0
  28. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/__init__.py +0 -0
  29. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/ast.py +0 -0
  30. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/core.py +0 -0
  31. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/lexrules.py +0 -0
  32. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/lextab.py +0 -0
  33. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/lextab.pyi +0 -0
  34. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/parserules.py +0 -0
  35. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/parserules.pyi +0 -0
  36. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/parsetab.py +0 -0
  37. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/parsetab.pyi +0 -0
  38. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/spatial/__init__.py +0 -0
  39. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/spatial/azure_models.py +0 -0
  40. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/spatial/bbox_common.py +0 -0
  41. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/spatial/table_form_common.py +0 -0
  42. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/steps/__init__.py +0 -0
  43. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/steps/common.py +0 -0
  44. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/testing/__init__.py +0 -0
  45. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/testing/test_components.py +0 -0
  46. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/testing/test_utils.py +0 -0
  47. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/training/__init__.py +0 -0
  48. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/training/train_utils.py +0 -0
  49. {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/utils/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kodexa
3
- Version: 7.0.12399109365
3
+ Version: 7.4.412416252968
4
4
  Summary: Python SDK for the Kodexa Platform
5
5
  Author: Austin Redenbaugh
6
6
  Author-email: austin@kodexa.com
@@ -2443,11 +2443,14 @@ class Document(object):
2443
2443
  def get_exceptions(self) -> List[ContentException]:
2444
2444
  return self._persistence_layer.get_exceptions()
2445
2445
 
2446
- def get_external_data(self) -> dict:
2447
- return self._persistence_layer.get_external_data()
2446
+ def get_external_data(self, key="default") -> dict:
2447
+ return self._persistence_layer.get_external_data(key)
2448
2448
 
2449
- def set_external_data(self, external_data:dict):
2450
- return self._persistence_layer.set_external_data(external_data)
2449
+ def get_external_data_keys(self) -> list[str]:
2450
+ return self._persistence_layer.get_external_data_keys()
2451
+
2452
+ def set_external_data(self, external_data:dict, key="default"):
2453
+ return self._persistence_layer.set_external_data(external_data, key)
2451
2454
 
2452
2455
  def get_steps(self) -> list[ProcessingStep]:
2453
2456
  return self._persistence_layer.get_steps()
@@ -111,9 +111,10 @@ class SqliteDocumentPersistence(object):
111
111
 
112
112
  self.cursor = self.connection.cursor()
113
113
  self.cursor.execute("PRAGMA journal_mode=OFF")
114
- self.cursor.execute("pragma temp_store = memory")
115
- self.cursor.execute("pragma mmap_size = 30000000000")
116
- self.cursor.execute("pragma cache_size = 10000") # Set the cache size to 10,000 pages
114
+ self.cursor.execute("PRAGMA temp_store=MEMORY")
115
+ self.cursor.execute("PRAGMA mmap_size=30000000000")
116
+ self.cursor.execute("PRAGMA cache_size=10000")
117
+ self.cursor.execute("PRAGMA page_size=4096")
117
118
 
118
119
  try:
119
120
  # We need to populate node_type_id_by_name
@@ -865,10 +866,11 @@ class SqliteDocumentPersistence(object):
865
866
  self.connection.commit()
866
867
  self.cursor.execute("VACUUM")
867
868
  self.cursor = self.connection.cursor()
868
- self.cursor.execute("pragma journal_mode=OFF")
869
- self.cursor.execute("pragma temp_store = memory")
870
- self.cursor.execute("pragma mmap_size = 30000000000")
871
- self.cursor.execute("pragma cache_size = 10000") # Set the cache size to 10,000 pages
869
+ self.cursor.execute("PRAGMA journal_mode=OFF")
870
+ self.cursor.execute("PRAGMA temp_store=MEMORY")
871
+ self.cursor.execute("PRAGMA mmap_size=30000000000")
872
+ self.cursor.execute("PRAGMA cache_size=10000")
873
+ self.cursor.execute("PRAGMA page_size=4096")
872
874
 
873
875
  def dump_in_memory_db_to_file(self):
874
876
  # Connect to a new or existing database file
@@ -1154,22 +1156,6 @@ class SqliteDocumentPersistence(object):
1154
1156
 
1155
1157
  return content_nodes
1156
1158
 
1157
- def __ensure_ed_table_exists(self):
1158
- """
1159
- Ensure the 'ed' table exists in the database.
1160
- Creates the table if it does not exist.
1161
- """
1162
- self.cursor.execute("""
1163
- CREATE TABLE IF NOT EXISTS ed (
1164
- obj BLOB
1165
- )
1166
- """)
1167
-
1168
- # Check if the table has any rows, if not, insert an initial empty row
1169
- result = self.cursor.execute("SELECT COUNT(*) FROM ed").fetchone()
1170
- if result[0] == 0:
1171
- self.cursor.execute("INSERT INTO ed (obj) VALUES (?)", [sqlite3.Binary(msgpack.packb({}))])
1172
-
1173
1159
  def __ensure_validations_table_exists(self):
1174
1160
  """
1175
1161
  Ensure the 'validations' table exists in the database.
@@ -1211,31 +1197,98 @@ class SqliteDocumentPersistence(object):
1211
1197
  return [DocumentTaxonValidation.model_validate(v) for v in msgpack.unpackb(result[0])]
1212
1198
  return []
1213
1199
 
1214
- def set_external_data(self, external_data: dict):
1200
+ def set_external_data(self, external_data: dict, key: str = "default"):
1215
1201
  """
1216
- Sets the external data for the document.
1202
+ Sets the external data for the document for a specific key.
1217
1203
 
1218
1204
  Args:
1219
1205
  external_data (dict): The external data to store, must be JSON serializable.
1206
+ key (str): The key to store the data under, defaults to "default"
1220
1207
  """
1221
1208
  self.__ensure_ed_table_exists()
1222
1209
  serialized_data = sqlite3.Binary(msgpack.packb(external_data))
1223
- self.cursor.execute("UPDATE ed SET obj = ? WHERE rowid = 1", [serialized_data])
1210
+ self.cursor.execute("DELETE FROM ed WHERE key = ?", [key])
1211
+ self.cursor.execute("INSERT INTO ed (key, obj) VALUES (?, ?)", [key, serialized_data])
1224
1212
  self.connection.commit()
1225
1213
 
1226
- def get_external_data(self) -> dict:
1214
+ def get_external_data(self, key: str = "default") -> dict:
1227
1215
  """
1228
- Gets the external data associated with this document.
1216
+ Gets the external data associated with this document for a specific key.
1217
+
1218
+ Args:
1219
+ key (str): The key to retrieve data for, defaults to "default"
1229
1220
 
1230
1221
  Returns:
1231
- dict: The external data stored in the ed table.
1222
+ dict: The external data stored in the ed table for the given key.
1232
1223
  """
1233
1224
  self.__ensure_ed_table_exists()
1234
- result = self.cursor.execute("SELECT obj FROM ed WHERE rowid = 1").fetchone()
1225
+ result = self.cursor.execute("SELECT obj FROM ed WHERE key = ?", [key]).fetchone()
1235
1226
  if result and result[0]:
1236
1227
  return msgpack.unpackb(result[0])
1237
1228
  return {}
1238
1229
 
1230
+ def get_external_data_keys(self) -> List[str]:
1231
+ """
1232
+ Gets all keys under which external data is stored.
1233
+
1234
+ Returns:
1235
+ List[str]: A list of all keys that have external data stored.
1236
+ """
1237
+ self.__ensure_ed_table_exists()
1238
+ results = self.cursor.execute("SELECT key FROM ed").fetchall()
1239
+ return [row[0] for row in results]
1240
+
1241
+ def __ensure_ed_table_exists(self):
1242
+ """
1243
+ Ensure the 'ed' table exists in the database.
1244
+ Creates the table if it does not exist.
1245
+ """
1246
+ # First check if the old table exists and has id column
1247
+ old_table = self.cursor.execute("""
1248
+ SELECT name FROM sqlite_master
1249
+ WHERE type='table' AND name='ed'
1250
+ """).fetchone()
1251
+
1252
+ if old_table:
1253
+ # Check if table has id column
1254
+ table_info = self.cursor.execute("PRAGMA table_info(ed)").fetchall()
1255
+ has_id_column = any(col[1] == 'id' for col in table_info)
1256
+
1257
+ if has_id_column:
1258
+ # Get the old data and drop the table
1259
+ data = self.cursor.execute("SELECT obj FROM ed").fetchone()
1260
+ self.cursor.execute("DROP TABLE ed")
1261
+
1262
+ # Create new table with key column
1263
+ self.cursor.execute("""
1264
+ CREATE TABLE ed (
1265
+ key TEXT PRIMARY KEY,
1266
+ obj BLOB
1267
+ )
1268
+ """)
1269
+
1270
+ # If there was data in the old table, insert it with default key
1271
+ if data:
1272
+ self.cursor.execute("INSERT INTO ed (key, obj) VALUES (?, ?)",
1273
+ ["default", data[0]])
1274
+ else:
1275
+ # Table exists but doesn't need migration - do nothing
1276
+ return
1277
+ else:
1278
+ # Create new table if it doesn't exist
1279
+ self.cursor.execute("""
1280
+ CREATE TABLE IF NOT EXISTS ed (
1281
+ key TEXT PRIMARY KEY,
1282
+ obj BLOB
1283
+ )
1284
+ """)
1285
+
1286
+ # Check if default key exists, if not insert empty data
1287
+ result = self.cursor.execute("SELECT COUNT(*) FROM ed WHERE key = 'default'").fetchone()
1288
+ if result[0] == 0:
1289
+ self.cursor.execute("INSERT INTO ed (key, obj) VALUES (?, ?)",
1290
+ ["default", sqlite3.Binary(msgpack.packb({}))])
1291
+
1239
1292
  def __ensure_steps_table_exists(self):
1240
1293
  """
1241
1294
  Ensure the 'steps' table exists in the database.
@@ -1434,22 +1487,31 @@ class PersistenceManager(object):
1434
1487
  def get_validations(self) -> list[DocumentTaxonValidation]:
1435
1488
  return self._underlying_persistence.get_validations()
1436
1489
 
1437
- def get_external_data(self) -> dict:
1490
+ def get_external_data(self, key="default") -> dict:
1438
1491
  """
1439
1492
  Gets the external data object associated with this document
1440
1493
 
1441
1494
  :return: dict of the external data
1442
1495
  """
1443
- return self._underlying_persistence.get_external_data()
1496
+ return self._underlying_persistence.get_external_data(key)
1497
+
1498
+ def get_external_data_keys(self) -> List[str]:
1499
+ """
1500
+ Gets all keys under which external data is stored.
1501
+
1502
+ Returns:
1503
+ List[str]: A list of all keys that have external data stored.
1504
+ """
1505
+ return self._underlying_persistence.get_external_data_keys()
1444
1506
 
1445
- def set_external_data(self, external_data:dict):
1507
+ def set_external_data(self, external_data:dict, key="default"):
1446
1508
  """
1447
1509
  Sets the external data for this document
1448
1510
 
1449
1511
  :param external_data: dict representing the external data, must be JSON serializable
1450
1512
  :return:
1451
1513
  """
1452
- self._underlying_persistence.set_external_data(external_data)
1514
+ self._underlying_persistence.set_external_data(external_data, key)
1453
1515
 
1454
1516
  def get_nodes_by_type(self, node_type: str) -> List[ContentNode]:
1455
1517
  """
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "kodexa"
3
- version = "7.0.012399109365"
3
+ version = "7.4.412416252968"
4
4
  description = "Python SDK for the Kodexa Platform"
5
5
  authors = ["Austin Redenbaugh <austin@kodexa.com>", "Philip Dodds <philip@kodexa.com>", "Romar Cablao <rcablao@kodexa.com>", "Amadea Paula Dodds <amadeapaula@kodexa.com>"]
6
6
  readme = "README.md"
@@ -14,7 +14,7 @@ classifiers = [
14
14
  'Intended Audience :: Developers',
15
15
  'Topic :: Software Development :: Libraries',
16
16
  '''License :: OSI Approved :: Apache Software License''', # noqa
17
- 'Programming Language :: Python :: 3.10',
17
+ 'Programming Language :: Python :: 3.11',
18
18
  ]
19
19
 
20
20
  [tool.poetry.dependencies]