kodexa 7.0.12399109365__tar.gz → 7.4.412416252968__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/PKG-INFO +1 -1
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/model.py +7 -4
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/persistence.py +96 -34
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/pyproject.toml +2 -2
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/LICENSE +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/README.md +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/__init__.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/assistant/__init__.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/assistant/assistant.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/connectors/__init__.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/connectors/connectors.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/dataclasses/__init__.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/dataclasses/templates/llm_data_class.j2 +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/__init__.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/base.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/entities/__init__.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/entities/check_response.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/entities/product.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/entities/product_subscription.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/objects.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/utils.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/pipeline/__init__.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/pipeline/pipeline.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/platform/__init__.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/platform/client.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/platform/interaction.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/platform/kodexa.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/__init__.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/ast.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/core.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/lexrules.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/lextab.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/lextab.pyi +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/parserules.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/parserules.pyi +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/parsetab.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/selectors/parsetab.pyi +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/spatial/__init__.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/spatial/azure_models.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/spatial/bbox_common.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/spatial/table_form_common.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/steps/__init__.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/steps/common.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/testing/__init__.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/testing/test_components.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/testing/test_utils.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/training/__init__.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/training/train_utils.py +0 -0
- {kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/utils/__init__.py +0 -0
@@ -2443,11 +2443,14 @@ class Document(object):
|
|
2443
2443
|
def get_exceptions(self) -> List[ContentException]:
|
2444
2444
|
return self._persistence_layer.get_exceptions()
|
2445
2445
|
|
2446
|
-
def get_external_data(self) -> dict:
|
2447
|
-
return self._persistence_layer.get_external_data()
|
2446
|
+
def get_external_data(self, key="default") -> dict:
|
2447
|
+
return self._persistence_layer.get_external_data(key)
|
2448
2448
|
|
2449
|
-
def
|
2450
|
-
return self._persistence_layer.
|
2449
|
+
def get_external_data_keys(self) -> list[str]:
|
2450
|
+
return self._persistence_layer.get_external_data_keys()
|
2451
|
+
|
2452
|
+
def set_external_data(self, external_data:dict, key="default"):
|
2453
|
+
return self._persistence_layer.set_external_data(external_data, key)
|
2451
2454
|
|
2452
2455
|
def get_steps(self) -> list[ProcessingStep]:
|
2453
2456
|
return self._persistence_layer.get_steps()
|
@@ -111,9 +111,10 @@ class SqliteDocumentPersistence(object):
|
|
111
111
|
|
112
112
|
self.cursor = self.connection.cursor()
|
113
113
|
self.cursor.execute("PRAGMA journal_mode=OFF")
|
114
|
-
self.cursor.execute("
|
115
|
-
self.cursor.execute("
|
116
|
-
self.cursor.execute("
|
114
|
+
self.cursor.execute("PRAGMA temp_store=MEMORY")
|
115
|
+
self.cursor.execute("PRAGMA mmap_size=30000000000")
|
116
|
+
self.cursor.execute("PRAGMA cache_size=10000")
|
117
|
+
self.cursor.execute("PRAGMA page_size=4096")
|
117
118
|
|
118
119
|
try:
|
119
120
|
# We need to populate node_type_id_by_name
|
@@ -865,10 +866,11 @@ class SqliteDocumentPersistence(object):
|
|
865
866
|
self.connection.commit()
|
866
867
|
self.cursor.execute("VACUUM")
|
867
868
|
self.cursor = self.connection.cursor()
|
868
|
-
self.cursor.execute("
|
869
|
-
self.cursor.execute("
|
870
|
-
self.cursor.execute("
|
871
|
-
self.cursor.execute("
|
869
|
+
self.cursor.execute("PRAGMA journal_mode=OFF")
|
870
|
+
self.cursor.execute("PRAGMA temp_store=MEMORY")
|
871
|
+
self.cursor.execute("PRAGMA mmap_size=30000000000")
|
872
|
+
self.cursor.execute("PRAGMA cache_size=10000")
|
873
|
+
self.cursor.execute("PRAGMA page_size=4096")
|
872
874
|
|
873
875
|
def dump_in_memory_db_to_file(self):
|
874
876
|
# Connect to a new or existing database file
|
@@ -1154,22 +1156,6 @@ class SqliteDocumentPersistence(object):
|
|
1154
1156
|
|
1155
1157
|
return content_nodes
|
1156
1158
|
|
1157
|
-
def __ensure_ed_table_exists(self):
|
1158
|
-
"""
|
1159
|
-
Ensure the 'ed' table exists in the database.
|
1160
|
-
Creates the table if it does not exist.
|
1161
|
-
"""
|
1162
|
-
self.cursor.execute("""
|
1163
|
-
CREATE TABLE IF NOT EXISTS ed (
|
1164
|
-
obj BLOB
|
1165
|
-
)
|
1166
|
-
""")
|
1167
|
-
|
1168
|
-
# Check if the table has any rows, if not, insert an initial empty row
|
1169
|
-
result = self.cursor.execute("SELECT COUNT(*) FROM ed").fetchone()
|
1170
|
-
if result[0] == 0:
|
1171
|
-
self.cursor.execute("INSERT INTO ed (obj) VALUES (?)", [sqlite3.Binary(msgpack.packb({}))])
|
1172
|
-
|
1173
1159
|
def __ensure_validations_table_exists(self):
|
1174
1160
|
"""
|
1175
1161
|
Ensure the 'validations' table exists in the database.
|
@@ -1211,31 +1197,98 @@ class SqliteDocumentPersistence(object):
|
|
1211
1197
|
return [DocumentTaxonValidation.model_validate(v) for v in msgpack.unpackb(result[0])]
|
1212
1198
|
return []
|
1213
1199
|
|
1214
|
-
def set_external_data(self, external_data: dict):
|
1200
|
+
def set_external_data(self, external_data: dict, key: str = "default"):
|
1215
1201
|
"""
|
1216
|
-
Sets the external data for the document.
|
1202
|
+
Sets the external data for the document for a specific key.
|
1217
1203
|
|
1218
1204
|
Args:
|
1219
1205
|
external_data (dict): The external data to store, must be JSON serializable.
|
1206
|
+
key (str): The key to store the data under, defaults to "default"
|
1220
1207
|
"""
|
1221
1208
|
self.__ensure_ed_table_exists()
|
1222
1209
|
serialized_data = sqlite3.Binary(msgpack.packb(external_data))
|
1223
|
-
self.cursor.execute("
|
1210
|
+
self.cursor.execute("DELETE FROM ed WHERE key = ?", [key])
|
1211
|
+
self.cursor.execute("INSERT INTO ed (key, obj) VALUES (?, ?)", [key, serialized_data])
|
1224
1212
|
self.connection.commit()
|
1225
1213
|
|
1226
|
-
def get_external_data(self) -> dict:
|
1214
|
+
def get_external_data(self, key: str = "default") -> dict:
|
1227
1215
|
"""
|
1228
|
-
Gets the external data associated with this document.
|
1216
|
+
Gets the external data associated with this document for a specific key.
|
1217
|
+
|
1218
|
+
Args:
|
1219
|
+
key (str): The key to retrieve data for, defaults to "default"
|
1229
1220
|
|
1230
1221
|
Returns:
|
1231
|
-
dict: The external data stored in the ed table.
|
1222
|
+
dict: The external data stored in the ed table for the given key.
|
1232
1223
|
"""
|
1233
1224
|
self.__ensure_ed_table_exists()
|
1234
|
-
result = self.cursor.execute("SELECT obj FROM ed WHERE
|
1225
|
+
result = self.cursor.execute("SELECT obj FROM ed WHERE key = ?", [key]).fetchone()
|
1235
1226
|
if result and result[0]:
|
1236
1227
|
return msgpack.unpackb(result[0])
|
1237
1228
|
return {}
|
1238
1229
|
|
1230
|
+
def get_external_data_keys(self) -> List[str]:
|
1231
|
+
"""
|
1232
|
+
Gets all keys under which external data is stored.
|
1233
|
+
|
1234
|
+
Returns:
|
1235
|
+
List[str]: A list of all keys that have external data stored.
|
1236
|
+
"""
|
1237
|
+
self.__ensure_ed_table_exists()
|
1238
|
+
results = self.cursor.execute("SELECT key FROM ed").fetchall()
|
1239
|
+
return [row[0] for row in results]
|
1240
|
+
|
1241
|
+
def __ensure_ed_table_exists(self):
|
1242
|
+
"""
|
1243
|
+
Ensure the 'ed' table exists in the database.
|
1244
|
+
Creates the table if it does not exist.
|
1245
|
+
"""
|
1246
|
+
# First check if the old table exists and has id column
|
1247
|
+
old_table = self.cursor.execute("""
|
1248
|
+
SELECT name FROM sqlite_master
|
1249
|
+
WHERE type='table' AND name='ed'
|
1250
|
+
""").fetchone()
|
1251
|
+
|
1252
|
+
if old_table:
|
1253
|
+
# Check if table has id column
|
1254
|
+
table_info = self.cursor.execute("PRAGMA table_info(ed)").fetchall()
|
1255
|
+
has_id_column = any(col[1] == 'id' for col in table_info)
|
1256
|
+
|
1257
|
+
if has_id_column:
|
1258
|
+
# Get the old data and drop the table
|
1259
|
+
data = self.cursor.execute("SELECT obj FROM ed").fetchone()
|
1260
|
+
self.cursor.execute("DROP TABLE ed")
|
1261
|
+
|
1262
|
+
# Create new table with key column
|
1263
|
+
self.cursor.execute("""
|
1264
|
+
CREATE TABLE ed (
|
1265
|
+
key TEXT PRIMARY KEY,
|
1266
|
+
obj BLOB
|
1267
|
+
)
|
1268
|
+
""")
|
1269
|
+
|
1270
|
+
# If there was data in the old table, insert it with default key
|
1271
|
+
if data:
|
1272
|
+
self.cursor.execute("INSERT INTO ed (key, obj) VALUES (?, ?)",
|
1273
|
+
["default", data[0]])
|
1274
|
+
else:
|
1275
|
+
# Table exists but doesn't need migration - do nothing
|
1276
|
+
return
|
1277
|
+
else:
|
1278
|
+
# Create new table if it doesn't exist
|
1279
|
+
self.cursor.execute("""
|
1280
|
+
CREATE TABLE IF NOT EXISTS ed (
|
1281
|
+
key TEXT PRIMARY KEY,
|
1282
|
+
obj BLOB
|
1283
|
+
)
|
1284
|
+
""")
|
1285
|
+
|
1286
|
+
# Check if default key exists, if not insert empty data
|
1287
|
+
result = self.cursor.execute("SELECT COUNT(*) FROM ed WHERE key = 'default'").fetchone()
|
1288
|
+
if result[0] == 0:
|
1289
|
+
self.cursor.execute("INSERT INTO ed (key, obj) VALUES (?, ?)",
|
1290
|
+
["default", sqlite3.Binary(msgpack.packb({}))])
|
1291
|
+
|
1239
1292
|
def __ensure_steps_table_exists(self):
|
1240
1293
|
"""
|
1241
1294
|
Ensure the 'steps' table exists in the database.
|
@@ -1434,22 +1487,31 @@ class PersistenceManager(object):
|
|
1434
1487
|
def get_validations(self) -> list[DocumentTaxonValidation]:
|
1435
1488
|
return self._underlying_persistence.get_validations()
|
1436
1489
|
|
1437
|
-
def get_external_data(self) -> dict:
|
1490
|
+
def get_external_data(self, key="default") -> dict:
|
1438
1491
|
"""
|
1439
1492
|
Gets the external data object associated with this document
|
1440
1493
|
|
1441
1494
|
:return: dict of the external data
|
1442
1495
|
"""
|
1443
|
-
return self._underlying_persistence.get_external_data()
|
1496
|
+
return self._underlying_persistence.get_external_data(key)
|
1497
|
+
|
1498
|
+
def get_external_data_keys(self) -> List[str]:
|
1499
|
+
"""
|
1500
|
+
Gets all keys under which external data is stored.
|
1501
|
+
|
1502
|
+
Returns:
|
1503
|
+
List[str]: A list of all keys that have external data stored.
|
1504
|
+
"""
|
1505
|
+
return self._underlying_persistence.get_external_data_keys()
|
1444
1506
|
|
1445
|
-
def set_external_data(self, external_data:dict):
|
1507
|
+
def set_external_data(self, external_data:dict, key="default"):
|
1446
1508
|
"""
|
1447
1509
|
Sets the external data for this document
|
1448
1510
|
|
1449
1511
|
:param external_data: dict representing the external data, must be JSON serializable
|
1450
1512
|
:return:
|
1451
1513
|
"""
|
1452
|
-
self._underlying_persistence.set_external_data(external_data)
|
1514
|
+
self._underlying_persistence.set_external_data(external_data, key)
|
1453
1515
|
|
1454
1516
|
def get_nodes_by_type(self, node_type: str) -> List[ContentNode]:
|
1455
1517
|
"""
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "kodexa"
|
3
|
-
version = "7.
|
3
|
+
version = "7.4.412416252968"
|
4
4
|
description = "Python SDK for the Kodexa Platform"
|
5
5
|
authors = ["Austin Redenbaugh <austin@kodexa.com>", "Philip Dodds <philip@kodexa.com>", "Romar Cablao <rcablao@kodexa.com>", "Amadea Paula Dodds <amadeapaula@kodexa.com>"]
|
6
6
|
readme = "README.md"
|
@@ -14,7 +14,7 @@ classifiers = [
|
|
14
14
|
'Intended Audience :: Developers',
|
15
15
|
'Topic :: Software Development :: Libraries',
|
16
16
|
'''License :: OSI Approved :: Apache Software License''', # noqa
|
17
|
-
'Programming Language :: Python :: 3.
|
17
|
+
'Programming Language :: Python :: 3.11',
|
18
18
|
]
|
19
19
|
|
20
20
|
[tool.poetry.dependencies]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/dataclasses/templates/llm_data_class.j2
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{kodexa-7.0.12399109365 → kodexa-7.4.412416252968}/kodexa/model/entities/product_subscription.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|