kodexa 7.0.12397101638__py3-none-any.whl → 7.0.12399073274__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kodexa/model/model.py +1 -1
- kodexa/model/persistence.py +40 -13
- kodexa/testing/test_utils.py +18 -18
- {kodexa-7.0.12397101638.dist-info → kodexa-7.0.12399073274.dist-info}/METADATA +1 -1
- {kodexa-7.0.12397101638.dist-info → kodexa-7.0.12399073274.dist-info}/RECORD +7 -7
- {kodexa-7.0.12397101638.dist-info → kodexa-7.0.12399073274.dist-info}/LICENSE +0 -0
- {kodexa-7.0.12397101638.dist-info → kodexa-7.0.12399073274.dist-info}/WHEEL +0 -0
kodexa/model/model.py
CHANGED
@@ -815,8 +815,8 @@ class ContentNode(object):
|
|
815
815
|
from kodexa.selectors.ast import SelectorContext
|
816
816
|
|
817
817
|
context = SelectorContext(self.document, first_only=first_only)
|
818
|
-
parsed_selector = parse(selector)
|
819
818
|
self.document.get_persistence().flush_cache()
|
819
|
+
parsed_selector = parse(selector)
|
820
820
|
return parsed_selector.resolve(self, variables, context)
|
821
821
|
|
822
822
|
def get_all_content(self, separator=" ", strip=True):
|
kodexa/model/persistence.py
CHANGED
@@ -3,6 +3,7 @@ import logging
|
|
3
3
|
import pathlib
|
4
4
|
import sqlite3
|
5
5
|
import tempfile
|
6
|
+
import time
|
6
7
|
import uuid
|
7
8
|
from typing import List, Optional
|
8
9
|
|
@@ -42,6 +43,22 @@ FEATURE_TYPE_LOOKUP = "select id from f_type where name = ?"
|
|
42
43
|
METADATA_INSERT = "insert into metadata(id,metadata) values (1,?)"
|
43
44
|
METADATA_DELETE = "delete from metadata where id=1"
|
44
45
|
|
46
|
+
# Configuration constants
|
47
|
+
CACHE_SIZE = 10000 # Number of nodes to cache
|
48
|
+
BATCH_SIZE = 1000 # Size of batches for bulk operations
|
49
|
+
SLOW_QUERY_THRESHOLD = 1.0 # Seconds
|
50
|
+
MAX_CONNECTIONS = 5 # Maximum number of database connections
|
51
|
+
|
52
|
+
def monitor_performance(func):
|
53
|
+
"""Performance monitoring decorator"""
|
54
|
+
def wrapper(*args, **kwargs):
|
55
|
+
start_time = time.time()
|
56
|
+
result = func(*args, **kwargs)
|
57
|
+
duration = time.time() - start_time
|
58
|
+
if duration > SLOW_QUERY_THRESHOLD:
|
59
|
+
logger.warning(f"Slow operation detected: {func.__name__}, duration: {duration}s")
|
60
|
+
return result
|
61
|
+
return wrapper
|
45
62
|
|
46
63
|
class SqliteDocumentPersistence(object):
|
47
64
|
"""
|
@@ -57,7 +74,7 @@ class SqliteDocumentPersistence(object):
|
|
57
74
|
The Sqlite persistence engine to support large scale documents (part of the V4 Kodexa Document Architecture)
|
58
75
|
"""
|
59
76
|
|
60
|
-
def __init__(self, document: Document, filename: str = None, delete_on_close=False, inmemory=False):
|
77
|
+
def __init__(self, document: Document, filename: str = None, delete_on_close=False, inmemory=False, persistence_manager=None):
|
61
78
|
self.document = document
|
62
79
|
|
63
80
|
self.node_types = {}
|
@@ -138,6 +155,7 @@ class SqliteDocumentPersistence(object):
|
|
138
155
|
|
139
156
|
return mem_conn
|
140
157
|
|
158
|
+
@monitor_performance
|
141
159
|
def get_all_tags(self):
|
142
160
|
"""
|
143
161
|
Retrieves all tags from the document.
|
@@ -153,6 +171,7 @@ class SqliteDocumentPersistence(object):
|
|
153
171
|
|
154
172
|
return features
|
155
173
|
|
174
|
+
@monitor_performance
|
156
175
|
def update_features(self, node):
|
157
176
|
"""
|
158
177
|
Updates the features of a given node in the document.
|
@@ -188,6 +207,7 @@ class SqliteDocumentPersistence(object):
|
|
188
207
|
self.cursor.execute("DELETE FROM ft where cn_id=?", [node.uuid])
|
189
208
|
self.cursor.executemany(FEATURE_INSERT, all_features)
|
190
209
|
|
210
|
+
@monitor_performance
|
191
211
|
def update_node(self, node):
|
192
212
|
"""
|
193
213
|
Updates a given node in the document.
|
@@ -200,6 +220,7 @@ class SqliteDocumentPersistence(object):
|
|
200
220
|
[node.index, node._parent_uuid, node.uuid],
|
201
221
|
)
|
202
222
|
|
223
|
+
@monitor_performance
|
203
224
|
def get_content_nodes(self, node_type, parent_node: ContentNode, include_children):
|
204
225
|
"""
|
205
226
|
Retrieves content nodes from the document based on the given parameters.
|
@@ -213,8 +234,8 @@ class SqliteDocumentPersistence(object):
|
|
213
234
|
list: A list of content nodes that match the given parameters.
|
214
235
|
"""
|
215
236
|
nodes = []
|
216
|
-
|
217
|
-
|
237
|
+
if not self.connection.in_transaction:
|
238
|
+
self.cursor.execute("BEGIN TRANSACTION")
|
218
239
|
if include_children:
|
219
240
|
if node_type == "*":
|
220
241
|
query = """
|
@@ -284,6 +305,7 @@ class SqliteDocumentPersistence(object):
|
|
284
305
|
],
|
285
306
|
).fetchall()
|
286
307
|
except StopIteration:
|
308
|
+
self.connection.commit()
|
287
309
|
return []
|
288
310
|
else:
|
289
311
|
query = "select id, pid, nt, idx from cn where pid=? and nt=? order by idx"
|
@@ -300,11 +322,14 @@ class SqliteDocumentPersistence(object):
|
|
300
322
|
],
|
301
323
|
).fetchall()
|
302
324
|
except StopIteration:
|
325
|
+
self.connection.commit()
|
303
326
|
return []
|
304
327
|
|
305
328
|
for raw_node in list(results):
|
306
329
|
nodes.append(self.__build_node(raw_node))
|
307
330
|
|
331
|
+
self.connection.commit()
|
332
|
+
|
308
333
|
return nodes
|
309
334
|
|
310
335
|
def initialize(self):
|
@@ -326,6 +351,7 @@ class SqliteDocumentPersistence(object):
|
|
326
351
|
self.cursor.close()
|
327
352
|
self.connection.close()
|
328
353
|
|
354
|
+
@monitor_performance
|
329
355
|
def get_max_feature_id(self):
|
330
356
|
"""
|
331
357
|
Retrieves the maximum feature id from the document.
|
@@ -396,6 +422,7 @@ class SqliteDocumentPersistence(object):
|
|
396
422
|
|
397
423
|
self.__update_metadata()
|
398
424
|
|
425
|
+
@monitor_performance
|
399
426
|
def content_node_count(self):
|
400
427
|
"""
|
401
428
|
Counts the number of content nodes in the document.
|
@@ -405,6 +432,7 @@ class SqliteDocumentPersistence(object):
|
|
405
432
|
"""
|
406
433
|
self.cursor.execute("select * from cn").fetchall()
|
407
434
|
|
435
|
+
@monitor_performance
|
408
436
|
def get_feature_type_id(self, feature):
|
409
437
|
"""
|
410
438
|
Retrieves the id of a given feature.
|
@@ -466,6 +494,7 @@ class SqliteDocumentPersistence(object):
|
|
466
494
|
|
467
495
|
return result[0]
|
468
496
|
|
497
|
+
@monitor_performance
|
469
498
|
def __insert_node(self, node: ContentNode, parent, execute=True):
|
470
499
|
"""
|
471
500
|
Inserts a node into the document.
|
@@ -1385,7 +1414,7 @@ class PersistenceManager(object):
|
|
1385
1414
|
self.node_parent_cache = {}
|
1386
1415
|
|
1387
1416
|
self._underlying_persistence = SqliteDocumentPersistence(
|
1388
|
-
document, filename, delete_on_close, inmemory=inmemory
|
1417
|
+
document, filename, delete_on_close, inmemory=inmemory, persistence_manager=self
|
1389
1418
|
)
|
1390
1419
|
|
1391
1420
|
def get_steps(self) -> list[ProcessingStep]:
|
@@ -1523,7 +1552,6 @@ class PersistenceManager(object):
|
|
1523
1552
|
Returns:
|
1524
1553
|
List[Node]: A list of nodes tagged with the specified tag.
|
1525
1554
|
"""
|
1526
|
-
self.flush_cache()
|
1527
1555
|
return self._underlying_persistence.get_tagged_nodes(tag, tag_uuid)
|
1528
1556
|
|
1529
1557
|
def get_all_tagged_nodes(self):
|
@@ -1533,7 +1561,6 @@ class PersistenceManager(object):
|
|
1533
1561
|
Returns:
|
1534
1562
|
List[Node]: A list of all tagged nodes.
|
1535
1563
|
"""
|
1536
|
-
self.flush_cache()
|
1537
1564
|
return self._underlying_persistence.get_all_tagged_nodes()
|
1538
1565
|
|
1539
1566
|
def initialize(self):
|
@@ -1565,6 +1592,7 @@ class PersistenceManager(object):
|
|
1565
1592
|
"""
|
1566
1593
|
self._underlying_persistence.close()
|
1567
1594
|
|
1595
|
+
@monitor_performance
|
1568
1596
|
def flush_cache(self):
|
1569
1597
|
"""
|
1570
1598
|
Flushes the cache by merging it with the underlying persistence layer.
|
@@ -1574,11 +1602,14 @@ class PersistenceManager(object):
|
|
1574
1602
|
all_content_parts = []
|
1575
1603
|
all_features = []
|
1576
1604
|
node_id_with_features = []
|
1577
|
-
|
1578
|
-
logger.debug("Merging cache to persistence")
|
1579
1605
|
dirty_nodes = self.node_cache.get_dirty_objs()
|
1580
1606
|
|
1607
|
+
if len(dirty_nodes) == 0:
|
1608
|
+
return
|
1609
|
+
|
1581
1610
|
logger.debug(f"Identified {len(dirty_nodes)} nodes to update")
|
1611
|
+
if not self._underlying_persistence.connection.in_transaction:
|
1612
|
+
self._underlying_persistence.connection.execute("BEGIN TRANSACTION")
|
1582
1613
|
|
1583
1614
|
next_feature_id = self._underlying_persistence.get_max_feature_id()
|
1584
1615
|
for node in dirty_nodes:
|
@@ -1618,7 +1649,6 @@ class PersistenceManager(object):
|
|
1618
1649
|
|
1619
1650
|
self.node_cache.undirty(node)
|
1620
1651
|
|
1621
|
-
logger.debug(f"Writing {len(all_node_ids)} nodes")
|
1622
1652
|
self._underlying_persistence.cursor.executemany(
|
1623
1653
|
"DELETE FROM cn where id=?", all_node_ids
|
1624
1654
|
)
|
@@ -1631,14 +1661,11 @@ class PersistenceManager(object):
|
|
1631
1661
|
self._underlying_persistence.cursor.executemany(
|
1632
1662
|
"DELETE FROM cnp where cn_id=?", all_node_ids
|
1633
1663
|
)
|
1634
|
-
logger.debug(f"Writing {len(all_content_parts)} content parts")
|
1635
|
-
|
1636
1664
|
self._underlying_persistence.cursor.executemany(
|
1637
1665
|
CONTENT_NODE_PART_INSERT, all_content_parts
|
1638
1666
|
)
|
1639
|
-
|
1640
|
-
logger.debug(f"Writing {len(all_features)} features")
|
1641
1667
|
self._underlying_persistence.cursor.executemany(FEATURE_INSERT, all_features)
|
1668
|
+
self._underlying_persistence.connection.commit()
|
1642
1669
|
|
1643
1670
|
def get_content_nodes(self, node_type, parent_node, include_children):
|
1644
1671
|
"""
|
kodexa/testing/test_utils.py
CHANGED
@@ -333,27 +333,27 @@ class ExtensionPackUtil:
|
|
333
333
|
if options is None:
|
334
334
|
options = {}
|
335
335
|
|
336
|
-
for service in self.kodexa_metadata
|
337
|
-
if service
|
336
|
+
for service in self.kodexa_metadata["services"]:
|
337
|
+
if service["type"] == "action" and service["slug"] == action_slug:
|
338
338
|
# TODO We need to validate all the options
|
339
339
|
|
340
|
-
if len(service
|
340
|
+
if len(service["metadata"]["options"]) > 0:
|
341
341
|
option_names = []
|
342
|
-
for option in service
|
343
|
-
option_names.append(option
|
344
|
-
if option
|
345
|
-
options[option
|
346
|
-
if option
|
342
|
+
for option in service["metadata"]["options"]:
|
343
|
+
option_names.append(option["name"])
|
344
|
+
if option["name"] not in options and "default" in option and option["default"] is not None:
|
345
|
+
options[option["name"]] = option["default"]
|
346
|
+
if option["required"] and option["name"] not in options:
|
347
347
|
raise OptionException(
|
348
|
-
f"Missing required option {option
|
348
|
+
f"Missing required option {option['name']}"
|
349
349
|
)
|
350
350
|
|
351
351
|
for option_name in options.keys():
|
352
352
|
if option_name not in option_names:
|
353
353
|
# We need to determine if this is actually a group
|
354
354
|
is_group = False
|
355
|
-
for check_option in service
|
356
|
-
if check_option["group"] is not None:
|
355
|
+
for check_option in service["metadata"]["options"]:
|
356
|
+
if "group" in check_option and check_option["group"] is not None:
|
357
357
|
if check_option["group"]["name"] == option_name:
|
358
358
|
is_group = True
|
359
359
|
|
@@ -363,8 +363,8 @@ class ExtensionPackUtil:
|
|
363
363
|
)
|
364
364
|
|
365
365
|
# We need to create and return our action
|
366
|
-
module = importlib.import_module(service
|
367
|
-
klass = getattr(module, service
|
366
|
+
module = importlib.import_module(service["step"]["package"])
|
367
|
+
klass = getattr(module, service["step"]["class"])
|
368
368
|
new_instance = klass(**options)
|
369
369
|
|
370
370
|
# Since we will be using to access metadata we will need to
|
@@ -421,15 +421,15 @@ class ExtensionPackUtil:
|
|
421
421
|
if options is None:
|
422
422
|
options = {}
|
423
423
|
|
424
|
-
for service in self.kodexa_metadata
|
425
|
-
if service
|
424
|
+
for service in self.kodexa_metadata["services"]:
|
425
|
+
if service["type"] == "assistant" and service["slug"] == assistant_slug:
|
426
426
|
# TODO We need to validate all the options
|
427
427
|
|
428
428
|
# We need to create and return our action
|
429
429
|
|
430
|
-
logger.info(f"Creating new assistant {service
|
431
|
-
module = importlib.import_module(service
|
432
|
-
klass = getattr(module, service
|
430
|
+
logger.info(f"Creating new assistant {service['assistant']}")
|
431
|
+
module = importlib.import_module(service["assistant"]["package"])
|
432
|
+
klass = getattr(module, service["assistant"]["class"])
|
433
433
|
return klass(**options)
|
434
434
|
|
435
435
|
raise Exception("Unable to find the assistant " + assistant_slug)
|
@@ -11,9 +11,9 @@ kodexa/model/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
11
11
|
kodexa/model/entities/check_response.py,sha256=eqBHxO6G2OAziL3p9bHGI-oiPkAG82H6Choc8wyvtM4,3949
|
12
12
|
kodexa/model/entities/product.py,sha256=ZDpHuBE_9FJ-klnkyBvTfPwYOqBkM1wraZMtHqNA8FQ,3526
|
13
13
|
kodexa/model/entities/product_subscription.py,sha256=UcmWR-qgLfdV7VCtJNwzgkanoS8nBSL6ngVuxQUK1M8,3810
|
14
|
-
kodexa/model/model.py,sha256=
|
14
|
+
kodexa/model/model.py,sha256=Bjy1n6_9kQLLch-wCUDpT31sTctE2kCGdi4cb1ptZN4,118647
|
15
15
|
kodexa/model/objects.py,sha256=4Oyjs6omlHfwziAK1m2tFk4jSnzN7lFdXACog07ed1c,185124
|
16
|
-
kodexa/model/persistence.py,sha256=
|
16
|
+
kodexa/model/persistence.py,sha256=Kai1M51TM56G0_0ol7OIVf40cnJXzpZ3NHAa20uh5Co,69554
|
17
17
|
kodexa/model/utils.py,sha256=6R-3rFiW9irBwj0Mq5yhp7EDXkNUFaeFhr3bWmnlW4g,2961
|
18
18
|
kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
|
19
19
|
kodexa/pipeline/pipeline.py,sha256=ZYpJAWcwV4YRK589DUhU0vXGQlkNSj4J2TsGbYqTLjo,25221
|
@@ -39,11 +39,11 @@ kodexa/steps/__init__.py,sha256=qpmAtYePTv7G-HzUBx087bA3kq-PPGcFJf4_Z5P--0k,179
|
|
39
39
|
kodexa/steps/common.py,sha256=fGEuKxcztcqrYFpXbu_OYkxh42yR9s5mkszmtkJhnQ8,10428
|
40
40
|
kodexa/testing/__init__.py,sha256=P8W-SOnWsi48asfnQV06iyHrzZAzuX69j9oYwBvgp5s,323
|
41
41
|
kodexa/testing/test_components.py,sha256=g5lP-GY0nTHuH5cIEw45vIejEeBaWkPKQGHL36jejBQ,1052
|
42
|
-
kodexa/testing/test_utils.py,sha256=
|
42
|
+
kodexa/testing/test_utils.py,sha256=v44p__gE7ia67W7WeHN2HBFCWSCUrCZt7G4xBNCmwf8,14154
|
43
43
|
kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
|
44
44
|
kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
45
|
kodexa/utils/__init__.py,sha256=Pnim1o9_db5YEnNvDTxpM7HG-qTlL6n8JwFwOafU9wo,5928
|
46
|
-
kodexa-7.0.
|
47
|
-
kodexa-7.0.
|
48
|
-
kodexa-7.0.
|
49
|
-
kodexa-7.0.
|
46
|
+
kodexa-7.0.12399073274.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
47
|
+
kodexa-7.0.12399073274.dist-info/METADATA,sha256=FHfSkW8bgm7lc-G9sT-rLxxLJfSaJP2-6ZaDGz6v1sE,3527
|
48
|
+
kodexa-7.0.12399073274.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
49
|
+
kodexa-7.0.12399073274.dist-info/RECORD,,
|
File without changes
|
File without changes
|