kodexa 7.0.12397101638__tar.gz → 7.0.12399109365__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/PKG-INFO +1 -1
  2. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/model/persistence.py +40 -14
  3. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/testing/test_utils.py +18 -18
  4. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/pyproject.toml +1 -1
  5. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/LICENSE +0 -0
  6. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/README.md +0 -0
  7. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/__init__.py +0 -0
  8. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/assistant/__init__.py +0 -0
  9. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/assistant/assistant.py +0 -0
  10. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/connectors/__init__.py +0 -0
  11. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/connectors/connectors.py +0 -0
  12. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/dataclasses/__init__.py +0 -0
  13. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/dataclasses/templates/llm_data_class.j2 +0 -0
  14. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/model/__init__.py +0 -0
  15. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/model/base.py +0 -0
  16. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/model/entities/__init__.py +0 -0
  17. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/model/entities/check_response.py +0 -0
  18. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/model/entities/product.py +0 -0
  19. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/model/entities/product_subscription.py +0 -0
  20. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/model/model.py +1 -1
  21. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/model/objects.py +0 -0
  22. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/model/utils.py +0 -0
  23. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/pipeline/__init__.py +0 -0
  24. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/pipeline/pipeline.py +0 -0
  25. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/platform/__init__.py +0 -0
  26. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/platform/client.py +0 -0
  27. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/platform/interaction.py +0 -0
  28. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/platform/kodexa.py +0 -0
  29. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/selectors/__init__.py +0 -0
  30. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/selectors/ast.py +0 -0
  31. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/selectors/core.py +0 -0
  32. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/selectors/lexrules.py +0 -0
  33. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/selectors/lextab.py +0 -0
  34. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/selectors/lextab.pyi +0 -0
  35. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/selectors/parserules.py +0 -0
  36. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/selectors/parserules.pyi +0 -0
  37. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/selectors/parsetab.py +0 -0
  38. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/selectors/parsetab.pyi +0 -0
  39. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/spatial/__init__.py +0 -0
  40. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/spatial/azure_models.py +0 -0
  41. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/spatial/bbox_common.py +0 -0
  42. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/spatial/table_form_common.py +0 -0
  43. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/steps/__init__.py +0 -0
  44. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/steps/common.py +0 -0
  45. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/testing/__init__.py +0 -0
  46. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/testing/test_components.py +0 -0
  47. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/training/__init__.py +0 -0
  48. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/training/train_utils.py +0 -0
  49. {kodexa-7.0.12397101638 → kodexa-7.0.12399109365}/kodexa/utils/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kodexa
3
- Version: 7.0.12397101638
3
+ Version: 7.0.12399109365
4
4
  Summary: Python SDK for the Kodexa Platform
5
5
  Author: Austin Redenbaugh
6
6
  Author-email: austin@kodexa.com
@@ -3,6 +3,7 @@ import logging
3
3
  import pathlib
4
4
  import sqlite3
5
5
  import tempfile
6
+ import time
6
7
  import uuid
7
8
  from typing import List, Optional
8
9
 
@@ -42,6 +43,22 @@ FEATURE_TYPE_LOOKUP = "select id from f_type where name = ?"
42
43
  METADATA_INSERT = "insert into metadata(id,metadata) values (1,?)"
43
44
  METADATA_DELETE = "delete from metadata where id=1"
44
45
 
46
+ # Configuration constants
47
+ CACHE_SIZE = 10000 # Number of nodes to cache
48
+ BATCH_SIZE = 1000 # Size of batches for bulk operations
49
+ SLOW_QUERY_THRESHOLD = 1.0 # Seconds
50
+ MAX_CONNECTIONS = 5 # Maximum number of database connections
51
+
52
+ def monitor_performance(func):
53
+ """Performance monitoring decorator"""
54
+ def wrapper(*args, **kwargs):
55
+ start_time = time.time()
56
+ result = func(*args, **kwargs)
57
+ duration = time.time() - start_time
58
+ if duration > SLOW_QUERY_THRESHOLD:
59
+ logger.warning(f"Slow operation detected: {func.__name__}, duration: {duration}s")
60
+ return result
61
+ return wrapper
45
62
 
46
63
  class SqliteDocumentPersistence(object):
47
64
  """
@@ -57,7 +74,7 @@ class SqliteDocumentPersistence(object):
57
74
  The Sqlite persistence engine to support large scale documents (part of the V4 Kodexa Document Architecture)
58
75
  """
59
76
 
60
- def __init__(self, document: Document, filename: str = None, delete_on_close=False, inmemory=False):
77
+ def __init__(self, document: Document, filename: str = None, delete_on_close=False, inmemory=False, persistence_manager=None):
61
78
  self.document = document
62
79
 
63
80
  self.node_types = {}
@@ -138,6 +155,7 @@ class SqliteDocumentPersistence(object):
138
155
 
139
156
  return mem_conn
140
157
 
158
+ @monitor_performance
141
159
  def get_all_tags(self):
142
160
  """
143
161
  Retrieves all tags from the document.
@@ -153,6 +171,7 @@ class SqliteDocumentPersistence(object):
153
171
 
154
172
  return features
155
173
 
174
+ @monitor_performance
156
175
  def update_features(self, node):
157
176
  """
158
177
  Updates the features of a given node in the document.
@@ -188,6 +207,7 @@ class SqliteDocumentPersistence(object):
188
207
  self.cursor.execute("DELETE FROM ft where cn_id=?", [node.uuid])
189
208
  self.cursor.executemany(FEATURE_INSERT, all_features)
190
209
 
210
+ @monitor_performance
191
211
  def update_node(self, node):
192
212
  """
193
213
  Updates a given node in the document.
@@ -200,6 +220,7 @@ class SqliteDocumentPersistence(object):
200
220
  [node.index, node._parent_uuid, node.uuid],
201
221
  )
202
222
 
223
+ @monitor_performance
203
224
  def get_content_nodes(self, node_type, parent_node: ContentNode, include_children):
204
225
  """
205
226
  Retrieves content nodes from the document based on the given parameters.
@@ -213,8 +234,8 @@ class SqliteDocumentPersistence(object):
213
234
  list: A list of content nodes that match the given parameters.
214
235
  """
215
236
  nodes = []
216
-
217
- results = []
237
+ if not self.connection.in_transaction:
238
+ self.cursor.execute("BEGIN TRANSACTION")
218
239
  if include_children:
219
240
  if node_type == "*":
220
241
  query = """
@@ -284,6 +305,7 @@ class SqliteDocumentPersistence(object):
284
305
  ],
285
306
  ).fetchall()
286
307
  except StopIteration:
308
+ self.connection.commit()
287
309
  return []
288
310
  else:
289
311
  query = "select id, pid, nt, idx from cn where pid=? and nt=? order by idx"
@@ -300,11 +322,14 @@ class SqliteDocumentPersistence(object):
300
322
  ],
301
323
  ).fetchall()
302
324
  except StopIteration:
325
+ self.connection.commit()
303
326
  return []
304
327
 
305
328
  for raw_node in list(results):
306
329
  nodes.append(self.__build_node(raw_node))
307
330
 
331
+ self.connection.commit()
332
+
308
333
  return nodes
309
334
 
310
335
  def initialize(self):
@@ -326,6 +351,7 @@ class SqliteDocumentPersistence(object):
326
351
  self.cursor.close()
327
352
  self.connection.close()
328
353
 
354
+ @monitor_performance
329
355
  def get_max_feature_id(self):
330
356
  """
331
357
  Retrieves the maximum feature id from the document.
@@ -396,6 +422,7 @@ class SqliteDocumentPersistence(object):
396
422
 
397
423
  self.__update_metadata()
398
424
 
425
+ @monitor_performance
399
426
  def content_node_count(self):
400
427
  """
401
428
  Counts the number of content nodes in the document.
@@ -405,6 +432,7 @@ class SqliteDocumentPersistence(object):
405
432
  """
406
433
  self.cursor.execute("select * from cn").fetchall()
407
434
 
435
+ @monitor_performance
408
436
  def get_feature_type_id(self, feature):
409
437
  """
410
438
  Retrieves the id of a given feature.
@@ -466,6 +494,7 @@ class SqliteDocumentPersistence(object):
466
494
 
467
495
  return result[0]
468
496
 
497
+ @monitor_performance
469
498
  def __insert_node(self, node: ContentNode, parent, execute=True):
470
499
  """
471
500
  Inserts a node into the document.
@@ -1385,7 +1414,7 @@ class PersistenceManager(object):
1385
1414
  self.node_parent_cache = {}
1386
1415
 
1387
1416
  self._underlying_persistence = SqliteDocumentPersistence(
1388
- document, filename, delete_on_close, inmemory=inmemory
1417
+ document, filename, delete_on_close, inmemory=inmemory, persistence_manager=self
1389
1418
  )
1390
1419
 
1391
1420
  def get_steps(self) -> list[ProcessingStep]:
@@ -1523,7 +1552,6 @@ class PersistenceManager(object):
1523
1552
  Returns:
1524
1553
  List[Node]: A list of nodes tagged with the specified tag.
1525
1554
  """
1526
- self.flush_cache()
1527
1555
  return self._underlying_persistence.get_tagged_nodes(tag, tag_uuid)
1528
1556
 
1529
1557
  def get_all_tagged_nodes(self):
@@ -1533,7 +1561,6 @@ class PersistenceManager(object):
1533
1561
  Returns:
1534
1562
  List[Node]: A list of all tagged nodes.
1535
1563
  """
1536
- self.flush_cache()
1537
1564
  return self._underlying_persistence.get_all_tagged_nodes()
1538
1565
 
1539
1566
  def initialize(self):
@@ -1565,6 +1592,7 @@ class PersistenceManager(object):
1565
1592
  """
1566
1593
  self._underlying_persistence.close()
1567
1594
 
1595
+ @monitor_performance
1568
1596
  def flush_cache(self):
1569
1597
  """
1570
1598
  Flushes the cache by merging it with the underlying persistence layer.
@@ -1574,11 +1602,13 @@ class PersistenceManager(object):
1574
1602
  all_content_parts = []
1575
1603
  all_features = []
1576
1604
  node_id_with_features = []
1577
-
1578
- logger.debug("Merging cache to persistence")
1579
1605
  dirty_nodes = self.node_cache.get_dirty_objs()
1580
1606
 
1581
- logger.debug(f"Identified {len(dirty_nodes)} nodes to update")
1607
+ if len(dirty_nodes) == 0:
1608
+ return
1609
+
1610
+ if not self._underlying_persistence.connection.in_transaction:
1611
+ self._underlying_persistence.connection.execute("BEGIN TRANSACTION")
1582
1612
 
1583
1613
  next_feature_id = self._underlying_persistence.get_max_feature_id()
1584
1614
  for node in dirty_nodes:
@@ -1618,7 +1648,6 @@ class PersistenceManager(object):
1618
1648
 
1619
1649
  self.node_cache.undirty(node)
1620
1650
 
1621
- logger.debug(f"Writing {len(all_node_ids)} nodes")
1622
1651
  self._underlying_persistence.cursor.executemany(
1623
1652
  "DELETE FROM cn where id=?", all_node_ids
1624
1653
  )
@@ -1631,14 +1660,11 @@ class PersistenceManager(object):
1631
1660
  self._underlying_persistence.cursor.executemany(
1632
1661
  "DELETE FROM cnp where cn_id=?", all_node_ids
1633
1662
  )
1634
- logger.debug(f"Writing {len(all_content_parts)} content parts")
1635
-
1636
1663
  self._underlying_persistence.cursor.executemany(
1637
1664
  CONTENT_NODE_PART_INSERT, all_content_parts
1638
1665
  )
1639
-
1640
- logger.debug(f"Writing {len(all_features)} features")
1641
1666
  self._underlying_persistence.cursor.executemany(FEATURE_INSERT, all_features)
1667
+ self._underlying_persistence.connection.commit()
1642
1668
 
1643
1669
  def get_content_nodes(self, node_type, parent_node, include_children):
1644
1670
  """
@@ -333,27 +333,27 @@ class ExtensionPackUtil:
333
333
  if options is None:
334
334
  options = {}
335
335
 
336
- for service in self.kodexa_metadata.services:
337
- if service.type == "action" and service.slug == action_slug:
336
+ for service in self.kodexa_metadata["services"]:
337
+ if service["type"] == "action" and service["slug"] == action_slug:
338
338
  # TODO We need to validate all the options
339
339
 
340
- if len(service.metadata.options) > 0:
340
+ if len(service["metadata"]["options"]) > 0:
341
341
  option_names = []
342
- for option in service.metadata.options:
343
- option_names.append(option.name)
344
- if option.name not in options and option.default is not None:
345
- options[option.name] = option.default
346
- if option.required and option.name not in options:
342
+ for option in service["metadata"]["options"]:
343
+ option_names.append(option["name"])
344
+ if option["name"] not in options and "default" in option and option["default"] is not None:
345
+ options[option["name"]] = option["default"]
346
+ if option["required"] and option["name"] not in options:
347
347
  raise OptionException(
348
- f"Missing required option {option.name}"
348
+ f"Missing required option {option['name']}"
349
349
  )
350
350
 
351
351
  for option_name in options.keys():
352
352
  if option_name not in option_names:
353
353
  # We need to determine if this is actually a group
354
354
  is_group = False
355
- for check_option in service.metadata.options:
356
- if check_option["group"] is not None:
355
+ for check_option in service["metadata"]["options"]:
356
+ if "group" in check_option and check_option["group"] is not None:
357
357
  if check_option["group"]["name"] == option_name:
358
358
  is_group = True
359
359
 
@@ -363,8 +363,8 @@ class ExtensionPackUtil:
363
363
  )
364
364
 
365
365
  # We need to create and return our action
366
- module = importlib.import_module(service.step.package)
367
- klass = getattr(module, service.step["class"])
366
+ module = importlib.import_module(service["step"]["package"])
367
+ klass = getattr(module, service["step"]["class"])
368
368
  new_instance = klass(**options)
369
369
 
370
370
  # Since we will be using to access metadata we will need to
@@ -421,15 +421,15 @@ class ExtensionPackUtil:
421
421
  if options is None:
422
422
  options = {}
423
423
 
424
- for service in self.kodexa_metadata.services:
425
- if service.type == "assistant" and service.slug == assistant_slug:
424
+ for service in self.kodexa_metadata["services"]:
425
+ if service["type"] == "assistant" and service["slug"] == assistant_slug:
426
426
  # TODO We need to validate all the options
427
427
 
428
428
  # We need to create and return our action
429
429
 
430
- logger.info(f"Creating new assistant {service.assistant}")
431
- module = importlib.import_module(service.assistant.package)
432
- klass = getattr(module, service.assistant["class"])
430
+ logger.info(f"Creating new assistant {service['assistant']}")
431
+ module = importlib.import_module(service["assistant"]["package"])
432
+ klass = getattr(module, service["assistant"]["class"])
433
433
  return klass(**options)
434
434
 
435
435
  raise Exception("Unable to find the assistant " + assistant_slug)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "kodexa"
3
- version = "7.0.012397101638"
3
+ version = "7.0.012399109365"
4
4
  description = "Python SDK for the Kodexa Platform"
5
5
  authors = ["Austin Redenbaugh <austin@kodexa.com>", "Philip Dodds <philip@kodexa.com>", "Romar Cablao <rcablao@kodexa.com>", "Amadea Paula Dodds <amadeapaula@kodexa.com>"]
6
6
  readme = "README.md"
@@ -815,8 +815,8 @@ class ContentNode(object):
815
815
  from kodexa.selectors.ast import SelectorContext
816
816
 
817
817
  context = SelectorContext(self.document, first_only=first_only)
818
- parsed_selector = parse(selector)
819
818
  self.document.get_persistence().flush_cache()
819
+ parsed_selector = parse(selector)
820
820
  return parsed_selector.resolve(self, variables, context)
821
821
 
822
822
  def get_all_content(self, separator=" ", strip=True):