kodexa 7.0.1a8003211616__py3-none-any.whl → 7.0.1a9196667375__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -96,6 +96,14 @@ class SqliteDocumentPersistence(object):
96
96
  self.cursor.execute("pragma temp_store = memory")
97
97
  self.cursor.execute("pragma mmap_size = 30000000000")
98
98
 
99
+ try:
100
+ # We need to populate node_type_id_by_name
101
+ for n_type in self.cursor.execute("select id,name from n_type"):
102
+ self.node_types[n_type[0]] = n_type[1]
103
+ self.node_type_id_by_name[n_type[1]] = n_type[0]
104
+ except:
105
+ pass
106
+
99
107
  def create_in_memory_database(self, disk_db_path: str):
100
108
  # Connect to the in-memory database
101
109
  mem_conn = sqlite3.connect(':memory:')
@@ -106,15 +114,21 @@ class SqliteDocumentPersistence(object):
106
114
  disk_cursor = disk_conn.cursor()
107
115
 
108
116
  # Load the contents of the disk database into memory
109
- disk_cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
117
+ disk_cursor.execute("SELECT name, sql FROM sqlite_master WHERE type='table';")
110
118
  tables = disk_cursor.fetchall()
111
- for table in tables:
112
- table_name = table[0]
119
+ for table_name, create_table_sql in tables:
120
+ if "sqlite" in table_name:
121
+ continue
122
+
123
+ # Create the table structure in the in-memory database
124
+ mem_cursor.execute(create_table_sql)
125
+
126
+ # Populate the table with data from the disk database
113
127
  disk_cursor.execute(f"SELECT * FROM {table_name}")
114
128
  rows = disk_cursor.fetchall()
115
- mem_cursor.execute(f"CREATE TABLE {table_name} AS SELECT * FROM {table_name}", [])
116
129
  for row in rows:
117
- mem_cursor.execute(f"INSERT INTO {table_name} VALUES ({', '.join('?' * len(row))})", row)
130
+ placeholders = ', '.join('?' * len(row))
131
+ mem_cursor.execute(f"INSERT INTO {table_name} VALUES ({placeholders})", row)
118
132
 
119
133
  # Commit changes and close disk connection
120
134
  mem_conn.commit()
@@ -131,7 +145,7 @@ class SqliteDocumentPersistence(object):
131
145
  """
132
146
  features = []
133
147
  for feature in self.cursor.execute(
134
- "select name from f_type where name like 'tag:%'"
148
+ "select name from f_type where name like 'tag:%'"
135
149
  ).fetchall():
136
150
  features.append(feature[0].split(":")[1])
137
151
 
@@ -730,8 +744,8 @@ class SqliteDocumentPersistence(object):
730
744
  # We need to get the child nodes
731
745
  children = []
732
746
  for child_node in self.cursor.execute(
733
- "select id, pid, nt, idx from cn where pid = ? order by idx",
734
- [content_node.uuid],
747
+ "select id, pid, nt, idx from cn where pid = ? order by idx",
748
+ [content_node.uuid],
735
749
  ).fetchall():
736
750
  children.append(self.__build_node(child_node))
737
751
  return children
@@ -750,8 +764,8 @@ class SqliteDocumentPersistence(object):
750
764
  # We need to get the child nodes
751
765
  children = []
752
766
  for child_node in self.cursor.execute(
753
- "select id, pid, nt, idx from cn where pid = ? order by idx",
754
- [content_node.uuid],
767
+ "select id, pid, nt, idx from cn where pid = ? order by idx",
768
+ [content_node.uuid],
755
769
  ).fetchall():
756
770
  children.append(child_node[0])
757
771
  return children
@@ -827,7 +841,7 @@ class SqliteDocumentPersistence(object):
827
841
  def dump_in_memory_db_to_file(self):
828
842
  # Connect to a new or existing database file
829
843
  disk_conn = sqlite3.connect(self.current_filename)
830
-
844
+
831
845
  # Use the backup API to copy the in-memory database to the disk file
832
846
  with disk_conn:
833
847
  self.connection.backup(disk_conn)
@@ -843,10 +857,10 @@ class SqliteDocumentPersistence(object):
843
857
  bytes: The document as bytes.
844
858
  """
845
859
  self.sync()
846
-
860
+
847
861
  if self.inmemory:
848
862
  self.dump_in_memory_db_to_file()
849
-
863
+
850
864
  with open(self.current_filename, "rb") as f:
851
865
  return f.read()
852
866
 
@@ -864,8 +878,8 @@ class SqliteDocumentPersistence(object):
864
878
 
865
879
  features = []
866
880
  for feature in self.cursor.execute(
867
- "select id, cn_id, f_type, binary_value, single from ft where cn_id = ?",
868
- [node.uuid],
881
+ "select id, cn_id, f_type, binary_value, single from ft where cn_id = ?",
882
+ [node.uuid],
869
883
  ).fetchall():
870
884
  feature_type_name = self.feature_type_names[feature[2]]
871
885
  single = feature[4] == 1
@@ -914,26 +928,27 @@ class SqliteDocumentPersistence(object):
914
928
  def get_all_node_ids(node):
915
929
  """
916
930
  This function recursively traverses a node tree, collecting the ids of all non-virtual nodes.
917
-
918
- Args:
919
- node (Node): The root node to start the traversal from.
920
-
921
- Returns:
922
- list: A list of ids of all non-virtual nodes in the tree.
923
931
  """
924
932
  all_node_ids = []
925
933
  if not node.virtual:
926
- all_node_ids.append([node.uuid])
934
+ all_node_ids.append(node.uuid) # Append the uuid directly, not as a list
927
935
  for child in node.get_children():
928
936
  all_node_ids.extend(get_all_node_ids(child))
929
-
930
937
  return all_node_ids
931
938
 
932
939
  all_child_ids = get_all_node_ids(node)
933
-
934
- self.cursor.executemany("delete from cnp where cn_id=?", all_child_ids)
935
- self.cursor.executemany("delete from cn where id=?", all_child_ids)
936
- self.cursor.executemany("delete from ft where cn_id=?", all_child_ids)
940
+ parameter_tuples = [(id,) for id in all_child_ids] # Prepare the parameters as tuples
941
+
942
+ # Assuming `self.cursor` is part of a larger transaction management system
943
+ try:
944
+ self.cursor.executemany("delete from cnp where cn_id=?", parameter_tuples)
945
+ self.cursor.executemany("delete from cn where id=?", parameter_tuples)
946
+ self.cursor.executemany("delete from ft where cn_id=?", parameter_tuples)
947
+ self.connection.commit() # Commit the transaction if part of one
948
+ return all_child_ids
949
+ except Exception as e:
950
+ self.connection.rollback() # Rollback in case of error
951
+ logger.error(f"An error occurred: {e}")
937
952
 
938
953
  def remove_all_features(self, node):
939
954
  """
@@ -1087,6 +1102,26 @@ class SqliteDocumentPersistence(object):
1087
1102
 
1088
1103
  return content_nodes
1089
1104
 
1105
+ def get_nodes_by_type(self, node_type):
1106
+ """
1107
+ Retrieves nodes of a given type from the document.
1108
+
1109
+ Args:
1110
+ node_type (str): The type of the nodes to be retrieved.
1111
+
1112
+ Returns:
1113
+ list: A list of nodes of the given type.
1114
+ """
1115
+ content_nodes = []
1116
+
1117
+ node_type_id = self.node_type_id_by_name.get(node_type)
1118
+
1119
+ query = "select id, pid, nt, idx from cn where nt = ? order by idx"
1120
+ for content_node in self.cursor.execute(query, [node_type_id]).fetchall():
1121
+ content_nodes.append(self.__build_node(content_node))
1122
+
1123
+ return content_nodes
1124
+
1090
1125
 
1091
1126
  class SimpleObjectCache(object):
1092
1127
  """
@@ -1139,14 +1174,14 @@ class SimpleObjectCache(object):
1139
1174
  self.objs[obj.uuid] = obj
1140
1175
  self.dirty_objs.add(obj.uuid)
1141
1176
 
1142
- def remove_obj(self, obj):
1177
+ def remove_obj(self, obj: ContentNode):
1143
1178
  """
1144
1179
  Remove an object from the cache.
1145
1180
 
1146
1181
  Args:
1147
1182
  obj (object): The object to remove.
1148
1183
  """
1149
- if obj.uuid in self.objs:
1184
+ if obj and obj.uuid in self.objs:
1150
1185
  self.objs.pop(obj.uuid)
1151
1186
  if obj.uuid in self.dirty_objs:
1152
1187
  self.dirty_objs.remove(obj.uuid)
@@ -1225,6 +1260,18 @@ class PersistenceManager(object):
1225
1260
  document, filename, delete_on_close, inmemory=inmemory
1226
1261
  )
1227
1262
 
1263
+ def get_nodes_by_type(self, node_type: str) -> List[ContentNode]:
1264
+ """
1265
+ Retrieves all nodes of a given type from the underlying persistence layer.
1266
+
1267
+ Args:
1268
+ node_type (str): The type of the nodes to be retrieved.
1269
+
1270
+ Returns:
1271
+ List[ContentNode]: A list of all nodes of the given type.
1272
+ """
1273
+ return self._underlying_persistence.get_nodes_by_type(node_type)
1274
+
1228
1275
  def get_node_by_uuid(self, uuid: int) -> ContentNode:
1229
1276
  """
1230
1277
  Retrieves a node by its uuid.
@@ -1473,6 +1520,14 @@ class PersistenceManager(object):
1473
1520
  if node.index is None:
1474
1521
  node.index = 0
1475
1522
 
1523
+ # Check if the node exists in the DB
1524
+ if node.uuid is None:
1525
+ node.uuid = self.node_cache.next_id
1526
+ self.node_cache.next_id += 1
1527
+
1528
+ if self._underlying_persistence.get_node(node.uuid) is None:
1529
+ self._underlying_persistence.add_content_node(node, parent)
1530
+
1476
1531
  if parent:
1477
1532
  node._parent_uuid = parent.uuid
1478
1533
  self.node_cache.add_obj(parent)
@@ -1486,8 +1541,8 @@ class PersistenceManager(object):
1486
1541
  update_child_cache = True
1487
1542
 
1488
1543
  if (
1489
- node.uuid in self.node_parent_cache
1490
- and node._parent_uuid != self.node_parent_cache[node.uuid]
1544
+ node.uuid in self.node_parent_cache
1545
+ and node._parent_uuid != self.node_parent_cache[node.uuid]
1491
1546
  ):
1492
1547
  # Remove from the old parent
1493
1548
  self.child_id_cache[self.node_parent_cache[node.uuid]].remove(node.uuid)
@@ -1505,8 +1560,8 @@ class PersistenceManager(object):
1505
1560
  self.child_id_cache[node._parent_uuid].add(node.uuid)
1506
1561
  current_children = self.child_cache[node._parent_uuid]
1507
1562
  if (
1508
- len(current_children) == 0
1509
- or node.index >= current_children[-1].index
1563
+ len(current_children) == 0
1564
+ or node.index >= current_children[-1].index
1510
1565
  ):
1511
1566
  self.child_cache[node._parent_uuid].append(node)
1512
1567
  else:
@@ -1568,7 +1623,14 @@ class PersistenceManager(object):
1568
1623
  self.content_parts_cache.pop(node.uuid, None)
1569
1624
  self.feature_cache.pop(node.uuid, None)
1570
1625
 
1571
- self._underlying_persistence.remove_content_node(node)
1626
+ all_ids = self._underlying_persistence.remove_content_node(node)
1627
+
1628
+ # remove all the ids from the cache
1629
+ for id in all_ids:
1630
+ tmp_node = self.node_cache.get_obj(id)
1631
+ if tmp_node is not None:
1632
+ self.node_cache.remove_obj(tmp_node)
1633
+ self.node_cache.dirty_objs.remove(id) if id in self.node_cache.dirty_objs else None
1572
1634
 
1573
1635
  def get_children(self, node):
1574
1636
  """
@@ -132,16 +132,17 @@ class PipelineContext:
132
132
  self.cancellation_handler = cancellation_handler
133
133
 
134
134
  def update_status(
135
- self, status_message: str, status_full_message: Optional[str] = None
135
+ self, status_message: str, progress: Optional[int] = None, progress_max: Optional[int] = None
136
136
  ):
137
137
  """Updates the status of the pipeline.
138
138
 
139
139
  Args:
140
140
  status_message (str): The status message.
141
- status_full_message (str, optional): The full status message. Defaults to None.
141
+ progress (int, optional): The progress of the pipeline. Defaults to None.
142
+ progress_max (int, optional): The maximum progress of the pipeline. Defaults to None.
142
143
  """
143
144
  if self.status_handler is not None:
144
- self.status_handler(status_message, status_full_message)
145
+ self.status_handler(status_message, progress, progress_max)
145
146
 
146
147
  def is_cancelled(self) -> bool:
147
148
  """Checks if the pipeline is cancelled.
@@ -233,14 +234,21 @@ class PipelineStep:
233
234
  """
234
235
 
235
236
  def __init__(
236
- self, step, name=None, options=None, attach_source=False, step_type="ACTION"
237
+ self, step, name=None, options=None, attach_source=False, step_type="ACTION", conditional=None
237
238
  ):
238
239
  if options is None:
239
- options = {}
240
+ try:
241
+ if 'options' in step:
242
+ options = step['options']
243
+ else:
244
+ options = {}
245
+ except:
246
+ options = {}
240
247
  self.step = step
241
248
  self.name = name
242
249
  self.options = options
243
250
  self.step_type = step_type
251
+ self.conditional = conditional
244
252
 
245
253
  if str(type(self.step)) == "<class 'type'>":
246
254
  logger.info(f"Adding new step class {step.__name__} to pipeline")
@@ -253,7 +261,10 @@ class PipelineStep:
253
261
  from kodexa import RemoteStep
254
262
 
255
263
  self.step = RemoteStep(
256
- step, step_type=step_type, options=options, attach_source=attach_source
264
+ step, step_type=step_type,
265
+ options=options,
266
+ attach_source=attach_source,
267
+ conditional=conditional
257
268
  )
258
269
  else:
259
270
  logger.info(f"Adding new step {type(step)} to pipeline")
@@ -273,7 +284,7 @@ class PipelineStep:
273
284
  "You can not yet deploy a pipeline with a class instance style step"
274
285
  )
275
286
  if isinstance(self.step, str):
276
- return {"ref": self.step, "options": self.options}
287
+ return {"ref": self.step, "options": self.options, "conditional": self.conditional}
277
288
  if callable(self.step):
278
289
  metadata = {
279
290
  "function": self.step.__name__,
@@ -284,6 +295,8 @@ class PipelineStep:
284
295
 
285
296
  metadata["name"] = self.name
286
297
  metadata["stepType"] = self.step_type
298
+ metadata["options"] = self.options
299
+ metadata["conditional"] = self.conditional
287
300
  return metadata
288
301
  except AttributeError as e:
289
302
  raise Exception("All steps must implement to_dict() for deployment", e)
@@ -478,7 +491,7 @@ class Pipeline:
478
491
  return self
479
492
 
480
493
  def add_step(
481
- self, step, name=None, options=None, attach_source=False, step_type="ACTION"
494
+ self, step, name=None, options=None, attach_source=False, step_type="ACTION", conditional=None
482
495
  ):
483
496
  """Add the given step to the current pipeline.
484
497
 
@@ -488,12 +501,18 @@ class Pipeline:
488
501
  options (optional): Options to be passed to the step if it is a simplified remote action. Defaults to None.
489
502
  attach_source (bool, optional): If step is simplified remote action this determines if we need to add the source. Defaults to False.
490
503
  step_type (str, optional): The type of step to add, can either be an ACTION or MODEL. Defaults to 'ACTION'.
491
-
504
+ conditional (str, optional): The conditional to use for the step. Defaults to None.
492
505
  Returns:
493
506
  Pipeline: The instance of the pipeline.
494
507
  """
495
508
  if options is None:
496
- options = {}
509
+ try:
510
+ if 'options' in step:
511
+ options = step['options']
512
+ else:
513
+ options = {}
514
+ except:
515
+ options = {}
497
516
  self.steps.append(
498
517
  PipelineStep(
499
518
  step=step,
@@ -501,6 +520,7 @@ class Pipeline:
501
520
  options=options,
502
521
  attach_source=attach_source,
503
522
  step_type=step_type,
523
+ conditional=conditional
504
524
  )
505
525
  )
506
526