exonware-xwnode 0.0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. exonware/__init__.py +14 -0
  2. exonware/xwnode/__init__.py +127 -0
  3. exonware/xwnode/base.py +676 -0
  4. exonware/xwnode/config.py +178 -0
  5. exonware/xwnode/contracts.py +730 -0
  6. exonware/xwnode/errors.py +503 -0
  7. exonware/xwnode/facade.py +460 -0
  8. exonware/xwnode/strategies/__init__.py +158 -0
  9. exonware/xwnode/strategies/advisor.py +463 -0
  10. exonware/xwnode/strategies/edges/__init__.py +32 -0
  11. exonware/xwnode/strategies/edges/adj_list.py +227 -0
  12. exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
  13. exonware/xwnode/strategies/edges/base.py +169 -0
  14. exonware/xwnode/strategies/flyweight.py +328 -0
  15. exonware/xwnode/strategies/impls/__init__.py +13 -0
  16. exonware/xwnode/strategies/impls/_base_edge.py +403 -0
  17. exonware/xwnode/strategies/impls/_base_node.py +307 -0
  18. exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
  19. exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
  20. exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
  21. exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
  22. exonware/xwnode/strategies/impls/edge_coo.py +533 -0
  23. exonware/xwnode/strategies/impls/edge_csc.py +447 -0
  24. exonware/xwnode/strategies/impls/edge_csr.py +492 -0
  25. exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
  26. exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
  27. exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
  28. exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
  29. exonware/xwnode/strategies/impls/edge_octree.py +574 -0
  30. exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
  31. exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
  32. exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
  33. exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
  34. exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
  35. exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
  36. exonware/xwnode/strategies/manager.py +775 -0
  37. exonware/xwnode/strategies/metrics.py +538 -0
  38. exonware/xwnode/strategies/migration.py +432 -0
  39. exonware/xwnode/strategies/nodes/__init__.py +50 -0
  40. exonware/xwnode/strategies/nodes/_base_node.py +307 -0
  41. exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
  42. exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
  43. exonware/xwnode/strategies/nodes/array_list.py +209 -0
  44. exonware/xwnode/strategies/nodes/base.py +247 -0
  45. exonware/xwnode/strategies/nodes/deque.py +200 -0
  46. exonware/xwnode/strategies/nodes/hash_map.py +135 -0
  47. exonware/xwnode/strategies/nodes/heap.py +307 -0
  48. exonware/xwnode/strategies/nodes/linked_list.py +232 -0
  49. exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
  50. exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
  51. exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
  52. exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
  53. exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
  54. exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
  55. exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
  56. exonware/xwnode/strategies/nodes/node_btree.py +357 -0
  57. exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
  58. exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
  59. exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
  60. exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
  61. exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
  62. exonware/xwnode/strategies/nodes/node_heap.py +191 -0
  63. exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
  64. exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
  65. exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
  66. exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
  67. exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
  68. exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
  69. exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
  70. exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
  71. exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
  72. exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
  73. exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
  74. exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
  75. exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
  76. exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
  77. exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
  78. exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
  79. exonware/xwnode/strategies/nodes/node_treap.py +387 -0
  80. exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
  81. exonware/xwnode/strategies/nodes/node_trie.py +252 -0
  82. exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
  83. exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
  84. exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
  85. exonware/xwnode/strategies/nodes/queue.py +161 -0
  86. exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
  87. exonware/xwnode/strategies/nodes/stack.py +152 -0
  88. exonware/xwnode/strategies/nodes/trie.py +274 -0
  89. exonware/xwnode/strategies/nodes/union_find.py +283 -0
  90. exonware/xwnode/strategies/pattern_detector.py +603 -0
  91. exonware/xwnode/strategies/performance_monitor.py +487 -0
  92. exonware/xwnode/strategies/queries/__init__.py +24 -0
  93. exonware/xwnode/strategies/queries/base.py +236 -0
  94. exonware/xwnode/strategies/queries/cql.py +201 -0
  95. exonware/xwnode/strategies/queries/cypher.py +181 -0
  96. exonware/xwnode/strategies/queries/datalog.py +70 -0
  97. exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
  98. exonware/xwnode/strategies/queries/eql.py +70 -0
  99. exonware/xwnode/strategies/queries/flux.py +70 -0
  100. exonware/xwnode/strategies/queries/gql.py +70 -0
  101. exonware/xwnode/strategies/queries/graphql.py +240 -0
  102. exonware/xwnode/strategies/queries/gremlin.py +181 -0
  103. exonware/xwnode/strategies/queries/hiveql.py +214 -0
  104. exonware/xwnode/strategies/queries/hql.py +70 -0
  105. exonware/xwnode/strategies/queries/jmespath.py +219 -0
  106. exonware/xwnode/strategies/queries/jq.py +66 -0
  107. exonware/xwnode/strategies/queries/json_query.py +66 -0
  108. exonware/xwnode/strategies/queries/jsoniq.py +248 -0
  109. exonware/xwnode/strategies/queries/kql.py +70 -0
  110. exonware/xwnode/strategies/queries/linq.py +238 -0
  111. exonware/xwnode/strategies/queries/logql.py +70 -0
  112. exonware/xwnode/strategies/queries/mql.py +68 -0
  113. exonware/xwnode/strategies/queries/n1ql.py +210 -0
  114. exonware/xwnode/strategies/queries/partiql.py +70 -0
  115. exonware/xwnode/strategies/queries/pig.py +215 -0
  116. exonware/xwnode/strategies/queries/promql.py +70 -0
  117. exonware/xwnode/strategies/queries/sparql.py +220 -0
  118. exonware/xwnode/strategies/queries/sql.py +275 -0
  119. exonware/xwnode/strategies/queries/xml_query.py +66 -0
  120. exonware/xwnode/strategies/queries/xpath.py +223 -0
  121. exonware/xwnode/strategies/queries/xquery.py +258 -0
  122. exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
  123. exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
  124. exonware/xwnode/strategies/registry.py +604 -0
  125. exonware/xwnode/strategies/simple.py +273 -0
  126. exonware/xwnode/strategies/utils.py +532 -0
  127. exonware/xwnode/types.py +912 -0
  128. exonware/xwnode/version.py +78 -0
  129. exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
  130. exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
  131. exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
  132. exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,655 @@
1
+ """
2
+ Edge Property Store Strategy Implementation
3
+
4
+ This module implements the EDGE_PROPERTY_STORE strategy for columnar
5
+ edge attribute storage with efficient analytical queries.
6
+ """
7
+
8
+ from typing import Any, Iterator, List, Dict, Set, Optional, Tuple, Union
9
+ from collections import defaultdict
10
+ import statistics
11
+ from ._base_edge import aEdgeStrategy
12
+ from ...types import EdgeMode, EdgeTrait
13
+
14
+
15
+ class PropertyColumn:
16
+ """Columnar storage for a single edge property."""
17
+
18
+ def __init__(self, name: str, data_type: type = object):
19
+ self.name = name
20
+ self.data_type = data_type
21
+ self.values: List[Any] = []
22
+ self.null_bitmap: List[bool] = [] # True if value is null
23
+
24
+ # Column statistics
25
+ self._min_value = None
26
+ self._max_value = None
27
+ self._unique_values: Set[Any] = set()
28
+ self._stats_dirty = True
29
+
30
+ def append(self, value: Any) -> None:
31
+ """Append value to column."""
32
+ if value is None:
33
+ self.values.append(None)
34
+ self.null_bitmap.append(True)
35
+ else:
36
+ self.values.append(value)
37
+ self.null_bitmap.append(False)
38
+ self._unique_values.add(value)
39
+
40
+ self._stats_dirty = True
41
+
42
+ def set_value(self, index: int, value: Any) -> None:
43
+ """Set value at specific index."""
44
+ if 0 <= index < len(self.values):
45
+ old_value = self.values[index]
46
+
47
+ if value is None:
48
+ self.values[index] = None
49
+ self.null_bitmap[index] = True
50
+ if old_value is not None:
51
+ self._unique_values.discard(old_value)
52
+ else:
53
+ self.values[index] = value
54
+ self.null_bitmap[index] = False
55
+ self._unique_values.add(value)
56
+ if old_value is not None:
57
+ self._unique_values.discard(old_value)
58
+
59
+ self._stats_dirty = True
60
+
61
+ def get_value(self, index: int) -> Any:
62
+ """Get value at specific index."""
63
+ if 0 <= index < len(self.values):
64
+ return self.values[index]
65
+ return None
66
+
67
+ def remove_at_index(self, index: int) -> None:
68
+ """Remove value at specific index."""
69
+ if 0 <= index < len(self.values):
70
+ old_value = self.values[index]
71
+ del self.values[index]
72
+ del self.null_bitmap[index]
73
+
74
+ if old_value is not None:
75
+ # Rebuild unique values set
76
+ self._unique_values = set(v for v in self.values if v is not None)
77
+
78
+ self._stats_dirty = True
79
+
80
+ def _update_statistics(self) -> None:
81
+ """Update column statistics."""
82
+ if not self._stats_dirty:
83
+ return
84
+
85
+ non_null_values = [v for v in self.values if v is not None]
86
+
87
+ if non_null_values:
88
+ try:
89
+ if all(isinstance(v, (int, float)) for v in non_null_values):
90
+ self._min_value = min(non_null_values)
91
+ self._max_value = max(non_null_values)
92
+ else:
93
+ self._min_value = min(non_null_values)
94
+ self._max_value = max(non_null_values)
95
+ except (TypeError, ValueError):
96
+ self._min_value = None
97
+ self._max_value = None
98
+ else:
99
+ self._min_value = None
100
+ self._max_value = None
101
+
102
+ self._stats_dirty = False
103
+
104
+ def get_statistics(self) -> Dict[str, Any]:
105
+ """Get column statistics."""
106
+ self._update_statistics()
107
+
108
+ non_null_values = [v for v in self.values if v is not None]
109
+ null_count = sum(self.null_bitmap)
110
+
111
+ stats = {
112
+ 'name': self.name,
113
+ 'data_type': self.data_type.__name__,
114
+ 'total_count': len(self.values),
115
+ 'non_null_count': len(non_null_values),
116
+ 'null_count': null_count,
117
+ 'null_percentage': (null_count / max(1, len(self.values))) * 100,
118
+ 'unique_count': len(self._unique_values),
119
+ 'min_value': self._min_value,
120
+ 'max_value': self._max_value
121
+ }
122
+
123
+ # Add numeric statistics if applicable
124
+ if non_null_values and all(isinstance(v, (int, float)) for v in non_null_values):
125
+ try:
126
+ stats.update({
127
+ 'mean': statistics.mean(non_null_values),
128
+ 'median': statistics.median(non_null_values),
129
+ 'std_dev': statistics.stdev(non_null_values) if len(non_null_values) > 1 else 0,
130
+ 'variance': statistics.variance(non_null_values) if len(non_null_values) > 1 else 0
131
+ })
132
+ except (statistics.StatisticsError, ValueError):
133
+ pass
134
+
135
+ return stats
136
+
137
+ def filter_indices(self, predicate: callable) -> List[int]:
138
+ """Get indices where predicate is true."""
139
+ return [i for i, value in enumerate(self.values) if predicate(value)]
140
+
141
+ def __len__(self) -> int:
142
+ """Get number of values in column."""
143
+ return len(self.values)
144
+
145
+
146
+ class xEdgePropertyStoreStrategy(aEdgeStrategy):
147
+ """
148
+ Edge Property Store strategy for columnar edge attributes.
149
+
150
+ Efficiently stores and queries edge properties in columnar format
151
+ for analytical workloads and complex property-based filtering.
152
+ """
153
+
154
+ def __init__(self, traits: EdgeTrait = EdgeTrait.NONE, **options):
155
+ """Initialize the Edge Property Store strategy."""
156
+ super().__init__(EdgeMode.EDGE_PROPERTY_STORE, traits, **options)
157
+
158
+ self.enable_compression = options.get('enable_compression', True)
159
+ self.default_batch_size = options.get('batch_size', 1000)
160
+
161
+ # Core edge storage
162
+ self._source_vertices: List[str] = [] # Source vertex names
163
+ self._target_vertices: List[str] = [] # Target vertex names
164
+ self._edge_ids: List[str] = [] # Edge identifiers
165
+
166
+ # Columnar property storage
167
+ self._property_columns: Dict[str, PropertyColumn] = {}
168
+
169
+ # Standard edge properties columns
170
+ self._init_standard_columns()
171
+
172
+ # Vertex management
173
+ self._vertices: Set[str] = set()
174
+ self._edge_count = 0
175
+ self._next_edge_id = 0
176
+
177
+ # Indices for fast lookups
178
+ self._edge_index: Dict[Tuple[str, str], List[int]] = defaultdict(list) # (source, target) -> [positions]
179
+ self._vertex_out_edges: Dict[str, List[int]] = defaultdict(list) # vertex -> [edge_positions]
180
+ self._vertex_in_edges: Dict[str, List[int]] = defaultdict(list) # vertex -> [edge_positions]
181
+
182
+ def get_supported_traits(self) -> EdgeTrait:
183
+ """Get the traits supported by the edge property store strategy."""
184
+ return (EdgeTrait.COLUMNAR | EdgeTrait.MULTI | EdgeTrait.COMPRESSED)
185
+
186
+ def _init_standard_columns(self) -> None:
187
+ """Initialize standard edge property columns."""
188
+ self._property_columns['weight'] = PropertyColumn('weight', float)
189
+ self._property_columns['timestamp'] = PropertyColumn('timestamp', float)
190
+ self._property_columns['label'] = PropertyColumn('label', str)
191
+ self._property_columns['category'] = PropertyColumn('category', str)
192
+
193
+ def _generate_edge_id(self) -> str:
194
+ """Generate unique edge ID."""
195
+ self._next_edge_id += 1
196
+ return f"edge_{self._next_edge_id}"
197
+
198
+ def _add_to_indices(self, position: int, source: str, target: str) -> None:
199
+ """Add edge to lookup indices."""
200
+ edge_key = (source, target)
201
+ self._edge_index[edge_key].append(position)
202
+ self._vertex_out_edges[source].append(position)
203
+ self._vertex_in_edges[target].append(position)
204
+ self._vertices.add(source)
205
+ self._vertices.add(target)
206
+
207
+ def _remove_from_indices(self, position: int) -> None:
208
+ """Remove edge from lookup indices."""
209
+ if position >= len(self._source_vertices):
210
+ return
211
+
212
+ source = self._source_vertices[position]
213
+ target = self._target_vertices[position]
214
+ edge_key = (source, target)
215
+
216
+ # Remove from indices
217
+ self._edge_index[edge_key].remove(position)
218
+ if not self._edge_index[edge_key]:
219
+ del self._edge_index[edge_key]
220
+
221
+ self._vertex_out_edges[source].remove(position)
222
+ self._vertex_in_edges[target].remove(position)
223
+
224
+ # Update positions in indices (shift down)
225
+ for key, positions in self._edge_index.items():
226
+ for i, pos in enumerate(positions):
227
+ if pos > position:
228
+ positions[i] = pos - 1
229
+
230
+ for vertex_edges in self._vertex_out_edges.values():
231
+ for i, pos in enumerate(vertex_edges):
232
+ if pos > position:
233
+ vertex_edges[i] = pos - 1
234
+
235
+ for vertex_edges in self._vertex_in_edges.values():
236
+ for i, pos in enumerate(vertex_edges):
237
+ if pos > position:
238
+ vertex_edges[i] = pos - 1
239
+
240
+ def _ensure_property_column(self, property_name: str, data_type: type = object) -> None:
241
+ """Ensure property column exists."""
242
+ if property_name not in self._property_columns:
243
+ column = PropertyColumn(property_name, data_type)
244
+
245
+ # Backfill with None values for existing edges
246
+ for _ in range(self._edge_count):
247
+ column.append(None)
248
+
249
+ self._property_columns[property_name] = column
250
+
251
+ # ============================================================================
252
+ # CORE EDGE OPERATIONS
253
+ # ============================================================================
254
+
255
+ def add_edge(self, source: str, target: str, **properties) -> str:
256
+ """Add edge with properties to columnar store."""
257
+ edge_id = properties.pop('edge_id', self._generate_edge_id())
258
+
259
+ # Add to core storage
260
+ position = len(self._source_vertices)
261
+ self._source_vertices.append(source)
262
+ self._target_vertices.append(target)
263
+ self._edge_ids.append(edge_id)
264
+
265
+ # Add to indices
266
+ self._add_to_indices(position, source, target)
267
+
268
+ # Add properties to columns
269
+ for prop_name, value in properties.items():
270
+ if prop_name not in self._property_columns:
271
+ # Infer data type
272
+ data_type = type(value) if value is not None else object
273
+ self._ensure_property_column(prop_name, data_type)
274
+
275
+ self._property_columns[prop_name].append(value)
276
+
277
+ # Fill missing properties with None
278
+ for column_name, column in self._property_columns.items():
279
+ if column_name not in properties:
280
+ column.append(None)
281
+
282
+ self._edge_count += 1
283
+ return edge_id
284
+
285
+ def remove_edge(self, source: str, target: str, edge_id: Optional[str] = None) -> bool:
286
+ """Remove edge from property store."""
287
+ edge_key = (source, target)
288
+ positions = self._edge_index.get(edge_key, [])
289
+
290
+ if not positions:
291
+ return False
292
+
293
+ # Find specific edge by ID or use first
294
+ position_to_remove = positions[0]
295
+ if edge_id:
296
+ for pos in positions:
297
+ if self._edge_ids[pos] == edge_id:
298
+ position_to_remove = pos
299
+ break
300
+
301
+ # Remove from all structures
302
+ self._remove_from_indices(position_to_remove)
303
+
304
+ del self._source_vertices[position_to_remove]
305
+ del self._target_vertices[position_to_remove]
306
+ del self._edge_ids[position_to_remove]
307
+
308
+ # Remove from all property columns
309
+ for column in self._property_columns.values():
310
+ column.remove_at_index(position_to_remove)
311
+
312
+ self._edge_count -= 1
313
+ return True
314
+
315
+ def has_edge(self, source: str, target: str) -> bool:
316
+ """Check if edge exists."""
317
+ edge_key = (source, target)
318
+ return edge_key in self._edge_index
319
+
320
+ def get_edge_data(self, source: str, target: str) -> Optional[Dict[str, Any]]:
321
+ """Get edge data with all properties."""
322
+ edge_key = (source, target)
323
+ positions = self._edge_index.get(edge_key, [])
324
+
325
+ if not positions:
326
+ return None
327
+
328
+ # Return data for first matching edge
329
+ position = positions[0]
330
+ edge_data = {
331
+ 'source': source,
332
+ 'target': target,
333
+ 'edge_id': self._edge_ids[position],
334
+ 'position': position
335
+ }
336
+
337
+ # Add all properties
338
+ for prop_name, column in self._property_columns.items():
339
+ edge_data[prop_name] = column.get_value(position)
340
+
341
+ return edge_data
342
+
343
+ def neighbors(self, vertex: str, direction: str = 'out') -> Iterator[str]:
344
+ """Get neighbors of vertex."""
345
+ neighbors_found = set()
346
+
347
+ if direction in ['out', 'both']:
348
+ for pos in self._vertex_out_edges.get(vertex, []):
349
+ target = self._target_vertices[pos]
350
+ if target not in neighbors_found:
351
+ neighbors_found.add(target)
352
+ yield target
353
+
354
+ if direction in ['in', 'both']:
355
+ for pos in self._vertex_in_edges.get(vertex, []):
356
+ source = self._source_vertices[pos]
357
+ if source not in neighbors_found:
358
+ neighbors_found.add(source)
359
+ yield source
360
+
361
+ def degree(self, vertex: str, direction: str = 'out') -> int:
362
+ """Get degree of vertex."""
363
+ if direction == 'out':
364
+ return len(self._vertex_out_edges.get(vertex, []))
365
+ elif direction == 'in':
366
+ return len(self._vertex_in_edges.get(vertex, []))
367
+ else: # both
368
+ out_neighbors = set(self._target_vertices[pos] for pos in self._vertex_out_edges.get(vertex, []))
369
+ in_neighbors = set(self._source_vertices[pos] for pos in self._vertex_in_edges.get(vertex, []))
370
+ return len(out_neighbors | in_neighbors)
371
+
372
+ def edges(self, data: bool = False) -> Iterator[tuple]:
373
+ """Get all edges."""
374
+ for i in range(self._edge_count):
375
+ source = self._source_vertices[i]
376
+ target = self._target_vertices[i]
377
+
378
+ if data:
379
+ edge_data = {'edge_id': self._edge_ids[i]}
380
+ for prop_name, column in self._property_columns.items():
381
+ edge_data[prop_name] = column.get_value(i)
382
+ yield (source, target, edge_data)
383
+ else:
384
+ yield (source, target)
385
+
386
+ def vertices(self) -> Iterator[str]:
387
+ """Get all vertices."""
388
+ return iter(self._vertices)
389
+
390
+ def __len__(self) -> int:
391
+ """Get number of edges."""
392
+ return self._edge_count
393
+
394
+ def vertex_count(self) -> int:
395
+ """Get number of vertices."""
396
+ return len(self._vertices)
397
+
398
+ def clear(self) -> None:
399
+ """Clear all data."""
400
+ self._source_vertices.clear()
401
+ self._target_vertices.clear()
402
+ self._edge_ids.clear()
403
+
404
+ for column in self._property_columns.values():
405
+ column.values.clear()
406
+ column.null_bitmap.clear()
407
+ column._unique_values.clear()
408
+ column._stats_dirty = True
409
+
410
+ self._vertices.clear()
411
+ self._edge_index.clear()
412
+ self._vertex_out_edges.clear()
413
+ self._vertex_in_edges.clear()
414
+
415
+ self._edge_count = 0
416
+ self._next_edge_id = 0
417
+
418
+ def add_vertex(self, vertex: str) -> None:
419
+ """Add vertex to graph."""
420
+ self._vertices.add(vertex)
421
+
422
+ def remove_vertex(self, vertex: str) -> bool:
423
+ """Remove vertex and all its edges."""
424
+ if vertex not in self._vertices:
425
+ return False
426
+
427
+ # Find all edges involving this vertex
428
+ edges_to_remove = []
429
+ for i in range(self._edge_count):
430
+ source = self._source_vertices[i]
431
+ target = self._target_vertices[i]
432
+ if source == vertex or target == vertex:
433
+ edges_to_remove.append((source, target, self._edge_ids[i]))
434
+
435
+ # Remove edges in reverse order to maintain indices
436
+ for source, target, edge_id in reversed(edges_to_remove):
437
+ self.remove_edge(source, target, edge_id)
438
+
439
+ # Remove vertex
440
+ self._vertices.discard(vertex)
441
+ return True
442
+
443
+ # ============================================================================
444
+ # COLUMNAR ANALYTICS OPERATIONS
445
+ # ============================================================================
446
+
447
+ def add_property_column(self, column_name: str, data_type: type = object, default_value: Any = None) -> None:
448
+ """Add new property column."""
449
+ if column_name in self._property_columns:
450
+ return
451
+
452
+ column = PropertyColumn(column_name, data_type)
453
+
454
+ # Backfill with default values
455
+ for _ in range(self._edge_count):
456
+ column.append(default_value)
457
+
458
+ self._property_columns[column_name] = column
459
+
460
+ def remove_property_column(self, column_name: str) -> bool:
461
+ """Remove property column."""
462
+ if column_name in self._property_columns:
463
+ del self._property_columns[column_name]
464
+ return True
465
+ return False
466
+
467
+ def get_property_columns(self) -> List[str]:
468
+ """Get list of all property column names."""
469
+ return list(self._property_columns.keys())
470
+
471
+ def get_column_data(self, column_name: str) -> Optional[List[Any]]:
472
+ """Get all values from a specific column."""
473
+ if column_name in self._property_columns:
474
+ return self._property_columns[column_name].values.copy()
475
+ return None
476
+
477
+ def set_edge_property(self, source: str, target: str, property_name: str, value: Any) -> bool:
478
+ """Set property value for specific edge."""
479
+ edge_key = (source, target)
480
+ positions = self._edge_index.get(edge_key, [])
481
+
482
+ if not positions:
483
+ return False
484
+
485
+ # Ensure column exists
486
+ if property_name not in self._property_columns:
487
+ self._ensure_property_column(property_name, type(value) if value is not None else object)
488
+
489
+ # Set value for first matching edge
490
+ position = positions[0]
491
+ self._property_columns[property_name].set_value(position, value)
492
+ return True
493
+
494
+ def get_edge_property(self, source: str, target: str, property_name: str) -> Any:
495
+ """Get property value for specific edge."""
496
+ edge_key = (source, target)
497
+ positions = self._edge_index.get(edge_key, [])
498
+
499
+ if not positions or property_name not in self._property_columns:
500
+ return None
501
+
502
+ position = positions[0]
503
+ return self._property_columns[property_name].get_value(position)
504
+
505
+ def filter_edges_by_property(self, property_name: str, predicate: callable) -> List[Tuple[str, str, Dict[str, Any]]]:
506
+ """Filter edges by property values."""
507
+ if property_name not in self._property_columns:
508
+ return []
509
+
510
+ column = self._property_columns[property_name]
511
+ matching_indices = column.filter_indices(predicate)
512
+
513
+ result = []
514
+ for index in matching_indices:
515
+ source = self._source_vertices[index]
516
+ target = self._target_vertices[index]
517
+ edge_data = {'edge_id': self._edge_ids[index]}
518
+
519
+ for prop_name, prop_column in self._property_columns.items():
520
+ edge_data[prop_name] = prop_column.get_value(index)
521
+
522
+ result.append((source, target, edge_data))
523
+
524
+ return result
525
+
526
+ def aggregate_property(self, property_name: str, operation: str = 'count') -> Any:
527
+ """Aggregate property values across all edges."""
528
+ if property_name not in self._property_columns:
529
+ return None
530
+
531
+ column = self._property_columns[property_name]
532
+ non_null_values = [v for v in column.values if v is not None]
533
+
534
+ if not non_null_values:
535
+ return None
536
+
537
+ if operation == 'count':
538
+ return len(non_null_values)
539
+ elif operation == 'sum':
540
+ return sum(non_null_values) if all(isinstance(v, (int, float)) for v in non_null_values) else None
541
+ elif operation == 'avg' or operation == 'mean':
542
+ return statistics.mean(non_null_values) if all(isinstance(v, (int, float)) for v in non_null_values) else None
543
+ elif operation == 'min':
544
+ return min(non_null_values)
545
+ elif operation == 'max':
546
+ return max(non_null_values)
547
+ elif operation == 'median':
548
+ return statistics.median(non_null_values) if all(isinstance(v, (int, float)) for v in non_null_values) else None
549
+ elif operation == 'unique':
550
+ return len(set(non_null_values))
551
+ else:
552
+ return None
553
+
554
+ def group_by_property(self, property_name: str) -> Dict[Any, List[int]]:
555
+ """Group edge indices by property values."""
556
+ if property_name not in self._property_columns:
557
+ return {}
558
+
559
+ column = self._property_columns[property_name]
560
+ groups = defaultdict(list)
561
+
562
+ for i, value in enumerate(column.values):
563
+ groups[value].append(i)
564
+
565
+ return dict(groups)
566
+
567
+ def get_property_statistics(self, property_name: str) -> Optional[Dict[str, Any]]:
568
+ """Get statistics for a specific property column."""
569
+ if property_name in self._property_columns:
570
+ return self._property_columns[property_name].get_statistics()
571
+ return None
572
+
573
+ def get_all_statistics(self) -> Dict[str, Dict[str, Any]]:
574
+ """Get statistics for all property columns."""
575
+ return {name: column.get_statistics() for name, column in self._property_columns.items()}
576
+
577
+ def export_to_dataframe_format(self) -> Dict[str, List[Any]]:
578
+ """Export data in DataFrame-compatible format."""
579
+ data = {
580
+ 'source': self._source_vertices.copy(),
581
+ 'target': self._target_vertices.copy(),
582
+ 'edge_id': self._edge_ids.copy()
583
+ }
584
+
585
+ for prop_name, column in self._property_columns.items():
586
+ data[prop_name] = column.values.copy()
587
+
588
+ return data
589
+
590
+ def get_schema(self) -> Dict[str, str]:
591
+ """Get schema information for all columns."""
592
+ schema = {
593
+ 'source': 'str',
594
+ 'target': 'str',
595
+ 'edge_id': 'str'
596
+ }
597
+
598
+ for prop_name, column in self._property_columns.items():
599
+ schema[prop_name] = column.data_type.__name__
600
+
601
+ return schema
602
+
603
+ def get_comprehensive_statistics(self) -> Dict[str, Any]:
604
+ """Get comprehensive statistics about the property store."""
605
+ column_stats = self.get_all_statistics()
606
+
607
+ return {
608
+ 'vertices': len(self._vertices),
609
+ 'edges': self._edge_count,
610
+ 'property_columns': len(self._property_columns),
611
+ 'column_names': list(self._property_columns.keys()),
612
+ 'total_cells': self._edge_count * len(self._property_columns),
613
+ 'memory_overhead': len(self._property_columns) * 100, # Estimated
614
+ 'column_statistics': column_stats,
615
+ 'enable_compression': self.enable_compression,
616
+ 'batch_size': self.default_batch_size
617
+ }
618
+
619
+ # ============================================================================
620
+ # PERFORMANCE CHARACTERISTICS
621
+ # ============================================================================
622
+
623
+ @property
624
+ def backend_info(self) -> Dict[str, Any]:
625
+ """Get backend implementation info."""
626
+ return {
627
+ 'strategy': 'EDGE_PROPERTY_STORE',
628
+ 'backend': 'Columnar storage with property indices',
629
+ 'enable_compression': self.enable_compression,
630
+ 'batch_size': self.default_batch_size,
631
+ 'property_columns': len(self._property_columns),
632
+ 'complexity': {
633
+ 'add_edge': 'O(p)', # p = number of properties
634
+ 'remove_edge': 'O(p + degree)',
635
+ 'property_filter': 'O(e)', # e = number of edges
636
+ 'property_aggregate': 'O(e)',
637
+ 'group_by': 'O(e)',
638
+ 'space': 'O(e * p)'
639
+ }
640
+ }
641
+
642
+ @property
643
+ def metrics(self) -> Dict[str, Any]:
644
+ """Get performance metrics."""
645
+ stats = self.get_comprehensive_statistics()
646
+
647
+ return {
648
+ 'vertices': stats['vertices'],
649
+ 'edges': stats['edges'],
650
+ 'property_columns': stats['property_columns'],
651
+ 'total_cells': stats['total_cells'],
652
+ 'column_names': ', '.join(stats['column_names'][:5]) + ('...' if len(stats['column_names']) > 5 else ''),
653
+ 'avg_properties_per_edge': f"{len(self._property_columns):.1f}",
654
+ 'memory_usage': f"{stats['edges'] * len(self._property_columns) * 8 + stats['memory_overhead']} bytes (estimated)"
655
+ }