exonware-xwnode 0.0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +14 -0
- exonware/xwnode/__init__.py +127 -0
- exonware/xwnode/base.py +676 -0
- exonware/xwnode/config.py +178 -0
- exonware/xwnode/contracts.py +730 -0
- exonware/xwnode/errors.py +503 -0
- exonware/xwnode/facade.py +460 -0
- exonware/xwnode/strategies/__init__.py +158 -0
- exonware/xwnode/strategies/advisor.py +463 -0
- exonware/xwnode/strategies/edges/__init__.py +32 -0
- exonware/xwnode/strategies/edges/adj_list.py +227 -0
- exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
- exonware/xwnode/strategies/edges/base.py +169 -0
- exonware/xwnode/strategies/flyweight.py +328 -0
- exonware/xwnode/strategies/impls/__init__.py +13 -0
- exonware/xwnode/strategies/impls/_base_edge.py +403 -0
- exonware/xwnode/strategies/impls/_base_node.py +307 -0
- exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
- exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
- exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
- exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
- exonware/xwnode/strategies/impls/edge_coo.py +533 -0
- exonware/xwnode/strategies/impls/edge_csc.py +447 -0
- exonware/xwnode/strategies/impls/edge_csr.py +492 -0
- exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
- exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
- exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
- exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
- exonware/xwnode/strategies/impls/edge_octree.py +574 -0
- exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
- exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
- exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
- exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
- exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
- exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
- exonware/xwnode/strategies/manager.py +775 -0
- exonware/xwnode/strategies/metrics.py +538 -0
- exonware/xwnode/strategies/migration.py +432 -0
- exonware/xwnode/strategies/nodes/__init__.py +50 -0
- exonware/xwnode/strategies/nodes/_base_node.py +307 -0
- exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
- exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
- exonware/xwnode/strategies/nodes/array_list.py +209 -0
- exonware/xwnode/strategies/nodes/base.py +247 -0
- exonware/xwnode/strategies/nodes/deque.py +200 -0
- exonware/xwnode/strategies/nodes/hash_map.py +135 -0
- exonware/xwnode/strategies/nodes/heap.py +307 -0
- exonware/xwnode/strategies/nodes/linked_list.py +232 -0
- exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
- exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
- exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
- exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
- exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
- exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
- exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
- exonware/xwnode/strategies/nodes/node_btree.py +357 -0
- exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
- exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
- exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
- exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
- exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
- exonware/xwnode/strategies/nodes/node_heap.py +191 -0
- exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
- exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
- exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
- exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
- exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
- exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
- exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
- exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
- exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
- exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
- exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
- exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
- exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
- exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
- exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
- exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
- exonware/xwnode/strategies/nodes/node_treap.py +387 -0
- exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
- exonware/xwnode/strategies/nodes/node_trie.py +252 -0
- exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
- exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
- exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
- exonware/xwnode/strategies/nodes/queue.py +161 -0
- exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
- exonware/xwnode/strategies/nodes/stack.py +152 -0
- exonware/xwnode/strategies/nodes/trie.py +274 -0
- exonware/xwnode/strategies/nodes/union_find.py +283 -0
- exonware/xwnode/strategies/pattern_detector.py +603 -0
- exonware/xwnode/strategies/performance_monitor.py +487 -0
- exonware/xwnode/strategies/queries/__init__.py +24 -0
- exonware/xwnode/strategies/queries/base.py +236 -0
- exonware/xwnode/strategies/queries/cql.py +201 -0
- exonware/xwnode/strategies/queries/cypher.py +181 -0
- exonware/xwnode/strategies/queries/datalog.py +70 -0
- exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
- exonware/xwnode/strategies/queries/eql.py +70 -0
- exonware/xwnode/strategies/queries/flux.py +70 -0
- exonware/xwnode/strategies/queries/gql.py +70 -0
- exonware/xwnode/strategies/queries/graphql.py +240 -0
- exonware/xwnode/strategies/queries/gremlin.py +181 -0
- exonware/xwnode/strategies/queries/hiveql.py +214 -0
- exonware/xwnode/strategies/queries/hql.py +70 -0
- exonware/xwnode/strategies/queries/jmespath.py +219 -0
- exonware/xwnode/strategies/queries/jq.py +66 -0
- exonware/xwnode/strategies/queries/json_query.py +66 -0
- exonware/xwnode/strategies/queries/jsoniq.py +248 -0
- exonware/xwnode/strategies/queries/kql.py +70 -0
- exonware/xwnode/strategies/queries/linq.py +238 -0
- exonware/xwnode/strategies/queries/logql.py +70 -0
- exonware/xwnode/strategies/queries/mql.py +68 -0
- exonware/xwnode/strategies/queries/n1ql.py +210 -0
- exonware/xwnode/strategies/queries/partiql.py +70 -0
- exonware/xwnode/strategies/queries/pig.py +215 -0
- exonware/xwnode/strategies/queries/promql.py +70 -0
- exonware/xwnode/strategies/queries/sparql.py +220 -0
- exonware/xwnode/strategies/queries/sql.py +275 -0
- exonware/xwnode/strategies/queries/xml_query.py +66 -0
- exonware/xwnode/strategies/queries/xpath.py +223 -0
- exonware/xwnode/strategies/queries/xquery.py +258 -0
- exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
- exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
- exonware/xwnode/strategies/registry.py +604 -0
- exonware/xwnode/strategies/simple.py +273 -0
- exonware/xwnode/strategies/utils.py +532 -0
- exonware/xwnode/types.py +912 -0
- exonware/xwnode/version.py +78 -0
- exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
- exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
- exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
- exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,655 @@
|
|
1
|
+
"""
|
2
|
+
Edge Property Store Strategy Implementation
|
3
|
+
|
4
|
+
This module implements the EDGE_PROPERTY_STORE strategy for columnar
|
5
|
+
edge attribute storage with efficient analytical queries.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, Iterator, List, Dict, Set, Optional, Tuple, Union
|
9
|
+
from collections import defaultdict
|
10
|
+
import statistics
|
11
|
+
from ._base_edge import aEdgeStrategy
|
12
|
+
from ...types import EdgeMode, EdgeTrait
|
13
|
+
|
14
|
+
|
15
|
+
class PropertyColumn:
|
16
|
+
"""Columnar storage for a single edge property."""
|
17
|
+
|
18
|
+
def __init__(self, name: str, data_type: type = object):
|
19
|
+
self.name = name
|
20
|
+
self.data_type = data_type
|
21
|
+
self.values: List[Any] = []
|
22
|
+
self.null_bitmap: List[bool] = [] # True if value is null
|
23
|
+
|
24
|
+
# Column statistics
|
25
|
+
self._min_value = None
|
26
|
+
self._max_value = None
|
27
|
+
self._unique_values: Set[Any] = set()
|
28
|
+
self._stats_dirty = True
|
29
|
+
|
30
|
+
def append(self, value: Any) -> None:
|
31
|
+
"""Append value to column."""
|
32
|
+
if value is None:
|
33
|
+
self.values.append(None)
|
34
|
+
self.null_bitmap.append(True)
|
35
|
+
else:
|
36
|
+
self.values.append(value)
|
37
|
+
self.null_bitmap.append(False)
|
38
|
+
self._unique_values.add(value)
|
39
|
+
|
40
|
+
self._stats_dirty = True
|
41
|
+
|
42
|
+
def set_value(self, index: int, value: Any) -> None:
|
43
|
+
"""Set value at specific index."""
|
44
|
+
if 0 <= index < len(self.values):
|
45
|
+
old_value = self.values[index]
|
46
|
+
|
47
|
+
if value is None:
|
48
|
+
self.values[index] = None
|
49
|
+
self.null_bitmap[index] = True
|
50
|
+
if old_value is not None:
|
51
|
+
self._unique_values.discard(old_value)
|
52
|
+
else:
|
53
|
+
self.values[index] = value
|
54
|
+
self.null_bitmap[index] = False
|
55
|
+
self._unique_values.add(value)
|
56
|
+
if old_value is not None:
|
57
|
+
self._unique_values.discard(old_value)
|
58
|
+
|
59
|
+
self._stats_dirty = True
|
60
|
+
|
61
|
+
def get_value(self, index: int) -> Any:
|
62
|
+
"""Get value at specific index."""
|
63
|
+
if 0 <= index < len(self.values):
|
64
|
+
return self.values[index]
|
65
|
+
return None
|
66
|
+
|
67
|
+
def remove_at_index(self, index: int) -> None:
|
68
|
+
"""Remove value at specific index."""
|
69
|
+
if 0 <= index < len(self.values):
|
70
|
+
old_value = self.values[index]
|
71
|
+
del self.values[index]
|
72
|
+
del self.null_bitmap[index]
|
73
|
+
|
74
|
+
if old_value is not None:
|
75
|
+
# Rebuild unique values set
|
76
|
+
self._unique_values = set(v for v in self.values if v is not None)
|
77
|
+
|
78
|
+
self._stats_dirty = True
|
79
|
+
|
80
|
+
def _update_statistics(self) -> None:
|
81
|
+
"""Update column statistics."""
|
82
|
+
if not self._stats_dirty:
|
83
|
+
return
|
84
|
+
|
85
|
+
non_null_values = [v for v in self.values if v is not None]
|
86
|
+
|
87
|
+
if non_null_values:
|
88
|
+
try:
|
89
|
+
if all(isinstance(v, (int, float)) for v in non_null_values):
|
90
|
+
self._min_value = min(non_null_values)
|
91
|
+
self._max_value = max(non_null_values)
|
92
|
+
else:
|
93
|
+
self._min_value = min(non_null_values)
|
94
|
+
self._max_value = max(non_null_values)
|
95
|
+
except (TypeError, ValueError):
|
96
|
+
self._min_value = None
|
97
|
+
self._max_value = None
|
98
|
+
else:
|
99
|
+
self._min_value = None
|
100
|
+
self._max_value = None
|
101
|
+
|
102
|
+
self._stats_dirty = False
|
103
|
+
|
104
|
+
def get_statistics(self) -> Dict[str, Any]:
|
105
|
+
"""Get column statistics."""
|
106
|
+
self._update_statistics()
|
107
|
+
|
108
|
+
non_null_values = [v for v in self.values if v is not None]
|
109
|
+
null_count = sum(self.null_bitmap)
|
110
|
+
|
111
|
+
stats = {
|
112
|
+
'name': self.name,
|
113
|
+
'data_type': self.data_type.__name__,
|
114
|
+
'total_count': len(self.values),
|
115
|
+
'non_null_count': len(non_null_values),
|
116
|
+
'null_count': null_count,
|
117
|
+
'null_percentage': (null_count / max(1, len(self.values))) * 100,
|
118
|
+
'unique_count': len(self._unique_values),
|
119
|
+
'min_value': self._min_value,
|
120
|
+
'max_value': self._max_value
|
121
|
+
}
|
122
|
+
|
123
|
+
# Add numeric statistics if applicable
|
124
|
+
if non_null_values and all(isinstance(v, (int, float)) for v in non_null_values):
|
125
|
+
try:
|
126
|
+
stats.update({
|
127
|
+
'mean': statistics.mean(non_null_values),
|
128
|
+
'median': statistics.median(non_null_values),
|
129
|
+
'std_dev': statistics.stdev(non_null_values) if len(non_null_values) > 1 else 0,
|
130
|
+
'variance': statistics.variance(non_null_values) if len(non_null_values) > 1 else 0
|
131
|
+
})
|
132
|
+
except (statistics.StatisticsError, ValueError):
|
133
|
+
pass
|
134
|
+
|
135
|
+
return stats
|
136
|
+
|
137
|
+
def filter_indices(self, predicate: callable) -> List[int]:
|
138
|
+
"""Get indices where predicate is true."""
|
139
|
+
return [i for i, value in enumerate(self.values) if predicate(value)]
|
140
|
+
|
141
|
+
def __len__(self) -> int:
|
142
|
+
"""Get number of values in column."""
|
143
|
+
return len(self.values)
|
144
|
+
|
145
|
+
|
146
|
+
class xEdgePropertyStoreStrategy(aEdgeStrategy):
|
147
|
+
"""
|
148
|
+
Edge Property Store strategy for columnar edge attributes.
|
149
|
+
|
150
|
+
Efficiently stores and queries edge properties in columnar format
|
151
|
+
for analytical workloads and complex property-based filtering.
|
152
|
+
"""
|
153
|
+
|
154
|
+
def __init__(self, traits: EdgeTrait = EdgeTrait.NONE, **options):
|
155
|
+
"""Initialize the Edge Property Store strategy."""
|
156
|
+
super().__init__(EdgeMode.EDGE_PROPERTY_STORE, traits, **options)
|
157
|
+
|
158
|
+
self.enable_compression = options.get('enable_compression', True)
|
159
|
+
self.default_batch_size = options.get('batch_size', 1000)
|
160
|
+
|
161
|
+
# Core edge storage
|
162
|
+
self._source_vertices: List[str] = [] # Source vertex names
|
163
|
+
self._target_vertices: List[str] = [] # Target vertex names
|
164
|
+
self._edge_ids: List[str] = [] # Edge identifiers
|
165
|
+
|
166
|
+
# Columnar property storage
|
167
|
+
self._property_columns: Dict[str, PropertyColumn] = {}
|
168
|
+
|
169
|
+
# Standard edge properties columns
|
170
|
+
self._init_standard_columns()
|
171
|
+
|
172
|
+
# Vertex management
|
173
|
+
self._vertices: Set[str] = set()
|
174
|
+
self._edge_count = 0
|
175
|
+
self._next_edge_id = 0
|
176
|
+
|
177
|
+
# Indices for fast lookups
|
178
|
+
self._edge_index: Dict[Tuple[str, str], List[int]] = defaultdict(list) # (source, target) -> [positions]
|
179
|
+
self._vertex_out_edges: Dict[str, List[int]] = defaultdict(list) # vertex -> [edge_positions]
|
180
|
+
self._vertex_in_edges: Dict[str, List[int]] = defaultdict(list) # vertex -> [edge_positions]
|
181
|
+
|
182
|
+
def get_supported_traits(self) -> EdgeTrait:
|
183
|
+
"""Get the traits supported by the edge property store strategy."""
|
184
|
+
return (EdgeTrait.COLUMNAR | EdgeTrait.MULTI | EdgeTrait.COMPRESSED)
|
185
|
+
|
186
|
+
def _init_standard_columns(self) -> None:
|
187
|
+
"""Initialize standard edge property columns."""
|
188
|
+
self._property_columns['weight'] = PropertyColumn('weight', float)
|
189
|
+
self._property_columns['timestamp'] = PropertyColumn('timestamp', float)
|
190
|
+
self._property_columns['label'] = PropertyColumn('label', str)
|
191
|
+
self._property_columns['category'] = PropertyColumn('category', str)
|
192
|
+
|
193
|
+
def _generate_edge_id(self) -> str:
|
194
|
+
"""Generate unique edge ID."""
|
195
|
+
self._next_edge_id += 1
|
196
|
+
return f"edge_{self._next_edge_id}"
|
197
|
+
|
198
|
+
def _add_to_indices(self, position: int, source: str, target: str) -> None:
|
199
|
+
"""Add edge to lookup indices."""
|
200
|
+
edge_key = (source, target)
|
201
|
+
self._edge_index[edge_key].append(position)
|
202
|
+
self._vertex_out_edges[source].append(position)
|
203
|
+
self._vertex_in_edges[target].append(position)
|
204
|
+
self._vertices.add(source)
|
205
|
+
self._vertices.add(target)
|
206
|
+
|
207
|
+
def _remove_from_indices(self, position: int) -> None:
|
208
|
+
"""Remove edge from lookup indices."""
|
209
|
+
if position >= len(self._source_vertices):
|
210
|
+
return
|
211
|
+
|
212
|
+
source = self._source_vertices[position]
|
213
|
+
target = self._target_vertices[position]
|
214
|
+
edge_key = (source, target)
|
215
|
+
|
216
|
+
# Remove from indices
|
217
|
+
self._edge_index[edge_key].remove(position)
|
218
|
+
if not self._edge_index[edge_key]:
|
219
|
+
del self._edge_index[edge_key]
|
220
|
+
|
221
|
+
self._vertex_out_edges[source].remove(position)
|
222
|
+
self._vertex_in_edges[target].remove(position)
|
223
|
+
|
224
|
+
# Update positions in indices (shift down)
|
225
|
+
for key, positions in self._edge_index.items():
|
226
|
+
for i, pos in enumerate(positions):
|
227
|
+
if pos > position:
|
228
|
+
positions[i] = pos - 1
|
229
|
+
|
230
|
+
for vertex_edges in self._vertex_out_edges.values():
|
231
|
+
for i, pos in enumerate(vertex_edges):
|
232
|
+
if pos > position:
|
233
|
+
vertex_edges[i] = pos - 1
|
234
|
+
|
235
|
+
for vertex_edges in self._vertex_in_edges.values():
|
236
|
+
for i, pos in enumerate(vertex_edges):
|
237
|
+
if pos > position:
|
238
|
+
vertex_edges[i] = pos - 1
|
239
|
+
|
240
|
+
def _ensure_property_column(self, property_name: str, data_type: type = object) -> None:
|
241
|
+
"""Ensure property column exists."""
|
242
|
+
if property_name not in self._property_columns:
|
243
|
+
column = PropertyColumn(property_name, data_type)
|
244
|
+
|
245
|
+
# Backfill with None values for existing edges
|
246
|
+
for _ in range(self._edge_count):
|
247
|
+
column.append(None)
|
248
|
+
|
249
|
+
self._property_columns[property_name] = column
|
250
|
+
|
251
|
+
# ============================================================================
|
252
|
+
# CORE EDGE OPERATIONS
|
253
|
+
# ============================================================================
|
254
|
+
|
255
|
+
def add_edge(self, source: str, target: str, **properties) -> str:
|
256
|
+
"""Add edge with properties to columnar store."""
|
257
|
+
edge_id = properties.pop('edge_id', self._generate_edge_id())
|
258
|
+
|
259
|
+
# Add to core storage
|
260
|
+
position = len(self._source_vertices)
|
261
|
+
self._source_vertices.append(source)
|
262
|
+
self._target_vertices.append(target)
|
263
|
+
self._edge_ids.append(edge_id)
|
264
|
+
|
265
|
+
# Add to indices
|
266
|
+
self._add_to_indices(position, source, target)
|
267
|
+
|
268
|
+
# Add properties to columns
|
269
|
+
for prop_name, value in properties.items():
|
270
|
+
if prop_name not in self._property_columns:
|
271
|
+
# Infer data type
|
272
|
+
data_type = type(value) if value is not None else object
|
273
|
+
self._ensure_property_column(prop_name, data_type)
|
274
|
+
|
275
|
+
self._property_columns[prop_name].append(value)
|
276
|
+
|
277
|
+
# Fill missing properties with None
|
278
|
+
for column_name, column in self._property_columns.items():
|
279
|
+
if column_name not in properties:
|
280
|
+
column.append(None)
|
281
|
+
|
282
|
+
self._edge_count += 1
|
283
|
+
return edge_id
|
284
|
+
|
285
|
+
def remove_edge(self, source: str, target: str, edge_id: Optional[str] = None) -> bool:
|
286
|
+
"""Remove edge from property store."""
|
287
|
+
edge_key = (source, target)
|
288
|
+
positions = self._edge_index.get(edge_key, [])
|
289
|
+
|
290
|
+
if not positions:
|
291
|
+
return False
|
292
|
+
|
293
|
+
# Find specific edge by ID or use first
|
294
|
+
position_to_remove = positions[0]
|
295
|
+
if edge_id:
|
296
|
+
for pos in positions:
|
297
|
+
if self._edge_ids[pos] == edge_id:
|
298
|
+
position_to_remove = pos
|
299
|
+
break
|
300
|
+
|
301
|
+
# Remove from all structures
|
302
|
+
self._remove_from_indices(position_to_remove)
|
303
|
+
|
304
|
+
del self._source_vertices[position_to_remove]
|
305
|
+
del self._target_vertices[position_to_remove]
|
306
|
+
del self._edge_ids[position_to_remove]
|
307
|
+
|
308
|
+
# Remove from all property columns
|
309
|
+
for column in self._property_columns.values():
|
310
|
+
column.remove_at_index(position_to_remove)
|
311
|
+
|
312
|
+
self._edge_count -= 1
|
313
|
+
return True
|
314
|
+
|
315
|
+
def has_edge(self, source: str, target: str) -> bool:
|
316
|
+
"""Check if edge exists."""
|
317
|
+
edge_key = (source, target)
|
318
|
+
return edge_key in self._edge_index
|
319
|
+
|
320
|
+
def get_edge_data(self, source: str, target: str) -> Optional[Dict[str, Any]]:
|
321
|
+
"""Get edge data with all properties."""
|
322
|
+
edge_key = (source, target)
|
323
|
+
positions = self._edge_index.get(edge_key, [])
|
324
|
+
|
325
|
+
if not positions:
|
326
|
+
return None
|
327
|
+
|
328
|
+
# Return data for first matching edge
|
329
|
+
position = positions[0]
|
330
|
+
edge_data = {
|
331
|
+
'source': source,
|
332
|
+
'target': target,
|
333
|
+
'edge_id': self._edge_ids[position],
|
334
|
+
'position': position
|
335
|
+
}
|
336
|
+
|
337
|
+
# Add all properties
|
338
|
+
for prop_name, column in self._property_columns.items():
|
339
|
+
edge_data[prop_name] = column.get_value(position)
|
340
|
+
|
341
|
+
return edge_data
|
342
|
+
|
343
|
+
def neighbors(self, vertex: str, direction: str = 'out') -> Iterator[str]:
|
344
|
+
"""Get neighbors of vertex."""
|
345
|
+
neighbors_found = set()
|
346
|
+
|
347
|
+
if direction in ['out', 'both']:
|
348
|
+
for pos in self._vertex_out_edges.get(vertex, []):
|
349
|
+
target = self._target_vertices[pos]
|
350
|
+
if target not in neighbors_found:
|
351
|
+
neighbors_found.add(target)
|
352
|
+
yield target
|
353
|
+
|
354
|
+
if direction in ['in', 'both']:
|
355
|
+
for pos in self._vertex_in_edges.get(vertex, []):
|
356
|
+
source = self._source_vertices[pos]
|
357
|
+
if source not in neighbors_found:
|
358
|
+
neighbors_found.add(source)
|
359
|
+
yield source
|
360
|
+
|
361
|
+
def degree(self, vertex: str, direction: str = 'out') -> int:
|
362
|
+
"""Get degree of vertex."""
|
363
|
+
if direction == 'out':
|
364
|
+
return len(self._vertex_out_edges.get(vertex, []))
|
365
|
+
elif direction == 'in':
|
366
|
+
return len(self._vertex_in_edges.get(vertex, []))
|
367
|
+
else: # both
|
368
|
+
out_neighbors = set(self._target_vertices[pos] for pos in self._vertex_out_edges.get(vertex, []))
|
369
|
+
in_neighbors = set(self._source_vertices[pos] for pos in self._vertex_in_edges.get(vertex, []))
|
370
|
+
return len(out_neighbors | in_neighbors)
|
371
|
+
|
372
|
+
def edges(self, data: bool = False) -> Iterator[tuple]:
|
373
|
+
"""Get all edges."""
|
374
|
+
for i in range(self._edge_count):
|
375
|
+
source = self._source_vertices[i]
|
376
|
+
target = self._target_vertices[i]
|
377
|
+
|
378
|
+
if data:
|
379
|
+
edge_data = {'edge_id': self._edge_ids[i]}
|
380
|
+
for prop_name, column in self._property_columns.items():
|
381
|
+
edge_data[prop_name] = column.get_value(i)
|
382
|
+
yield (source, target, edge_data)
|
383
|
+
else:
|
384
|
+
yield (source, target)
|
385
|
+
|
386
|
+
def vertices(self) -> Iterator[str]:
|
387
|
+
"""Get all vertices."""
|
388
|
+
return iter(self._vertices)
|
389
|
+
|
390
|
+
def __len__(self) -> int:
|
391
|
+
"""Get number of edges."""
|
392
|
+
return self._edge_count
|
393
|
+
|
394
|
+
def vertex_count(self) -> int:
|
395
|
+
"""Get number of vertices."""
|
396
|
+
return len(self._vertices)
|
397
|
+
|
398
|
+
def clear(self) -> None:
|
399
|
+
"""Clear all data."""
|
400
|
+
self._source_vertices.clear()
|
401
|
+
self._target_vertices.clear()
|
402
|
+
self._edge_ids.clear()
|
403
|
+
|
404
|
+
for column in self._property_columns.values():
|
405
|
+
column.values.clear()
|
406
|
+
column.null_bitmap.clear()
|
407
|
+
column._unique_values.clear()
|
408
|
+
column._stats_dirty = True
|
409
|
+
|
410
|
+
self._vertices.clear()
|
411
|
+
self._edge_index.clear()
|
412
|
+
self._vertex_out_edges.clear()
|
413
|
+
self._vertex_in_edges.clear()
|
414
|
+
|
415
|
+
self._edge_count = 0
|
416
|
+
self._next_edge_id = 0
|
417
|
+
|
418
|
+
def add_vertex(self, vertex: str) -> None:
|
419
|
+
"""Add vertex to graph."""
|
420
|
+
self._vertices.add(vertex)
|
421
|
+
|
422
|
+
def remove_vertex(self, vertex: str) -> bool:
|
423
|
+
"""Remove vertex and all its edges."""
|
424
|
+
if vertex not in self._vertices:
|
425
|
+
return False
|
426
|
+
|
427
|
+
# Find all edges involving this vertex
|
428
|
+
edges_to_remove = []
|
429
|
+
for i in range(self._edge_count):
|
430
|
+
source = self._source_vertices[i]
|
431
|
+
target = self._target_vertices[i]
|
432
|
+
if source == vertex or target == vertex:
|
433
|
+
edges_to_remove.append((source, target, self._edge_ids[i]))
|
434
|
+
|
435
|
+
# Remove edges in reverse order to maintain indices
|
436
|
+
for source, target, edge_id in reversed(edges_to_remove):
|
437
|
+
self.remove_edge(source, target, edge_id)
|
438
|
+
|
439
|
+
# Remove vertex
|
440
|
+
self._vertices.discard(vertex)
|
441
|
+
return True
|
442
|
+
|
443
|
+
# ============================================================================
|
444
|
+
# COLUMNAR ANALYTICS OPERATIONS
|
445
|
+
# ============================================================================
|
446
|
+
|
447
|
+
def add_property_column(self, column_name: str, data_type: type = object, default_value: Any = None) -> None:
|
448
|
+
"""Add new property column."""
|
449
|
+
if column_name in self._property_columns:
|
450
|
+
return
|
451
|
+
|
452
|
+
column = PropertyColumn(column_name, data_type)
|
453
|
+
|
454
|
+
# Backfill with default values
|
455
|
+
for _ in range(self._edge_count):
|
456
|
+
column.append(default_value)
|
457
|
+
|
458
|
+
self._property_columns[column_name] = column
|
459
|
+
|
460
|
+
def remove_property_column(self, column_name: str) -> bool:
|
461
|
+
"""Remove property column."""
|
462
|
+
if column_name in self._property_columns:
|
463
|
+
del self._property_columns[column_name]
|
464
|
+
return True
|
465
|
+
return False
|
466
|
+
|
467
|
+
def get_property_columns(self) -> List[str]:
|
468
|
+
"""Get list of all property column names."""
|
469
|
+
return list(self._property_columns.keys())
|
470
|
+
|
471
|
+
def get_column_data(self, column_name: str) -> Optional[List[Any]]:
|
472
|
+
"""Get all values from a specific column."""
|
473
|
+
if column_name in self._property_columns:
|
474
|
+
return self._property_columns[column_name].values.copy()
|
475
|
+
return None
|
476
|
+
|
477
|
+
def set_edge_property(self, source: str, target: str, property_name: str, value: Any) -> bool:
|
478
|
+
"""Set property value for specific edge."""
|
479
|
+
edge_key = (source, target)
|
480
|
+
positions = self._edge_index.get(edge_key, [])
|
481
|
+
|
482
|
+
if not positions:
|
483
|
+
return False
|
484
|
+
|
485
|
+
# Ensure column exists
|
486
|
+
if property_name not in self._property_columns:
|
487
|
+
self._ensure_property_column(property_name, type(value) if value is not None else object)
|
488
|
+
|
489
|
+
# Set value for first matching edge
|
490
|
+
position = positions[0]
|
491
|
+
self._property_columns[property_name].set_value(position, value)
|
492
|
+
return True
|
493
|
+
|
494
|
+
def get_edge_property(self, source: str, target: str, property_name: str) -> Any:
|
495
|
+
"""Get property value for specific edge."""
|
496
|
+
edge_key = (source, target)
|
497
|
+
positions = self._edge_index.get(edge_key, [])
|
498
|
+
|
499
|
+
if not positions or property_name not in self._property_columns:
|
500
|
+
return None
|
501
|
+
|
502
|
+
position = positions[0]
|
503
|
+
return self._property_columns[property_name].get_value(position)
|
504
|
+
|
505
|
+
def filter_edges_by_property(self, property_name: str, predicate: callable) -> List[Tuple[str, str, Dict[str, Any]]]:
|
506
|
+
"""Filter edges by property values."""
|
507
|
+
if property_name not in self._property_columns:
|
508
|
+
return []
|
509
|
+
|
510
|
+
column = self._property_columns[property_name]
|
511
|
+
matching_indices = column.filter_indices(predicate)
|
512
|
+
|
513
|
+
result = []
|
514
|
+
for index in matching_indices:
|
515
|
+
source = self._source_vertices[index]
|
516
|
+
target = self._target_vertices[index]
|
517
|
+
edge_data = {'edge_id': self._edge_ids[index]}
|
518
|
+
|
519
|
+
for prop_name, prop_column in self._property_columns.items():
|
520
|
+
edge_data[prop_name] = prop_column.get_value(index)
|
521
|
+
|
522
|
+
result.append((source, target, edge_data))
|
523
|
+
|
524
|
+
return result
|
525
|
+
|
526
|
+
def aggregate_property(self, property_name: str, operation: str = 'count') -> Any:
|
527
|
+
"""Aggregate property values across all edges."""
|
528
|
+
if property_name not in self._property_columns:
|
529
|
+
return None
|
530
|
+
|
531
|
+
column = self._property_columns[property_name]
|
532
|
+
non_null_values = [v for v in column.values if v is not None]
|
533
|
+
|
534
|
+
if not non_null_values:
|
535
|
+
return None
|
536
|
+
|
537
|
+
if operation == 'count':
|
538
|
+
return len(non_null_values)
|
539
|
+
elif operation == 'sum':
|
540
|
+
return sum(non_null_values) if all(isinstance(v, (int, float)) for v in non_null_values) else None
|
541
|
+
elif operation == 'avg' or operation == 'mean':
|
542
|
+
return statistics.mean(non_null_values) if all(isinstance(v, (int, float)) for v in non_null_values) else None
|
543
|
+
elif operation == 'min':
|
544
|
+
return min(non_null_values)
|
545
|
+
elif operation == 'max':
|
546
|
+
return max(non_null_values)
|
547
|
+
elif operation == 'median':
|
548
|
+
return statistics.median(non_null_values) if all(isinstance(v, (int, float)) for v in non_null_values) else None
|
549
|
+
elif operation == 'unique':
|
550
|
+
return len(set(non_null_values))
|
551
|
+
else:
|
552
|
+
return None
|
553
|
+
|
554
|
+
def group_by_property(self, property_name: str) -> Dict[Any, List[int]]:
|
555
|
+
"""Group edge indices by property values."""
|
556
|
+
if property_name not in self._property_columns:
|
557
|
+
return {}
|
558
|
+
|
559
|
+
column = self._property_columns[property_name]
|
560
|
+
groups = defaultdict(list)
|
561
|
+
|
562
|
+
for i, value in enumerate(column.values):
|
563
|
+
groups[value].append(i)
|
564
|
+
|
565
|
+
return dict(groups)
|
566
|
+
|
567
|
+
def get_property_statistics(self, property_name: str) -> Optional[Dict[str, Any]]:
|
568
|
+
"""Get statistics for a specific property column."""
|
569
|
+
if property_name in self._property_columns:
|
570
|
+
return self._property_columns[property_name].get_statistics()
|
571
|
+
return None
|
572
|
+
|
573
|
+
def get_all_statistics(self) -> Dict[str, Dict[str, Any]]:
|
574
|
+
"""Get statistics for all property columns."""
|
575
|
+
return {name: column.get_statistics() for name, column in self._property_columns.items()}
|
576
|
+
|
577
|
+
def export_to_dataframe_format(self) -> Dict[str, List[Any]]:
|
578
|
+
"""Export data in DataFrame-compatible format."""
|
579
|
+
data = {
|
580
|
+
'source': self._source_vertices.copy(),
|
581
|
+
'target': self._target_vertices.copy(),
|
582
|
+
'edge_id': self._edge_ids.copy()
|
583
|
+
}
|
584
|
+
|
585
|
+
for prop_name, column in self._property_columns.items():
|
586
|
+
data[prop_name] = column.values.copy()
|
587
|
+
|
588
|
+
return data
|
589
|
+
|
590
|
+
def get_schema(self) -> Dict[str, str]:
|
591
|
+
"""Get schema information for all columns."""
|
592
|
+
schema = {
|
593
|
+
'source': 'str',
|
594
|
+
'target': 'str',
|
595
|
+
'edge_id': 'str'
|
596
|
+
}
|
597
|
+
|
598
|
+
for prop_name, column in self._property_columns.items():
|
599
|
+
schema[prop_name] = column.data_type.__name__
|
600
|
+
|
601
|
+
return schema
|
602
|
+
|
603
|
+
def get_comprehensive_statistics(self) -> Dict[str, Any]:
|
604
|
+
"""Get comprehensive statistics about the property store."""
|
605
|
+
column_stats = self.get_all_statistics()
|
606
|
+
|
607
|
+
return {
|
608
|
+
'vertices': len(self._vertices),
|
609
|
+
'edges': self._edge_count,
|
610
|
+
'property_columns': len(self._property_columns),
|
611
|
+
'column_names': list(self._property_columns.keys()),
|
612
|
+
'total_cells': self._edge_count * len(self._property_columns),
|
613
|
+
'memory_overhead': len(self._property_columns) * 100, # Estimated
|
614
|
+
'column_statistics': column_stats,
|
615
|
+
'enable_compression': self.enable_compression,
|
616
|
+
'batch_size': self.default_batch_size
|
617
|
+
}
|
618
|
+
|
619
|
+
# ============================================================================
|
620
|
+
# PERFORMANCE CHARACTERISTICS
|
621
|
+
# ============================================================================
|
622
|
+
|
623
|
+
@property
|
624
|
+
def backend_info(self) -> Dict[str, Any]:
|
625
|
+
"""Get backend implementation info."""
|
626
|
+
return {
|
627
|
+
'strategy': 'EDGE_PROPERTY_STORE',
|
628
|
+
'backend': 'Columnar storage with property indices',
|
629
|
+
'enable_compression': self.enable_compression,
|
630
|
+
'batch_size': self.default_batch_size,
|
631
|
+
'property_columns': len(self._property_columns),
|
632
|
+
'complexity': {
|
633
|
+
'add_edge': 'O(p)', # p = number of properties
|
634
|
+
'remove_edge': 'O(p + degree)',
|
635
|
+
'property_filter': 'O(e)', # e = number of edges
|
636
|
+
'property_aggregate': 'O(e)',
|
637
|
+
'group_by': 'O(e)',
|
638
|
+
'space': 'O(e * p)'
|
639
|
+
}
|
640
|
+
}
|
641
|
+
|
642
|
+
@property
|
643
|
+
def metrics(self) -> Dict[str, Any]:
|
644
|
+
"""Get performance metrics."""
|
645
|
+
stats = self.get_comprehensive_statistics()
|
646
|
+
|
647
|
+
return {
|
648
|
+
'vertices': stats['vertices'],
|
649
|
+
'edges': stats['edges'],
|
650
|
+
'property_columns': stats['property_columns'],
|
651
|
+
'total_cells': stats['total_cells'],
|
652
|
+
'column_names': ', '.join(stats['column_names'][:5]) + ('...' if len(stats['column_names']) > 5 else ''),
|
653
|
+
'avg_properties_per_edge': f"{len(self._property_columns):.1f}",
|
654
|
+
'memory_usage': f"{stats['edges'] * len(self._property_columns) * 8 + stats['memory_overhead']} bytes (estimated)"
|
655
|
+
}
|