exonware-xwnode 0.0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +14 -0
- exonware/xwnode/__init__.py +127 -0
- exonware/xwnode/base.py +676 -0
- exonware/xwnode/config.py +178 -0
- exonware/xwnode/contracts.py +730 -0
- exonware/xwnode/errors.py +503 -0
- exonware/xwnode/facade.py +460 -0
- exonware/xwnode/strategies/__init__.py +158 -0
- exonware/xwnode/strategies/advisor.py +463 -0
- exonware/xwnode/strategies/edges/__init__.py +32 -0
- exonware/xwnode/strategies/edges/adj_list.py +227 -0
- exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
- exonware/xwnode/strategies/edges/base.py +169 -0
- exonware/xwnode/strategies/flyweight.py +328 -0
- exonware/xwnode/strategies/impls/__init__.py +13 -0
- exonware/xwnode/strategies/impls/_base_edge.py +403 -0
- exonware/xwnode/strategies/impls/_base_node.py +307 -0
- exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
- exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
- exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
- exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
- exonware/xwnode/strategies/impls/edge_coo.py +533 -0
- exonware/xwnode/strategies/impls/edge_csc.py +447 -0
- exonware/xwnode/strategies/impls/edge_csr.py +492 -0
- exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
- exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
- exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
- exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
- exonware/xwnode/strategies/impls/edge_octree.py +574 -0
- exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
- exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
- exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
- exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
- exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
- exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
- exonware/xwnode/strategies/manager.py +775 -0
- exonware/xwnode/strategies/metrics.py +538 -0
- exonware/xwnode/strategies/migration.py +432 -0
- exonware/xwnode/strategies/nodes/__init__.py +50 -0
- exonware/xwnode/strategies/nodes/_base_node.py +307 -0
- exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
- exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
- exonware/xwnode/strategies/nodes/array_list.py +209 -0
- exonware/xwnode/strategies/nodes/base.py +247 -0
- exonware/xwnode/strategies/nodes/deque.py +200 -0
- exonware/xwnode/strategies/nodes/hash_map.py +135 -0
- exonware/xwnode/strategies/nodes/heap.py +307 -0
- exonware/xwnode/strategies/nodes/linked_list.py +232 -0
- exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
- exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
- exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
- exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
- exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
- exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
- exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
- exonware/xwnode/strategies/nodes/node_btree.py +357 -0
- exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
- exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
- exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
- exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
- exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
- exonware/xwnode/strategies/nodes/node_heap.py +191 -0
- exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
- exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
- exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
- exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
- exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
- exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
- exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
- exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
- exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
- exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
- exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
- exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
- exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
- exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
- exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
- exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
- exonware/xwnode/strategies/nodes/node_treap.py +387 -0
- exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
- exonware/xwnode/strategies/nodes/node_trie.py +252 -0
- exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
- exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
- exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
- exonware/xwnode/strategies/nodes/queue.py +161 -0
- exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
- exonware/xwnode/strategies/nodes/stack.py +152 -0
- exonware/xwnode/strategies/nodes/trie.py +274 -0
- exonware/xwnode/strategies/nodes/union_find.py +283 -0
- exonware/xwnode/strategies/pattern_detector.py +603 -0
- exonware/xwnode/strategies/performance_monitor.py +487 -0
- exonware/xwnode/strategies/queries/__init__.py +24 -0
- exonware/xwnode/strategies/queries/base.py +236 -0
- exonware/xwnode/strategies/queries/cql.py +201 -0
- exonware/xwnode/strategies/queries/cypher.py +181 -0
- exonware/xwnode/strategies/queries/datalog.py +70 -0
- exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
- exonware/xwnode/strategies/queries/eql.py +70 -0
- exonware/xwnode/strategies/queries/flux.py +70 -0
- exonware/xwnode/strategies/queries/gql.py +70 -0
- exonware/xwnode/strategies/queries/graphql.py +240 -0
- exonware/xwnode/strategies/queries/gremlin.py +181 -0
- exonware/xwnode/strategies/queries/hiveql.py +214 -0
- exonware/xwnode/strategies/queries/hql.py +70 -0
- exonware/xwnode/strategies/queries/jmespath.py +219 -0
- exonware/xwnode/strategies/queries/jq.py +66 -0
- exonware/xwnode/strategies/queries/json_query.py +66 -0
- exonware/xwnode/strategies/queries/jsoniq.py +248 -0
- exonware/xwnode/strategies/queries/kql.py +70 -0
- exonware/xwnode/strategies/queries/linq.py +238 -0
- exonware/xwnode/strategies/queries/logql.py +70 -0
- exonware/xwnode/strategies/queries/mql.py +68 -0
- exonware/xwnode/strategies/queries/n1ql.py +210 -0
- exonware/xwnode/strategies/queries/partiql.py +70 -0
- exonware/xwnode/strategies/queries/pig.py +215 -0
- exonware/xwnode/strategies/queries/promql.py +70 -0
- exonware/xwnode/strategies/queries/sparql.py +220 -0
- exonware/xwnode/strategies/queries/sql.py +275 -0
- exonware/xwnode/strategies/queries/xml_query.py +66 -0
- exonware/xwnode/strategies/queries/xpath.py +223 -0
- exonware/xwnode/strategies/queries/xquery.py +258 -0
- exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
- exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
- exonware/xwnode/strategies/registry.py +604 -0
- exonware/xwnode/strategies/simple.py +273 -0
- exonware/xwnode/strategies/utils.py +532 -0
- exonware/xwnode/types.py +912 -0
- exonware/xwnode/version.py +78 -0
- exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
- exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
- exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
- exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,447 @@
|
|
1
|
+
"""
|
2
|
+
CSC (Compressed Sparse Column) Edge Strategy Implementation
|
3
|
+
|
4
|
+
This module implements the CSC strategy for sparse graph representation
|
5
|
+
using compressed sparse column format for efficient column operations.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, Iterator, List, Dict, Set, Optional, Tuple
|
9
|
+
from collections import defaultdict
|
10
|
+
import bisect
|
11
|
+
from ._base_edge import aEdgeStrategy
|
12
|
+
from ...types import EdgeMode, EdgeTrait
|
13
|
+
|
14
|
+
|
15
|
+
class xCSCStrategy(aEdgeStrategy):
|
16
|
+
"""
|
17
|
+
CSC (Compressed Sparse Column) edge strategy for sparse graphs.
|
18
|
+
|
19
|
+
Optimized for column-wise operations and efficient sparse matrix
|
20
|
+
computations with compressed storage format.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self, traits: EdgeTrait = EdgeTrait.NONE, **options):
|
24
|
+
"""Initialize the CSC strategy."""
|
25
|
+
super().__init__(EdgeMode.CSC, traits, **options)
|
26
|
+
|
27
|
+
self.weighted = options.get('weighted', True)
|
28
|
+
self.allow_duplicates = options.get('allow_duplicates', False)
|
29
|
+
|
30
|
+
# CSC format: Compressed Sparse Column
|
31
|
+
# col_ptr[j] to col_ptr[j+1] gives range in row_indices/values for column j
|
32
|
+
self._col_ptr: List[int] = [0] # Column pointers
|
33
|
+
self._row_indices: List[int] = [] # Row indices
|
34
|
+
self._values: List[float] = [] # Edge values/weights
|
35
|
+
|
36
|
+
# Vertex management
|
37
|
+
self._vertices: Set[str] = set()
|
38
|
+
self._vertex_to_id: Dict[str, int] = {}
|
39
|
+
self._id_to_vertex: Dict[int, str] = {}
|
40
|
+
self._next_vertex_id = 0
|
41
|
+
|
42
|
+
# Matrix dimensions
|
43
|
+
self._num_rows = 0
|
44
|
+
self._num_cols = 0
|
45
|
+
self._nnz = 0 # Number of non-zeros
|
46
|
+
|
47
|
+
# Quick access for compatibility
|
48
|
+
self._edge_count = 0
|
49
|
+
|
50
|
+
def get_supported_traits(self) -> EdgeTrait:
|
51
|
+
"""Get the traits supported by the CSC strategy."""
|
52
|
+
return (EdgeTrait.SPARSE | EdgeTrait.COMPRESSED | EdgeTrait.CACHE_FRIENDLY | EdgeTrait.COLUMNAR)
|
53
|
+
|
54
|
+
def _get_or_create_vertex_id(self, vertex: str) -> int:
|
55
|
+
"""Get or create vertex ID."""
|
56
|
+
if vertex not in self._vertex_to_id:
|
57
|
+
vertex_id = self._next_vertex_id
|
58
|
+
self._vertex_to_id[vertex] = vertex_id
|
59
|
+
self._id_to_vertex[vertex_id] = vertex
|
60
|
+
self._vertices.add(vertex)
|
61
|
+
self._next_vertex_id += 1
|
62
|
+
return vertex_id
|
63
|
+
return self._vertex_to_id[vertex]
|
64
|
+
|
65
|
+
def _expand_matrix(self, new_rows: int, new_cols: int) -> None:
|
66
|
+
"""Expand matrix dimensions if needed."""
|
67
|
+
if new_rows > self._num_rows:
|
68
|
+
self._num_rows = new_rows
|
69
|
+
|
70
|
+
if new_cols > self._num_cols:
|
71
|
+
# Add empty columns
|
72
|
+
for _ in range(self._num_cols, new_cols):
|
73
|
+
self._col_ptr.append(len(self._row_indices))
|
74
|
+
self._num_cols = new_cols
|
75
|
+
|
76
|
+
def _find_edge_in_column(self, col: int, row: int) -> int:
|
77
|
+
"""Find edge position in column, returns -1 if not found."""
|
78
|
+
start = self._col_ptr[col]
|
79
|
+
end = self._col_ptr[col + 1] if col + 1 < len(self._col_ptr) else len(self._row_indices)
|
80
|
+
|
81
|
+
# Binary search in sorted row indices
|
82
|
+
left, right = start, end - 1
|
83
|
+
while left <= right:
|
84
|
+
mid = (left + right) // 2
|
85
|
+
if self._row_indices[mid] == row:
|
86
|
+
return mid
|
87
|
+
elif self._row_indices[mid] < row:
|
88
|
+
left = mid + 1
|
89
|
+
else:
|
90
|
+
right = mid - 1
|
91
|
+
|
92
|
+
return -1
|
93
|
+
|
94
|
+
def _insert_edge_in_column(self, col: int, row: int, value: float) -> None:
|
95
|
+
"""Insert edge in column maintaining sorted order."""
|
96
|
+
start = self._col_ptr[col]
|
97
|
+
end = self._col_ptr[col + 1] if col + 1 < len(self._col_ptr) else len(self._row_indices)
|
98
|
+
|
99
|
+
# Find insertion position
|
100
|
+
pos = bisect.bisect_left(self._row_indices, row, start, end)
|
101
|
+
|
102
|
+
# Insert at position
|
103
|
+
self._row_indices.insert(pos, row)
|
104
|
+
self._values.insert(pos, value)
|
105
|
+
|
106
|
+
# Update column pointers for columns after this one
|
107
|
+
for i in range(col + 1, len(self._col_ptr)):
|
108
|
+
self._col_ptr[i] += 1
|
109
|
+
|
110
|
+
self._nnz += 1
|
111
|
+
|
112
|
+
def _remove_edge_from_column(self, col: int, pos: int) -> None:
|
113
|
+
"""Remove edge from column at given position."""
|
114
|
+
del self._row_indices[pos]
|
115
|
+
del self._values[pos]
|
116
|
+
|
117
|
+
# Update column pointers for columns after this one
|
118
|
+
for i in range(col + 1, len(self._col_ptr)):
|
119
|
+
self._col_ptr[i] -= 1
|
120
|
+
|
121
|
+
self._nnz -= 1
|
122
|
+
|
123
|
+
# ============================================================================
|
124
|
+
# CORE EDGE OPERATIONS
|
125
|
+
# ============================================================================
|
126
|
+
|
127
|
+
def add_edge(self, source: str, target: str, **properties) -> str:
|
128
|
+
"""Add edge to CSC matrix."""
|
129
|
+
# In CSC: source = row, target = column
|
130
|
+
row_id = self._get_or_create_vertex_id(source)
|
131
|
+
col_id = self._get_or_create_vertex_id(target)
|
132
|
+
|
133
|
+
weight = properties.get('weight', 1.0) if self.weighted else 1.0
|
134
|
+
|
135
|
+
# Expand matrix if needed
|
136
|
+
self._expand_matrix(row_id + 1, col_id + 1)
|
137
|
+
|
138
|
+
# Check if edge exists
|
139
|
+
pos = self._find_edge_in_column(col_id, row_id)
|
140
|
+
|
141
|
+
if pos != -1:
|
142
|
+
if not self.allow_duplicates:
|
143
|
+
# Update existing edge
|
144
|
+
self._values[pos] = weight
|
145
|
+
return f"{source}->{target}"
|
146
|
+
# else: allow duplicate, fall through to insert
|
147
|
+
|
148
|
+
# Insert new edge
|
149
|
+
self._insert_edge_in_column(col_id, row_id, weight)
|
150
|
+
self._edge_count += 1
|
151
|
+
|
152
|
+
return f"{source}->{target}"
|
153
|
+
|
154
|
+
def remove_edge(self, source: str, target: str, edge_id: Optional[str] = None) -> bool:
|
155
|
+
"""Remove edge from CSC matrix."""
|
156
|
+
if source not in self._vertex_to_id or target not in self._vertex_to_id:
|
157
|
+
return False
|
158
|
+
|
159
|
+
row_id = self._vertex_to_id[source]
|
160
|
+
col_id = self._vertex_to_id[target]
|
161
|
+
|
162
|
+
if col_id >= self._num_cols:
|
163
|
+
return False
|
164
|
+
|
165
|
+
pos = self._find_edge_in_column(col_id, row_id)
|
166
|
+
if pos != -1:
|
167
|
+
self._remove_edge_from_column(col_id, pos)
|
168
|
+
self._edge_count -= 1
|
169
|
+
return True
|
170
|
+
|
171
|
+
return False
|
172
|
+
|
173
|
+
def has_edge(self, source: str, target: str) -> bool:
|
174
|
+
"""Check if edge exists."""
|
175
|
+
if source not in self._vertex_to_id or target not in self._vertex_to_id:
|
176
|
+
return False
|
177
|
+
|
178
|
+
row_id = self._vertex_to_id[source]
|
179
|
+
col_id = self._vertex_to_id[target]
|
180
|
+
|
181
|
+
if col_id >= self._num_cols:
|
182
|
+
return False
|
183
|
+
|
184
|
+
return self._find_edge_in_column(col_id, row_id) != -1
|
185
|
+
|
186
|
+
def get_edge_data(self, source: str, target: str) -> Optional[Dict[str, Any]]:
|
187
|
+
"""Get edge data."""
|
188
|
+
if not self.has_edge(source, target):
|
189
|
+
return None
|
190
|
+
|
191
|
+
row_id = self._vertex_to_id[source]
|
192
|
+
col_id = self._vertex_to_id[target]
|
193
|
+
pos = self._find_edge_in_column(col_id, row_id)
|
194
|
+
|
195
|
+
return {
|
196
|
+
'source': source,
|
197
|
+
'target': target,
|
198
|
+
'weight': self._values[pos],
|
199
|
+
'row_id': row_id,
|
200
|
+
'col_id': col_id
|
201
|
+
}
|
202
|
+
|
203
|
+
def neighbors(self, vertex: str, direction: str = 'out') -> Iterator[str]:
|
204
|
+
"""Get neighbors of vertex."""
|
205
|
+
if vertex not in self._vertex_to_id:
|
206
|
+
return
|
207
|
+
|
208
|
+
vertex_id = self._vertex_to_id[vertex]
|
209
|
+
|
210
|
+
if direction in ['out', 'both']:
|
211
|
+
# Outgoing: vertex is source (row), find all columns with this row
|
212
|
+
for col in range(self._num_cols):
|
213
|
+
if self._find_edge_in_column(col, vertex_id) != -1:
|
214
|
+
yield self._id_to_vertex[col]
|
215
|
+
|
216
|
+
if direction in ['in', 'both']:
|
217
|
+
# Incoming: vertex is target (column), get all rows in this column
|
218
|
+
if vertex_id < self._num_cols:
|
219
|
+
start = self._col_ptr[vertex_id]
|
220
|
+
end = self._col_ptr[vertex_id + 1] if vertex_id + 1 < len(self._col_ptr) else len(self._row_indices)
|
221
|
+
|
222
|
+
for i in range(start, end):
|
223
|
+
row_id = self._row_indices[i]
|
224
|
+
if row_id in self._id_to_vertex:
|
225
|
+
yield self._id_to_vertex[row_id]
|
226
|
+
|
227
|
+
def degree(self, vertex: str, direction: str = 'out') -> int:
|
228
|
+
"""Get degree of vertex."""
|
229
|
+
return len(list(self.neighbors(vertex, direction)))
|
230
|
+
|
231
|
+
def edges(self, data: bool = False) -> Iterator[tuple]:
|
232
|
+
"""Get all edges."""
|
233
|
+
for col in range(self._num_cols):
|
234
|
+
start = self._col_ptr[col]
|
235
|
+
end = self._col_ptr[col + 1] if col + 1 < len(self._col_ptr) else len(self._row_indices)
|
236
|
+
|
237
|
+
target = self._id_to_vertex.get(col)
|
238
|
+
if not target:
|
239
|
+
continue
|
240
|
+
|
241
|
+
for i in range(start, end):
|
242
|
+
row = self._row_indices[i]
|
243
|
+
source = self._id_to_vertex.get(row)
|
244
|
+
if not source:
|
245
|
+
continue
|
246
|
+
|
247
|
+
if data:
|
248
|
+
edge_data = {
|
249
|
+
'weight': self._values[i],
|
250
|
+
'row_id': row,
|
251
|
+
'col_id': col
|
252
|
+
}
|
253
|
+
yield (source, target, edge_data)
|
254
|
+
else:
|
255
|
+
yield (source, target)
|
256
|
+
|
257
|
+
def vertices(self) -> Iterator[str]:
|
258
|
+
"""Get all vertices."""
|
259
|
+
return iter(self._vertices)
|
260
|
+
|
261
|
+
def __len__(self) -> int:
|
262
|
+
"""Get number of edges."""
|
263
|
+
return self._edge_count
|
264
|
+
|
265
|
+
def vertex_count(self) -> int:
|
266
|
+
"""Get number of vertices."""
|
267
|
+
return len(self._vertices)
|
268
|
+
|
269
|
+
def clear(self) -> None:
|
270
|
+
"""Clear all data."""
|
271
|
+
self._col_ptr = [0]
|
272
|
+
self._row_indices.clear()
|
273
|
+
self._values.clear()
|
274
|
+
self._vertices.clear()
|
275
|
+
self._vertex_to_id.clear()
|
276
|
+
self._id_to_vertex.clear()
|
277
|
+
|
278
|
+
self._num_rows = 0
|
279
|
+
self._num_cols = 0
|
280
|
+
self._nnz = 0
|
281
|
+
self._edge_count = 0
|
282
|
+
self._next_vertex_id = 0
|
283
|
+
|
284
|
+
def add_vertex(self, vertex: str) -> None:
|
285
|
+
"""Add vertex to graph."""
|
286
|
+
self._get_or_create_vertex_id(vertex)
|
287
|
+
|
288
|
+
def remove_vertex(self, vertex: str) -> bool:
|
289
|
+
"""Remove vertex and all its edges."""
|
290
|
+
if vertex not in self._vertex_to_id:
|
291
|
+
return False
|
292
|
+
|
293
|
+
vertex_id = self._vertex_to_id[vertex]
|
294
|
+
|
295
|
+
# Remove all edges involving this vertex
|
296
|
+
# This is expensive in CSC format - requires rebuilding
|
297
|
+
edges_to_remove = []
|
298
|
+
for source, target in self.edges():
|
299
|
+
if source == vertex or target == vertex:
|
300
|
+
edges_to_remove.append((source, target))
|
301
|
+
|
302
|
+
for source, target in edges_to_remove:
|
303
|
+
self.remove_edge(source, target)
|
304
|
+
|
305
|
+
# Remove vertex
|
306
|
+
del self._vertex_to_id[vertex]
|
307
|
+
del self._id_to_vertex[vertex_id]
|
308
|
+
self._vertices.remove(vertex)
|
309
|
+
|
310
|
+
return True
|
311
|
+
|
312
|
+
# ============================================================================
|
313
|
+
# CSC SPECIFIC OPERATIONS
|
314
|
+
# ============================================================================
|
315
|
+
|
316
|
+
def get_column(self, target: str) -> List[Tuple[str, float]]:
|
317
|
+
"""Get all incoming edges to target vertex (column in CSC)."""
|
318
|
+
if target not in self._vertex_to_id:
|
319
|
+
return []
|
320
|
+
|
321
|
+
col_id = self._vertex_to_id[target]
|
322
|
+
if col_id >= self._num_cols:
|
323
|
+
return []
|
324
|
+
|
325
|
+
result = []
|
326
|
+
start = self._col_ptr[col_id]
|
327
|
+
end = self._col_ptr[col_id + 1] if col_id + 1 < len(self._col_ptr) else len(self._row_indices)
|
328
|
+
|
329
|
+
for i in range(start, end):
|
330
|
+
row_id = self._row_indices[i]
|
331
|
+
source = self._id_to_vertex.get(row_id)
|
332
|
+
if source:
|
333
|
+
result.append((source, self._values[i]))
|
334
|
+
|
335
|
+
return result
|
336
|
+
|
337
|
+
def matrix_vector_multiply(self, vector: Dict[str, float]) -> Dict[str, float]:
|
338
|
+
"""Multiply CSC matrix with vector (efficient column-wise)."""
|
339
|
+
result = defaultdict(float)
|
340
|
+
|
341
|
+
for col in range(self._num_cols):
|
342
|
+
col_vertex = self._id_to_vertex.get(col)
|
343
|
+
if not col_vertex or col_vertex not in vector:
|
344
|
+
continue
|
345
|
+
|
346
|
+
col_value = vector[col_vertex]
|
347
|
+
start = self._col_ptr[col]
|
348
|
+
end = self._col_ptr[col + 1] if col + 1 < len(self._col_ptr) else len(self._row_indices)
|
349
|
+
|
350
|
+
for i in range(start, end):
|
351
|
+
row_id = self._row_indices[i]
|
352
|
+
row_vertex = self._id_to_vertex.get(row_id)
|
353
|
+
if row_vertex:
|
354
|
+
result[row_vertex] += self._values[i] * col_value
|
355
|
+
|
356
|
+
return dict(result)
|
357
|
+
|
358
|
+
def get_sparsity(self) -> float:
|
359
|
+
"""Get sparsity ratio (fraction of zero entries)."""
|
360
|
+
total_entries = self._num_rows * self._num_cols
|
361
|
+
if total_entries == 0:
|
362
|
+
return 0.0
|
363
|
+
return 1.0 - (self._nnz / total_entries)
|
364
|
+
|
365
|
+
def compress(self) -> None:
|
366
|
+
"""Compress storage by removing empty columns."""
|
367
|
+
# Remove empty columns at the end
|
368
|
+
while self._num_cols > 0 and self._col_ptr[self._num_cols - 1] == self._col_ptr[self._num_cols]:
|
369
|
+
self._num_cols -= 1
|
370
|
+
self._col_ptr.pop()
|
371
|
+
|
372
|
+
def get_memory_usage(self) -> Dict[str, int]:
|
373
|
+
"""Get detailed memory usage."""
|
374
|
+
return {
|
375
|
+
'col_ptr_bytes': len(self._col_ptr) * 4, # 4 bytes per int
|
376
|
+
'row_indices_bytes': len(self._row_indices) * 4,
|
377
|
+
'values_bytes': len(self._values) * 8, # 8 bytes per float
|
378
|
+
'vertex_mapping_bytes': len(self._vertices) * 50, # Estimated
|
379
|
+
'total_bytes': (len(self._col_ptr) + len(self._row_indices)) * 4 + len(self._values) * 8 + len(self._vertices) * 50
|
380
|
+
}
|
381
|
+
|
382
|
+
def export_matrix(self) -> Dict[str, Any]:
|
383
|
+
"""Export CSC matrix data."""
|
384
|
+
return {
|
385
|
+
'col_ptr': self._col_ptr.copy(),
|
386
|
+
'row_indices': self._row_indices.copy(),
|
387
|
+
'values': self._values.copy(),
|
388
|
+
'vertex_to_id': self._vertex_to_id.copy(),
|
389
|
+
'id_to_vertex': self._id_to_vertex.copy(),
|
390
|
+
'dimensions': (self._num_rows, self._num_cols),
|
391
|
+
'nnz': self._nnz
|
392
|
+
}
|
393
|
+
|
394
|
+
def get_statistics(self) -> Dict[str, Any]:
|
395
|
+
"""Get comprehensive CSC statistics."""
|
396
|
+
memory = self.get_memory_usage()
|
397
|
+
|
398
|
+
return {
|
399
|
+
'vertices': len(self._vertices),
|
400
|
+
'edges': self._edge_count,
|
401
|
+
'matrix_dimensions': (self._num_rows, self._num_cols),
|
402
|
+
'nnz': self._nnz,
|
403
|
+
'sparsity': self.get_sparsity(),
|
404
|
+
'density': 1.0 - self.get_sparsity(),
|
405
|
+
'avg_edges_per_column': self._nnz / max(1, self._num_cols),
|
406
|
+
'compression_ratio': self._nnz / max(1, self._num_rows * self._num_cols),
|
407
|
+
'memory_usage': memory['total_bytes'],
|
408
|
+
'weighted': self.weighted,
|
409
|
+
'allow_duplicates': self.allow_duplicates
|
410
|
+
}
|
411
|
+
|
412
|
+
# ============================================================================
|
413
|
+
# PERFORMANCE CHARACTERISTICS
|
414
|
+
# ============================================================================
|
415
|
+
|
416
|
+
@property
|
417
|
+
def backend_info(self) -> Dict[str, Any]:
|
418
|
+
"""Get backend implementation info."""
|
419
|
+
return {
|
420
|
+
'strategy': 'CSC',
|
421
|
+
'backend': 'Compressed Sparse Column matrix format',
|
422
|
+
'weighted': self.weighted,
|
423
|
+
'allow_duplicates': self.allow_duplicates,
|
424
|
+
'complexity': {
|
425
|
+
'add_edge': 'O(log k)', # k = edges in column
|
426
|
+
'remove_edge': 'O(log k)',
|
427
|
+
'has_edge': 'O(log k)',
|
428
|
+
'column_access': 'O(1)',
|
429
|
+
'matrix_vector_mult': 'O(nnz)',
|
430
|
+
'space': 'O(nnz + vertices)'
|
431
|
+
}
|
432
|
+
}
|
433
|
+
|
434
|
+
@property
|
435
|
+
def metrics(self) -> Dict[str, Any]:
|
436
|
+
"""Get performance metrics."""
|
437
|
+
stats = self.get_statistics()
|
438
|
+
|
439
|
+
return {
|
440
|
+
'vertices': stats['vertices'],
|
441
|
+
'edges': stats['edges'],
|
442
|
+
'matrix_size': f"{stats['matrix_dimensions'][0]}x{stats['matrix_dimensions'][1]}",
|
443
|
+
'sparsity': f"{stats['sparsity'] * 100:.1f}%",
|
444
|
+
'nnz': stats['nnz'],
|
445
|
+
'avg_edges_per_col': f"{stats['avg_edges_per_column']:.1f}",
|
446
|
+
'memory_usage': f"{stats['memory_usage']} bytes"
|
447
|
+
}
|