orbitsi 0.1.0__cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
orbitsi/__init__.py ADDED
File without changes
orbitsi/main.py ADDED
@@ -0,0 +1,59 @@
1
+ # orbitsi/main.py
2
+ import argparse
3
+ from orbitsi.utils import read_graph_from_file
4
+ from orbitsi.orbit import EVOKEOrbitCounter, ORCAOrbitCounter
5
+ from orbitsi.search import FilterEngine, OrderEngine, SearchEngine
6
+
7
+ def run_search(args):
8
+ data_graph = read_graph_from_file(args.data)
9
+ pattern_graph = read_graph_from_file(args.pattern)
10
+
11
+ counter_cls = EVOKEOrbitCounter if args.orbit_counter == "evoke" else ORCAOrbitCounter
12
+
13
+ filter_engine = FilterEngine(
14
+ data_graph=data_graph,
15
+ pattern_graph=pattern_graph,
16
+ orbit_counter_class=counter_cls,
17
+ graphlet_size=args.graphlet_size
18
+ )
19
+ pattern_orbits, candidate_sets = filter_engine.run()
20
+ order_engine = OrderEngine(pattern_graph, pattern_orbits)
21
+ order, pivot = order_engine.run()
22
+ search_engine = SearchEngine(data_graph, pattern_graph, candidate_sets, order, pivot)
23
+ matches = search_engine.run()
24
+
25
+ print(f"✅ Matches found: {len(matches)}")
26
+ for match in matches:
27
+ print(match)
28
+
29
+
30
+ def run_orbit_count(args):
31
+ G = read_graph_from_file(args.graph)
32
+ counter_cls = EVOKEOrbitCounter if args.orbit_counter == "evoke" else ORCAOrbitCounter
33
+ counter = counter_cls(G, size=args.graphlet_size)
34
+ orbits = counter.get_orbits(induced=args.induced)
35
+ print(orbits)
36
+
37
+
38
+ def cli_entrypoint():
39
+ parser = argparse.ArgumentParser(prog="orbitsi")
40
+ subparsers = parser.add_subparsers(dest="command", required=True)
41
+
42
+ # --- search ---
43
+ search_parser = subparsers.add_parser("search", help="Subgraph isomorphism search")
44
+ search_parser.add_argument("--data", required=True, help="Path to data graph")
45
+ search_parser.add_argument("--pattern", required=True, help="Path to pattern graph")
46
+ search_parser.add_argument("--orbit-counter", choices=["evoke", "orca"], default="evoke")
47
+ search_parser.add_argument("--graphlet-size", type=int, choices=[4, 5], default=4)
48
+ search_parser.set_defaults(func=run_search)
49
+
50
+ # --- count-orbits ---
51
+ count_parser = subparsers.add_parser("count-orbits", help="Count node orbits in a graph")
52
+ count_parser.add_argument("--graph", required=True, help="Path to graph")
53
+ count_parser.add_argument("--orbit-counter", choices=["evoke", "orca"], default="evoke")
54
+ count_parser.add_argument("--graphlet-size", type=int, choices=[4, 5], default=4)
55
+ count_parser.add_argument("--induced", action="store_true", help="Compute induced orbits")
56
+ count_parser.set_defaults(func=run_orbit_count)
57
+
58
+ args = parser.parse_args()
59
+ args.func(args)
@@ -0,0 +1,2 @@
1
+ from .evoke import EVOKEOrbitCounter
2
+ from .orca import ORCAOrbitCounter
orbitsi/orbit/evoke.py ADDED
@@ -0,0 +1,26 @@
1
+ import networkx as nx
2
+ import numpy as np
3
+ from _evoke_cpp import evoke_count
4
+ from .orbit_converter import OrbitMatrixConverter
5
+
6
+ class EVOKEOrbitCounter:
7
+ def __init__(self, G: nx.Graph, size: int = 4):
8
+ self.G = G
9
+ self.size = size
10
+ self.counts = None
11
+ self.converter = OrbitMatrixConverter()
12
+
13
+ def _nx_to_cpp_graph(self) -> dict[int, list[int]]:
14
+ return {int(n): [int(nbr) for nbr in self.G.neighbors(n)] for n in self.G.nodes}
15
+
16
+ def count_orbits(self):
17
+ cpp_graph = self._nx_to_cpp_graph()
18
+ self.counts = evoke_count(cpp_graph, size=self.size, parallel=True)
19
+ return self.counts
20
+
21
+ def get_orbits(self, induced: bool = False) -> np.ndarray:
22
+ if self.counts is None:
23
+ self.count_orbits()
24
+ sorted_nodes = sorted(self.counts)
25
+ orbit_matrix = np.array([self.counts[node] for node in sorted_nodes], dtype=int)
26
+ return self.converter.noninduced_to_induced(orbit_matrix) if induced else orbit_matrix
@@ -0,0 +1,63 @@
1
+ import numpy as np
2
+ import os
3
+
4
+ class OrbitMatrixConverter:
5
+ def __init__(self):
6
+ self.matrices = self._load_matrices()
7
+
8
+ def _load_matrices(self):
9
+ """Load conversion matrices as NumPy arrays, not matrix objects."""
10
+ this_dir = os.path.dirname(__file__)
11
+ matrices_path = os.path.join(this_dir, 'orbit_conversion_matrices.npz')
12
+ matrices_file = np.load(matrices_path, allow_pickle=True)
13
+
14
+ return {
15
+ int(k.split('_')[1]): np.asarray(matrices_file[k])
16
+ for k in matrices_file.files
17
+ }
18
+
19
+ def noninduced_to_induced(self, noninduced_matrix: np.ndarray) -> np.ndarray:
20
+ """Convert non-induced orbit counts to induced counts."""
21
+ mat = self.matrices
22
+ T = np.asarray(noninduced_matrix.T) # ensure ndarray
23
+
24
+ if noninduced_matrix.shape[1] == 15:
25
+ induced = np.vstack([
26
+ T[0:1, :],
27
+ np.linalg.solve(mat[3], T[1:4, :]),
28
+ np.linalg.solve(mat[4], T[4:15, :])
29
+ ])
30
+ elif noninduced_matrix.shape[1] == 73:
31
+ induced = np.vstack([
32
+ T[0:1, :],
33
+ np.linalg.solve(mat[3], T[1:4, :]),
34
+ np.linalg.solve(mat[4], T[4:15, :]),
35
+ np.linalg.solve(mat[5], T[15:, :])
36
+ ])
37
+ else:
38
+ raise ValueError("Unsupported orbit count size for conversion.")
39
+
40
+ return np.asarray(induced.T, dtype=int)
41
+
42
+ def induced_to_noninduced(self, induced_matrix: np.ndarray) -> np.ndarray:
43
+ """Convert induced orbit counts to non-induced counts."""
44
+ mat = self.matrices
45
+ T = np.asarray(induced_matrix.T) # ensure ndarray
46
+
47
+ if induced_matrix.shape[1] == 15:
48
+ noninduced = np.vstack([
49
+ T[0:1, :],
50
+ mat[3] @ T[1:4, :],
51
+ mat[4] @ T[4:15, :]
52
+ ])
53
+ elif induced_matrix.shape[1] == 73:
54
+ noninduced = np.vstack([
55
+ T[0:1, :],
56
+ mat[3] @ T[1:4, :],
57
+ mat[4] @ T[4:15, :],
58
+ mat[5] @ T[15:, :]
59
+ ])
60
+ else:
61
+ raise ValueError("Unsupported orbit count size for conversion.")
62
+
63
+ return np.asarray(noninduced.T, dtype=int)
orbitsi/orbit/orca.py ADDED
@@ -0,0 +1,34 @@
1
+ import networkx as nx
2
+ import numpy as np
3
+ from _orca_cpp import orca_count
4
+ from .orbit_converter import OrbitMatrixConverter
5
+
6
+ class ORCAOrbitCounter:
7
+ def __init__(self, G: nx.Graph, size: int = 4):
8
+ self.G = G
9
+ self.size = size
10
+ self.counts = None
11
+ self.converter = OrbitMatrixConverter()
12
+
13
+ def _nx_to_cpp_adjlist(self) -> tuple[list[list[int]], dict]:
14
+ n = self.G.number_of_nodes()
15
+ adj = [[] for _ in range(n)]
16
+ mapping = {node: i for i, node in enumerate(sorted(self.G.nodes()))}
17
+ for u, v in self.G.edges():
18
+ adj[mapping[u]].append(mapping[v])
19
+ adj[mapping[v]].append(mapping[u])
20
+ return adj, mapping
21
+
22
+ def count_orbits(self) -> dict[int, list[int]]:
23
+ adj, mapping = self._nx_to_cpp_adjlist()
24
+ orbit_matrix = orca_count(adj, self.size)
25
+ reverse_mapping = {v: k for k, v in mapping.items()}
26
+ self.counts = {reverse_mapping[i]: row for i, row in enumerate(orbit_matrix)}
27
+ return self.counts
28
+
29
+ def get_orbits(self, induced: bool = False) -> np.ndarray:
30
+ if self.counts is None:
31
+ self.count_orbits()
32
+ sorted_nodes = sorted(self.counts)
33
+ orbit_matrix = np.array([self.counts[node] for node in sorted_nodes], dtype=int)
34
+ return orbit_matrix if induced else self.converter.induced_to_noninduced(orbit_matrix)
@@ -0,0 +1,3 @@
1
+ from .filtering import FilterEngine
2
+ from .ordering import OrderEngine
3
+ from .searching import SearchEngine
@@ -0,0 +1,149 @@
1
+ import networkx as nx
2
+ from collections import defaultdict, Counter
3
+
4
+ class FilterEngine:
5
+ def __init__(self, data_graph, pattern_graph, orbit_counter_class, graphlet_size=4):
6
+ self.data_graph = data_graph
7
+ self.pattern_graph = pattern_graph
8
+ self.graphlet_size = graphlet_size
9
+ self.orbit_counter_class = orbit_counter_class
10
+ self.data_labels = nx.get_node_attributes(data_graph, "label")
11
+ self.pattern_labels = nx.get_node_attributes(pattern_graph, "label")
12
+
13
+
14
+ self.label_index = defaultdict(list)
15
+ for node, label in self.data_labels.items():
16
+ self.label_index[label].append(node)
17
+
18
+ # Orbit counters
19
+ self.pattern_orbits = self._compute_orbits(pattern_graph, orbit_counter_class)
20
+
21
+ def extract_induced_subgraph(self, data_graph, candidate_sets):
22
+ candidate_nodes = set(v for candidates in candidate_sets.values() for v in candidates)
23
+
24
+ # Mapping: original node → new node id (0 to n-1)
25
+ old_to_new = {v: i for i, v in enumerate(sorted(candidate_nodes))}
26
+ new_to_old = {i: v for v, i in old_to_new.items()}
27
+
28
+ # Build induced subgraph with relabeled nodes
29
+ G_sub = nx.Graph()
30
+ for u, v in data_graph.subgraph(candidate_nodes).edges():
31
+ G_sub.add_edge(old_to_new[u], old_to_new[v])
32
+
33
+ return G_sub, old_to_new, new_to_old
34
+
35
+ def remap_subgraph_to_graph(self, orbit_matrix, new_to_old):
36
+ return {
37
+ new_to_old[i]: orbit_matrix[i].tolist()
38
+ for i in range(len(orbit_matrix))
39
+ }
40
+
41
+ def _compute_orbits(self, graph, counter_class):
42
+ counter = counter_class(graph, size=self.graphlet_size)
43
+ orbit_matrix = counter.get_orbits(induced=False)
44
+ return {node: orbit_matrix[i].tolist() for i, node in enumerate(graph.nodes())}
45
+
46
+ def run(self):
47
+ candidate_sets = self.ldf_filter()
48
+ if not candidate_sets:
49
+ return {}
50
+ print("============================================================\nLDF:\n============================================================\n")
51
+ self.printCandidateSets(candidate_sets)
52
+ candidate_sets = self.nlf_filter(candidate_sets)
53
+ if not candidate_sets:
54
+ return {}
55
+ print("============================================================\nNLF:\n============================================================\n")
56
+ self.printCandidateSets(candidate_sets)
57
+
58
+ '''candidate_sets = self.orbit_filter(candidate_sets)
59
+ if not candidate_sets:
60
+ return {}
61
+ print("============================================================\Orbit:\n============================================================\n")'''
62
+ self.printCandidateSets(candidate_sets)
63
+
64
+ return self.pattern_orbits, candidate_sets
65
+
66
+ def ldf_filter(self):
67
+ candidate_sets = {}
68
+ data_degrees = dict(self.data_graph.degree)
69
+
70
+ for u in self.pattern_graph.nodes:
71
+ label_u = self.pattern_labels[u]
72
+ deg_u = self.pattern_graph.degree[u]
73
+
74
+ labeled_nodes = self.label_index.get(label_u, [])
75
+ candidates = [
76
+ v for v in labeled_nodes
77
+ if data_degrees[v] >= deg_u
78
+ ]
79
+
80
+ if not candidates:
81
+ return {}
82
+ candidate_sets[u] = candidates
83
+ return candidate_sets
84
+
85
+ def nlf_filter(self, candidate_sets):
86
+ refined_sets = {}
87
+
88
+ pattern_nlf = {
89
+ u: Counter(self.pattern_labels[nbr] for nbr in self.pattern_graph.neighbors(u))
90
+ for u in self.pattern_graph.nodes
91
+ }
92
+
93
+ # Compute only NLFs for candidate vertices actually used
94
+ used_data_nodes = set(v for cset in candidate_sets.values() for v in cset)
95
+ data_nlf = {
96
+ v: Counter(self.data_labels[nbr] for nbr in self.data_graph.neighbors(v))
97
+ for v in used_data_nodes
98
+ }
99
+
100
+ for u, candidates in candidate_sets.items():
101
+ u_nlf = pattern_nlf[u]
102
+ filtered = [
103
+ v for v in candidates
104
+ if all(data_nlf[v].get(lbl, 0) >= cnt for lbl, cnt in u_nlf.items())
105
+ ]
106
+
107
+ if not filtered:
108
+ return {}
109
+ refined_sets[u] = filtered
110
+
111
+ return refined_sets
112
+
113
+ def orbit_filter(self, candidate_sets):
114
+ subgraph, old_to_new, new_to_old = self.extract_induced_subgraph(self.data_graph, candidate_sets)
115
+ counter = self.orbit_counter_class(subgraph, size=self.graphlet_size)
116
+ data_orbit_matrix = counter.get_orbits(induced=False)
117
+
118
+ # Step 3: Remap orbit vectors to original graph nodes using helper
119
+ self.data_orbits = self.remap_subgraph_to_graph(data_orbit_matrix, new_to_old)
120
+ #print(self.data_orbits)
121
+ # Step 4: Filter based on orbit counts
122
+ refined_sets = {}
123
+ for u, candidates in candidate_sets.items():
124
+ orbit_u = self.pattern_orbits[u]
125
+ filtered = []
126
+
127
+ for v in candidates:
128
+ orbit_v = self.data_orbits[v]
129
+ for ov, ou in zip(orbit_v, orbit_u):
130
+ if ov < ou:
131
+ break
132
+ else:
133
+ filtered.append(v)
134
+
135
+ if not filtered:
136
+ return {}
137
+ refined_sets[u] = filtered
138
+
139
+ return refined_sets
140
+
141
+
142
+ def printCandidateSets(self, candidate_sets):
143
+ # Display results
144
+ if not candidate_sets:
145
+ print("No match possible after filtering.")
146
+ else:
147
+ print("\n=== Candidate Sets ===")
148
+ for u, cands in candidate_sets.items():
149
+ print(f"Pattern Node {u} → Candidates: {cands}")
@@ -0,0 +1,68 @@
1
+ import networkx as nx
2
+ import numpy as np
3
+
4
+ class OrderEngine:
5
+ def __init__(self, pattern_graph, pattern_orbits=None, method='auto'):
6
+ self.pattern_graph = pattern_graph
7
+ self.pattern_orbits = pattern_orbits
8
+ self.num_nodes = pattern_graph.number_of_nodes()
9
+
10
+ if method == 'auto':
11
+ self.use_orbit = pattern_orbits is not None
12
+ elif method == 'orbit':
13
+ if pattern_orbits is None:
14
+ raise ValueError("Orbit method selected but no orbit data provided.")
15
+ self.use_orbit = True
16
+ elif method == 'degree':
17
+ self.use_orbit = False
18
+ else:
19
+ raise ValueError("Invalid method: choose from 'auto', 'orbit', or 'degree'")
20
+
21
+ def compute_score(self, node):
22
+ if self.use_orbit:
23
+ return sum(x * x for x in self.pattern_orbits[node])
24
+ else:
25
+ return self.pattern_graph.degree[node]
26
+
27
+ def run(self):
28
+ order = []
29
+ pivot = [None] * self.num_nodes
30
+ visited = [False] * self.num_nodes
31
+
32
+ orbit_strength = {
33
+ u: self.compute_score(u)
34
+ for u in self.pattern_graph.nodes
35
+ }
36
+
37
+ start_node = max(orbit_strength, key=orbit_strength.get)
38
+ order.append(start_node)
39
+ visited[start_node] = True
40
+
41
+ for _ in range(1, self.num_nodes):
42
+ max_bn = -1
43
+ selected = None
44
+
45
+ for u in self.pattern_graph.nodes:
46
+ if visited[u]:
47
+ continue
48
+
49
+ backward_neighbors = sum(
50
+ 1 for v in order if self.pattern_graph.has_edge(u, v)
51
+ )
52
+
53
+ if backward_neighbors > max_bn or (
54
+ backward_neighbors == max_bn and
55
+ orbit_strength[u] > orbit_strength.get(selected, -1)
56
+ ):
57
+ max_bn = backward_neighbors
58
+ selected = u
59
+
60
+ for v in order:
61
+ if self.pattern_graph.has_edge(selected, v):
62
+ pivot[len(order)] = v
63
+ break
64
+
65
+ visited[selected] = True
66
+ order.append(selected)
67
+
68
+ return order, pivot
@@ -0,0 +1,55 @@
1
+ import networkx as nx
2
+
3
+ class SearchEngine:
4
+ def __init__(self, data_graph, pattern_graph, candidate_sets, order, pivot):
5
+ self.data_graph = data_graph
6
+ self.pattern_graph = pattern_graph
7
+ self.candidate_sets = candidate_sets
8
+ self.order = order
9
+ self.pivot = pivot
10
+ self.mapping = {} # pattern_node -> data_node
11
+ self.inverse_mapping = {} # data_node -> pattern_node
12
+ self.matches = []
13
+
14
+ def is_valid(self, u, v):
15
+ # Ensure v isn't already used
16
+ if v in self.inverse_mapping:
17
+ return False
18
+
19
+ # Enforce pivot constraint: v must be connected to mapped pivot in data_graph
20
+ '''pivot_u = self.pivot[u]
21
+ if pivot_u is not None:
22
+ mapped_pivot = self.mapping.get(pivot_u)
23
+ if mapped_pivot is not None and not self.data_graph.has_edge(v, mapped_pivot):
24
+ return False'''
25
+
26
+ # Enforce all adjacency constraints between u and previously mapped pattern nodes
27
+ for u_prev in self.mapping:
28
+ if self.pattern_graph.has_edge(u, u_prev):
29
+ v_prev = self.mapping[u_prev]
30
+ if not self.data_graph.has_edge(v, v_prev):
31
+ return False
32
+
33
+ return True
34
+
35
+ def backtrack(self, depth=0):
36
+ if depth == len(self.order):
37
+ # Full match found
38
+ self.matches.append(self.mapping.copy())
39
+ return
40
+
41
+ u = self.order[depth]
42
+ for v in self.candidate_sets[u]:
43
+ if self.is_valid(u, v):
44
+ # Assign
45
+ self.mapping[u] = v
46
+ self.inverse_mapping[v] = u
47
+ self.backtrack(depth + 1)
48
+ # Undo
49
+ del self.mapping[u]
50
+ del self.inverse_mapping[v]
51
+
52
+ def run(self, return_all=True):
53
+ self.matches = []
54
+ self.backtrack()
55
+ return self.matches if return_all else self.matches[:1]
orbitsi/utils.py ADDED
@@ -0,0 +1,32 @@
1
+ import networkx as nx
2
+
3
+ def read_graph_from_file(filepath):
4
+ """
5
+ Reads an undirected, vertex-labeled graph from file.
6
+ Format:
7
+ - Starts with: t N M
8
+ - Then: N lines of 'v VertexID LabelId Degree' (only label is used)
9
+ - Then: M lines of 'e VertexID VertexID'
10
+
11
+ Parameters:
12
+ - filepath: str, path to the graph file
13
+
14
+ Returns:
15
+ - G: networkx.Graph with node labels in the 'label' attribute
16
+ """
17
+ G = nx.Graph()
18
+
19
+ with open(filepath, 'r') as f:
20
+ for line in f:
21
+ tokens = line.strip().split()
22
+ if not tokens:
23
+ continue
24
+ if tokens[0] == 'v':
25
+ node_id = int(tokens[1])
26
+ label = int(tokens[2])
27
+ G.add_node(node_id, label=label)
28
+ elif tokens[0] == 'e':
29
+ u, v = int(tokens[1]), int(tokens[2])
30
+ G.add_edge(u, v)
31
+
32
+ return G