cpg2py 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cpg2py/__init__.py CHANGED
@@ -1,40 +1,80 @@
1
1
  from csv import DictReader
2
2
  from pathlib import Path
3
- from .cpg import _Graph
4
- from .abc import *
5
3
 
6
- def cpg_graph(node_csv: Path, edge_csv: Path) -> "_Graph":
4
+ from ._abc import *
5
+ from ._cpg import CpgGraph, CpgNode, CpgEdge
6
+ from ._exceptions import CPGError, EdgeNotFoundError, NodeNotFoundError, TopFileNotFoundError
7
+ from ._logger import get_logger
8
+
9
+ logger = get_logger(__name__)
10
+
11
+
12
+ def cpg_graph(node_csv: Path, edge_csv: Path, verbose: bool = False) -> CpgGraph:
13
+ """
14
+ Creates a CPG graph from CSV files.
15
+
16
+ Args:
17
+ node_csv: Path to the nodes CSV file
18
+ edge_csv: Path to the edges CSV file
19
+ verbose: If True, log warnings for duplicate nodes/edges
20
+
21
+ Returns:
22
+ Graph instance loaded from CSV files
23
+ """
7
24
  storage = Storage()
8
- with open(node_csv, 'r') as n_file:
9
- reader = DictReader(n_file, delimiter='\t')
10
- for node_props in reader:
11
- nid = node_props.get("id:int", None)
12
- if nid is None: node_props.get("id")
13
- if not storage.add_node(nid):
14
- print(f"WARN: Node {nid} already exists in the graph")
15
- if not storage.set_node_props(nid, node_props):
16
- print(f"WARN: Failed to set properties for node {nid}")
17
- with open(edge_csv, 'r') as f:
18
- reader = DictReader(f, delimiter='\t')
19
- for edge_props in reader:
20
- f_nid = str(edge_props.get("start", None) )
21
- if f_nid is None: f_nid = str(edge_props.get("start:str"))
25
+ with open(node_csv, "r", encoding="utf-8") as n_file:
26
+ reader = DictReader(n_file, delimiter="\t")
27
+ for node_props in reader:
28
+ nid = node_props.get("id:int", None)
29
+ if nid is None:
30
+ nid = node_props.get("id")
31
+ if nid is None:
32
+ continue
33
+ if not storage.add_node(nid) and verbose:
34
+ logger.warning("Node %s already exists in the graph", nid)
35
+ if not storage.set_node_props(nid, node_props) and verbose:
36
+ logger.warning("Failed to set properties for node %s", nid)
37
+ with open(edge_csv, "r", encoding="utf-8") as f:
38
+ reader = DictReader(f, delimiter="\t")
39
+ for edge_props in reader:
40
+ f_nid = str(edge_props.get("start", None))
41
+ if f_nid is None:
42
+ f_nid = str(edge_props.get("start:str"))
22
43
  t_nid = str(edge_props.get("end", None))
23
- if t_nid is None: t_nid = str(edge_props.get("end:str"))
44
+ if t_nid is None:
45
+ t_nid = str(edge_props.get("end:str"))
24
46
  e_type = str(edge_props.get("type", None))
25
- if e_type is None: e_type = str(edge_props.get("type:str"))
47
+ if e_type is None:
48
+ e_type = str(edge_props.get("type:str"))
26
49
  edge_id = (f_nid, t_nid, e_type)
27
- if not storage.contains_node(edge_id[0]):
50
+ if not storage.contains_node(edge_id[0]):
28
51
  storage.add_node(edge_id[0])
29
- print(f"WARN: node {edge_id[0]} does not exists")
30
- if not storage.contains_node(edge_id[1]):
52
+ if verbose:
53
+ logger.warning("Node %s does not exist", edge_id[0])
54
+ if not storage.contains_node(edge_id[1]):
31
55
  storage.add_node(edge_id[1])
32
- print(f"WARN: node {edge_id[1]} does not exists")
33
- if not storage.add_edge(edge_id):
34
- print(f"WARN: Edge {f_nid} -> {t_nid} already exists in the graph")
35
- if not storage.set_edge_props(edge_id, edge_props):
36
- print(f"WARN: Failed to set properties for edge {edge_id}")
37
- return _Graph(storage)
56
+ if verbose:
57
+ logger.warning("Node %s does not exist", edge_id[1])
58
+ if not storage.add_edge(edge_id):
59
+ if verbose:
60
+ logger.warning("Edge %s -> %s already exists in the graph", f_nid, t_nid)
61
+ if not storage.set_edge_props(edge_id, edge_props):
62
+ if verbose:
63
+ logger.warning("Failed to set properties for edge %s", edge_id)
64
+ return CpgGraph(storage)
38
65
 
39
66
 
40
- __all__ = ['cpg_graph', 'AbcGraphQuerier', 'AbcNodeQuerier', 'AbcEdgeQuerier', 'Storage']
67
+ __all__ = [
68
+ "cpg_graph",
69
+ "CpgGraph",
70
+ "CpgNode",
71
+ "CpgEdge",
72
+ "AbcGraphQuerier",
73
+ "AbcNodeQuerier",
74
+ "AbcEdgeQuerier",
75
+ "Storage",
76
+ "CPGError",
77
+ "NodeNotFoundError",
78
+ "EdgeNotFoundError",
79
+ "TopFileNotFoundError",
80
+ ]
@@ -1,6 +1,6 @@
1
+ from .edge import AbcEdgeQuerier
1
2
  from .graph import AbcGraphQuerier
2
3
  from .node import AbcNodeQuerier
3
- from .edge import AbcEdgeQuerier
4
4
  from .storage import Storage
5
5
 
6
- __all__ = ["Storage", "AbcGraphQuerier", "AbcNodeQuerier", "AbcEdgeQuerier"]
6
+ __all__ = ["Storage", "AbcGraphQuerier", "AbcNodeQuerier", "AbcEdgeQuerier"]
cpg2py/_abc/edge.py ADDED
@@ -0,0 +1,96 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ from typing import Any, Dict, Optional, Tuple
5
+
6
+ from .._exceptions import EdgeNotFoundError
7
+ from .storage import Storage
8
+
9
+
10
+ class AbcEdgeQuerier(abc.ABC):
11
+ """
12
+ Abstract base class for edge query operations.
13
+
14
+ Provides interface for querying edge properties and accessing edge data.
15
+ """
16
+
17
+ def __init__(self, graph: Storage, f_nid: str, t_nid: str, e_type: int = 0) -> None:
18
+ """
19
+ Initializes an edge querier.
20
+
21
+ Args:
22
+ graph: Storage instance containing the graph
23
+ f_nid: Source node ID
24
+ t_nid: Target node ID
25
+ e_type: Edge type/ID
26
+
27
+ Raises:
28
+ EdgeNotFoundError: If edge does not exist in the graph
29
+ """
30
+ self.__graph: Storage = graph
31
+ self.__edge_id: Tuple[str, str, str] = (str(f_nid), str(t_nid), str(e_type))
32
+ if not graph.contains_edge(self.__edge_id):
33
+ raise EdgeNotFoundError(f_nid, t_nid, str(e_type))
34
+
35
+ @property
36
+ def edge_id(self) -> Tuple[str, str, int]:
37
+ """
38
+ Returns the edge ID tuple.
39
+
40
+ Returns:
41
+ Edge ID tuple (from_node, to_node, edge_type)
42
+ """
43
+ return self.__edge_id
44
+
45
+ @property
46
+ def from_nid(self) -> str:
47
+ """
48
+ Returns the source node ID.
49
+
50
+ Returns:
51
+ Source node ID string
52
+ """
53
+ return self.__edge_id[0]
54
+
55
+ @property
56
+ def to_nid(self) -> str:
57
+ """
58
+ Returns the target node ID.
59
+
60
+ Returns:
61
+ Target node ID string
62
+ """
63
+ return self.__edge_id[1]
64
+
65
+ @property
66
+ def edge_type(self) -> str:
67
+ """
68
+ Returns the edge type.
69
+
70
+ Returns:
71
+ Edge type string
72
+ """
73
+ return self.__edge_id[2]
74
+
75
+ @property
76
+ def properties(self) -> Optional[Dict[str, Any]]:
77
+ """
78
+ Returns all edge properties.
79
+
80
+ Returns:
81
+ Dictionary of edge properties, or None if edge not found
82
+ """
83
+ return self.__graph.get_edge_props(self.__edge_id)
84
+
85
+ def get_property(self, *prop_names: str) -> Optional[Any]:
86
+ """
87
+ Gets an edge property by trying multiple possible property names.
88
+
89
+ Args:
90
+ prop_names: Variable number of property name alternatives to try
91
+
92
+ Returns:
93
+ First found property value, or None if none found
94
+ """
95
+ prop_values = (self.__graph.get_edge_prop(self.__edge_id, p_name) for p_name in prop_names)
96
+ return next((value for value in prop_values if value is not None), None)
cpg2py/_abc/graph.py ADDED
@@ -0,0 +1,247 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ from collections import deque
5
+ from typing import Callable, Deque, Generic, Iterable, List, Optional, TypeVar
6
+
7
+ from .edge import AbcEdgeQuerier
8
+ from .node import AbcNodeQuerier
9
+ from .storage import Storage
10
+
11
+ # Type variables for generic graph querier
12
+ # Covariant: subtypes can be used where base types are expected
13
+ _NodeType = TypeVar("_NodeType", bound=AbcNodeQuerier, covariant=True)
14
+ _EdgeType = TypeVar("_EdgeType", bound=AbcEdgeQuerier, covariant=True)
15
+
16
+ # Type variables for concrete implementations (invariant)
17
+ _ConcreteNodeType = TypeVar("_ConcreteNodeType", bound=AbcNodeQuerier)
18
+ _ConcreteEdgeType = TypeVar("_ConcreteEdgeType", bound=AbcEdgeQuerier)
19
+
20
+
21
+ class AbcGraphQuerier(abc.ABC, Generic[_ConcreteNodeType, _ConcreteEdgeType]):
22
+ """
23
+ Abstract base class for graph query operations.
24
+
25
+ Provides interface for querying nodes, edges, and traversing graph structures.
26
+
27
+ This is a generic class that allows type-safe operations based on the concrete
28
+ node and edge types. When you create a concrete implementation, specify the
29
+ node and edge types:
30
+
31
+ Example:
32
+ class MyGraph(AbcGraphQuerier[MyNode, MyEdge]):
33
+ ...
34
+
35
+ Type Parameters:
36
+ _ConcreteNodeType: The concrete node type returned by node() and related methods
37
+ _ConcreteEdgeType: The concrete edge type returned by edge() and used in conditions
38
+ """
39
+
40
+ __NodeCondition = Callable[[_NodeType], bool]
41
+ __EdgeCondition = Callable[[_EdgeType], bool]
42
+
43
+ __always_true = lambda _: True
44
+
45
+ __NodesResult = Iterable[_ConcreteNodeType]
46
+ __EdgesResult = Iterable[_ConcreteEdgeType]
47
+
48
+ def __init__(self, target: Storage, maxdepth: int = -1) -> None:
49
+ """
50
+ Initializes a graph querier.
51
+
52
+ Args:
53
+ target: Storage instance containing the graph
54
+ maxdepth: Maximum depth for traversal operations (-1 for unlimited)
55
+ """
56
+ self.__graph: Storage = target
57
+ self.__maxdepth: int = maxdepth
58
+
59
+ @property
60
+ def storage(self) -> Storage:
61
+ """
62
+ Returns the underlying storage instance.
63
+
64
+ Returns:
65
+ Storage instance
66
+ """
67
+ return self.__graph
68
+
69
+ @abc.abstractmethod
70
+ def node(self, whose_id_is: str) -> Optional[_ConcreteNodeType]:
71
+ """
72
+ Returns a node by its ID.
73
+
74
+ Args:
75
+ whose_id_is: Node ID to look up
76
+
77
+ Returns:
78
+ Node instance if found, None otherwise
79
+ """
80
+ raise NotImplementedError
81
+
82
+ @abc.abstractmethod
83
+ def edge(self, fid: str, tid: str, eid: str) -> Optional[_ConcreteEdgeType]:
84
+ """
85
+ Returns an edge by its source, target, and edge type.
86
+
87
+ Args:
88
+ fid: Source node ID
89
+ tid: Target node ID
90
+ eid: Edge type/ID
91
+
92
+ Returns:
93
+ Edge instance if found, None otherwise
94
+ """
95
+ raise NotImplementedError
96
+
97
+ def nodes(
98
+ self, who_satisifies: __NodeCondition = __always_true
99
+ ) -> Iterable[_ConcreteNodeType]:
100
+ """
101
+ Returns all nodes matching the condition.
102
+
103
+ Args:
104
+ who_satisifies: Node condition filter
105
+
106
+ Yields:
107
+ Nodes matching the condition
108
+ """
109
+ for nid in self.__graph.get_nodes():
110
+ cur_node = self.node(whose_id_is=nid)
111
+ if cur_node and who_satisifies(cur_node):
112
+ yield cur_node
113
+
114
+ def first_node(
115
+ self, who_satisifies: __NodeCondition = __always_true
116
+ ) -> Optional[_ConcreteNodeType]:
117
+ """
118
+ Returns the first node matching the condition.
119
+
120
+ Args:
121
+ who_satisifies: Node condition filter
122
+
123
+ Returns:
124
+ First matching node, or None if no match
125
+ """
126
+ return next(self.nodes(who_satisifies), None)
127
+
128
+ def edges(
129
+ self, who_satisifies: __EdgeCondition = __always_true
130
+ ) -> Iterable[_ConcreteEdgeType]:
131
+ """
132
+ Returns all edges matching the condition.
133
+
134
+ Args:
135
+ who_satisifies: Edge condition filter
136
+
137
+ Yields:
138
+ Edges matching the condition
139
+ """
140
+ for from_id, to_id, edge_id in self.__graph.get_edges():
141
+ cur_edge = self.edge(from_id, to_id, edge_id)
142
+ if cur_edge and who_satisifies(cur_edge):
143
+ yield cur_edge
144
+
145
+ def succ(
146
+ self, of: _ConcreteNodeType, who_satisifies: __EdgeCondition = __always_true
147
+ ) -> Iterable[_ConcreteNodeType]:
148
+ """
149
+ Returns successor nodes connected to the input node.
150
+
151
+ Args:
152
+ of: Source node
153
+ who_satisifies: Edge condition filter
154
+
155
+ Yields:
156
+ Successor nodes matching the condition
157
+ """
158
+ for src, dst, edge_type in self.__graph.out_edges(of.node_id):
159
+ edge = self.edge(src, dst, edge_type)
160
+ if edge and who_satisifies(edge):
161
+ node = self.node(whose_id_is=dst)
162
+ if node:
163
+ yield node
164
+
165
+ def prev(
166
+ self, of: _ConcreteNodeType, who_satisifies: __EdgeCondition = __always_true
167
+ ) -> Iterable[_ConcreteNodeType]:
168
+ """
169
+ Returns predecessor nodes connected to the input node.
170
+
171
+ Args:
172
+ of: Target node
173
+ who_satisifies: Edge condition filter
174
+
175
+ Yields:
176
+ Predecessor nodes matching the condition
177
+ """
178
+ for src, dst, edge_type in self.__graph.in_edges(of.node_id):
179
+ edge = self.edge(src, dst, edge_type)
180
+ if edge and who_satisifies(edge):
181
+ node = self.node(whose_id_is=src)
182
+ if node:
183
+ yield node
184
+
185
+ def __bfs_search(
186
+ self, root: _ConcreteNodeType, condition: __EdgeCondition, reverse: bool
187
+ ) -> Iterable[_ConcreteNodeType]:
188
+ """
189
+ Returns nodes from src node by BFS order (src node not included).
190
+
191
+ Args:
192
+ root: Starting node
193
+ condition: Edge condition filter
194
+ reverse: If True, traverse backwards
195
+
196
+ Yields:
197
+ Nodes in BFS order (excluding root)
198
+ """
199
+ if root is None:
200
+ return
201
+ visited_nids: List[str] = []
202
+ nodes_queue: Deque[_ConcreteNodeType] = deque([root, None])
203
+ depth = self.__maxdepth
204
+ while depth != 0 and len(nodes_queue) > 1:
205
+ cur_node = nodes_queue.popleft()
206
+ if cur_node is None:
207
+ nodes_queue.append(None)
208
+ depth -= 1
209
+ elif cur_node.node_id not in visited_nids:
210
+ visited_nids.append(cur_node.node_id)
211
+ if not reverse:
212
+ n_nodes = self.succ(cur_node, condition)
213
+ else:
214
+ n_nodes = self.prev(cur_node, condition)
215
+ nodes_queue.extend(n_nodes)
216
+ if root.node_id != cur_node.node_id:
217
+ yield cur_node
218
+
219
+ def descendants(
220
+ self, src: _ConcreteNodeType, condition: __EdgeCondition = __always_true
221
+ ) -> Iterable[_ConcreteNodeType]:
222
+ """
223
+ Returns descendants from src node by BFS order (src node not included).
224
+
225
+ Args:
226
+ src: Source node
227
+ condition: Edge condition filter
228
+
229
+ Yields:
230
+ Descendant nodes in BFS order
231
+ """
232
+ return self.__bfs_search(src, condition, reverse=False)
233
+
234
+ def ancestors(
235
+ self, src: _ConcreteNodeType, condition: __EdgeCondition = __always_true
236
+ ) -> Iterable[_ConcreteNodeType]:
237
+ """
238
+ Returns ancestors from src node by BFS order (src node not included).
239
+
240
+ Args:
241
+ src: Source node
242
+ condition: Edge condition filter
243
+
244
+ Yields:
245
+ Ancestor nodes in BFS order
246
+ """
247
+ return self.__bfs_search(src, condition, reverse=True)
cpg2py/_abc/node.py ADDED
@@ -0,0 +1,62 @@
1
+ import abc
2
+ from typing import Any, Dict, Optional
3
+
4
+ from .._exceptions import NodeNotFoundError
5
+ from .storage import Storage
6
+
7
+
8
+ class AbcNodeQuerier(abc.ABC):
9
+ """
10
+ Abstract base class for node query operations.
11
+
12
+ Provides interface for querying node properties and accessing node data.
13
+ """
14
+
15
+ def __init__(self, graph: Storage, nid: str) -> None:
16
+ """
17
+ Initializes a node querier.
18
+
19
+ Args:
20
+ graph: Storage instance containing the graph
21
+ nid: Node ID to query
22
+
23
+ Raises:
24
+ NodeNotFoundError: If node does not exist in the graph
25
+ """
26
+ self.__nid: str = str(nid)
27
+ self.__graph: Storage = graph
28
+ if not graph.contains_node(self.__nid):
29
+ raise NodeNotFoundError(nid)
30
+
31
+ @property
32
+ def node_id(self) -> str:
33
+ """
34
+ Returns the node ID.
35
+
36
+ Returns:
37
+ Node ID string
38
+ """
39
+ return self.__nid
40
+
41
+ @property
42
+ def properties(self) -> Optional[Dict[str, Any]]:
43
+ """
44
+ Returns all node properties.
45
+
46
+ Returns:
47
+ Dictionary of node properties, or None if node not found
48
+ """
49
+ return self.__graph.get_node_props(self.__nid)
50
+
51
+ def get_property(self, *prop_names: str) -> Optional[Any]:
52
+ """
53
+ Gets a node property by trying multiple possible property names.
54
+
55
+ Args:
56
+ prop_names: Variable number of property name alternatives to try
57
+
58
+ Returns:
59
+ First found property value, or None if none found
60
+ """
61
+ prop_values = (self.__graph.get_node_prop(self.__nid, p_name) for p_name in prop_names)
62
+ return next((value for value in prop_values if value is not None), None)