cpg2py 1.0.5__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cpg2py/__init__.py CHANGED
@@ -1,40 +1,103 @@
1
1
  from csv import DictReader
2
2
  from pathlib import Path
3
- from .cpg import _Graph
4
- from .abc import *
3
+ from typing import Union
5
4
 
6
- def cpg_graph(node_csv: Path, edge_csv: Path, verbose: bool = False) -> "_Graph":
5
+ from ._abc import Storage
6
+ from ._abc import AbcEdgeQuerier, AbcGraphQuerier, AbcNodeQuerier
7
+ from ._cpg import CpgEdge, CpgGraph, CpgNode
8
+ from ._exceptions import CPGError, EdgeNotFoundError, NodeNotFoundError, TopFileNotFoundError
9
+ from ._logger import get_logger
10
+
11
+ logger = get_logger(__name__)
12
+
13
+
14
+ def storage_from_json(path: Union[Path, str]) -> Storage:
15
+ """
16
+ Creates a Storage instance populated from a JSON file.
17
+
18
+ Args:
19
+ path: Path to JSON file (Path or str).
20
+
21
+ Returns:
22
+ New Storage instance containing the graph.
23
+
24
+ Raises:
25
+ OSError: If the file cannot be read.
26
+ ValueError: If JSON structure is invalid (missing "nodes" or "edges").
27
+ KeyError: If an edge object is missing "from", "to", or "type".
28
+ """
29
+ storage = Storage()
30
+ storage.load_json(path)
31
+ return storage
32
+
33
+
34
+ def cpg_graph(node_csv: Path, edge_csv: Path, verbose: bool = False) -> CpgGraph:
35
+ """
36
+ Creates a CPG graph from CSV files.
37
+
38
+ Args:
39
+ node_csv: Path to the nodes CSV file
40
+ edge_csv: Path to the edges CSV file
41
+ verbose: If True, log warnings for duplicate nodes/edges
42
+
43
+ Returns:
44
+ Graph instance loaded from CSV files
45
+ """
7
46
  storage = Storage()
8
- with open(node_csv, 'r') as n_file:
9
- reader = DictReader(n_file, delimiter='\t')
10
- for node_props in reader:
11
- nid = node_props.get("id:int", None)
12
- if nid is None: node_props.get("id")
13
- if not storage.add_node(nid) and verbose:
14
- print(f"WARN: Node {nid} already exists in the graph")
15
- if not storage.set_node_props(nid, node_props) and verbose:
16
- print(f"WARN: Failed to set properties for node {nid}")
17
- with open(edge_csv, 'r') as f:
18
- reader = DictReader(f, delimiter='\t')
19
- for edge_props in reader:
20
- f_nid = str(edge_props.get("start", None) )
21
- if f_nid is None: f_nid = str(edge_props.get("start:str"))
47
+ with open(node_csv, "r", encoding="utf-8") as n_file:
48
+ reader = DictReader(n_file, delimiter="\t")
49
+ for node_props in reader:
50
+ nid = node_props.get("id:int", None)
51
+ if nid is None:
52
+ nid = node_props.get("id")
53
+ if nid is None:
54
+ continue
55
+ if not storage.add_node(nid) and verbose:
56
+ logger.warning("Node %s already exists in the graph", nid)
57
+ if not storage.set_node_props(nid, node_props) and verbose:
58
+ logger.warning("Failed to set properties for node %s", nid)
59
+ with open(edge_csv, "r", encoding="utf-8") as f:
60
+ reader = DictReader(f, delimiter="\t")
61
+ for edge_props in reader:
62
+ f_nid = str(edge_props.get("start", None))
63
+ if f_nid is None:
64
+ f_nid = str(edge_props.get("start:str"))
22
65
  t_nid = str(edge_props.get("end", None))
23
- if t_nid is None: t_nid = str(edge_props.get("end:str"))
66
+ if t_nid is None:
67
+ t_nid = str(edge_props.get("end:str"))
24
68
  e_type = str(edge_props.get("type", None))
25
- if e_type is None: e_type = str(edge_props.get("type:str"))
69
+ if e_type is None:
70
+ e_type = str(edge_props.get("type:str"))
26
71
  edge_id = (f_nid, t_nid, e_type)
27
- if not storage.contains_node(edge_id[0]):
72
+ if not storage.contains_node(edge_id[0]):
28
73
  storage.add_node(edge_id[0])
29
- if verbose: print(f"WARN: node {edge_id[0]} does not exists")
30
- if not storage.contains_node(edge_id[1]):
74
+ if verbose:
75
+ logger.warning("Node %s does not exist", edge_id[0])
76
+ if not storage.contains_node(edge_id[1]):
31
77
  storage.add_node(edge_id[1])
32
- if verbose: print(f"WARN: node {edge_id[1]} does not exists")
33
- if not storage.add_edge(edge_id):
34
- if verbose: print(f"WARN: Edge {f_nid} -> {t_nid} already exists in the graph")
35
- if not storage.set_edge_props(edge_id, edge_props):
36
- if verbose: print(f"WARN: Failed to set properties for edge {edge_id}")
37
- return _Graph(storage)
78
+ if verbose:
79
+ logger.warning("Node %s does not exist", edge_id[1])
80
+ if not storage.add_edge(edge_id):
81
+ if verbose:
82
+ logger.warning("Edge %s -> %s already exists in the graph", f_nid, t_nid)
83
+ if not storage.set_edge_props(edge_id, edge_props):
84
+ if verbose:
85
+ logger.warning("Failed to set properties for edge %s", edge_id)
86
+ return CpgGraph(storage)
38
87
 
39
88
 
40
- __all__ = ['cpg_graph', 'AbcGraphQuerier', 'AbcNodeQuerier', 'AbcEdgeQuerier', 'Storage']
89
+ __all__ = [
90
+ "cpg_graph",
91
+ "storage_from_json",
92
+ "CpgGraph",
93
+ "CpgNode",
94
+ "CpgEdge",
95
+ "AbcGraphQuerier",
96
+ "AbcNodeQuerier",
97
+ "AbcEdgeQuerier",
98
+ "Storage",
99
+ "CPGError",
100
+ "NodeNotFoundError",
101
+ "EdgeNotFoundError",
102
+ "TopFileNotFoundError",
103
+ ]
@@ -1,6 +1,6 @@
1
+ from .edge import AbcEdgeQuerier
1
2
  from .graph import AbcGraphQuerier
2
3
  from .node import AbcNodeQuerier
3
- from .edge import AbcEdgeQuerier
4
4
  from .storage import Storage
5
5
 
6
- __all__ = ["Storage", "AbcGraphQuerier", "AbcNodeQuerier", "AbcEdgeQuerier"]
6
+ __all__ = ["Storage", "AbcGraphQuerier", "AbcNodeQuerier", "AbcEdgeQuerier"]
cpg2py/_abc/edge.py ADDED
@@ -0,0 +1,96 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ from typing import Any, Dict, Optional, Tuple
5
+
6
+ from .._exceptions import EdgeNotFoundError
7
+ from .storage import Storage
8
+
9
+
10
+ class AbcEdgeQuerier(abc.ABC):
11
+ """
12
+ Abstract base class for edge property access, queries, and updates.
13
+ """
14
+
15
+ def __init__(self, graph: Storage, f_nid: str, t_nid: str, e_type: str) -> None:
16
+ """
17
+ Initializes edge querier and validates edge existence.
18
+
19
+ Args:
20
+ graph: Storage instance containing the graph.
21
+ f_nid: Source node ID.
22
+ t_nid: Target node ID.
23
+ e_type: Edge type string.
24
+
25
+ Raises:
26
+ EdgeNotFoundError: If edge does not exist in the graph.
27
+ """
28
+ self.__graph: Storage = graph
29
+ self.__edge_id: Tuple[str, str, str] = (str(f_nid), str(t_nid), str(e_type))
30
+ if not graph.contains_edge(self.__edge_id):
31
+ raise EdgeNotFoundError(f_nid, t_nid, e_type)
32
+
33
+ @property
34
+ def edge_id(self) -> Tuple[str, str, str]:
35
+ """
36
+ Returns the edge identifier tuple (from_nid, to_nid, edge_type).
37
+ """
38
+ return self.__edge_id
39
+
40
+ @property
41
+ def from_nid(self) -> str:
42
+ """Returns the source node identifier."""
43
+ return self.__edge_id[0]
44
+
45
+ @property
46
+ def to_nid(self) -> str:
47
+ """Returns the target node identifier."""
48
+ return self.__edge_id[1]
49
+
50
+ @property
51
+ def edge_type(self) -> str:
52
+ """Returns the edge type string."""
53
+ return self.__edge_id[2]
54
+
55
+ @property
56
+ def properties(self) -> Optional[Dict[str, Any]]:
57
+ """Returns all edge properties dictionary, or None if not found."""
58
+ return self.__graph.get_edge_props(self.__edge_id)
59
+
60
+ def get_property(self, *prop_names: str) -> Optional[Any]:
61
+ """
62
+ Returns first found property value trying multiple name alternatives.
63
+
64
+ Args:
65
+ prop_names: Property name alternatives to try.
66
+
67
+ Returns:
68
+ First found value, or None if none found.
69
+ """
70
+ prop_values = (self.__graph.get_edge_prop(self.__edge_id, p_name) for p_name in prop_names)
71
+ return next((value for value in prop_values if value is not None), None)
72
+
73
+ def set_property(self, key: str, value: Any) -> bool:
74
+ """
75
+ Sets single edge property value.
76
+
77
+ Args:
78
+ key: Property key.
79
+ value: Property value.
80
+
81
+ Returns:
82
+ True if property was set, False if edge does not exist.
83
+ """
84
+ return self.__graph.set_edge_prop(self.__edge_id, key, value)
85
+
86
+ def set_properties(self, props: Dict[str, Any]) -> bool:
87
+ """
88
+ Updates multiple edge properties at once.
89
+
90
+ Args:
91
+ props: Dictionary of property key-value pairs.
92
+
93
+ Returns:
94
+ True if properties were updated, False if edge does not exist.
95
+ """
96
+ return self.__graph.set_edge_props(self.__edge_id, props)
cpg2py/_abc/graph.py ADDED
@@ -0,0 +1,247 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ from collections import deque
5
+ from typing import Callable, Deque, Generic, Iterable, List, Optional, TypeVar
6
+
7
+ from .edge import AbcEdgeQuerier
8
+ from .node import AbcNodeQuerier
9
+ from .storage import Storage
10
+
11
+ # Type variables for generic graph querier
12
+ # Covariant: subtypes can be used where base types are expected
13
+ _NodeType = TypeVar("_NodeType", bound=AbcNodeQuerier, covariant=True)
14
+ _EdgeType = TypeVar("_EdgeType", bound=AbcEdgeQuerier, covariant=True)
15
+
16
+ # Type variables for concrete implementations (invariant)
17
+ _ConcreteNodeType = TypeVar("_ConcreteNodeType", bound=AbcNodeQuerier)
18
+ _ConcreteEdgeType = TypeVar("_ConcreteEdgeType", bound=AbcEdgeQuerier)
19
+
20
+
21
+ class AbcGraphQuerier(abc.ABC, Generic[_ConcreteNodeType, _ConcreteEdgeType]):
22
+ """
23
+ Abstract base class for graph query operations.
24
+
25
+ Provides interface for querying nodes, edges, and traversing graph structures.
26
+
27
+ This is a generic class that allows type-safe operations based on the concrete
28
+ node and edge types. When you create a concrete implementation, specify the
29
+ node and edge types:
30
+
31
+ Example:
32
+ class MyGraph(AbcGraphQuerier[MyNode, MyEdge]):
33
+ ...
34
+
35
+ Type Parameters:
36
+ _ConcreteNodeType: The concrete node type returned by node() and related methods
37
+ _ConcreteEdgeType: The concrete edge type returned by edge() and used in conditions
38
+ """
39
+
40
+ __NodeCondition = Callable[[_NodeType], bool]
41
+ __EdgeCondition = Callable[[_EdgeType], bool]
42
+
43
+ __always_true = lambda _: True
44
+
45
+ __NodesResult = Iterable[_ConcreteNodeType]
46
+ __EdgesResult = Iterable[_ConcreteEdgeType]
47
+
48
+ def __init__(self, target: Storage, maxdepth: int = -1) -> None:
49
+ """
50
+ Initializes a graph querier.
51
+
52
+ Args:
53
+ target: Storage instance containing the graph
54
+ maxdepth: Maximum depth for traversal operations (-1 for unlimited)
55
+ """
56
+ self.__graph: Storage = target
57
+ self.__maxdepth: int = maxdepth
58
+
59
+ @property
60
+ def storage(self) -> Storage:
61
+ """
62
+ Returns the underlying storage instance.
63
+
64
+ Returns:
65
+ Storage instance
66
+ """
67
+ return self.__graph
68
+
69
+ @abc.abstractmethod
70
+ def node(self, whose_id_is: str) -> Optional[_ConcreteNodeType]:
71
+ """
72
+ Returns a node by its ID.
73
+
74
+ Args:
75
+ whose_id_is: Node ID to look up
76
+
77
+ Returns:
78
+ Node instance if found, None otherwise
79
+ """
80
+ raise NotImplementedError
81
+
82
+ @abc.abstractmethod
83
+ def edge(self, fid: str, tid: str, eid: str) -> Optional[_ConcreteEdgeType]:
84
+ """
85
+ Returns an edge by its source, target, and edge type.
86
+
87
+ Args:
88
+ fid: Source node ID
89
+ tid: Target node ID
90
+ eid: Edge type/ID
91
+
92
+ Returns:
93
+ Edge instance if found, None otherwise
94
+ """
95
+ raise NotImplementedError
96
+
97
+ def nodes(
98
+ self, who_satisifies: __NodeCondition = __always_true
99
+ ) -> Iterable[_ConcreteNodeType]:
100
+ """
101
+ Returns all nodes matching the condition.
102
+
103
+ Args:
104
+ who_satisifies: Node condition filter
105
+
106
+ Yields:
107
+ Nodes matching the condition
108
+ """
109
+ for nid in self.__graph.get_nodes():
110
+ cur_node = self.node(whose_id_is=nid)
111
+ if cur_node and who_satisifies(cur_node):
112
+ yield cur_node
113
+
114
+ def first_node(
115
+ self, who_satisifies: __NodeCondition = __always_true
116
+ ) -> Optional[_ConcreteNodeType]:
117
+ """
118
+ Returns the first node matching the condition.
119
+
120
+ Args:
121
+ who_satisifies: Node condition filter
122
+
123
+ Returns:
124
+ First matching node, or None if no match
125
+ """
126
+ return next(self.nodes(who_satisifies), None)
127
+
128
+ def edges(
129
+ self, who_satisifies: __EdgeCondition = __always_true
130
+ ) -> Iterable[_ConcreteEdgeType]:
131
+ """
132
+ Returns all edges matching the condition.
133
+
134
+ Args:
135
+ who_satisifies: Edge condition filter
136
+
137
+ Yields:
138
+ Edges matching the condition
139
+ """
140
+ for from_id, to_id, edge_id in self.__graph.get_edges():
141
+ cur_edge = self.edge(from_id, to_id, edge_id)
142
+ if cur_edge and who_satisifies(cur_edge):
143
+ yield cur_edge
144
+
145
+ def succ(
146
+ self, of: _ConcreteNodeType, who_satisifies: __EdgeCondition = __always_true
147
+ ) -> Iterable[_ConcreteNodeType]:
148
+ """
149
+ Returns successor nodes connected to the input node.
150
+
151
+ Args:
152
+ of: Source node
153
+ who_satisifies: Edge condition filter
154
+
155
+ Yields:
156
+ Successor nodes matching the condition
157
+ """
158
+ for src, dst, edge_type in self.__graph.out_edges(of.node_id):
159
+ edge = self.edge(src, dst, edge_type)
160
+ if edge and who_satisifies(edge):
161
+ node = self.node(whose_id_is=dst)
162
+ if node:
163
+ yield node
164
+
165
+ def prev(
166
+ self, of: _ConcreteNodeType, who_satisifies: __EdgeCondition = __always_true
167
+ ) -> Iterable[_ConcreteNodeType]:
168
+ """
169
+ Returns predecessor nodes connected to the input node.
170
+
171
+ Args:
172
+ of: Target node
173
+ who_satisifies: Edge condition filter
174
+
175
+ Yields:
176
+ Predecessor nodes matching the condition
177
+ """
178
+ for src, dst, edge_type in self.__graph.in_edges(of.node_id):
179
+ edge = self.edge(src, dst, edge_type)
180
+ if edge and who_satisifies(edge):
181
+ node = self.node(whose_id_is=src)
182
+ if node:
183
+ yield node
184
+
185
+ def __bfs_search(
186
+ self, root: _ConcreteNodeType, condition: __EdgeCondition, reverse: bool
187
+ ) -> Iterable[_ConcreteNodeType]:
188
+ """
189
+ Returns nodes from src node by BFS order (src node not included).
190
+
191
+ Args:
192
+ root: Starting node
193
+ condition: Edge condition filter
194
+ reverse: If True, traverse backwards
195
+
196
+ Yields:
197
+ Nodes in BFS order (excluding root)
198
+ """
199
+ if root is None:
200
+ return
201
+ visited_nids: List[str] = []
202
+ nodes_queue: Deque[_ConcreteNodeType] = deque([root, None])
203
+ depth = self.__maxdepth
204
+ while depth != 0 and len(nodes_queue) > 1:
205
+ cur_node = nodes_queue.popleft()
206
+ if cur_node is None:
207
+ nodes_queue.append(None)
208
+ depth -= 1
209
+ elif cur_node.node_id not in visited_nids:
210
+ visited_nids.append(cur_node.node_id)
211
+ if not reverse:
212
+ n_nodes = self.succ(cur_node, condition)
213
+ else:
214
+ n_nodes = self.prev(cur_node, condition)
215
+ nodes_queue.extend(n_nodes)
216
+ if root.node_id != cur_node.node_id:
217
+ yield cur_node
218
+
219
+ def descendants(
220
+ self, src: _ConcreteNodeType, condition: __EdgeCondition = __always_true
221
+ ) -> Iterable[_ConcreteNodeType]:
222
+ """
223
+ Returns descendants from src node by BFS order (src node not included).
224
+
225
+ Args:
226
+ src: Source node
227
+ condition: Edge condition filter
228
+
229
+ Yields:
230
+ Descendant nodes in BFS order
231
+ """
232
+ return self.__bfs_search(src, condition, reverse=False)
233
+
234
+ def ancestors(
235
+ self, src: _ConcreteNodeType, condition: __EdgeCondition = __always_true
236
+ ) -> Iterable[_ConcreteNodeType]:
237
+ """
238
+ Returns ancestors from src node by BFS order (src node not included).
239
+
240
+ Args:
241
+ src: Source node
242
+ condition: Edge condition filter
243
+
244
+ Yields:
245
+ Ancestor nodes in BFS order
246
+ """
247
+ return self.__bfs_search(src, condition, reverse=True)
cpg2py/_abc/node.py ADDED
@@ -0,0 +1,75 @@
1
+ import abc
2
+ from typing import Any, Dict, Optional
3
+
4
+ from .._exceptions import NodeNotFoundError
5
+ from .storage import Storage
6
+
7
+
8
+ class AbcNodeQuerier(abc.ABC):
9
+ """
10
+ Abstract base class for node property access, queries, and updates.
11
+ """
12
+
13
+ def __init__(self, graph: Storage, nid: str) -> None:
14
+ """
15
+ Initializes node querier and validates node existence.
16
+
17
+ Args:
18
+ graph: Storage instance containing the graph.
19
+ nid: Node identifier.
20
+
21
+ Raises:
22
+ NodeNotFoundError: If node does not exist in the graph.
23
+ """
24
+ self.__nid: str = str(nid)
25
+ self.__graph: Storage = graph
26
+ if not graph.contains_node(self.__nid):
27
+ raise NodeNotFoundError(str(nid))
28
+
29
+ @property
30
+ def node_id(self) -> str:
31
+ """Returns the node identifier."""
32
+ return self.__nid
33
+
34
+ @property
35
+ def properties(self) -> Optional[Dict[str, Any]]:
36
+ """Returns all node properties dictionary, or None if not found."""
37
+ return self.__graph.get_node_props(self.__nid)
38
+
39
+ def get_property(self, *prop_names: str) -> Optional[Any]:
40
+ """
41
+ Returns first found property value trying multiple name alternatives.
42
+
43
+ Args:
44
+ prop_names: Property name alternatives to try.
45
+
46
+ Returns:
47
+ First found value, or None if none found.
48
+ """
49
+ prop_values = (self.__graph.get_node_prop(self.__nid, p_name) for p_name in prop_names)
50
+ return next((value for value in prop_values if value is not None), None)
51
+
52
+ def set_property(self, key: str, value: Any) -> bool:
53
+ """
54
+ Sets single node property value.
55
+
56
+ Args:
57
+ key: Property key.
58
+ value: Property value.
59
+
60
+ Returns:
61
+ True if property was set, False if node does not exist.
62
+ """
63
+ return self.__graph.set_node_prop(self.__nid, key, value)
64
+
65
+ def set_properties(self, props: Dict[str, Any]) -> bool:
66
+ """
67
+ Updates multiple node properties at once.
68
+
69
+ Args:
70
+ props: Dictionary of property key-value pairs.
71
+
72
+ Returns:
73
+ True if properties were updated, False if node does not exist.
74
+ """
75
+ return self.__graph.set_node_props(self.__nid, props)