cpg2py 1.0.5__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cpg2py/_abc/storage.py ADDED
@@ -0,0 +1,258 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
4
+
5
+ from .._logger import get_logger
6
+
7
+ logger = get_logger(__name__)
8
+
9
+
10
+ class Storage:
11
+ """A directed multi-graph implementation supporting multiple edges between nodes."""
12
+
13
+ __NodeID = str
14
+ __EdgeID = Tuple[str, str, str]
15
+ __Property = Dict[str, Any]
16
+
17
+ def __init__(self):
18
+ """Initializes an empty directed graph."""
19
+ self.__nodes: Dict[str, Dict[str, Any]] = {}
20
+ self.__edges: Dict[Tuple[str, str, str], Dict[str, Any]] = {}
21
+ self.__struct: Dict[str, List[Tuple[str, str, str]]] = {}
22
+
23
+ ################################ GRAPH STRUCTURE APIs ################################
24
+
25
+ def add_node(self, nid: __NodeID) -> bool:
26
+ """
27
+ Adds a node to the graph.
28
+
29
+ Args:
30
+ nid: Node ID to add
31
+
32
+ Returns:
33
+ True if node was added, False if it already exists
34
+ """
35
+ nid = str(nid)
36
+ if nid in self.__nodes:
37
+ return False
38
+ self.__nodes[nid] = {}
39
+ self.__struct[nid] = []
40
+ return True
41
+
42
+ def contains_node(self, nid: __NodeID) -> bool:
43
+ """
44
+ Checks if a node exists in the graph.
45
+
46
+ Args:
47
+ nid: Node ID to check
48
+
49
+ Returns:
50
+ True if node exists, False otherwise
51
+ """
52
+ nid = str(nid)
53
+ return nid in self.__nodes
54
+
55
+ def add_edge(self, eid: __EdgeID) -> bool:
56
+ """
57
+ Adds a directed edge to the graph.
58
+
59
+ Args:
60
+ eid: Edge ID tuple (from_node, to_node, edge_type)
61
+
62
+ Returns:
63
+ True if edge was added, False if it already exists or nodes are missing
64
+ """
65
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
66
+ if eid in self.__edges:
67
+ return False
68
+ if eid[0] not in self.__nodes:
69
+ return False
70
+ if eid[1] not in self.__nodes:
71
+ return False
72
+ self.__edges[eid] = {}
73
+ self.__struct[eid[0]].append(eid)
74
+ self.__struct[eid[1]].append(eid)
75
+ return True
76
+
77
+ def contains_edge(self, eid: __EdgeID) -> bool:
78
+ """Checks if an edge exists in the graph."""
79
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
80
+ return eid in self.__edges
81
+
82
+ def out_edges(self, nid: __NodeID) -> Iterable[__EdgeID]:
83
+ """Returns a list of outgoing edges from a given node."""
84
+ nid = str(nid)
85
+ return (eid for eid in self.__struct.get(nid, []) if eid[0] == nid)
86
+
87
+ def in_edges(self, nid: __NodeID) -> Iterable[__EdgeID]:
88
+ """Returns a list of incoming edges to a given node."""
89
+ nid = str(nid)
90
+ return (eid for eid in self.__struct.get(nid, []) if eid[1] == nid)
91
+
92
+ def successors(self, nid: __NodeID) -> Iterable[__NodeID]:
93
+ """Returns all successor nodes of a given node."""
94
+ nid = str(nid)
95
+ return (eid[1] for eid in self.__struct.get(nid, []) if eid[0] == nid)
96
+
97
+ def predecessors(self, nid: __NodeID) -> Iterable[__NodeID]:
98
+ """Returns all predecessor nodes of a given node."""
99
+ nid = str(nid)
100
+ return (eid[0] for eid in self.__struct.get(nid, []) if eid[1] == nid)
101
+
102
+ ################################ GRAPH PROPERTIES APIs ################################
103
+
104
+ def set_node_props(self, node: __NodeID, props: __Property) -> bool:
105
+ """Sets the properties of a node."""
106
+ node = str(node)
107
+ if node not in self.__nodes:
108
+ return False
109
+ prev_data: dict = self.__nodes[node]
110
+ prev_data.update({str(k): v for k, v in props.items()})
111
+ return True
112
+
113
+ def get_node_props(self, node: __NodeID) -> Optional[__Property]:
114
+ """Returns the properties of a node."""
115
+ node = str(node)
116
+ return self.__nodes.get(node, None)
117
+
118
+ def set_node_prop(self, node: __NodeID, key: str, value: Any) -> bool:
119
+ """Sets the properties of a node."""
120
+ node, key = str(node), str(key)
121
+ if node not in self.__nodes:
122
+ return False
123
+ self.__nodes[node][key] = value
124
+ return True
125
+
126
+ def get_node_prop(self, node: __NodeID, key: str) -> Optional[Any]:
127
+ """Returns the properties of a node."""
128
+ node, key = str(node), str(key)
129
+ return self.__nodes.get(node, {}).get(key, None)
130
+
131
+ def set_edge_props(self, eid: __EdgeID, props: __Property) -> bool:
132
+ """Sets the properties of an edge."""
133
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
134
+ if eid not in self.__edges:
135
+ return False
136
+ prev_data: dict = self.__edges[eid]
137
+ prev_data.update({str(k): v for k, v in props.items()})
138
+ return True
139
+
140
+ def get_edge_props(self, eid: __EdgeID) -> Optional[__Property]:
141
+ """Returns the properties of an edge."""
142
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
143
+ return self.__edges.get(eid)
144
+
145
+ def set_edge_prop(self, eid: __EdgeID, key: str, value: Any) -> bool:
146
+ """Sets the properties of an edge."""
147
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
148
+ key = str(key)
149
+ if eid not in self.__edges:
150
+ return False
151
+ self.__edges[eid][key] = value
152
+ return True
153
+
154
+ def get_edge_prop(self, eid: __EdgeID, key: str) -> Optional[__Property]:
155
+ """Returns the properties of an edge."""
156
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
157
+ key = str(key)
158
+ return self.__edges.get(eid, {}).get(key, None)
159
+
160
+ def __repr__(self):
161
+ """Returns a string representation of the graph."""
162
+ return f"MultiDiGraph(nodes={len(list(self.__nodes))}, edges={len(list(self.__edges))})"
163
+
164
+ ################################ GRAPH COMMON APIs ################################
165
+
166
+ def get_nodes(self) -> Iterable[__NodeID]:
167
+ """Returns a list of all nodes in the graph."""
168
+ return self.__nodes.keys()
169
+
170
+ def get_edges(self) -> Iterable[__EdgeID]:
171
+ """Returns a list of all edges in the graph."""
172
+ return self.__edges.keys()
173
+
174
+ def remove_node(self, nid: __NodeID) -> bool:
175
+ """Removes a node from the graph."""
176
+ nid = str(nid)
177
+ if nid not in self.__nodes:
178
+ return False
179
+ self.__nodes.pop(nid)
180
+ for eid in self.__struct.pop(nid, []):
181
+ self.__edges.pop(eid, None)
182
+ return True
183
+
184
+ def remove_edge(self, eid: __EdgeID) -> bool:
185
+ """Removes an edge from the graph."""
186
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
187
+ if eid not in self.__edges:
188
+ return False
189
+ self.__struct[eid[0]].remove(eid)
190
+ self.__struct[eid[1]].remove(eid)
191
+ self.__edges.pop(eid)
192
+ return True
193
+
194
+ def save_json(self, path: Union[Path, str]) -> None:
195
+ """
196
+ Serializes the graph (nodes and edges with properties) to a UTF-8 JSON file.
197
+
198
+ Args:
199
+ path: File path (Path or str).
200
+
201
+ Raises:
202
+ OSError: If the file cannot be written.
203
+ TypeError: If a property value is not JSON-serializable.
204
+ """
205
+ payload: Dict[str, Any] = {
206
+ "nodes": dict(self.__nodes),
207
+ "edges": [
208
+ {"from": eid[0], "to": eid[1], "type": eid[2], "props": props}
209
+ for eid, props in self.__edges.items()
210
+ ],
211
+ }
212
+ with open(path, "w", encoding="utf-8") as f:
213
+ json.dump(payload, f, ensure_ascii=False, indent=2)
214
+
215
+ def load_json(self, path: Union[Path, str]) -> None:
216
+ """
217
+ Replaces the current graph with the contents of the JSON file.
218
+
219
+ Clears existing nodes and edges, then loads nodes, edges, and their
220
+ properties. Expects top-level keys "nodes" and "edges".
221
+
222
+ Args:
223
+ path: File path (Path or str).
224
+
225
+ Raises:
226
+ OSError: If the file cannot be read.
227
+ ValueError: If JSON structure is invalid (missing "nodes" or "edges").
228
+ KeyError: If an edge object is missing "from", "to", or "type".
229
+ """
230
+ with open(path, "r", encoding="utf-8") as f:
231
+ data: Dict[str, Any] = json.load(f)
232
+ if "nodes" not in data or "edges" not in data:
233
+ raise ValueError("JSON must contain top-level 'nodes' and 'edges'")
234
+ self.__nodes = {}
235
+ self.__edges = {}
236
+ self.__struct = {}
237
+ nodes_data: Dict[str, Dict[str, Any]] = data["nodes"]
238
+ for nid, props in nodes_data.items():
239
+ nid_str = str(nid)
240
+ self.__nodes[nid_str] = dict(props) if props else {}
241
+ self.__struct[nid_str] = []
242
+ for edge_obj in data["edges"]:
243
+ from_nid = str(edge_obj["from"])
244
+ to_nid = str(edge_obj["to"])
245
+ etype = str(edge_obj["type"])
246
+ props = edge_obj.get("props")
247
+ if props is None:
248
+ props = {}
249
+ eid = (from_nid, to_nid, etype)
250
+ if from_nid not in self.__nodes:
251
+ self.__nodes[from_nid] = {}
252
+ self.__struct[from_nid] = []
253
+ if to_nid not in self.__nodes:
254
+ self.__nodes[to_nid] = {}
255
+ self.__struct[to_nid] = []
256
+ self.__edges[eid] = dict(props)
257
+ self.__struct[from_nid].append(eid)
258
+ self.__struct[to_nid].append(eid)
@@ -0,0 +1,5 @@
1
+ from .edge import CpgEdge
2
+ from .graph import CpgGraph
3
+ from .node import CpgNode
4
+
5
+ __all__ = ["CpgGraph", "CpgNode", "CpgEdge"]
cpg2py/_cpg/edge.py ADDED
@@ -0,0 +1,31 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, Tuple
4
+
5
+ from .._abc import AbcEdgeQuerier
6
+
7
+
8
+ class CpgEdge(AbcEdgeQuerier):
9
+ """Concrete edge implementation with CPG-specific property accessors."""
10
+
11
+ @property
12
+ def id(self) -> Tuple[str, str, str]:
13
+ return self.edge_id
14
+
15
+ @property
16
+ def start(self) -> Optional[int]:
17
+ start_str = str(self.get_property("start", "start:START_ID"))
18
+ return int(start_str) if start_str.isnumeric() else int(self.from_nid)
19
+
20
+ @property
21
+ def end(self) -> Optional[int]:
22
+ end_str = str(self.get_property("end", "end:END_ID"))
23
+ return int(end_str) if end_str.isnumeric() else int(self.to_nid)
24
+
25
+ @property
26
+ def type(self) -> Optional[str]:
27
+ return self.get_property("type", "type:TYPE")
28
+
29
+ @property
30
+ def var(self) -> Optional[str]:
31
+ return self.get_property("var")
cpg2py/_cpg/graph.py ADDED
@@ -0,0 +1,183 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ from typing import Callable, Iterable, Optional
5
+
6
+ from .._abc import AbcGraphQuerier, Storage
7
+ from .._exceptions import EdgeNotFoundError, NodeNotFoundError, TopFileNotFoundError
8
+ from .._logger import get_logger
9
+ from .edge import CpgEdge
10
+ from .node import CpgNode
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ class CpgGraph(AbcGraphQuerier[CpgNode, CpgEdge]):
16
+ """
17
+ Graph implementation for Object Property Diagram (OPG) used by ODgen and FAST.
18
+
19
+ Provides concrete implementation of graph query operations for CPG data.
20
+
21
+ This class is parameterized with CpgNode and CpgEdge types, ensuring type safety
22
+ throughout the graph operations.
23
+ """
24
+
25
+ __EdgeCondition = Callable[[CpgEdge], bool]
26
+ __always_true = lambda _: True
27
+
28
+ def __init__(self, target: Storage) -> None:
29
+ super().__init__(target)
30
+
31
+ def node(self, whose_id_is: str) -> Optional[CpgNode]:
32
+ """
33
+ Returns a node by its ID.
34
+
35
+ Args:
36
+ whose_id_is: Node ID to look up
37
+
38
+ Returns:
39
+ Node instance if found
40
+
41
+ Raises:
42
+ NodeNotFoundError: If node is not found in the graph
43
+ """
44
+ try:
45
+ return CpgNode(self.storage, whose_id_is)
46
+ except NodeNotFoundError:
47
+ raise
48
+ except Exception as e:
49
+ logger.exception("Unexpected error while finding node with id %s", whose_id_is)
50
+ raise NodeNotFoundError(whose_id_is) from e
51
+
52
+ def edge(self, fid: str, tid: str, eid: str) -> Optional[CpgEdge]:
53
+ """
54
+ Returns an edge by its source, target, and edge type.
55
+
56
+ Args:
57
+ fid: Source node ID
58
+ tid: Target node ID
59
+ eid: Edge type/ID
60
+
61
+ Returns:
62
+ Edge instance if found
63
+
64
+ Raises:
65
+ EdgeNotFoundError: If edge is not found in the graph
66
+ """
67
+ try:
68
+ return CpgEdge(self.storage, fid, tid, eid)
69
+ except EdgeNotFoundError:
70
+ raise
71
+ except Exception as e:
72
+ logger.exception(
73
+ "Unexpected error while finding edge from %s to %s, eid is %s", fid, tid, eid
74
+ )
75
+ raise EdgeNotFoundError(fid, tid, str(eid)) from e
76
+
77
+ @functools.lru_cache()
78
+ def topfile_node(self, of_nid: str) -> CpgNode:
79
+ """
80
+ Finds the top file node from the input node.
81
+
82
+ Args:
83
+ of_nid: Starting node ID
84
+
85
+ Returns:
86
+ Top file node
87
+
88
+ Raises:
89
+ TopFileNotFoundError: If top file node cannot be found
90
+ NodeNotFoundError: If starting node is not found
91
+ """
92
+ of_node = self.node(of_nid)
93
+ if of_node.type == "File":
94
+ return of_node
95
+ if "TOPLEVEL_FILE" in of_node.flags:
96
+ return of_node
97
+ parents = self.prev(of_node, lambda e: e.type in ["PARENT_OF", "ENTRY", "EXIT"])
98
+ for pre in parents:
99
+ try:
100
+ top_file = self.topfile_node(pre.id)
101
+ return top_file
102
+ except TopFileNotFoundError:
103
+ continue
104
+ logger.error("Cannot find top file node from node %s", of_nid)
105
+ raise TopFileNotFoundError(of_nid)
106
+
107
+ def succ(self, of: CpgNode, who_satisifies: __EdgeCondition = __always_true) -> Iterable[CpgNode]:
108
+ """
109
+ Returns successor nodes connected to the input node.
110
+
111
+ Args:
112
+ of: Source node
113
+ who_satisifies: Optional edge condition filter
114
+
115
+ Yields:
116
+ Successor nodes matching the condition
117
+ """
118
+ return super().succ(of, who_satisifies)
119
+
120
+ def prev(self, of: CpgNode, who_satisifies: __EdgeCondition = __always_true) -> Iterable[CpgNode]:
121
+ """
122
+ Returns predecessor nodes connected to the input node.
123
+
124
+ Args:
125
+ of: Target node
126
+ who_satisifies: Optional edge condition filter
127
+
128
+ Yields:
129
+ Predecessor nodes matching the condition
130
+ """
131
+ return super().prev(of, who_satisifies)
132
+
133
+ def children(self, of: CpgNode, extra: __EdgeCondition = __always_true) -> Iterable[CpgNode]:
134
+ """
135
+ Returns child nodes connected via PARENT_OF edges.
136
+
137
+ Args:
138
+ of: Parent node
139
+ extra: Additional edge condition filter
140
+
141
+ Returns:
142
+ Iterable of child nodes
143
+ """
144
+ return self.succ(of, lambda e: extra(e) and (e.type == "PARENT_OF"))
145
+
146
+ def parent(self, of: CpgNode, extra: __EdgeCondition = __always_true) -> Iterable[CpgNode]:
147
+ """
148
+ Returns parent nodes connected via PARENT_OF edges.
149
+
150
+ Args:
151
+ of: Child node
152
+ extra: Additional edge condition filter
153
+
154
+ Returns:
155
+ Iterable of parent nodes
156
+ """
157
+ return self.prev(of, lambda e: extra(e) and (e.type == "PARENT_OF"))
158
+
159
+ def flow_to(self, of: CpgNode, extra: __EdgeCondition = __always_true) -> Iterable[CpgNode]:
160
+ """
161
+ Returns successor nodes connected via FLOWS_TO edges.
162
+
163
+ Args:
164
+ of: Source node
165
+ extra: Additional edge condition filter
166
+
167
+ Returns:
168
+ Iterable of flow successor nodes
169
+ """
170
+ return self.succ(of, lambda e: extra(e) and (e.type == "FLOWS_TO"))
171
+
172
+ def flow_from(self, of: CpgNode, extra: __EdgeCondition = __always_true) -> Iterable[CpgNode]:
173
+ """
174
+ Returns predecessor nodes connected via FLOWS_TO edges.
175
+
176
+ Args:
177
+ of: Target node
178
+ extra: Additional edge condition filter
179
+
180
+ Returns:
181
+ Iterable of flow predecessor nodes
182
+ """
183
+ return self.prev(of, lambda e: extra(e) and (e.type == "FLOWS_TO"))
cpg2py/_cpg/node.py ADDED
@@ -0,0 +1,66 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Optional
4
+
5
+ from .._abc import AbcNodeQuerier
6
+
7
+
8
+ class CpgNode(AbcNodeQuerier):
9
+ """Concrete node implementation with CPG-specific property accessors."""
10
+
11
+ @property
12
+ def id(self) -> str:
13
+ return self.node_id
14
+
15
+ @property
16
+ def code(self) -> Optional[str]:
17
+ return self.get_property("code")
18
+
19
+ @property
20
+ def label(self) -> Optional[str]:
21
+ return self.get_property("labels:label", "labels")
22
+
23
+ @property
24
+ def flags(self) -> List[str]:
25
+ flags_str = self.get_property("flags:string_array", "flags:string[]", "flags")
26
+ return str(flags_str).split(" ") if flags_str is not None else []
27
+
28
+ @property
29
+ def line_num(self) -> Optional[int]:
30
+ linenum_str = str(self.get_property("lineno:int", "lineno"))
31
+ return int(linenum_str) if linenum_str.isnumeric() else None
32
+
33
+ @property
34
+ def children_num(self) -> Optional[int]:
35
+ num_str = str(self.get_property("childnum:int", "childnum"))
36
+ return int(num_str) if num_str.isnumeric() else None
37
+
38
+ @property
39
+ def func_id(self) -> Optional[int]:
40
+ fid_str = str(self.get_property("funcid:int", "funcid"))
41
+ return int(fid_str) if fid_str.isnumeric() else None
42
+
43
+ @property
44
+ def class_name(self) -> Optional[str]:
45
+ return self.get_property("classname")
46
+
47
+ @property
48
+ def namespace(self) -> Optional[str]:
49
+ return self.get_property("namespace")
50
+
51
+ @property
52
+ def name(self) -> Optional[str]:
53
+ return self.get_property("name")
54
+
55
+ @property
56
+ def end_num(self) -> Optional[int]:
57
+ end_str = str(self.get_property("endlineno:int", "endlineno"))
58
+ return int(end_str) if end_str.isnumeric() else None
59
+
60
+ @property
61
+ def comment(self) -> Optional[str]:
62
+ return self.get_property("doccomment")
63
+
64
+ @property
65
+ def type(self) -> Optional[str]:
66
+ return self.get_property("type")
cpg2py/_exceptions.py ADDED
@@ -0,0 +1,43 @@
1
+ """Custom exceptions for cpg2py package."""
2
+
3
+ from typing import Optional
4
+
5
+
6
+ class CPGError(Exception):
7
+ """Base exception for all CPG-related errors."""
8
+
9
+
10
+ class NodeNotFoundError(CPGError):
11
+ """Raised when node identifier does not exist in storage."""
12
+
13
+ def __init__(self, node_id: str, message: Optional[str] = None) -> None:
14
+ self.node_id = node_id
15
+ if message is None:
16
+ message = f"Node with id '{node_id}' not found in graph"
17
+ super().__init__(message)
18
+
19
+
20
+ class EdgeNotFoundError(CPGError):
21
+ """Raised when edge identifier tuple does not exist in storage."""
22
+
23
+ def __init__(
24
+ self, from_id: str, to_id: str, edge_type: str, message: Optional[str] = None
25
+ ) -> None:
26
+ self.from_id = from_id
27
+ self.to_id = to_id
28
+ self.edge_type = edge_type
29
+ if message is None:
30
+ message = (
31
+ f"Edge from '{from_id}' to '{to_id}' with type '{edge_type}' not found in graph"
32
+ )
33
+ super().__init__(message)
34
+
35
+
36
+ class TopFileNotFoundError(CPGError):
37
+ """Raised when top file node cannot be found during upward traversal."""
38
+
39
+ def __init__(self, node_id: str, message: Optional[str] = None) -> None:
40
+ self.node_id = node_id
41
+ if message is None:
42
+ message = f"Cannot find top file node from node '{node_id}'"
43
+ super().__init__(message)
cpg2py/_logger.py ADDED
@@ -0,0 +1,53 @@
1
+ """
2
+ Logging configuration for cpg2py package.
3
+ """
4
+
5
+ import logging
6
+ import sys
7
+ from typing import Optional
8
+
9
+ _logger: Optional[logging.Logger] = None
10
+
11
+
12
+ def get_logger(name: Optional[str] = None) -> logging.Logger:
13
+ """
14
+ Returns a logger instance for the specified module.
15
+
16
+ Args:
17
+ name: Module name (defaults to 'cpg2py')
18
+
19
+ Returns:
20
+ Configured logger instance
21
+ """
22
+ global _logger
23
+
24
+ if _logger is None:
25
+ _logger = logging.getLogger("cpg2py")
26
+ _logger.setLevel(logging.WARNING)
27
+
28
+ if not _logger.handlers:
29
+ handler = logging.StreamHandler(sys.stderr)
30
+ handler.setLevel(logging.WARNING)
31
+
32
+ formatter = logging.Formatter(
33
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
34
+ )
35
+ handler.setFormatter(formatter)
36
+ _logger.addHandler(handler)
37
+
38
+ if name:
39
+ return _logger.getChild(name)
40
+ return _logger
41
+
42
+
43
+ def set_log_level(level: int) -> None:
44
+ """
45
+ Sets the logging level for the cpg2py logger.
46
+
47
+ Args:
48
+ level: Logging level (e.g., logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR)
49
+ """
50
+ logger = get_logger()
51
+ logger.setLevel(level)
52
+ for handler in logger.handlers:
53
+ handler.setLevel(level)