cpg2py 1.0.5__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cpg2py/_abc/storage.py ADDED
@@ -0,0 +1,190 @@
1
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
2
+
3
+ from .._logger import get_logger
4
+
5
+ logger = get_logger(__name__)
6
+
7
+
8
+ class Storage:
9
+ """A directed multi-graph implementation supporting multiple edges between nodes."""
10
+
11
+ __NodeID = str
12
+ __EdgeID = Tuple[str, str, str]
13
+ __Property = Dict[str, Any]
14
+
15
+ def __init__(self):
16
+ """Initializes an empty directed graph."""
17
+ self.__nodes: Dict[str, Dict[str, Any]] = {}
18
+ self.__edges: Dict[Tuple[str, str, str], Dict[str, Any]] = {}
19
+ self.__struct: Dict[str, List[Tuple[str, str, str]]] = {}
20
+
21
+ ################################ GRAPH STRUCTURE APIs ################################
22
+
23
+ def add_node(self, nid: __NodeID) -> bool:
24
+ """
25
+ Adds a node to the graph.
26
+
27
+ Args:
28
+ nid: Node ID to add
29
+
30
+ Returns:
31
+ True if node was added, False if it already exists
32
+ """
33
+ nid = str(nid)
34
+ if nid in self.__nodes:
35
+ return False
36
+ self.__nodes[nid] = {}
37
+ self.__struct[nid] = []
38
+ return True
39
+
40
+ def contains_node(self, nid: __NodeID) -> bool:
41
+ """
42
+ Checks if a node exists in the graph.
43
+
44
+ Args:
45
+ nid: Node ID to check
46
+
47
+ Returns:
48
+ True if node exists, False otherwise
49
+ """
50
+ nid = str(nid)
51
+ return nid in self.__nodes
52
+
53
+ def add_edge(self, eid: __EdgeID) -> bool:
54
+ """
55
+ Adds a directed edge to the graph.
56
+
57
+ Args:
58
+ eid: Edge ID tuple (from_node, to_node, edge_type)
59
+
60
+ Returns:
61
+ True if edge was added, False if it already exists or nodes are missing
62
+ """
63
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
64
+ if eid in self.__edges:
65
+ return False
66
+ if eid[0] not in self.__nodes:
67
+ return False
68
+ if eid[1] not in self.__nodes:
69
+ return False
70
+ self.__edges[eid] = {}
71
+ self.__struct[eid[0]].append(eid)
72
+ self.__struct[eid[1]].append(eid)
73
+ return True
74
+
75
+ def contains_edge(self, eid: __EdgeID) -> bool:
76
+ """Checks if an edge exists in the graph."""
77
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
78
+ return eid in self.__edges
79
+
80
+ def out_edges(self, nid: __NodeID) -> Iterable[__EdgeID]:
81
+ """Returns a list of outgoing edges from a given node."""
82
+ nid = str(nid)
83
+ return (eid for eid in self.__struct.get(nid, []) if eid[0] == nid)
84
+
85
+ def in_edges(self, nid: __NodeID) -> Iterable[__EdgeID]:
86
+ """Returns a list of incoming edges to a given node."""
87
+ nid = str(nid)
88
+ return (eid for eid in self.__struct.get(nid, []) if eid[1] == nid)
89
+
90
+ def successors(self, nid: __NodeID) -> Iterable[__NodeID]:
91
+ """Returns all successor nodes of a given node."""
92
+ nid = str(nid)
93
+ return (eid[1] for eid in self.__struct.get(nid, []) if eid[0] == nid)
94
+
95
+ def predecessors(self, nid: __NodeID) -> Iterable[__NodeID]:
96
+ """Returns all predecessor nodes of a given node."""
97
+ nid = str(nid)
98
+ return (eid[0] for eid in self.__struct.get(nid, []) if eid[1] == nid)
99
+
100
+ ################################ GRAPH PROPERTIES APIs ################################
101
+
102
+ def set_node_props(self, node: __NodeID, props: __Property) -> bool:
103
+ """Sets the properties of a node."""
104
+ node = str(node)
105
+ if node not in self.__nodes:
106
+ return False
107
+ prev_data: dict = self.__nodes[node]
108
+ prev_data.update({str(k): v for k, v in props.items()})
109
+ return True
110
+
111
+ def get_node_props(self, node: __NodeID) -> Optional[__Property]:
112
+ """Returns the properties of a node."""
113
+ node = str(node)
114
+ return self.__nodes.get(node, None)
115
+
116
+ def set_node_prop(self, node: __NodeID, key: str, value: Any) -> bool:
117
+ """Sets the properties of a node."""
118
+ node, key = str(node), str(key)
119
+ if node not in self.__nodes:
120
+ return False
121
+ self.__nodes[node][key] = value
122
+ return True
123
+
124
+ def get_node_prop(self, node: __NodeID, key: str) -> Optional[Any]:
125
+ """Returns the properties of a node."""
126
+ node, key = str(node), str(key)
127
+ return self.__nodes.get(node, {}).get(key, None)
128
+
129
+ def set_edge_props(self, eid: __EdgeID, props: __Property) -> bool:
130
+ """Sets the properties of an edge."""
131
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
132
+ if eid not in self.__edges:
133
+ return False
134
+ prev_data: dict = self.__edges[eid]
135
+ prev_data.update({str(k): v for k, v in props.items()})
136
+ return True
137
+
138
+ def get_edge_props(self, eid: __EdgeID) -> Optional[__Property]:
139
+ """Returns the properties of an edge."""
140
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
141
+ return self.__edges.get(eid)
142
+
143
+ def set_edge_prop(self, eid: __EdgeID, key: str, value: Any) -> bool:
144
+ """Sets the properties of an edge."""
145
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
146
+ key = str(key)
147
+ if eid not in self.__edges:
148
+ return False
149
+ self.__edges[eid][key] = value
150
+ return True
151
+
152
+ def get_edge_prop(self, eid: __EdgeID, key: str) -> Optional[__Property]:
153
+ """Returns the properties of an edge."""
154
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
155
+ key = str(key)
156
+ return self.__edges.get(eid, {}).get(key, None)
157
+
158
+ def __repr__(self):
159
+ """Returns a string representation of the graph."""
160
+ return f"MultiDiGraph(nodes={len(list(self.__nodes))}, edges={len(list(self.__edges))})"
161
+
162
+ ################################ GRAPH COMMON APIs ################################
163
+
164
+ def get_nodes(self) -> Iterable[__NodeID]:
165
+ """Returns a list of all nodes in the graph."""
166
+ return self.__nodes.keys()
167
+
168
+ def get_edges(self) -> Iterable[__EdgeID]:
169
+ """Returns a list of all edges in the graph."""
170
+ return self.__edges.keys()
171
+
172
+ def remove_node(self, nid: __NodeID) -> bool:
173
+ """Removes a node from the graph."""
174
+ nid = str(nid)
175
+ if nid not in self.__nodes:
176
+ return False
177
+ self.__nodes.pop(nid)
178
+ for eid in self.__struct.pop(nid, []):
179
+ self.__edges.pop(eid, None)
180
+ return True
181
+
182
+ def remove_edge(self, eid: __EdgeID) -> bool:
183
+ """Removes an edge from the graph."""
184
+ eid = (str(eid[0]), str(eid[1]), str(eid[2]))
185
+ if eid not in self.__edges:
186
+ return False
187
+ self.__struct[eid[0]].remove(eid)
188
+ self.__struct[eid[1]].remove(eid)
189
+ self.__edges.pop(eid)
190
+ return True
@@ -0,0 +1,5 @@
1
+ from .edge import CpgEdge
2
+ from .graph import CpgGraph
3
+ from .node import CpgNode
4
+
5
+ __all__ = ["CpgGraph", "CpgNode", "CpgEdge"]
cpg2py/_cpg/edge.py ADDED
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, Tuple
4
+
5
+ from .._abc import AbcEdgeQuerier, AbcGraphQuerier
6
+
7
+
8
+ class CpgEdge(AbcEdgeQuerier):
9
+ def __init__(self, graph: AbcGraphQuerier, f_nid: str, t_nid: str, e_type: str) -> None:
10
+ super().__init__(graph, f_nid, t_nid, e_type)
11
+
12
+ @property
13
+ def id(self) -> Tuple[str, str, str]:
14
+ return self.edge_id
15
+
16
+ @property
17
+ def start(self) -> Optional[int]:
18
+ start_str = str(self.get_property("start", "start:START_ID"))
19
+ return int(start_str) if start_str.isnumeric() else int(self.from_nid)
20
+
21
+ @property
22
+ def end(self) -> Optional[int]:
23
+ end_str = str(self.get_property("end", "end:END_ID"))
24
+ return int(end_str) if end_str.isnumeric() else int(self.to_nid)
25
+
26
+ @property
27
+ def type(self) -> Optional[str]:
28
+ return self.get_property("type", "type:TYPE")
29
+
30
+ @property
31
+ def var(self) -> Optional[str]:
32
+ return self.get_property("var")
cpg2py/_cpg/graph.py ADDED
@@ -0,0 +1,183 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ from typing import Callable, Iterable, Optional
5
+
6
+ from .._abc import AbcGraphQuerier, Storage
7
+ from .._exceptions import EdgeNotFoundError, NodeNotFoundError, TopFileNotFoundError
8
+ from .._logger import get_logger
9
+ from .edge import CpgEdge
10
+ from .node import CpgNode
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ class CpgGraph(AbcGraphQuerier[CpgNode, CpgEdge]):
16
+ """
17
+ Graph implementation for Object Property Diagram (OPG) used by ODgen and FAST.
18
+
19
+ Provides concrete implementation of graph query operations for CPG data.
20
+
21
+ This class is parameterized with CpgNode and CpgEdge types, ensuring type safety
22
+ throughout the graph operations.
23
+ """
24
+
25
+ __EdgeCondition = Callable[[CpgEdge], bool]
26
+ __always_true = lambda _: True
27
+
28
+ def __init__(self, target: Storage) -> None:
29
+ super().__init__(target)
30
+
31
+ def node(self, whose_id_is: str) -> Optional[CpgNode]:
32
+ """
33
+ Returns a node by its ID.
34
+
35
+ Args:
36
+ whose_id_is: Node ID to look up
37
+
38
+ Returns:
39
+ Node instance if found
40
+
41
+ Raises:
42
+ NodeNotFoundError: If node is not found in the graph
43
+ """
44
+ try:
45
+ return CpgNode(self.storage, whose_id_is)
46
+ except NodeNotFoundError:
47
+ raise
48
+ except Exception as e:
49
+ logger.exception("Unexpected error while finding node with id %s", whose_id_is)
50
+ raise NodeNotFoundError(whose_id_is) from e
51
+
52
+ def edge(self, fid: str, tid: str, eid: str) -> Optional[CpgEdge]:
53
+ """
54
+ Returns an edge by its source, target, and edge type.
55
+
56
+ Args:
57
+ fid: Source node ID
58
+ tid: Target node ID
59
+ eid: Edge type/ID
60
+
61
+ Returns:
62
+ Edge instance if found
63
+
64
+ Raises:
65
+ EdgeNotFoundError: If edge is not found in the graph
66
+ """
67
+ try:
68
+ return CpgEdge(self.storage, fid, tid, eid)
69
+ except EdgeNotFoundError:
70
+ raise
71
+ except Exception as e:
72
+ logger.exception(
73
+ "Unexpected error while finding edge from %s to %s, eid is %s", fid, tid, eid
74
+ )
75
+ raise EdgeNotFoundError(fid, tid, str(eid)) from e
76
+
77
+ @functools.lru_cache()
78
+ def topfile_node(self, of_nid: str) -> CpgNode:
79
+ """
80
+ Finds the top file node from the input node.
81
+
82
+ Args:
83
+ of_nid: Starting node ID
84
+
85
+ Returns:
86
+ Top file node
87
+
88
+ Raises:
89
+ TopFileNotFoundError: If top file node cannot be found
90
+ NodeNotFoundError: If starting node is not found
91
+ """
92
+ of_node = self.node(of_nid)
93
+ if of_node.type == "File":
94
+ return of_node
95
+ if "TOPLEVEL_FILE" in of_node.flags:
96
+ return of_node
97
+ parents = self.prev(of_node, lambda e: e.type in ["PARENT_OF", "ENTRY", "EXIT"])
98
+ for pre in parents:
99
+ try:
100
+ top_file = self.topfile_node(pre.id)
101
+ return top_file
102
+ except TopFileNotFoundError:
103
+ continue
104
+ logger.error("Cannot find top file node from node %s", of_nid)
105
+ raise TopFileNotFoundError(of_nid)
106
+
107
+ def succ(self, of: CpgNode, who_satisifies: __EdgeCondition = __always_true) -> Iterable[CpgNode]:
108
+ """
109
+ Returns successor nodes connected to the input node.
110
+
111
+ Args:
112
+ of: Source node
113
+ who_satisifies: Optional edge condition filter
114
+
115
+ Yields:
116
+ Successor nodes matching the condition
117
+ """
118
+ return super().succ(of, who_satisifies)
119
+
120
+ def prev(self, of: CpgNode, who_satisifies: __EdgeCondition = __always_true) -> Iterable[CpgNode]:
121
+ """
122
+ Returns predecessor nodes connected to the input node.
123
+
124
+ Args:
125
+ of: Target node
126
+ who_satisifies: Optional edge condition filter
127
+
128
+ Yields:
129
+ Predecessor nodes matching the condition
130
+ """
131
+ return super().prev(of, who_satisifies)
132
+
133
+ def children(self, of: CpgNode, extra: __EdgeCondition = __always_true) -> Iterable[CpgNode]:
134
+ """
135
+ Returns child nodes connected via PARENT_OF edges.
136
+
137
+ Args:
138
+ of: Parent node
139
+ extra: Additional edge condition filter
140
+
141
+ Returns:
142
+ Iterable of child nodes
143
+ """
144
+ return self.succ(of, lambda e: extra(e) and (e.type == "PARENT_OF"))
145
+
146
+ def parent(self, of: CpgNode, extra: __EdgeCondition = __always_true) -> Iterable[CpgNode]:
147
+ """
148
+ Returns parent nodes connected via PARENT_OF edges.
149
+
150
+ Args:
151
+ of: Child node
152
+ extra: Additional edge condition filter
153
+
154
+ Returns:
155
+ Iterable of parent nodes
156
+ """
157
+ return self.prev(of, lambda e: extra(e) and (e.type == "PARENT_OF"))
158
+
159
+ def flow_to(self, of: CpgNode, extra: __EdgeCondition = __always_true) -> Iterable[CpgNode]:
160
+ """
161
+ Returns successor nodes connected via FLOWS_TO edges.
162
+
163
+ Args:
164
+ of: Source node
165
+ extra: Additional edge condition filter
166
+
167
+ Returns:
168
+ Iterable of flow successor nodes
169
+ """
170
+ return self.succ(of, lambda e: extra(e) and (e.type == "FLOWS_TO"))
171
+
172
+ def flow_from(self, of: CpgNode, extra: __EdgeCondition = __always_true) -> Iterable[CpgNode]:
173
+ """
174
+ Returns predecessor nodes connected via FLOWS_TO edges.
175
+
176
+ Args:
177
+ of: Target node
178
+ extra: Additional edge condition filter
179
+
180
+ Returns:
181
+ Iterable of flow predecessor nodes
182
+ """
183
+ return self.prev(of, lambda e: extra(e) and (e.type == "FLOWS_TO"))
cpg2py/_cpg/node.py ADDED
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Optional
4
+
5
+ from .._abc import AbcGraphQuerier, AbcNodeQuerier
6
+
7
+
8
+ class CpgNode(AbcNodeQuerier):
9
+ def __init__(self, graph: AbcGraphQuerier, nid: str) -> None:
10
+ super().__init__(graph, nid)
11
+
12
+ @property
13
+ def id(self) -> str:
14
+ return self.node_id
15
+
16
+ @property
17
+ def code(self) -> Optional[str]:
18
+ return self.get_property("code")
19
+
20
+ @property
21
+ def label(self) -> Optional[str]:
22
+ return self.get_property("labels:label", "labels")
23
+
24
+ @property
25
+ def flags(self) -> List[str]:
26
+ flags_str = self.get_property("flags:string_array", "flags:string[]", "flags")
27
+ return str(flags_str).split(" ") if flags_str is not None else []
28
+
29
+ @property
30
+ def line_num(self) -> Optional[int]:
31
+ linenum_str = str(self.get_property("lineno:int", "lineno"))
32
+ return int(linenum_str) if linenum_str.isnumeric() else None
33
+
34
+ @property
35
+ def children_num(self) -> Optional[int]:
36
+ num_str = str(self.get_property("childnum:int", "childnum"))
37
+ return int(num_str) if num_str.isnumeric() else None
38
+
39
+ @property
40
+ def func_id(self) -> Optional[int]:
41
+ fid_str = str(self.get_property("funcid:int", "funcid"))
42
+ return int(fid_str) if fid_str.isnumeric() else None
43
+
44
+ @property
45
+ def class_name(self) -> Optional[str]:
46
+ return self.get_property("classname")
47
+
48
+ @property
49
+ def namespace(self) -> Optional[str]:
50
+ return self.get_property("namespace")
51
+
52
+ @property
53
+ def name(self) -> Optional[str]:
54
+ return self.get_property("name")
55
+
56
+ @property
57
+ def end_num(self) -> Optional[int]:
58
+ end_str = str(self.get_property("endlineno:int", "endlineno"))
59
+ return int(end_str) if end_str.isnumeric() else None
60
+
61
+ @property
62
+ def comment(self) -> Optional[str]:
63
+ return self.get_property("doccomment")
64
+
65
+ @property
66
+ def type(self) -> Optional[str]:
67
+ return self.get_property("type")
cpg2py/_exceptions.py ADDED
@@ -0,0 +1,41 @@
1
+ """
2
+ Custom exceptions for cpg2py package.
3
+ """
4
+
5
+
6
+ class CPGError(Exception):
7
+ """Base exception for all CPG-related errors."""
8
+
9
+
10
+ class NodeNotFoundError(CPGError):
11
+ """Raised when a node cannot be found in the graph."""
12
+
13
+ def __init__(self, node_id: str, message: str = None):
14
+ self.node_id = node_id
15
+ if message is None:
16
+ message = f"Node with id '{node_id}' not found in graph"
17
+ super().__init__(message)
18
+
19
+
20
+ class EdgeNotFoundError(CPGError):
21
+ """Raised when an edge cannot be found in the graph."""
22
+
23
+ def __init__(self, from_id: str, to_id: str, edge_type: str, message: str = None):
24
+ self.from_id = from_id
25
+ self.to_id = to_id
26
+ self.edge_type = edge_type
27
+ if message is None:
28
+ message = (
29
+ f"Edge from '{from_id}' to '{to_id}' with type '{edge_type}' not found in graph"
30
+ )
31
+ super().__init__(message)
32
+
33
+
34
+ class TopFileNotFoundError(CPGError):
35
+ """Raised when top file node cannot be found."""
36
+
37
+ def __init__(self, node_id: str, message: str = None):
38
+ self.node_id = node_id
39
+ if message is None:
40
+ message = f"Cannot find top file node from node '{node_id}'"
41
+ super().__init__(message)
cpg2py/_logger.py ADDED
@@ -0,0 +1,53 @@
1
+ """
2
+ Logging configuration for cpg2py package.
3
+ """
4
+
5
+ import logging
6
+ import sys
7
+ from typing import Optional
8
+
9
+ _logger: Optional[logging.Logger] = None
10
+
11
+
12
+ def get_logger(name: Optional[str] = None) -> logging.Logger:
13
+ """
14
+ Returns a logger instance for the specified module.
15
+
16
+ Args:
17
+ name: Module name (defaults to 'cpg2py')
18
+
19
+ Returns:
20
+ Configured logger instance
21
+ """
22
+ global _logger
23
+
24
+ if _logger is None:
25
+ _logger = logging.getLogger("cpg2py")
26
+ _logger.setLevel(logging.WARNING)
27
+
28
+ if not _logger.handlers:
29
+ handler = logging.StreamHandler(sys.stderr)
30
+ handler.setLevel(logging.WARNING)
31
+
32
+ formatter = logging.Formatter(
33
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
34
+ )
35
+ handler.setFormatter(formatter)
36
+ _logger.addHandler(handler)
37
+
38
+ if name:
39
+ return _logger.getChild(name)
40
+ return _logger
41
+
42
+
43
+ def set_log_level(level: int) -> None:
44
+ """
45
+ Sets the logging level for the cpg2py logger.
46
+
47
+ Args:
48
+ level: Logging level (e.g., logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR)
49
+ """
50
+ logger = get_logger()
51
+ logger.setLevel(level)
52
+ for handler in logger.handlers:
53
+ handler.setLevel(level)