cpg2py 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cpg2py/__init__.py +69 -29
- cpg2py/{abc → _abc}/__init__.py +2 -2
- cpg2py/_abc/edge.py +96 -0
- cpg2py/_abc/graph.py +247 -0
- cpg2py/_abc/node.py +62 -0
- cpg2py/_abc/storage.py +190 -0
- cpg2py/_cpg/__init__.py +5 -0
- cpg2py/_cpg/edge.py +32 -0
- cpg2py/_cpg/graph.py +183 -0
- cpg2py/_cpg/node.py +67 -0
- cpg2py/_exceptions.py +41 -0
- cpg2py/_logger.py +53 -0
- cpg2py-1.1.0.dist-info/METADATA +407 -0
- cpg2py-1.1.0.dist-info/RECORD +17 -0
- {cpg2py-1.0.4.dist-info → cpg2py-1.1.0.dist-info}/WHEEL +1 -1
- cpg2py/abc/edge.py +0 -39
- cpg2py/abc/graph.py +0 -98
- cpg2py/abc/node.py +0 -26
- cpg2py/abc/storage.py +0 -153
- cpg2py/cpg/__init__.py +0 -3
- cpg2py/cpg/edge.py +0 -31
- cpg2py/cpg/graph.py +0 -97
- cpg2py/cpg/node.py +0 -65
- cpg2py-1.0.4.dist-info/METADATA +0 -261
- cpg2py-1.0.4.dist-info/RECORD +0 -15
- {cpg2py-1.0.4.dist-info → cpg2py-1.1.0.dist-info}/LICENSE +0 -0
- {cpg2py-1.0.4.dist-info → cpg2py-1.1.0.dist-info}/top_level.txt +0 -0
cpg2py/abc/storage.py
DELETED
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
from typing import Optional, Iterable, Dict, Tuple
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class Storage:
|
|
5
|
-
""" A directed multi-graph implementation supporting multiple edges between nodes. """
|
|
6
|
-
|
|
7
|
-
__NodeID = str
|
|
8
|
-
__EdgeID = Tuple[str, str, str]
|
|
9
|
-
__Property = Dict[str, any]
|
|
10
|
-
|
|
11
|
-
def __init__(self):
|
|
12
|
-
""" Initializes an empty directed graph. """
|
|
13
|
-
self.__nodes = dict()
|
|
14
|
-
self.__edges = dict()
|
|
15
|
-
self.__struct = dict()
|
|
16
|
-
|
|
17
|
-
################################ GRAPH STRUCTURE APIs ################################
|
|
18
|
-
|
|
19
|
-
def add_node(self, nid: __NodeID) -> bool:
|
|
20
|
-
""" Adds a node to the graph. """
|
|
21
|
-
nid = str(nid)
|
|
22
|
-
if nid in self.__nodes: return False
|
|
23
|
-
self.__nodes[nid] = {}
|
|
24
|
-
self.__struct[nid] = []
|
|
25
|
-
return True
|
|
26
|
-
|
|
27
|
-
def contains_node(self, nid: __NodeID) -> bool:
|
|
28
|
-
""" Checks if a node exists in the graph. """
|
|
29
|
-
nid = str(nid)
|
|
30
|
-
return nid in self.__nodes
|
|
31
|
-
|
|
32
|
-
def add_edge(self, eid: __EdgeID) -> bool:
|
|
33
|
-
""" Adds a directed edge from `start` to `end` with an optional edge type. """
|
|
34
|
-
eid = (str(eid[0]), str(eid[1]), str(eid[2]))
|
|
35
|
-
if eid in self.__edges: return False
|
|
36
|
-
if eid[0] not in self.__nodes: return False
|
|
37
|
-
if eid[1] not in self.__nodes: return False
|
|
38
|
-
self.__edges[eid] = {}
|
|
39
|
-
self.__struct[eid[0]].append(eid)
|
|
40
|
-
self.__struct[eid[1]].append(eid)
|
|
41
|
-
return True
|
|
42
|
-
|
|
43
|
-
def contains_edge(self, eid: __EdgeID) -> bool:
|
|
44
|
-
""" Checks if an edge exists in the graph. """
|
|
45
|
-
eid = (str(eid[0]), str(eid[1]), str(eid[2]))
|
|
46
|
-
return eid in self.__edges
|
|
47
|
-
|
|
48
|
-
def out_edges(self, nid: __NodeID) -> Iterable[__EdgeID]:
|
|
49
|
-
""" Returns a list of outgoing edges from a given node. """
|
|
50
|
-
nid = str(nid)
|
|
51
|
-
return (eid for eid in self.__struct.get(nid) if eid[0] == nid)
|
|
52
|
-
|
|
53
|
-
def in_edges(self, nid: __NodeID) -> Iterable[__EdgeID]:
|
|
54
|
-
""" Returns a list of incoming edges to a given node. """
|
|
55
|
-
nid = str(nid)
|
|
56
|
-
return (eid for eid in self.__struct.get(nid) if eid[1] == nid)
|
|
57
|
-
|
|
58
|
-
def successors(self, nid: __NodeID) -> Iterable[__NodeID]:
|
|
59
|
-
""" Returns all successor nodes of a given node. """
|
|
60
|
-
nid = str(nid)
|
|
61
|
-
return (eid[1] for eid in self.__struct.get(nid) if eid[0] == nid)
|
|
62
|
-
|
|
63
|
-
def predecessors(self, nid: __NodeID) -> Iterable[__NodeID]:
|
|
64
|
-
""" Returns all predecessor nodes of a given node. """
|
|
65
|
-
nid = str(nid)
|
|
66
|
-
return (eid[0] for eid in self.__struct.get(nid) if eid[1] == nid)
|
|
67
|
-
|
|
68
|
-
################################ GRAPH PROPERTIES APIs ################################
|
|
69
|
-
|
|
70
|
-
def set_node_props(self, node: __NodeID, props: __Property) -> bool:
|
|
71
|
-
""" Sets the properties of a node. """
|
|
72
|
-
node = str(node)
|
|
73
|
-
if node not in self.__nodes: return False
|
|
74
|
-
prev_data: dict = self.__nodes[node]
|
|
75
|
-
prev_data.update({str(k) : v for k, v in props.items()})
|
|
76
|
-
return True
|
|
77
|
-
|
|
78
|
-
def get_node_props(self, node: __NodeID) -> Optional[__Property]:
|
|
79
|
-
""" Returns the properties of a node. """
|
|
80
|
-
node = str(node)
|
|
81
|
-
print(self.__nodes)
|
|
82
|
-
return self.__nodes.get(node, None)
|
|
83
|
-
|
|
84
|
-
def set_node_prop(self, node: __NodeID, key: str, value: any) -> bool:
|
|
85
|
-
""" Sets the properties of a node. """
|
|
86
|
-
node, key = str(node), str(key)
|
|
87
|
-
if node not in self.__nodes: return False
|
|
88
|
-
self.__nodes[node][key] = value
|
|
89
|
-
return True
|
|
90
|
-
|
|
91
|
-
def get_node_prop(self, node: __NodeID, key: str) -> Optional[any]:
|
|
92
|
-
""" Returns the properties of a node. """
|
|
93
|
-
node, key = str(node), str(key)
|
|
94
|
-
return self.__nodes.get(node, {}).get(key, None)
|
|
95
|
-
|
|
96
|
-
def set_edge_props(self, eid: __EdgeID, props: __Property) -> bool:
|
|
97
|
-
""" Sets the properties of an edge. """
|
|
98
|
-
eid = (str(eid[0]), str(eid[1]), str(eid[2]))
|
|
99
|
-
if eid not in self.__edges: return False
|
|
100
|
-
prev_data: dict = self.__edges[eid]
|
|
101
|
-
prev_data.update({str(k) : v for k, v in props.items()})
|
|
102
|
-
return True
|
|
103
|
-
|
|
104
|
-
def get_edge_props(self, eid: __EdgeID) -> Optional[__Property]:
|
|
105
|
-
""" Returns the properties of an edge. """
|
|
106
|
-
eid = (str(eid[0]), str(eid[1]), str(eid[2]))
|
|
107
|
-
return self.__edges.get(eid)
|
|
108
|
-
|
|
109
|
-
def set_edge_prop(self, eid: __EdgeID, key: str, value: any) -> bool:
|
|
110
|
-
""" Sets the properties of an edge. """
|
|
111
|
-
eid = (str(eid[0]), str(eid[1]), str(eid[2]))
|
|
112
|
-
key = str(key)
|
|
113
|
-
if eid not in self.__edges: return False
|
|
114
|
-
self.__edges[eid][key] = value
|
|
115
|
-
return
|
|
116
|
-
|
|
117
|
-
def get_edge_prop(self, eid: __EdgeID, key: str) -> Optional[__Property]:
|
|
118
|
-
""" Returns the properties of an edge. """
|
|
119
|
-
eid = (str(eid[0]), str(eid[1]), str(eid[2]))
|
|
120
|
-
key = str(key)
|
|
121
|
-
return self.__edges.get(eid, {}).get(key, None)
|
|
122
|
-
|
|
123
|
-
def __repr__(self):
|
|
124
|
-
""" Returns a string representation of the graph. """
|
|
125
|
-
return f"MultiDiGraph(nodes={len(self.nodes)}, edges={len(self.get_edges())})"
|
|
126
|
-
|
|
127
|
-
################################ GRAPH COMMON APIs ################################
|
|
128
|
-
|
|
129
|
-
def get_nodes(self) -> Iterable[__NodeID]:
|
|
130
|
-
""" Returns a list of all nodes in the graph. """
|
|
131
|
-
return self.__nodes.keys()
|
|
132
|
-
|
|
133
|
-
def get_edges(self) -> Iterable[__EdgeID]:
|
|
134
|
-
""" Returns a list of all edges in the graph. """
|
|
135
|
-
return self.__edges.keys()
|
|
136
|
-
|
|
137
|
-
def remove_node(self, nid: __NodeID) -> bool:
|
|
138
|
-
""" Removes a node from the graph. """
|
|
139
|
-
nid = str(nid)
|
|
140
|
-
if nid not in self.__nodes: return False
|
|
141
|
-
self.__nodes.pop(nid)
|
|
142
|
-
for eid in self.__struct[nid]:
|
|
143
|
-
self.__edges.pop(eid)
|
|
144
|
-
return True
|
|
145
|
-
|
|
146
|
-
def remove_edge(self, eid: __EdgeID) -> bool:
|
|
147
|
-
""" Removes an edge from the graph. """
|
|
148
|
-
eid = (str(eid[0]), str(eid[1]), str(eid[2]))
|
|
149
|
-
if eid not in self.__edges: return False
|
|
150
|
-
self.__struct[eid[0]].remove(eid)
|
|
151
|
-
self.__struct[eid[1]].remove(eid)
|
|
152
|
-
self.__edges.pop(eid)
|
|
153
|
-
return True
|
cpg2py/cpg/__init__.py
DELETED
cpg2py/cpg/edge.py
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
from typing import Optional, Tuple
|
|
3
|
-
from ..abc import AbcGraphQuerier, AbcEdgeQuerier
|
|
4
|
-
|
|
5
|
-
class _Edge(AbcEdgeQuerier):
|
|
6
|
-
|
|
7
|
-
def __init__(self, graph: AbcGraphQuerier, f_nid: str, t_nid: str, e_type: str) -> None:
|
|
8
|
-
super().__init__(graph, f_nid, t_nid, e_type)
|
|
9
|
-
return None
|
|
10
|
-
|
|
11
|
-
@property
|
|
12
|
-
def id(self) -> Tuple[str, str, str]:
|
|
13
|
-
return self.edge_id
|
|
14
|
-
|
|
15
|
-
@property
|
|
16
|
-
def start(self) -> Optional[int]:
|
|
17
|
-
start_str = str(self.get_property('start', 'start:START_ID'))
|
|
18
|
-
return int(start_str) if start_str.isnumeric() else int(self.__from_id)
|
|
19
|
-
|
|
20
|
-
@property
|
|
21
|
-
def end(self) -> Optional[int]:
|
|
22
|
-
end_str = str(self.get_property('end', 'end:END_ID'))
|
|
23
|
-
return int(end_str) if end_str.isnumeric() else int(self.__to_id)
|
|
24
|
-
|
|
25
|
-
@property
|
|
26
|
-
def type(self) -> Optional[str]:
|
|
27
|
-
return self.get_property('type', 'type:TYPE')
|
|
28
|
-
|
|
29
|
-
@property
|
|
30
|
-
def var(self) -> Optional[str]:
|
|
31
|
-
return self.get_property('var')
|
cpg2py/cpg/graph.py
DELETED
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
from .edge import _Edge
|
|
5
|
-
from .node import _Node
|
|
6
|
-
from ..abc import AbcGraphQuerier, Storage
|
|
7
|
-
|
|
8
|
-
from typing import Callable, Iterable, Optional
|
|
9
|
-
import json, functools
|
|
10
|
-
|
|
11
|
-
class _Graph(AbcGraphQuerier):
|
|
12
|
-
'''
|
|
13
|
-
OPG is Object Property Diagram used by ODgen and FAST
|
|
14
|
-
'''
|
|
15
|
-
__EdgeCondition = Callable[[_Edge], bool]
|
|
16
|
-
__always_true = lambda _: True
|
|
17
|
-
|
|
18
|
-
def __init__(self, target: Storage) -> None:
|
|
19
|
-
super().__init__(target)
|
|
20
|
-
return None
|
|
21
|
-
|
|
22
|
-
def node(self, whose_id_is: str) -> Optional[_Node]:
|
|
23
|
-
try:
|
|
24
|
-
return _Node(self.storage, whose_id_is)
|
|
25
|
-
except Exception as e: print(
|
|
26
|
-
f'✘ {_Graph} ERROR:'
|
|
27
|
-
f'Cannot find node with id {whose_id_is}.'
|
|
28
|
-
f'(exception is {e})'
|
|
29
|
-
)
|
|
30
|
-
return None
|
|
31
|
-
|
|
32
|
-
def edge(self, fid: str, tid: str, eid:str) -> Optional[_Edge]:
|
|
33
|
-
try:
|
|
34
|
-
return _Edge(self.storage, fid, tid, eid)
|
|
35
|
-
except Exception as e: print(
|
|
36
|
-
f'✘ {_Graph} ERROR:'
|
|
37
|
-
f'Cannot find edge from {fid} to {tid}, and eid is {str(eid)}.'
|
|
38
|
-
f'(exception is {e})'
|
|
39
|
-
)
|
|
40
|
-
return None
|
|
41
|
-
|
|
42
|
-
@functools.lru_cache()
|
|
43
|
-
def topfile_node(self, of_nid: str) -> _Node:
|
|
44
|
-
'''
|
|
45
|
-
find the top file node from the input node.
|
|
46
|
-
'''
|
|
47
|
-
of_node = self.node(of_nid)
|
|
48
|
-
if of_node.type == "File": return of_node
|
|
49
|
-
if 'TOPLEVEL_FILE' in of_node.flags: return of_node
|
|
50
|
-
parents = self.prev(of_node, lambda e: e.type in ["PARENT_OF", "ENTRY", "EXIT"])
|
|
51
|
-
for pre in parents:
|
|
52
|
-
top_file = self.topfile_node(pre.id)
|
|
53
|
-
if top_file is not None: return top_file
|
|
54
|
-
raise Exception(f'❌ INNER ERROR(500): CANNOT FIND THE TOPFILE.')
|
|
55
|
-
|
|
56
|
-
def succ(self, of: _Node, who_satisifies: __EdgeCondition = __always_true) -> Iterable[_Node]:
|
|
57
|
-
'''
|
|
58
|
-
return the next nodes connected with the input one.
|
|
59
|
-
'''
|
|
60
|
-
return super().succ(of.id, who_satisifies)
|
|
61
|
-
|
|
62
|
-
def prev(self, of, who_satisifies = __always_true) -> Iterable[_Node]:
|
|
63
|
-
'''
|
|
64
|
-
return the previous nodes connected with the input one.
|
|
65
|
-
'''
|
|
66
|
-
return super().prev(of, who_satisifies)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def children(self, of: _Node, extra: __EdgeCondition = __always_true) -> Iterable[_Node]:
|
|
70
|
-
'''
|
|
71
|
-
return the next nodes connected with the input one.
|
|
72
|
-
The edge type between them is PARENT_OF
|
|
73
|
-
'''
|
|
74
|
-
return self.succ(of, lambda e: extra(e) and (e.type == "PARENT_OF"))
|
|
75
|
-
|
|
76
|
-
def parent(self, of: _Node, extra:__EdgeCondition = __always_true) -> Iterable[_Node]:
|
|
77
|
-
'''
|
|
78
|
-
return the prev nodes connected with the input one.
|
|
79
|
-
The edge type between them is PARENT_OF
|
|
80
|
-
'''
|
|
81
|
-
return self.prev(of, lambda e: extra(e) and (e.type == "PARENT_OF"))
|
|
82
|
-
|
|
83
|
-
def flow_to(self, of: _Node, extra: __EdgeCondition = __always_true) -> Iterable[_Node]:
|
|
84
|
-
'''
|
|
85
|
-
return the next nodes connected with the input one.
|
|
86
|
-
The edge type between them is FLOW_TO
|
|
87
|
-
'''
|
|
88
|
-
return self.succ(of, lambda e: extra(e) and (e.type == "FLOWS_TO"))
|
|
89
|
-
|
|
90
|
-
def flow_from(self, of: _Node, extra: __EdgeCondition = __always_true) -> Iterable[_Node]:
|
|
91
|
-
'''
|
|
92
|
-
return the previous nodes connected with the input one.
|
|
93
|
-
The edge type between them is FLOW_TO
|
|
94
|
-
'''
|
|
95
|
-
return self.prev(of, lambda e: extra(e) and (e.type == "FLOWS_TO"))
|
|
96
|
-
|
|
97
|
-
pass
|
cpg2py/cpg/node.py
DELETED
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
from typing import List, Optional
|
|
3
|
-
|
|
4
|
-
from ..abc import AbcNodeQuerier, AbcGraphQuerier
|
|
5
|
-
|
|
6
|
-
class _Node(AbcNodeQuerier):
|
|
7
|
-
|
|
8
|
-
def __init__(self, graph: AbcGraphQuerier, nid: str) -> None:
|
|
9
|
-
super().__init__(graph, nid)
|
|
10
|
-
return None
|
|
11
|
-
|
|
12
|
-
@property
|
|
13
|
-
def id(self) -> str: return self.node_id
|
|
14
|
-
|
|
15
|
-
@property
|
|
16
|
-
def code(self) -> Optional[str]:
|
|
17
|
-
return self.get_property('code')
|
|
18
|
-
|
|
19
|
-
@property
|
|
20
|
-
def label(self) -> Optional[str]:
|
|
21
|
-
return self.get_property('labels:label', 'labels')
|
|
22
|
-
|
|
23
|
-
@property
|
|
24
|
-
def flags(self) -> List[str]:
|
|
25
|
-
flags_str = self.get_property('flags:string_array', 'flags:string[]', 'flags')
|
|
26
|
-
return str(flags_str).split(' ') if flags_str is not None else []
|
|
27
|
-
|
|
28
|
-
@property
|
|
29
|
-
def line_num(self) -> Optional[int]:
|
|
30
|
-
linenum_str = str(self.get_property('lineno:int', 'lineno'))
|
|
31
|
-
return int(linenum_str) if linenum_str.isnumeric() else None
|
|
32
|
-
|
|
33
|
-
@property
|
|
34
|
-
def children_num(self) -> Optional[int]:
|
|
35
|
-
num_str = str(self.get_property('childnum:int', 'childnum'))
|
|
36
|
-
return int(num_str) if num_str.isnumeric() else None
|
|
37
|
-
|
|
38
|
-
@property
|
|
39
|
-
def func_id(self) -> Optional[int]:
|
|
40
|
-
fid_str = str(self.get_property('funcid:int', 'funcid'))
|
|
41
|
-
return int(fid_str) if fid_str.isnumeric() else None
|
|
42
|
-
|
|
43
|
-
@property
|
|
44
|
-
def class_name(self) -> Optional[str]:
|
|
45
|
-
return self.get_property('classname')
|
|
46
|
-
|
|
47
|
-
@property
|
|
48
|
-
def namespace(self) -> Optional[str]:
|
|
49
|
-
return self.get_property('namespace')
|
|
50
|
-
|
|
51
|
-
@property
|
|
52
|
-
def name(self) -> Optional[str]: return self.get_property('name')
|
|
53
|
-
|
|
54
|
-
@property
|
|
55
|
-
def end_num(self) -> Optional[int]:
|
|
56
|
-
end_str = str(self.get_property('endlineno:int', 'endlineno'))
|
|
57
|
-
return int(end_str) if end_str.isnumeric() else None
|
|
58
|
-
|
|
59
|
-
@property
|
|
60
|
-
def comment(self) -> Optional[str]:
|
|
61
|
-
return self.get_property('doccomment')
|
|
62
|
-
|
|
63
|
-
@property
|
|
64
|
-
def type(self) -> Optional[str]:
|
|
65
|
-
return self.get_property('type')
|
cpg2py-1.0.4.dist-info/METADATA
DELETED
|
@@ -1,261 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.2
|
|
2
|
-
Name: cpg2py
|
|
3
|
-
Version: 1.0.4
|
|
4
|
-
Summary: A graph-based data structure designed for querying CSV files in Joern format in Python
|
|
5
|
-
Home-page: https://github.com/YichaoXu/cpg2py
|
|
6
|
-
Author: Yichao Xu
|
|
7
|
-
Author-email: Yichao Xu <yxu166@jhu.edu>
|
|
8
|
-
License: MIT License
|
|
9
|
-
|
|
10
|
-
Copyright (c) 2025 Yichao Xu
|
|
11
|
-
|
|
12
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
13
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
14
|
-
in the Software without restriction, including without limitation the rights
|
|
15
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
16
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
17
|
-
furnished to do so, subject to the following conditions:
|
|
18
|
-
|
|
19
|
-
The above copyright notice and this permission notice shall be included in all
|
|
20
|
-
copies or substantial portions of the Software.
|
|
21
|
-
|
|
22
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
23
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
24
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
25
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
26
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
27
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
28
|
-
SOFTWARE.
|
|
29
|
-
|
|
30
|
-
Project-URL: Homepage, https://github.com/YichaoXu/cpg2py
|
|
31
|
-
Project-URL: Repository, https://github.com/YichaoXu/cpg2py
|
|
32
|
-
Keywords: Joern,CPG,Graph,CSV
|
|
33
|
-
Requires-Python: >=3.6
|
|
34
|
-
Description-Content-Type: text/markdown
|
|
35
|
-
License-File: LICENSE
|
|
36
|
-
Dynamic: author
|
|
37
|
-
Dynamic: home-page
|
|
38
|
-
Dynamic: requires-python
|
|
39
|
-
|
|
40
|
-
# **cpg2py: Graph-Based Query Engine for Joern CSV Files**
|
|
41
|
-
|
|
42
|
-
`cpg2py` is a Python library that provides a lightweight **graph-based query engine** for analyzing **Code Property Graphs (CPG)** extracted from Joern CSV files. The library offers an **abstract base class (ABC) architecture**, allowing users to extend and implement their own custom graph queries.
|
|
43
|
-
|
|
44
|
-
---
|
|
45
|
-
|
|
46
|
-
## **🚀 Features**
|
|
47
|
-
|
|
48
|
-
- **MultiDiGraph Representation**: A directed multi-graph with support for multiple edges between nodes.
|
|
49
|
-
- **CSV-Based Graph Construction**: Reads `nodes.csv` and `rels.csv` to construct a graph structure.
|
|
50
|
-
- **Extensible Abstract Base Classes (ABC)**:
|
|
51
|
-
- `AbcGraphQuerier` for implementing **custom graph queries**.
|
|
52
|
-
- `AbcNodeQuerier` for interacting with **nodes**.
|
|
53
|
-
- `AbcEdgeQuerier` for interacting with **edges**.
|
|
54
|
-
- **Built-in Query Mechanisms**:
|
|
55
|
-
- **Retrieve all edges**.
|
|
56
|
-
- **Get incoming (**``**) and outgoing (**``**) edges of a node**.
|
|
57
|
-
- **Find successors (**``**) and predecessors (**``**)**.
|
|
58
|
-
- **Traverse AST, Control Flow, and Data Flow Graphs**.
|
|
59
|
-
|
|
60
|
-
---
|
|
61
|
-
|
|
62
|
-
## **📚 Installation**
|
|
63
|
-
|
|
64
|
-
To install the package, use:
|
|
65
|
-
|
|
66
|
-
```bash
|
|
67
|
-
pip install git+https://github.com/YichaoXu/cpg2py.git
|
|
68
|
-
```
|
|
69
|
-
|
|
70
|
-
Or clone the pip repository:
|
|
71
|
-
|
|
72
|
-
```bash
|
|
73
|
-
pip install cpg2py
|
|
74
|
-
```
|
|
75
|
-
|
|
76
|
-
---
|
|
77
|
-
|
|
78
|
-
## **📂 File Structure**
|
|
79
|
-
|
|
80
|
-
- **`nodes.csv`** (Example):
|
|
81
|
-
```csv
|
|
82
|
-
id:int labels:label type flags:string_array lineno:int code childnum:int funcid:int classname namespace endlineno:int name doccomment
|
|
83
|
-
0 Filesystem Directory "input"
|
|
84
|
-
1 Filesystem File "example.php"
|
|
85
|
-
2 AST AST_TOPLEVEL TOPLEVEL_FILE 1 "" 25 "/input/example.php"
|
|
86
|
-
|
|
87
|
-
````
|
|
88
|
-
- **`rels.csv`** (Example):
|
|
89
|
-
```csv
|
|
90
|
-
start end type
|
|
91
|
-
2 3 ENTRY
|
|
92
|
-
2 4 EXIT
|
|
93
|
-
6 7 ENTRY
|
|
94
|
-
6 9 PARENT_OF
|
|
95
|
-
````
|
|
96
|
-
|
|
97
|
-
---
|
|
98
|
-
|
|
99
|
-
## **📚 Usage**
|
|
100
|
-
|
|
101
|
-
### **1️⃣ Load Graph from Joern CSVs**
|
|
102
|
-
|
|
103
|
-
```python
|
|
104
|
-
from cpg2py import cpg_graph
|
|
105
|
-
|
|
106
|
-
# Load graph from CSV files
|
|
107
|
-
graph = cpg_graph("nodes.csv", "rels.csv")
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
---
|
|
111
|
-
|
|
112
|
-
### **2️⃣ Query Nodes & Edges**
|
|
113
|
-
|
|
114
|
-
```python
|
|
115
|
-
# Get a specific node
|
|
116
|
-
node = graph.node("2")
|
|
117
|
-
print(node.name, node.type) # Example output: "/tmp/example.php" AST_TOPLEVEL
|
|
118
|
-
|
|
119
|
-
# Get a specific edge
|
|
120
|
-
edge = graph.edge("2", "3", "ENTRY")
|
|
121
|
-
print(edge.type) # Output: ENTRY
|
|
122
|
-
```
|
|
123
|
-
|
|
124
|
-
---
|
|
125
|
-
|
|
126
|
-
### **3️⃣ Get Node Connections**
|
|
127
|
-
|
|
128
|
-
```python
|
|
129
|
-
# Get all outgoing edges from a node
|
|
130
|
-
outgoing_edges = graph.succ(node)
|
|
131
|
-
for out_node in outgoing_edges:
|
|
132
|
-
print(out_node.id, out_node.name)
|
|
133
|
-
|
|
134
|
-
# Get all incoming edges to a node
|
|
135
|
-
incoming_edges = graph.prev(node)
|
|
136
|
-
for in_node in incoming_edges:
|
|
137
|
-
print(in_node.id, in_node.name)
|
|
138
|
-
```
|
|
139
|
-
|
|
140
|
-
---
|
|
141
|
-
|
|
142
|
-
### **4️⃣ AST and Flow Queries**
|
|
143
|
-
|
|
144
|
-
```python
|
|
145
|
-
# Get top-level file node for a given node
|
|
146
|
-
top_file = graph.topfile_node("5")
|
|
147
|
-
print(top_file.name) # Output: "example.php"
|
|
148
|
-
|
|
149
|
-
# Get child nodes in the AST hierarchy
|
|
150
|
-
children = graph.children(node)
|
|
151
|
-
print([child.id for child in children])
|
|
152
|
-
|
|
153
|
-
# Get data flow successors
|
|
154
|
-
flow_successors = graph.flow_to(node)
|
|
155
|
-
print([succ.id for succ in flow_successors])
|
|
156
|
-
```
|
|
157
|
-
|
|
158
|
-
---
|
|
159
|
-
|
|
160
|
-
## **🛠 Abstract Base Classes (ABC)**
|
|
161
|
-
|
|
162
|
-
The following abstract base classes (`ABC`) provide interfaces for extending **node**, **edge**, and **graph** querying behavior.
|
|
163
|
-
|
|
164
|
-
---
|
|
165
|
-
|
|
166
|
-
### **🔹 AbcNodeQuerier (Abstract Node Interface)**
|
|
167
|
-
|
|
168
|
-
This class defines how nodes interact with the graph storage.
|
|
169
|
-
|
|
170
|
-
```python
|
|
171
|
-
from cpg2py.abc import AbcNodeQuerier
|
|
172
|
-
|
|
173
|
-
class MyNodeQuerier(AbcNodeQuerier):
|
|
174
|
-
def __init__(self, graph, nid):
|
|
175
|
-
super().__init__(graph, nid)
|
|
176
|
-
|
|
177
|
-
@property
|
|
178
|
-
def name(self):
|
|
179
|
-
return self.get_property("name")
|
|
180
|
-
```
|
|
181
|
-
|
|
182
|
-
---
|
|
183
|
-
|
|
184
|
-
### **🔹 AbcEdgeQuerier (Abstract Edge Interface)**
|
|
185
|
-
|
|
186
|
-
Defines the querying mechanisms for edges in the graph.
|
|
187
|
-
|
|
188
|
-
```python
|
|
189
|
-
from cpg2py.abc import AbcEdgeQuerier
|
|
190
|
-
|
|
191
|
-
class MyEdgeQuerier(AbcEdgeQuerier):
|
|
192
|
-
def __init__(self, graph, f_nid, t_nid, e_type):
|
|
193
|
-
super().__init__(graph, f_nid, t_nid, e_type)
|
|
194
|
-
|
|
195
|
-
@property
|
|
196
|
-
def type(self):
|
|
197
|
-
return self.get_property("type")
|
|
198
|
-
```
|
|
199
|
-
|
|
200
|
-
---
|
|
201
|
-
|
|
202
|
-
### **🔹 AbcGraphQuerier (Abstract Graph Interface)**
|
|
203
|
-
|
|
204
|
-
This class provides an interface for implementing custom graph query mechanisms.
|
|
205
|
-
|
|
206
|
-
```python
|
|
207
|
-
from cpg2py.abc import AbcGraphQuerier
|
|
208
|
-
|
|
209
|
-
class MyGraphQuerier(AbcGraphQuerier):
|
|
210
|
-
def node(self, nid: str):
|
|
211
|
-
return MyNodeQuerier(self.storage, nid)
|
|
212
|
-
|
|
213
|
-
def edge(self, fid, tid, eid):
|
|
214
|
-
return MyEdgeQuerier(self.storage, fid, tid, eid)
|
|
215
|
-
```
|
|
216
|
-
|
|
217
|
-
---
|
|
218
|
-
|
|
219
|
-
## **🔍 Querying The Graph**
|
|
220
|
-
|
|
221
|
-
After implementing the abstract classes, you can perform advanced queries:
|
|
222
|
-
|
|
223
|
-
```python
|
|
224
|
-
graph = MyGraphQuerier(storage)
|
|
225
|
-
|
|
226
|
-
# Query node properties
|
|
227
|
-
node = graph.node("5")
|
|
228
|
-
print(node.name) # Example Output: "main"
|
|
229
|
-
|
|
230
|
-
# Query edge properties
|
|
231
|
-
edge = graph.edge("5", "6", "FLOWS_TO")
|
|
232
|
-
print(edge.type) # Output: "FLOWS_TO"
|
|
233
|
-
```
|
|
234
|
-
|
|
235
|
-
---
|
|
236
|
-
|
|
237
|
-
## **🐝 API Reference**
|
|
238
|
-
|
|
239
|
-
For a more detail APIs document please see our [APIs doc](docs/APIs.md)
|
|
240
|
-
|
|
241
|
-
- **Graph Functions**:
|
|
242
|
-
- `cpg_graph(node_csv, edge_csv)`: Loads graph from CSV files.
|
|
243
|
-
- `graph.node(nid)`: Retrieves a node by ID.
|
|
244
|
-
- `graph.edge(fid, tid, eid)`: Retrieves an edge.
|
|
245
|
-
- `graph.succ(node)`: Gets successor nodes.
|
|
246
|
-
- `graph.prev(node)`: Gets predecessor nodes.
|
|
247
|
-
- **Node Properties**:
|
|
248
|
-
- `.name`: Node name.
|
|
249
|
-
- `.type`: Node type.
|
|
250
|
-
- `.line_num`: Source code line number.
|
|
251
|
-
- **Edge Properties**:
|
|
252
|
-
- `.start`: Edge start node.
|
|
253
|
-
- `.end`: Edge end node.
|
|
254
|
-
- `.type`: Edge type.
|
|
255
|
-
|
|
256
|
-
---
|
|
257
|
-
|
|
258
|
-
## **🌟 License**
|
|
259
|
-
|
|
260
|
-
This project is licensed under the **MIT License**.
|
|
261
|
-
|
cpg2py-1.0.4.dist-info/RECORD
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
cpg2py/__init__.py,sha256=4GXaMrMxlqWKpjrwAZlSwBCG-9_osMeChuFS-WRqtKg,1918
|
|
2
|
-
cpg2py/abc/__init__.py,sha256=HgDXsJkGcPQvr6ac3hMrUU9TuQibMaP7-oXbXLs_iLI,207
|
|
3
|
-
cpg2py/abc/edge.py,sha256=zLlRebDKT2qK5XOJq23_UUCESgGsvgalbXdx_uUcKks,1240
|
|
4
|
-
cpg2py/abc/graph.py,sha256=DbeHN6qsNaqcqL9cUquJWC16BihDxy1MEhyuL-qVMBI,3902
|
|
5
|
-
cpg2py/abc/node.py,sha256=E2EQv_KisEPuslFaglZZvSBm-VY5LYCFBOFY1yiPzDY,844
|
|
6
|
-
cpg2py/abc/storage.py,sha256=BF82Vs_7FxGYSPiM_6JwQVzmaqMzGy2r-WSk8pQQclY,5976
|
|
7
|
-
cpg2py/cpg/__init__.py,sha256=fO59Yd7OISyxdjdZDJ5zFM41r4cYKawOsvpqV4gWrGM,48
|
|
8
|
-
cpg2py/cpg/edge.py,sha256=brG7cQl7tjihbUCBYyjNUnMLsKIif-XQc_TFdGz8ZYo,1007
|
|
9
|
-
cpg2py/cpg/graph.py,sha256=n4EjAhhjzl0XRJBwlAmK5AcSrECiqhjA6dXt2Ucf7IM,3448
|
|
10
|
-
cpg2py/cpg/node.py,sha256=-2xl8c-eSbe4Kv4xPAEf6POlCYOV4j0voxJAKDZj3Cc,2037
|
|
11
|
-
cpg2py-1.0.4.dist-info/LICENSE,sha256=vTjbt7iL1hUilI8E87FoQerEDa9nbpeip26iA6bguHI,1066
|
|
12
|
-
cpg2py-1.0.4.dist-info/METADATA,sha256=Xhx-JFz1RECxW-aLxPhfy4elJgFoaTjxxiXR28YD-c8,7077
|
|
13
|
-
cpg2py-1.0.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
14
|
-
cpg2py-1.0.4.dist-info/top_level.txt,sha256=xDY8faKh5Rczvsqb5Jt9Sq-Y7EOImh7jh-m1oVTnH5k,7
|
|
15
|
-
cpg2py-1.0.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|