cpg2py 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cpg2py/__init__.py CHANGED
@@ -1,14 +1,36 @@
1
1
  from csv import DictReader
2
2
  from pathlib import Path
3
+ from typing import Union
3
4
 
4
- from ._abc import *
5
- from ._cpg import CpgGraph, CpgNode, CpgEdge
5
+ from ._abc import Storage
6
+ from ._abc import AbcEdgeQuerier, AbcGraphQuerier, AbcNodeQuerier
7
+ from ._cpg import CpgEdge, CpgGraph, CpgNode
6
8
  from ._exceptions import CPGError, EdgeNotFoundError, NodeNotFoundError, TopFileNotFoundError
7
9
  from ._logger import get_logger
8
10
 
9
11
  logger = get_logger(__name__)
10
12
 
11
13
 
14
+ def storage_from_json(path: Union[Path, str]) -> Storage:
15
+ """
16
+ Creates a Storage instance populated from a JSON file.
17
+
18
+ Args:
19
+ path: Path to JSON file (Path or str).
20
+
21
+ Returns:
22
+ New Storage instance containing the graph.
23
+
24
+ Raises:
25
+ OSError: If the file cannot be read.
26
+ ValueError: If JSON structure is invalid (missing "nodes" or "edges").
27
+ KeyError: If an edge object is missing "from", "to", or "type".
28
+ """
29
+ storage = Storage()
30
+ storage.load_json(path)
31
+ return storage
32
+
33
+
12
34
  def cpg_graph(node_csv: Path, edge_csv: Path, verbose: bool = False) -> CpgGraph:
13
35
  """
14
36
  Creates a CPG graph from CSV files.
@@ -66,6 +88,7 @@ def cpg_graph(node_csv: Path, edge_csv: Path, verbose: bool = False) -> CpgGraph
66
88
 
67
89
  __all__ = [
68
90
  "cpg_graph",
91
+ "storage_from_json",
69
92
  "CpgGraph",
70
93
  "CpgNode",
71
94
  "CpgEdge",
cpg2py/_abc/edge.py CHANGED
@@ -9,88 +9,88 @@ from .storage import Storage
9
9
 
10
10
  class AbcEdgeQuerier(abc.ABC):
11
11
  """
12
- Abstract base class for edge query operations.
13
-
14
- Provides interface for querying edge properties and accessing edge data.
12
+ Abstract base class for edge property access, queries, and updates.
15
13
  """
16
14
 
17
- def __init__(self, graph: Storage, f_nid: str, t_nid: str, e_type: int = 0) -> None:
15
+ def __init__(self, graph: Storage, f_nid: str, t_nid: str, e_type: str) -> None:
18
16
  """
19
- Initializes an edge querier.
17
+ Initializes edge querier and validates edge existence.
20
18
 
21
19
  Args:
22
- graph: Storage instance containing the graph
23
- f_nid: Source node ID
24
- t_nid: Target node ID
25
- e_type: Edge type/ID
20
+ graph: Storage instance containing the graph.
21
+ f_nid: Source node ID.
22
+ t_nid: Target node ID.
23
+ e_type: Edge type string.
26
24
 
27
25
  Raises:
28
- EdgeNotFoundError: If edge does not exist in the graph
26
+ EdgeNotFoundError: If edge does not exist in the graph.
29
27
  """
30
28
  self.__graph: Storage = graph
31
29
  self.__edge_id: Tuple[str, str, str] = (str(f_nid), str(t_nid), str(e_type))
32
30
  if not graph.contains_edge(self.__edge_id):
33
- raise EdgeNotFoundError(f_nid, t_nid, str(e_type))
31
+ raise EdgeNotFoundError(f_nid, t_nid, e_type)
34
32
 
35
33
  @property
36
- def edge_id(self) -> Tuple[str, str, int]:
34
+ def edge_id(self) -> Tuple[str, str, str]:
37
35
  """
38
- Returns the edge ID tuple.
39
-
40
- Returns:
41
- Edge ID tuple (from_node, to_node, edge_type)
36
+ Returns the edge identifier tuple (from_nid, to_nid, edge_type).
42
37
  """
43
38
  return self.__edge_id
44
39
 
45
40
  @property
46
41
  def from_nid(self) -> str:
47
- """
48
- Returns the source node ID.
49
-
50
- Returns:
51
- Source node ID string
52
- """
42
+ """Returns the source node identifier."""
53
43
  return self.__edge_id[0]
54
44
 
55
45
  @property
56
46
  def to_nid(self) -> str:
57
- """
58
- Returns the target node ID.
59
-
60
- Returns:
61
- Target node ID string
62
- """
47
+ """Returns the target node identifier."""
63
48
  return self.__edge_id[1]
64
49
 
65
50
  @property
66
51
  def edge_type(self) -> str:
52
+ """Returns the edge type string."""
53
+ return self.__edge_id[2]
54
+
55
+ @property
56
+ def properties(self) -> Optional[Dict[str, Any]]:
57
+ """Returns all edge properties dictionary, or None if not found."""
58
+ return self.__graph.get_edge_props(self.__edge_id)
59
+
60
+ def get_property(self, *prop_names: str) -> Optional[Any]:
67
61
  """
68
- Returns the edge type.
62
+ Returns first found property value trying multiple name alternatives.
63
+
64
+ Args:
65
+ prop_names: Property name alternatives to try.
69
66
 
70
67
  Returns:
71
- Edge type string
68
+ First found value, or None if none found.
72
69
  """
73
- return self.__edge_id[2]
70
+ prop_values = (self.__graph.get_edge_prop(self.__edge_id, p_name) for p_name in prop_names)
71
+ return next((value for value in prop_values if value is not None), None)
74
72
 
75
- @property
76
- def properties(self) -> Optional[Dict[str, Any]]:
73
+ def set_property(self, key: str, value: Any) -> bool:
77
74
  """
78
- Returns all edge properties.
75
+ Sets single edge property value.
76
+
77
+ Args:
78
+ key: Property key.
79
+ value: Property value.
79
80
 
80
81
  Returns:
81
- Dictionary of edge properties, or None if edge not found
82
+ True if property was set, False if edge does not exist.
82
83
  """
83
- return self.__graph.get_edge_props(self.__edge_id)
84
+ return self.__graph.set_edge_prop(self.__edge_id, key, value)
84
85
 
85
- def get_property(self, *prop_names: str) -> Optional[Any]:
86
+ def set_properties(self, props: Dict[str, Any]) -> bool:
86
87
  """
87
- Gets an edge property by trying multiple possible property names.
88
+ Updates multiple edge properties at once.
88
89
 
89
90
  Args:
90
- prop_names: Variable number of property name alternatives to try
91
+ props: Dictionary of property key-value pairs.
91
92
 
92
93
  Returns:
93
- First found property value, or None if none found
94
+ True if properties were updated, False if edge does not exist.
94
95
  """
95
- prop_values = (self.__graph.get_edge_prop(self.__edge_id, p_name) for p_name in prop_names)
96
- return next((value for value in prop_values if value is not None), None)
96
+ return self.__graph.set_edge_props(self.__edge_id, props)
cpg2py/_abc/node.py CHANGED
@@ -7,56 +7,69 @@ from .storage import Storage
7
7
 
8
8
  class AbcNodeQuerier(abc.ABC):
9
9
  """
10
- Abstract base class for node query operations.
11
-
12
- Provides interface for querying node properties and accessing node data.
10
+ Abstract base class for node property access, queries, and updates.
13
11
  """
14
12
 
15
13
  def __init__(self, graph: Storage, nid: str) -> None:
16
14
  """
17
- Initializes a node querier.
15
+ Initializes node querier and validates node existence.
18
16
 
19
17
  Args:
20
- graph: Storage instance containing the graph
21
- nid: Node ID to query
18
+ graph: Storage instance containing the graph.
19
+ nid: Node identifier.
22
20
 
23
21
  Raises:
24
- NodeNotFoundError: If node does not exist in the graph
22
+ NodeNotFoundError: If node does not exist in the graph.
25
23
  """
26
24
  self.__nid: str = str(nid)
27
25
  self.__graph: Storage = graph
28
26
  if not graph.contains_node(self.__nid):
29
- raise NodeNotFoundError(nid)
27
+ raise NodeNotFoundError(str(nid))
30
28
 
31
29
  @property
32
30
  def node_id(self) -> str:
31
+ """Returns the node identifier."""
32
+ return self.__nid
33
+
34
+ @property
35
+ def properties(self) -> Optional[Dict[str, Any]]:
36
+ """Returns all node properties dictionary, or None if not found."""
37
+ return self.__graph.get_node_props(self.__nid)
38
+
39
+ def get_property(self, *prop_names: str) -> Optional[Any]:
33
40
  """
34
- Returns the node ID.
41
+ Returns first found property value trying multiple name alternatives.
42
+
43
+ Args:
44
+ prop_names: Property name alternatives to try.
35
45
 
36
46
  Returns:
37
- Node ID string
47
+ First found value, or None if none found.
38
48
  """
39
- return self.__nid
49
+ prop_values = (self.__graph.get_node_prop(self.__nid, p_name) for p_name in prop_names)
50
+ return next((value for value in prop_values if value is not None), None)
40
51
 
41
- @property
42
- def properties(self) -> Optional[Dict[str, Any]]:
52
+ def set_property(self, key: str, value: Any) -> bool:
43
53
  """
44
- Returns all node properties.
54
+ Sets single node property value.
55
+
56
+ Args:
57
+ key: Property key.
58
+ value: Property value.
45
59
 
46
60
  Returns:
47
- Dictionary of node properties, or None if node not found
61
+ True if property was set, False if node does not exist.
48
62
  """
49
- return self.__graph.get_node_props(self.__nid)
63
+ return self.__graph.set_node_prop(self.__nid, key, value)
50
64
 
51
- def get_property(self, *prop_names: str) -> Optional[Any]:
65
+ def set_properties(self, props: Dict[str, Any]) -> bool:
52
66
  """
53
- Gets a node property by trying multiple possible property names.
67
+ Updates multiple node properties at once.
54
68
 
55
69
  Args:
56
- prop_names: Variable number of property name alternatives to try
70
+ props: Dictionary of property key-value pairs.
57
71
 
58
72
  Returns:
59
- First found property value, or None if none found
73
+ True if properties were updated, False if node does not exist.
60
74
  """
61
- prop_values = (self.__graph.get_node_prop(self.__nid, p_name) for p_name in prop_names)
62
- return next((value for value in prop_values if value is not None), None)
75
+ return self.__graph.set_node_props(self.__nid, props)
cpg2py/_abc/storage.py CHANGED
@@ -1,4 +1,6 @@
1
- from typing import Any, Dict, Iterable, List, Optional, Tuple
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
2
4
 
3
5
  from .._logger import get_logger
4
6
 
@@ -188,3 +190,69 @@ class Storage:
188
190
  self.__struct[eid[1]].remove(eid)
189
191
  self.__edges.pop(eid)
190
192
  return True
193
+
194
+ def save_json(self, path: Union[Path, str]) -> None:
195
+ """
196
+ Serializes the graph (nodes and edges with properties) to a UTF-8 JSON file.
197
+
198
+ Args:
199
+ path: File path (Path or str).
200
+
201
+ Raises:
202
+ OSError: If the file cannot be written.
203
+ TypeError: If a property value is not JSON-serializable.
204
+ """
205
+ payload: Dict[str, Any] = {
206
+ "nodes": dict(self.__nodes),
207
+ "edges": [
208
+ {"from": eid[0], "to": eid[1], "type": eid[2], "props": props}
209
+ for eid, props in self.__edges.items()
210
+ ],
211
+ }
212
+ with open(path, "w", encoding="utf-8") as f:
213
+ json.dump(payload, f, ensure_ascii=False, indent=2)
214
+
215
+ def load_json(self, path: Union[Path, str]) -> None:
216
+ """
217
+ Replaces the current graph with the contents of the JSON file.
218
+
219
+ Clears existing nodes and edges, then loads nodes, edges, and their
220
+ properties. Expects top-level keys "nodes" and "edges".
221
+
222
+ Args:
223
+ path: File path (Path or str).
224
+
225
+ Raises:
226
+ OSError: If the file cannot be read.
227
+ ValueError: If JSON structure is invalid (missing "nodes" or "edges").
228
+ KeyError: If an edge object is missing "from", "to", or "type".
229
+ """
230
+ with open(path, "r", encoding="utf-8") as f:
231
+ data: Dict[str, Any] = json.load(f)
232
+ if "nodes" not in data or "edges" not in data:
233
+ raise ValueError("JSON must contain top-level 'nodes' and 'edges'")
234
+ self.__nodes = {}
235
+ self.__edges = {}
236
+ self.__struct = {}
237
+ nodes_data: Dict[str, Dict[str, Any]] = data["nodes"]
238
+ for nid, props in nodes_data.items():
239
+ nid_str = str(nid)
240
+ self.__nodes[nid_str] = dict(props) if props else {}
241
+ self.__struct[nid_str] = []
242
+ for edge_obj in data["edges"]:
243
+ from_nid = str(edge_obj["from"])
244
+ to_nid = str(edge_obj["to"])
245
+ etype = str(edge_obj["type"])
246
+ props = edge_obj.get("props")
247
+ if props is None:
248
+ props = {}
249
+ eid = (from_nid, to_nid, etype)
250
+ if from_nid not in self.__nodes:
251
+ self.__nodes[from_nid] = {}
252
+ self.__struct[from_nid] = []
253
+ if to_nid not in self.__nodes:
254
+ self.__nodes[to_nid] = {}
255
+ self.__struct[to_nid] = []
256
+ self.__edges[eid] = dict(props)
257
+ self.__struct[from_nid].append(eid)
258
+ self.__struct[to_nid].append(eid)
cpg2py/_cpg/edge.py CHANGED
@@ -2,12 +2,11 @@ from __future__ import annotations
2
2
 
3
3
  from typing import Optional, Tuple
4
4
 
5
- from .._abc import AbcEdgeQuerier, AbcGraphQuerier
5
+ from .._abc import AbcEdgeQuerier
6
6
 
7
7
 
8
8
  class CpgEdge(AbcEdgeQuerier):
9
- def __init__(self, graph: AbcGraphQuerier, f_nid: str, t_nid: str, e_type: str) -> None:
10
- super().__init__(graph, f_nid, t_nid, e_type)
9
+ """Concrete edge implementation with CPG-specific property accessors."""
11
10
 
12
11
  @property
13
12
  def id(self) -> Tuple[str, str, str]:
cpg2py/_cpg/node.py CHANGED
@@ -2,12 +2,11 @@ from __future__ import annotations
2
2
 
3
3
  from typing import List, Optional
4
4
 
5
- from .._abc import AbcGraphQuerier, AbcNodeQuerier
5
+ from .._abc import AbcNodeQuerier
6
6
 
7
7
 
8
8
  class CpgNode(AbcNodeQuerier):
9
- def __init__(self, graph: AbcGraphQuerier, nid: str) -> None:
10
- super().__init__(graph, nid)
9
+ """Concrete node implementation with CPG-specific property accessors."""
11
10
 
12
11
  @property
13
12
  def id(self) -> str:
cpg2py/_exceptions.py CHANGED
@@ -1,6 +1,6 @@
1
- """
2
- Custom exceptions for cpg2py package.
3
- """
1
+ """Custom exceptions for cpg2py package."""
2
+
3
+ from typing import Optional
4
4
 
5
5
 
6
6
  class CPGError(Exception):
@@ -8,9 +8,9 @@ class CPGError(Exception):
8
8
 
9
9
 
10
10
  class NodeNotFoundError(CPGError):
11
- """Raised when a node cannot be found in the graph."""
11
+ """Raised when node identifier does not exist in storage."""
12
12
 
13
- def __init__(self, node_id: str, message: str = None):
13
+ def __init__(self, node_id: str, message: Optional[str] = None) -> None:
14
14
  self.node_id = node_id
15
15
  if message is None:
16
16
  message = f"Node with id '{node_id}' not found in graph"
@@ -18,9 +18,11 @@ class NodeNotFoundError(CPGError):
18
18
 
19
19
 
20
20
  class EdgeNotFoundError(CPGError):
21
- """Raised when an edge cannot be found in the graph."""
21
+ """Raised when edge identifier tuple does not exist in storage."""
22
22
 
23
- def __init__(self, from_id: str, to_id: str, edge_type: str, message: str = None):
23
+ def __init__(
24
+ self, from_id: str, to_id: str, edge_type: str, message: Optional[str] = None
25
+ ) -> None:
24
26
  self.from_id = from_id
25
27
  self.to_id = to_id
26
28
  self.edge_type = edge_type
@@ -32,9 +34,9 @@ class EdgeNotFoundError(CPGError):
32
34
 
33
35
 
34
36
  class TopFileNotFoundError(CPGError):
35
- """Raised when top file node cannot be found."""
37
+ """Raised when top file node cannot be found during upward traversal."""
36
38
 
37
- def __init__(self, node_id: str, message: str = None):
39
+ def __init__(self, node_id: str, message: Optional[str] = None) -> None:
38
40
  self.node_id = node_id
39
41
  if message is None:
40
42
  message = f"Cannot find top file node from node '{node_id}'"
@@ -0,0 +1,180 @@
1
+ Metadata-Version: 2.4
2
+ Name: cpg2py
3
+ Version: 1.2.0
4
+ Summary: A graph-based data structure designed for querying CSV files in Joern format in Python
5
+ Author-email: samhsu-dev <yxu166@jhu.edu>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 samhsu-dev
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/samhsu-dev/cpg2py
29
+ Project-URL: Repository, https://github.com/samhsu-dev/cpg2py
30
+ Project-URL: Documentation, https://github.com/samhsu-dev/cpg2py
31
+ Keywords: Joern,CPG,Graph,CSV
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Programming Language :: Python :: 3
36
+ Classifier: Programming Language :: Python :: 3.8
37
+ Classifier: Programming Language :: Python :: 3.9
38
+ Classifier: Programming Language :: Python :: 3.10
39
+ Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Programming Language :: Python :: 3.12
41
+ Classifier: Programming Language :: Python :: 3.13
42
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
43
+ Requires-Python: >=3.8
44
+ Description-Content-Type: text/markdown
45
+ License-File: LICENSE
46
+ Provides-Extra: test
47
+ Requires-Dist: pytest>=7.4.0; extra == "test"
48
+ Requires-Dist: pytest-cov>=4.0.0; extra == "test"
49
+ Provides-Extra: dev
50
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
51
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
52
+ Requires-Dist: black>=23.0.0; extra == "dev"
53
+ Requires-Dist: isort>=5.12.0; extra == "dev"
54
+ Requires-Dist: pylint>=2.17.0; extra == "dev"
55
+ Dynamic: license-file
56
+
57
+ # cpg2py
58
+
59
+ Python graph query engine for Code Property Graphs from Joern CSV exports. Directed multi-graph with generic ABCs for custom node/edge/graph types.
60
+
61
+ **Features**: Load from `nodes.csv` + `rels.csv`; query/update nodes and edges (`get_property`, `set_property`, `set_properties`); traverse succ/prev/children/parent/flow_to/flow_from; JSON persistence (`save_json`, `load_json`, `storage_from_json`). Concrete types: `CpgGraph`, `CpgNode`, `CpgEdge`.
62
+
63
+ ---
64
+
65
+ ## Installation
66
+
67
+ ```bash
68
+ pip install cpg2py
69
+ ```
70
+
71
+ From source (e.g. with [uv](https://github.com/astral-sh/uv)):
72
+
73
+ ```bash
74
+ git clone https://github.com/samhsu-dev/cpg2py.git && cd cpg2py
75
+ uv sync --dev
76
+ uv run pytest tests/
77
+ ```
78
+
79
+ ---
80
+
81
+ ## Input format
82
+
83
+ - **nodes.csv**: tab-delimited; must include node id (e.g. `id:int` or `id`). Other columns become node properties.
84
+ - **rels.csv**: tab-delimited; columns `start`, `end`, `type` (or `start:str`, `end:str`, `type:str`).
85
+
86
+ ---
87
+
88
+ ## Usage
89
+
90
+ **Load from CSV**
91
+
92
+ ```python
93
+ from pathlib import Path
94
+ from cpg2py import cpg_graph, CpgGraph, CpgNode, CpgEdge
95
+
96
+ graph: CpgGraph = cpg_graph(Path("nodes.csv"), Path("rels.csv"))
97
+ ```
98
+
99
+ **Nodes and edges** (edge identified by `(from_id, to_id, edge_type)`; `edge_type` is string)
100
+
101
+ ```python
102
+ node: CpgNode = graph.node("2")
103
+ node.name
104
+ node.set_property("name", "x")
105
+ node.set_properties({"k": "v"})
106
+
107
+ edge: CpgEdge = graph.edge("2", "3", "ENTRY")
108
+ edge.from_nid, edge.to_nid, edge.type
109
+ edge.set_property("weight", 0.5)
110
+ ```
111
+
112
+ **Traversal**
113
+
114
+ ```python
115
+ graph.succ(node) # successors
116
+ graph.prev(node) # predecessors
117
+ graph.children(node)
118
+ graph.parent(node)
119
+ graph.flow_to(node)
120
+ graph.flow_from(node)
121
+ graph.topfile_node("5") # top-level file node for given node ID
122
+ ```
123
+
124
+ **Filtered iteration** (optional predicate)
125
+
126
+ ```python
127
+ graph.nodes(lambda n: n.type == "Function")
128
+ graph.edges(lambda e: e.edge_type == "FLOWS_TO")
129
+ graph.succ(node, who_satisifies=lambda e: e.edge_type == "PARENT_OF")
130
+ graph.descendants(node, condition=...)
131
+ graph.ancestors(node, condition=...)
132
+ ```
133
+
134
+ **JSON persistence**
135
+
136
+ ```python
137
+ graph.storage.save_json("graph.json")
138
+
139
+ storage = Storage()
140
+ storage.load_json("graph.json")
141
+ graph2 = CpgGraph(storage)
142
+
143
+ # or
144
+ storage = storage_from_json(Path("graph.json"))
145
+ ```
146
+
147
+ JSON schema: `{"nodes": { "<id>": { "<key>": <value>, ... }, ... }, "edges": [ {"from": str, "to": str, "type": str, "props": {...} }, ... ]}`. See [design.md](docs/design.md).
148
+
149
+ ---
150
+
151
+ ## Extending (ABCs)
152
+
153
+ Implement `AbcGraphQuerier[MyNode, MyEdge]`, `AbcNodeQuerier`, `AbcEdgeQuerier`; inject `Storage`. Full interface and contracts: [docs/design.md](docs/design.md).
154
+
155
+ Minimal custom graph:
156
+
157
+ ```python
158
+ from cpg2py import AbcGraphQuerier, AbcNodeQuerier, AbcEdgeQuerier, Storage
159
+ from typing import Optional
160
+
161
+ class MyNode(AbcNodeQuerier): pass
162
+ class MyEdge(AbcEdgeQuerier): pass
163
+
164
+ class MyGraph(AbcGraphQuerier[MyNode, MyEdge]):
165
+ def node(self, whose_id_is: str) -> Optional[MyNode]:
166
+ return MyNode(self.storage, whose_id_is)
167
+ def edge(self, fid: str, tid: str, eid: str) -> Optional[MyEdge]:
168
+ return MyEdge(self.storage, fid, tid, eid)
169
+
170
+ g = MyGraph(Storage())
171
+ ```
172
+
173
+ ---
174
+
175
+ Interface specifications (classes, methods, signatures, validation): [docs/design.md](docs/design.md).
176
+
177
+ ---
178
+
179
+ ## License
180
+ MIT.
@@ -0,0 +1,17 @@
1
+ cpg2py/__init__.py,sha256=j_4bRx0t-P10QnOXahQ89avNJne_1mVio0Gd62JIRLs,3548
2
+ cpg2py/_exceptions.py,sha256=OlnptpATmuzRQS0HoC24MoDPcPrY01uGMhVPJdK7Zuo,1376
3
+ cpg2py/_logger.py,sha256=HjJAhs4ZAB_IMoBMVRe3KPGS5GRygGy0eNncvIqu8ls,1309
4
+ cpg2py/_abc/__init__.py,sha256=XN7RKxMpnZqvjDAUWxXXLGKY1jrPpKBAxUbq-pydwh4,208
5
+ cpg2py/_abc/edge.py,sha256=Co3TXbfR2kd7G5oXFVEO0YjDND0zzFecf_R6yRHcyzw,2956
6
+ cpg2py/_abc/graph.py,sha256=LDF2ylbJYQubUV2LboJlkiQaqWtLW3LsyKBkTJOl4Gs,7795
7
+ cpg2py/_abc/node.py,sha256=ZnMf0TTi2Pv69AhzmomI8a8z8_05WgzVpK9p8VisTr8,2275
8
+ cpg2py/_abc/storage.py,sha256=E8lB6uo4zMCvgFmg6LJ5x00AoAFT-gha8nY3FxcJAQE,9252
9
+ cpg2py/_cpg/__init__.py,sha256=BYNrxYo0G3XoIaUN9llEYkZoCstYJgt88OrJZCkQW7E,126
10
+ cpg2py/_cpg/edge.py,sha256=bqzv9dmAVMFSyDmyCLhE0SKIWZZ9gu9UWLC3j6dfhAQ,886
11
+ cpg2py/_cpg/graph.py,sha256=Aks2VsDolKdT1NdEz9Vn5ABIWBTPDGvEq7PbrehLVuI,5674
12
+ cpg2py/_cpg/node.py,sha256=wSBCTCvA8UWQZG9CJn0QyBlQPt9PiXlPd0B2Ddlr7BI,1945
13
+ cpg2py-1.2.0.dist-info/licenses/LICENSE,sha256=ZAjdPt8K7uO5xDb4RyEuUTNZtFGx9_sfSQCy0OP64cU,1067
14
+ cpg2py-1.2.0.dist-info/METADATA,sha256=a-sNUByG1k8a0xX_HFQKRmINP8LDcJ9FMuz5RcqjzNU,5915
15
+ cpg2py-1.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
16
+ cpg2py-1.2.0.dist-info/top_level.txt,sha256=xDY8faKh5Rczvsqb5Jt9Sq-Y7EOImh7jh-m1oVTnH5k,7
17
+ cpg2py-1.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.3.3)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2025 Yichao Xu
3
+ Copyright (c) 2025 samhsu-dev
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,407 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: cpg2py
3
- Version: 1.1.0
4
- Summary: A graph-based data structure designed for querying CSV files in Joern format in Python
5
- Author-email: samhsu-dev <yxu166@jhu.edu>
6
- License: MIT License
7
-
8
- Copyright (c) 2025 Yichao Xu
9
-
10
- Permission is hereby granted, free of charge, to any person obtaining a copy
11
- of this software and associated documentation files (the "Software"), to deal
12
- in the Software without restriction, including without limitation the rights
13
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
- copies of the Software, and to permit persons to whom the Software is
15
- furnished to do so, subject to the following conditions:
16
-
17
- The above copyright notice and this permission notice shall be included in all
18
- copies or substantial portions of the Software.
19
-
20
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
- SOFTWARE.
27
-
28
- Project-URL: Homepage, https://github.com/YichaoXu/cpg2py
29
- Project-URL: Repository, https://github.com/YichaoXu/cpg2py
30
- Project-URL: Documentation, https://github.com/YichaoXu/cpg2py
31
- Keywords: Joern,CPG,Graph,CSV
32
- Classifier: Development Status :: 4 - Beta
33
- Classifier: Intended Audience :: Developers
34
- Classifier: License :: OSI Approved :: MIT License
35
- Classifier: Programming Language :: Python :: 3
36
- Classifier: Programming Language :: Python :: 3.8
37
- Classifier: Programming Language :: Python :: 3.9
38
- Classifier: Programming Language :: Python :: 3.10
39
- Classifier: Programming Language :: Python :: 3.11
40
- Classifier: Programming Language :: Python :: 3.12
41
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
42
- Requires-Python: >=3.8
43
- Description-Content-Type: text/markdown
44
- License-File: LICENSE
45
- Provides-Extra: dev
46
- Requires-Dist: pytest>=7.0.0; extra == "dev"
47
- Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
48
- Provides-Extra: test
49
-
50
- # **cpg2py: Graph-Based Query Engine for Joern CSV Files**
51
-
52
- `cpg2py` is a Python library that provides a lightweight **graph-based query engine** for analyzing **Code Property Graphs (CPG)** extracted from Joern CSV files. The library offers an **abstract base class (ABC) architecture**, allowing users to extend and implement their own custom graph queries.
53
-
54
- ---
55
-
56
- ## **🚀 Features**
57
-
58
- - **MultiDiGraph Representation**: A directed multi-graph with support for multiple edges between nodes.
59
- - **CSV-Based Graph Construction**: Reads `nodes.csv` and `rels.csv` to construct a graph structure.
60
- - **Type-Safe Generic Types**: Uses Python generics for type-safe graph operations (similar to Java generics).
61
- - **Extensible Abstract Base Classes (ABC)**:
62
- - `AbcGraphQuerier` for implementing **custom graph queries** with generic type support.
63
- - `AbcNodeQuerier` for interacting with **nodes**.
64
- - `AbcEdgeQuerier` for interacting with **edges**.
65
- - **Built-in Query Mechanisms**:
66
- - **Retrieve all nodes and edges** with type-safe iteration.
67
- - **Get incoming and outgoing edges** of a node.
68
- - **Find successors and predecessors** with type preservation.
69
- - **Traverse AST, Control Flow, and Data Flow Graphs**.
70
- - **Concrete Implementation**: `CpgGraph`, `CpgNode`, and `CpgEdge` provide ready-to-use implementations.
71
-
72
- ---
73
-
74
- ## **📚 Installation**
75
-
76
- ### Using pip
77
-
78
- To install the package, use:
79
-
80
- ```bash
81
- pip install git+https://github.com/samhsu-dev/cpg2py.git
82
- ```
83
-
84
- ### Using uv (Recommended)
85
-
86
- This project uses [uv](https://github.com/astral-sh/uv) for fast and reliable package management.
87
-
88
- **Install uv:**
89
- ```bash
90
- curl -LsSf https://astral.sh/uv/install.sh | sh
91
- ```
92
-
93
- **Clone and install:**
94
- ```bash
95
- git clone https://github.com/samhsu-dev/cpg2py.git
96
- cd cpg2py
97
- uv sync --dev # Install with dev dependencies
98
- ```
99
-
100
- **For development:**
101
- ```bash
102
- uv sync --dev
103
- uv run pytest tests/ # Run tests
104
- ```
105
-
106
- Or clone the pip repository:
107
-
108
- ```bash
109
- pip install cpg2py
110
- ```
111
-
112
- ---
113
-
114
- ## **📂 File Structure**
115
-
116
- - **`nodes.csv`** (Example):
117
- ```csv
118
- id:int labels:label type flags:string_array lineno:int code childnum:int funcid:int classname namespace endlineno:int name doccomment
119
- 0 Filesystem Directory "input"
120
- 1 Filesystem File "example.php"
121
- 2 AST AST_TOPLEVEL TOPLEVEL_FILE 1 "" 25 "/input/example.php"
122
-
123
- ````
124
- - **`rels.csv`** (Example):
125
- ```csv
126
- start end type
127
- 2 3 ENTRY
128
- 2 4 EXIT
129
- 6 7 ENTRY
130
- 6 9 PARENT_OF
131
- ````
132
-
133
- ---
134
-
135
- ## **🎯 Type Safety with Generics**
136
-
137
- `cpg2py` uses Python's generic types (similar to Java generics) to provide type-safe operations:
138
-
139
- ```python
140
- from typing import Iterable
141
- from cpg2py import cpg_graph, CpgGraph, CpgNode, CpgEdge
142
-
143
- # Type checker knows graph is CpgGraph[CpgNode, CpgEdge]
144
- graph: CpgGraph = cpg_graph("nodes.csv", "rels.csv")
145
-
146
- # Type checker knows node is CpgNode (not just AbcNodeQuerier)
147
- node: CpgNode = graph.node("5")
148
-
149
- # Type checker knows successors are Iterable[CpgNode]
150
- successors: Iterable[CpgNode] = graph.succ(node)
151
- for succ in successors:
152
- succ.code # Type-safe: IDE knows succ is CpgNode
153
- ```
154
-
155
- This ensures that:
156
- - Return types are preserved throughout graph operations
157
- - IDE autocomplete works correctly
158
- - Type checkers (mypy, pyright) can verify type correctness
159
-
160
- For more details, see [Generics Documentation](docs/GENERICS.md).
161
-
162
- ---
163
-
164
- ## **📚 Usage**
165
-
166
- ### **1️⃣ Load Graph from Joern CSVs**
167
-
168
- ```python
169
- from cpg2py import cpg_graph
170
-
171
- # Load graph from CSV files
172
- graph = cpg_graph("nodes.csv", "rels.csv")
173
- ```
174
-
175
- The `cpg_graph` function returns a `CpgGraph` instance, which is the concrete implementation of the graph querier.
176
-
177
- ---
178
-
179
- ### **2️⃣ Query Nodes & Edges**
180
-
181
- ```python
182
- from cpg2py import CpgGraph, CpgNode, CpgEdge
183
-
184
- # Get a specific node (returns CpgNode)
185
- node: CpgNode = graph.node("2")
186
- print(node.name, node.type) # Example output: "/tmp/example.php" AST_TOPLEVEL
187
-
188
- # Get a specific edge (returns CpgEdge)
189
- edge: CpgEdge = graph.edge("2", "3", "ENTRY")
190
- print(edge.type) # Output: ENTRY
191
- ```
192
-
193
- ---
194
-
195
- ### **3️⃣ Get Node Connections**
196
-
197
- ```python
198
- # Get all outgoing edges from a node
199
- outgoing_edges = graph.succ(node)
200
- for out_node in outgoing_edges:
201
- print(out_node.id, out_node.name) # out_node is CpgNode
202
-
203
- # Get all incoming edges to a node
204
- incoming_edges = graph.prev(node)
205
- for in_node in incoming_edges:
206
- print(in_node.id, in_node.name) # in_node is CpgNode
207
- ```
208
-
209
- ---
210
-
211
- ### **4️⃣ AST and Flow Queries**
212
-
213
- ```python
214
- # Get top-level file node for a given node
215
- top_file: CpgNode = graph.topfile_node("5")
216
- print(top_file.name) # Output: "example.php"
217
-
218
- # Get child nodes in the AST hierarchy
219
- children = graph.children(node)
220
- print([child.id for child in children]) # children are CpgNode instances
221
-
222
- # Get data flow successors
223
- flow_successors = graph.flow_to(node)
224
- print([succ.id for succ in flow_successors]) # successors are CpgNode instances
225
- ```
226
-
227
- ---
228
-
229
- ## **🛠 Abstract Base Classes (ABC)**
230
-
231
- The following abstract base classes (`ABC`) provide interfaces for extending **node**, **edge**, and **graph** querying behavior. All ABCs are imported directly from the main `cpg2py` package.
232
-
233
- ---
234
-
235
- ### **🔹 AbcNodeQuerier (Abstract Node Interface)**
236
-
237
- This class defines how nodes interact with the graph storage.
238
-
239
- ```python
240
- from cpg2py import AbcNodeQuerier, Storage
241
-
242
- class MyNodeQuerier(AbcNodeQuerier):
243
- def __init__(self, graph: Storage, nid: str):
244
- super().__init__(graph, nid)
245
-
246
- @property
247
- def name(self):
248
- return self.get_property("name")
249
- ```
250
-
251
- ---
252
-
253
- ### **🔹 AbcEdgeQuerier (Abstract Edge Interface)**
254
-
255
- Defines the querying mechanisms for edges in the graph.
256
-
257
- ```python
258
- from cpg2py import AbcEdgeQuerier, Storage
259
-
260
- class MyEdgeQuerier(AbcEdgeQuerier):
261
- def __init__(self, graph: Storage, f_nid: str, t_nid: str, e_type: str):
262
- super().__init__(graph, f_nid, t_nid, e_type)
263
-
264
- @property
265
- def type(self):
266
- return self.get_property("type")
267
- ```
268
-
269
- ---
270
-
271
- ### **🔹 AbcGraphQuerier (Abstract Graph Interface)**
272
-
273
- This class provides an interface for implementing custom graph query mechanisms. It's a generic class that supports type-safe operations.
274
-
275
- ```python
276
- from cpg2py import AbcGraphQuerier, Storage
277
- from typing import Optional
278
-
279
- class MyGraphQuerier(AbcGraphQuerier[MyNodeQuerier, MyEdgeQuerier]):
280
- def node(self, nid: str) -> Optional[MyNodeQuerier]:
281
- return MyNodeQuerier(self.storage, nid)
282
-
283
- def edge(self, fid: str, tid: str, eid: str) -> Optional[MyEdgeQuerier]:
284
- return MyEdgeQuerier(self.storage, fid, tid, eid)
285
- ```
286
-
287
- **Note**: `AbcGraphQuerier` is a generic class parameterized by node and edge types, ensuring type safety throughout graph operations. The concrete implementation `CpgGraph` is defined as `AbcGraphQuerier[CpgNode, CpgEdge]`.
288
-
289
- ---
290
-
291
- ## **🔍 Querying The Graph**
292
-
293
- After implementing the abstract classes, you can perform advanced queries:
294
-
295
- ```python
296
- from cpg2py import Storage
297
-
298
- storage = Storage()
299
- graph = MyGraphQuerier(storage)
300
-
301
- # Query node properties
302
- node = graph.node("5")
303
- print(node.name) # Example Output: "main"
304
-
305
- # Query edge properties
306
- edge = graph.edge("5", "6", "FLOWS_TO")
307
- print(edge.type) # Output: "FLOWS_TO"
308
- ```
309
-
310
- ### **Using the Built-in CpgGraph**
311
-
312
- You can also use the built-in `CpgGraph` implementation directly:
313
-
314
- ```python
315
- from typing import Iterable
316
- from cpg2py import cpg_graph, CpgGraph, CpgNode, CpgEdge
317
-
318
- # Load from CSV files
319
- graph: CpgGraph = cpg_graph("nodes.csv", "rels.csv")
320
-
321
- # Type-safe operations
322
- node: CpgNode = graph.node("5")
323
- edge: CpgEdge = graph.edge("5", "6", "FLOWS_TO")
324
-
325
- # Type-safe iteration
326
- successors: Iterable[CpgNode] = graph.succ(node)
327
- for succ in successors:
328
- print(succ.code) # Type checker knows succ is CpgNode
329
- ```
330
-
331
- ---
332
-
333
- ## **🐝 API Reference**
334
-
335
- For more detailed API documentation, please see our [APIs doc](docs/APIs.md).
336
-
337
- ### **Main Package Exports**
338
-
339
- All public APIs are available directly from the `cpg2py` package:
340
-
341
- ```python
342
- from cpg2py import (
343
- # Factory function
344
- cpg_graph,
345
-
346
- # Concrete implementations
347
- CpgGraph,
348
- CpgNode,
349
- CpgEdge,
350
-
351
- # Abstract base classes
352
- AbcGraphQuerier,
353
- AbcNodeQuerier,
354
- AbcEdgeQuerier,
355
- Storage,
356
-
357
- # Exceptions
358
- CPGError,
359
- NodeNotFoundError,
360
- EdgeNotFoundError,
361
- TopFileNotFoundError,
362
- )
363
- ```
364
-
365
- ### **Graph Functions**
366
-
367
- - `cpg_graph(node_csv: Path, edge_csv: Path, verbose: bool = False) -> CpgGraph`: Loads graph from CSV files and returns a `CpgGraph` instance.
368
- - `graph.node(nid: str) -> Optional[CpgNode]`: Retrieves a node by ID (returns `CpgNode`).
369
- - `graph.edge(fid: str, tid: str, eid: str) -> Optional[CpgEdge]`: Retrieves an edge (returns `CpgEdge`).
370
- - `graph.succ(node: CpgNode) -> Iterable[CpgNode]`: Gets successor nodes.
371
- - `graph.prev(node: CpgNode) -> Iterable[CpgNode]`: Gets predecessor nodes.
372
- - `graph.children(node: CpgNode) -> Iterable[CpgNode]`: Gets child nodes via PARENT_OF edges.
373
- - `graph.parent(node: CpgNode) -> Iterable[CpgNode]`: Gets parent nodes via PARENT_OF edges.
374
- - `graph.flow_to(node: CpgNode) -> Iterable[CpgNode]`: Gets data flow successors.
375
- - `graph.flow_from(node: CpgNode) -> Iterable[CpgNode]`: Gets data flow predecessors.
376
- - `graph.topfile_node(nid: str) -> CpgNode`: Finds the top-level file node.
377
-
378
- ### **Node Properties (CpgNode)**
379
-
380
- - `.id`: Node ID (string).
381
- - `.name`: Node name.
382
- - `.type`: Node type.
383
- - `.code`: Source code content.
384
- - `.label`: Node label.
385
- - `.line_num`: Source code line number.
386
- - `.flags`: List of node flags.
387
- - `.children_num`: Number of children.
388
- - `.func_id`: Function ID.
389
- - `.class_name`: Class name.
390
- - `.namespace`: Namespace.
391
- - `.end_num`: End line number.
392
- - `.comment`: Documentation comment.
393
-
394
- ### **Edge Properties (CpgEdge)**
395
-
396
- - `.id`: Edge ID tuple `(from_node, to_node, edge_type)`.
397
- - `.start`: Edge start position.
398
- - `.end`: Edge end position.
399
- - `.type`: Edge type.
400
- - `.var`: Variable name (if applicable).
401
-
402
- ---
403
-
404
- ## **🌟 License**
405
-
406
- This project is licensed under the **MIT License**.
407
-
@@ -1,17 +0,0 @@
1
- cpg2py/__init__.py,sha256=T8Nt4ZLMtCSQjqRINYlYZH9iLiANj3onJqGZk08he4A,2888
2
- cpg2py/_exceptions.py,sha256=AR2zPeds9fNcCA9iFZGAGluMg3_lgsKHR_4WUmyuD_w,1240
3
- cpg2py/_logger.py,sha256=HjJAhs4ZAB_IMoBMVRe3KPGS5GRygGy0eNncvIqu8ls,1309
4
- cpg2py/_abc/__init__.py,sha256=XN7RKxMpnZqvjDAUWxXXLGKY1jrPpKBAxUbq-pydwh4,208
5
- cpg2py/_abc/edge.py,sha256=CqRz4xmDPplH3lyzRnoWjMj-JgFLFoL8YAJ6XLUsPjA,2571
6
- cpg2py/_abc/graph.py,sha256=LDF2ylbJYQubUV2LboJlkiQaqWtLW3LsyKBkTJOl4Gs,7795
7
- cpg2py/_abc/node.py,sha256=s-AQ2oOVtw9wXjBODYtaRKAp8tkPgddMLX2_6eJglcg,1724
8
- cpg2py/_abc/storage.py,sha256=vUng2nBlhBOJL6YrPTBZaAjkpLdcQLCAW9lOhz7hPxw,6623
9
- cpg2py/_cpg/__init__.py,sha256=BYNrxYo0G3XoIaUN9llEYkZoCstYJgt88OrJZCkQW7E,126
10
- cpg2py/_cpg/edge.py,sha256=GcGfwWzQJY0P1GkL8BsQ-vVcsHRlbUp-Le81lPLNt-w,973
11
- cpg2py/_cpg/graph.py,sha256=Aks2VsDolKdT1NdEz9Vn5ABIWBTPDGvEq7PbrehLVuI,5674
12
- cpg2py/_cpg/node.py,sha256=hP6jfw9miUyuqksnX9Sr5pz7NYaY4ESigZZ0cmobMcM,1988
13
- cpg2py-1.1.0.dist-info/LICENSE,sha256=vTjbt7iL1hUilI8E87FoQerEDa9nbpeip26iA6bguHI,1066
14
- cpg2py-1.1.0.dist-info/METADATA,sha256=VaniBLcK3R1Vp52Iyma3L0NonkN35bH7b41p0H5wJfw,12262
15
- cpg2py-1.1.0.dist-info/WHEEL,sha256=WnJ8fYhv8N4SYVK2lLYNI6N0kVATA7b0piVUNvqIIJE,91
16
- cpg2py-1.1.0.dist-info/top_level.txt,sha256=xDY8faKh5Rczvsqb5Jt9Sq-Y7EOImh7jh-m1oVTnH5k,7
17
- cpg2py-1.1.0.dist-info/RECORD,,