graphcontainer 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphcontainer-0.1.0/LICENSE +21 -0
- graphcontainer-0.1.0/PKG-INFO +182 -0
- graphcontainer-0.1.0/README.md +137 -0
- graphcontainer-0.1.0/pyproject.toml +42 -0
- graphcontainer-0.1.0/setup.cfg +4 -0
- graphcontainer-0.1.0/setup.py +4 -0
- graphcontainer-0.1.0/src/graphcontainer/__init__.py +123 -0
- graphcontainer-0.1.0/src/graphcontainer/adapters/__init__.py +35 -0
- graphcontainer-0.1.0/src/graphcontainer/adapters/base.py +50 -0
- graphcontainer-0.1.0/src/graphcontainer/adapters/expla_graphs.py +222 -0
- graphcontainer-0.1.0/src/graphcontainer/adapters/fastinsight.py +195 -0
- graphcontainer-0.1.0/src/graphcontainer/adapters/freebasekg.py +221 -0
- graphcontainer-0.1.0/src/graphcontainer/adapters/g_retriever.py +644 -0
- graphcontainer-0.1.0/src/graphcontainer/adapters/hipporag.py +833 -0
- graphcontainer-0.1.0/src/graphcontainer/adapters/hipporag_raw.py +482 -0
- graphcontainer-0.1.0/src/graphcontainer/adapters/lightrag.py +462 -0
- graphcontainer-0.1.0/src/graphcontainer/adapters/tog.py +302 -0
- graphcontainer-0.1.0/src/graphcontainer/base.py +34 -0
- graphcontainer-0.1.0/src/graphcontainer/core.py +112 -0
- graphcontainer-0.1.0/src/graphcontainer/index.py +18 -0
- graphcontainer-0.1.0/src/graphcontainer/indexers.py +177 -0
- graphcontainer-0.1.0/src/graphcontainer/rag/__init__.py +26 -0
- graphcontainer-0.1.0/src/graphcontainer/rag/contracts.py +166 -0
- graphcontainer-0.1.0/src/graphcontainer/rag/embeddings.py +241 -0
- graphcontainer-0.1.0/src/graphcontainer/rag/generator.py +83 -0
- graphcontainer-0.1.0/src/graphcontainer/rag/pipeline.py +96 -0
- graphcontainer-0.1.0/src/graphcontainer/rag/retrievers/__init__.py +13 -0
- graphcontainer-0.1.0/src/graphcontainer/rag/retrievers/base.py +29 -0
- graphcontainer-0.1.0/src/graphcontainer/rag/retrievers/fastinsight.py +786 -0
- graphcontainer-0.1.0/src/graphcontainer/rag/retrievers/hybrid.py +25 -0
- graphcontainer-0.1.0/src/graphcontainer/rag/retrievers/one_hop.py +177 -0
- graphcontainer-0.1.0/src/graphcontainer/rag/retrievers/utils.py +132 -0
- graphcontainer-0.1.0/src/graphcontainer/rag/retrievers/vector.py +123 -0
- graphcontainer-0.1.0/src/graphcontainer/rag/service.py +150 -0
- graphcontainer-0.1.0/src/graphcontainer/types.py +17 -0
- graphcontainer-0.1.0/src/graphcontainer/utils.py +14 -0
- graphcontainer-0.1.0/src/graphcontainer/visualizer/__init__.py +55 -0
- graphcontainer-0.1.0/src/graphcontainer/visualizer/client.py +309 -0
- graphcontainer-0.1.0/src/graphcontainer/visualizer/live_visualizer.py +2036 -0
- graphcontainer-0.1.0/src/graphcontainer/visualizer/web/app.js +2239 -0
- graphcontainer-0.1.0/src/graphcontainer/visualizer/web/index.html +204 -0
- graphcontainer-0.1.0/src/graphcontainer/visualizer/web/logo.png +0 -0
- graphcontainer-0.1.0/src/graphcontainer/visualizer/web/style.css +1343 -0
- graphcontainer-0.1.0/src/graphcontainer.egg-info/PKG-INFO +182 -0
- graphcontainer-0.1.0/src/graphcontainer.egg-info/SOURCES.txt +46 -0
- graphcontainer-0.1.0/src/graphcontainer.egg-info/dependency_links.txt +1 -0
- graphcontainer-0.1.0/src/graphcontainer.egg-info/requires.txt +14 -0
- graphcontainer-0.1.0/src/graphcontainer.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Seonho AN and Chaejeong HYUN
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: graphcontainer
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Unified workflow for working with graph RAG systems
|
|
5
|
+
Author: asmath472
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Seonho AN and Chaejeong HYUN
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
Requires-Python: >=3.11
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Requires-Dist: chromadb>=1.4.1
|
|
31
|
+
Requires-Dist: flagembedding>=1.3.5
|
|
32
|
+
Requires-Dist: openai>=1.108.1
|
|
33
|
+
Requires-Dist: pandas>=3.0.0
|
|
34
|
+
Requires-Dist: python-dotenv>=1.2.1
|
|
35
|
+
Requires-Dist: pyct>=0.6.0
|
|
36
|
+
Requires-Dist: pydantic>=2.12.5
|
|
37
|
+
Requires-Dist: pyvis>=0.3.2
|
|
38
|
+
Requires-Dist: sentence-transformers>=5.2.0
|
|
39
|
+
Requires-Dist: tiktoken>=0.12.0
|
|
40
|
+
Requires-Dist: torch>=2.10.0
|
|
41
|
+
Requires-Dist: tqdm>=4.67.1
|
|
42
|
+
Requires-Dist: transformers>=4.57.6
|
|
43
|
+
Requires-Dist: ollama>=0.6.1
|
|
44
|
+
Dynamic: license-file
|
|
45
|
+
|
|
46
|
+
## GraphContainer
|
|
47
|
+
|
|
48
|
+
<p align="center">
|
|
49
|
+
<img src="graph-container-workflow.jpg" alt="GraphContainer workflow" width="760">
|
|
50
|
+
</p>
|
|
51
|
+
|
|
52
|
+
**GraphContainer** provides a unified workflow for working with graph RAG systems. It is designed to load graphs produced by different methods, convert them into a shared internal representation, run retrieval pipelines on top of that representation, visualize retrieval traces in a browser, and execute experiments through a consistent interface.
|
|
53
|
+
|
|
54
|
+
[](https://youtu.be/O02eNJLwkU0)
|
|
55
|
+
|
|
56
|
+
### Overview
|
|
57
|
+
|
|
58
|
+
The main idea behind GraphContainer is simple: different graph RAG methods store graph data in different formats, but once those graphs are converted into a common structure, they can be searched, visualized, and compared in a much more consistent way. In this repository, that common structure is implemented through the Unified Graph State, which stores nodes, edges, adjacency information, and vector indexes in a form that downstream components can access without caring about the original source format.
|
|
59
|
+
|
|
60
|
+
At the core of the implementation are `SimpleGraphContainer` and `SearchableGraphContainer`. `SimpleGraphContainer` is responsible for holding the in-memory graph itself, while `SearchableGraphContainer` extends that base structure with pluggable vector indexes such as `node_vector`. On top of this container layer, the repository provides adapters for different upstream graph formats, including `import_graph_from_component_graph` (Component Graph), `import_graph_from_attribute_bundle_graph` (Attribute Bundle Graph), `import_graph_from_topology_semantic_graph` (Topology-Semantic Graph), and `import_graph_from_subgraph_union_graph` (Subgraph Union Graph). These adapters are the entry points that translate method-specific graph storage into the unified internal graph state used by the rest of the system.
|
|
61
|
+
|
|
62
|
+
Once a graph has been loaded, retrieval is handled by the RAG modules under `src/graphcontainer/rag`. The embedding path is managed through `src/graphcontainer/rag/embeddings.py`, and the retrieval logic lives in `src/graphcontainer/rag/retrievers.py`. The repository currently includes two retrieval strategies: `OneHopRetriever`, which starts from vector-retrieved seed nodes and expands to their immediate neighbors, and `FastInsightRetriever`, which applies a multi-stage retrieval process with seed selection, deeper exploration, and final filtering. In the current experiment setup, the initial retrieval size is set to `10`, and FastInsight keeps the final `5` nodes before answer generation.
|
|
63
|
+
|
|
64
|
+
The end-to-end experiment pipeline is implemented in [test/rag_experiment.py](/./test/rag_experiment.py). This script loads the available graphs, applies the retrievers, builds prompts from the retrieved content, sends the prompts to the generator model, and writes the outputs as JSONL files. In other words, the implementation path is: load a graph from a method-specific source, convert it into the unified graph container, run retrieval on top of the shared representation, assemble the retrieved evidence into a prompt, generate an answer, and finally save the result for evaluation.
|
|
65
|
+
|
|
66
|
+
### Installation
|
|
67
|
+
|
|
68
|
+
Before running the project, make sure `uv` itself is installed. On macOS and Linux, you can install it with the official standalone installer:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
On Windows PowerShell, you can install it with:
|
|
75
|
+
|
|
76
|
+
```powershell
|
|
77
|
+
powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
If you prefer another installation method, such as Homebrew, WinGet, Scoop, or `pipx`, you can use the official `uv` installation guide.
|
|
81
|
+
|
|
82
|
+
If you want to install the published package from PyPI, use:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pip install graphcontainer
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
If you are developing locally from this repository, install the project dependencies with:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
uv sync
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
After installation, restart bash and use the command below to activate the virtual environment.
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
source .venv/bin/activate
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Also, you can get example graphs in the below Google Drive link. Place this on the project root directory.
|
|
101
|
+
|
|
102
|
+
[](https://drive.google.com/file/d/1pK8mK2Jgp3T4gVUOuHQrVz2xSpDBIGdV/view?usp=sharing)
|
|
103
|
+
|
|
104
|
+
<!-- [Google Drive link](https://drive.google.com/file/d/1pK8mK2Jgp3T4gVUOuHQrVz2xSpDBIGdV/view?usp=sharing). Place this on the project root directory. -->
|
|
105
|
+
|
|
106
|
+
### Web-based Visualizer
|
|
107
|
+
|
|
108
|
+
The web interface is powered by the live visualizer. You can launch it directly from the command line by pointing it to a graph source:
|
|
109
|
+
|
|
110
|
+
<!-- ```bash
|
|
111
|
+
uv run python -m graphcontainer.visualizer.live_visualizer \
|
|
112
|
+
--source data/rag_storage/fastinsight/scifact-openai \
|
|
113
|
+
--host 127.0.0.1 \
|
|
114
|
+
--port 8765 \
|
|
115
|
+
--hops 2
|
|
116
|
+
``` -->
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
python serve.py --graph component_graph:./data/rag_storage/fastinsight/scifact-openai \
|
|
120
|
+
--host 127.0.0.1 \
|
|
121
|
+
--port 8765 \
|
|
122
|
+
--hops 2
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
After the server starts, open `http://127.0.0.1:8765` in your browser. The page renders the graph or subgraph associated with the current retrieval session and lets you inspect how the retriever moved through the graph. Nodes and edges selected during retrieval can be highlighted, and the visualizer keeps track of session progress so that a query can be inspected step by step instead of only as a final result.
|
|
127
|
+
|
|
128
|
+
If you already have a graph object in memory, you can launch the same interface from Python by using `serve_graph`:
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from graphcontainer import serve_graph
|
|
132
|
+
|
|
133
|
+
visualizer = serve_graph(
|
|
134
|
+
graph,
|
|
135
|
+
host="127.0.0.1",
|
|
136
|
+
port=8765,
|
|
137
|
+
default_hops=2,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
print(visualizer.url)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
If your graph is stored in Component Graph format, you can also serve it directly from storage:
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
from graphcontainer import serve_component_graph
|
|
147
|
+
|
|
148
|
+
visualizer = serve_component_graph(
|
|
149
|
+
"data/rag_storage/fastinsight/scifact-openai",
|
|
150
|
+
host="127.0.0.1",
|
|
151
|
+
port=8765,
|
|
152
|
+
default_hops=2,
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
In practice, the web page is useful for understanding what happened during retrieval rather than only checking the final answer. A typical flow is to start the visualizer, open the browser page, submit a query or connect to an existing retrieval session, and then inspect the highlighted nodes, edges, and progress updates. This makes it easier to see which evidence was selected, how graph traversal expanded from the initial seeds, and how the retrieved subgraph contributed to the final answer.
|
|
157
|
+
|
|
158
|
+
### Run Experiments
|
|
159
|
+
|
|
160
|
+
The default experiment path in this repository is provided through [scripts/run_batch_experiment.sh](./scripts/run_batch_experiment.sh). This script is intentionally fixed to the current experimental setup and can be run with:
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
uv run bash scripts/run_batch_experiment.sh
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
By default, this runs the experiment on the `bsard` dataset with `query_limit=-1`, `top_k=10`, `index_name=node_vector`, `ollama_url=http://localhost:11434/v1`, `ollama_model=gemma3:12b`, and `max_context_chunks=10`. The current setup uses `text-embedding-3-small` for embeddings, and the experiment script iterates over the available graph imports while applying both retrieval methods to each graph.
|
|
167
|
+
|
|
168
|
+
If you want to run the experiment entry point directly rather than going through the batch script, you can execute:
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
uv run python test/rag_experiment.py \
|
|
172
|
+
--dataset bsard \
|
|
173
|
+
--query_limit -1 \
|
|
174
|
+
--top_k 10 \
|
|
175
|
+
--index_name node_vector \
|
|
176
|
+
--output_dir ./output/bsard \
|
|
177
|
+
--ollama_url http://localhost:11434/v1 \
|
|
178
|
+
--ollama_model gemma3:12b \
|
|
179
|
+
--max_context_chunks 10
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
The outputs are saved as JSONL files under `./output/bsard/`, typically in files named like `<graph_name>_<retriever>.jsonl`. Each line contains a single query-output pair in the form `{"query": "question text", "output": "generated answer"}`. This makes the results easy to evaluate later with a separate judging or comparison pipeline.
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
## GraphContainer
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<img src="graph-container-workflow.jpg" alt="GraphContainer workflow" width="760">
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
**GraphContainer** provides a unified workflow for working with graph RAG systems. It is designed to load graphs produced by different methods, convert them into a shared internal representation, run retrieval pipelines on top of that representation, visualize retrieval traces in a browser, and execute experiments through a consistent interface.
|
|
8
|
+
|
|
9
|
+
[](https://youtu.be/O02eNJLwkU0)
|
|
10
|
+
|
|
11
|
+
### Overview
|
|
12
|
+
|
|
13
|
+
The main idea behind GraphContainer is simple: different graph RAG methods store graph data in different formats, but once those graphs are converted into a common structure, they can be searched, visualized, and compared in a much more consistent way. In this repository, that common structure is implemented through the Unified Graph State, which stores nodes, edges, adjacency information, and vector indexes in a form that downstream components can access without caring about the original source format.
|
|
14
|
+
|
|
15
|
+
At the core of the implementation are `SimpleGraphContainer` and `SearchableGraphContainer`. `SimpleGraphContainer` is responsible for holding the in-memory graph itself, while `SearchableGraphContainer` extends that base structure with pluggable vector indexes such as `node_vector`. On top of this container layer, the repository provides adapters for different upstream graph formats, including `import_graph_from_component_graph` (Component Graph), `import_graph_from_attribute_bundle_graph` (Attribute Bundle Graph), `import_graph_from_topology_semantic_graph` (Topology-Semantic Graph), and `import_graph_from_subgraph_union_graph` (Subgraph Union Graph). These adapters are the entry points that translate method-specific graph storage into the unified internal graph state used by the rest of the system.
|
|
16
|
+
|
|
17
|
+
Once a graph has been loaded, retrieval is handled by the RAG modules under `src/graphcontainer/rag`. The embedding path is managed through `src/graphcontainer/rag/embeddings.py`, and the retrieval logic lives in `src/graphcontainer/rag/retrievers.py`. The repository currently includes two retrieval strategies: `OneHopRetriever`, which starts from vector-retrieved seed nodes and expands to their immediate neighbors, and `FastInsightRetriever`, which applies a multi-stage retrieval process with seed selection, deeper exploration, and final filtering. In the current experiment setup, the initial retrieval size is set to `10`, and FastInsight keeps the final `5` nodes before answer generation.
|
|
18
|
+
|
|
19
|
+
The end-to-end experiment pipeline is implemented in [test/rag_experiment.py](/./test/rag_experiment.py). This script loads the available graphs, applies the retrievers, builds prompts from the retrieved content, sends the prompts to the generator model, and writes the outputs as JSONL files. In other words, the implementation path is: load a graph from a method-specific source, convert it into the unified graph container, run retrieval on top of the shared representation, assemble the retrieved evidence into a prompt, generate an answer, and finally save the result for evaluation.
|
|
20
|
+
|
|
21
|
+
### Installation
|
|
22
|
+
|
|
23
|
+
Before running the project, make sure `uv` itself is installed. On macOS and Linux, you can install it with the official standalone installer:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
On Windows PowerShell, you can install it with:
|
|
30
|
+
|
|
31
|
+
```powershell
|
|
32
|
+
powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
If you prefer another installation method, such as Homebrew, WinGet, Scoop, or `pipx`, you can use the official `uv` installation guide.
|
|
36
|
+
|
|
37
|
+
If you want to install the published package from PyPI, use:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install graphcontainer
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
If you are developing locally from this repository, install the project dependencies with:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
uv sync
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
After installation, restart bash and use the command below to activate the virtual environment.
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
source .venv/bin/activate
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Also, you can get example graphs in the below Google Drive link. Place this on the project root directory.
|
|
56
|
+
|
|
57
|
+
[](https://drive.google.com/file/d/1pK8mK2Jgp3T4gVUOuHQrVz2xSpDBIGdV/view?usp=sharing)
|
|
58
|
+
|
|
59
|
+
<!-- [Google Drive link](https://drive.google.com/file/d/1pK8mK2Jgp3T4gVUOuHQrVz2xSpDBIGdV/view?usp=sharing). Place this on the project root directory. -->
|
|
60
|
+
|
|
61
|
+
### Web-based Visualizer
|
|
62
|
+
|
|
63
|
+
The web interface is powered by the live visualizer. You can launch it directly from the command line by pointing it to a graph source:
|
|
64
|
+
|
|
65
|
+
<!-- ```bash
|
|
66
|
+
uv run python -m graphcontainer.visualizer.live_visualizer \
|
|
67
|
+
--source data/rag_storage/fastinsight/scifact-openai \
|
|
68
|
+
--host 127.0.0.1 \
|
|
69
|
+
--port 8765 \
|
|
70
|
+
--hops 2
|
|
71
|
+
``` -->
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
python serve.py --graph component_graph:./data/rag_storage/fastinsight/scifact-openai \
|
|
75
|
+
--host 127.0.0.1 \
|
|
76
|
+
--port 8765 \
|
|
77
|
+
--hops 2
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
After the server starts, open `http://127.0.0.1:8765` in your browser. The page renders the graph or subgraph associated with the current retrieval session and lets you inspect how the retriever moved through the graph. Nodes and edges selected during retrieval can be highlighted, and the visualizer keeps track of session progress so that a query can be inspected step by step instead of only as a final result.
|
|
82
|
+
|
|
83
|
+
If you already have a graph object in memory, you can launch the same interface from Python by using `serve_graph`:
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from graphcontainer import serve_graph
|
|
87
|
+
|
|
88
|
+
visualizer = serve_graph(
|
|
89
|
+
graph,
|
|
90
|
+
host="127.0.0.1",
|
|
91
|
+
port=8765,
|
|
92
|
+
default_hops=2,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
print(visualizer.url)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
If your graph is stored in Component Graph format, you can also serve it directly from storage:
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from graphcontainer import serve_component_graph
|
|
102
|
+
|
|
103
|
+
visualizer = serve_component_graph(
|
|
104
|
+
"data/rag_storage/fastinsight/scifact-openai",
|
|
105
|
+
host="127.0.0.1",
|
|
106
|
+
port=8765,
|
|
107
|
+
default_hops=2,
|
|
108
|
+
)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
In practice, the web page is useful for understanding what happened during retrieval rather than only checking the final answer. A typical flow is to start the visualizer, open the browser page, submit a query or connect to an existing retrieval session, and then inspect the highlighted nodes, edges, and progress updates. This makes it easier to see which evidence was selected, how graph traversal expanded from the initial seeds, and how the retrieved subgraph contributed to the final answer.
|
|
112
|
+
|
|
113
|
+
### Run Experiments
|
|
114
|
+
|
|
115
|
+
The default experiment path in this repository is provided through [scripts/run_batch_experiment.sh](./scripts/run_batch_experiment.sh). This script is intentionally fixed to the current experimental setup and can be run with:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
uv run bash scripts/run_batch_experiment.sh
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
By default, this runs the experiment on the `bsard` dataset with `query_limit=-1`, `top_k=10`, `index_name=node_vector`, `ollama_url=http://localhost:11434/v1`, `ollama_model=gemma3:12b`, and `max_context_chunks=10`. The current setup uses `text-embedding-3-small` for embeddings, and the experiment script iterates over the available graph imports while applying both retrieval methods to each graph.
|
|
122
|
+
|
|
123
|
+
If you want to run the experiment entry point directly rather than going through the batch script, you can execute:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
uv run python test/rag_experiment.py \
|
|
127
|
+
--dataset bsard \
|
|
128
|
+
--query_limit -1 \
|
|
129
|
+
--top_k 10 \
|
|
130
|
+
--index_name node_vector \
|
|
131
|
+
--output_dir ./output/bsard \
|
|
132
|
+
--ollama_url http://localhost:11434/v1 \
|
|
133
|
+
--ollama_model gemma3:12b \
|
|
134
|
+
--max_context_chunks 10
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
The outputs are saved as JSONL files under `./output/bsard/`, typically in files named like `<graph_name>_<retriever>.jsonl`. Each line contains a single query-output pair in the form `{"query": "question text", "output": "generated answer"}`. This makes the results easy to evaluate later with a separate judging or comparison pipeline.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "graphcontainer"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Unified workflow for working with graph RAG systems"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { file = "LICENSE" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "asmath472" },
|
|
14
|
+
]
|
|
15
|
+
dependencies = [
|
|
16
|
+
"chromadb>=1.4.1",
|
|
17
|
+
"flagembedding>=1.3.5",
|
|
18
|
+
"openai>=1.108.1",
|
|
19
|
+
"pandas>=3.0.0",
|
|
20
|
+
"python-dotenv>=1.2.1",
|
|
21
|
+
"pyct>=0.6.0",
|
|
22
|
+
"pydantic>=2.12.5",
|
|
23
|
+
"pyvis>=0.3.2",
|
|
24
|
+
"sentence-transformers>=5.2.0",
|
|
25
|
+
"tiktoken>=0.12.0",
|
|
26
|
+
"torch>=2.10.0",
|
|
27
|
+
"tqdm>=4.67.1",
|
|
28
|
+
"transformers>=4.57.6",
|
|
29
|
+
"ollama>=0.6.1",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[tool.setuptools]
|
|
33
|
+
include-package-data = true
|
|
34
|
+
package-dir = {"" = "src"}
|
|
35
|
+
|
|
36
|
+
[tool.setuptools.packages.find]
|
|
37
|
+
where = ["src"]
|
|
38
|
+
include = ["graphcontainer*"]
|
|
39
|
+
exclude = ["test*"]
|
|
40
|
+
|
|
41
|
+
[tool.setuptools.package-data]
|
|
42
|
+
graphcontainer = ["visualizer/web/*"]
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from .base import BaseGraphContainer
|
|
2
|
+
from .core import SearchableGraphContainer, SimpleGraphContainer
|
|
3
|
+
from .index import BaseIndexer
|
|
4
|
+
from .indexers import ChromaCollectionIndexer, InMemoryVectorIndexer, PGVectorIndexer
|
|
5
|
+
from .types import EdgeRecord, NodeRecord
|
|
6
|
+
from .adapters import (
|
|
7
|
+
FastInsightAdapter,
|
|
8
|
+
GraphAdapter,
|
|
9
|
+
GraphAdapterError,
|
|
10
|
+
LightRAGAdapter,
|
|
11
|
+
UnsupportedSourceError,
|
|
12
|
+
import_graph_from_attribute_bundle_graph,
|
|
13
|
+
import_graph_from_component_graph,
|
|
14
|
+
import_graph_from_fastinsight,
|
|
15
|
+
import_graph_from_lightrag,
|
|
16
|
+
import_graph_from_subgraph_union_graph,
|
|
17
|
+
import_graph_from_topology_semantic_graph,
|
|
18
|
+
)
|
|
19
|
+
from .visualizer import (
|
|
20
|
+
LiveGraphVisualizer,
|
|
21
|
+
LiveVisualizerClient,
|
|
22
|
+
VisualizerHTTPError,
|
|
23
|
+
clear_session,
|
|
24
|
+
create_session,
|
|
25
|
+
delete_session,
|
|
26
|
+
get_config,
|
|
27
|
+
get_session_snapshot,
|
|
28
|
+
get_session_subgraph,
|
|
29
|
+
health,
|
|
30
|
+
list_sessions,
|
|
31
|
+
set_progress,
|
|
32
|
+
serve_attribute_bundle_graph,
|
|
33
|
+
serve_component_graph,
|
|
34
|
+
serve_fastinsight,
|
|
35
|
+
serve_g_retriever,
|
|
36
|
+
serve_graph,
|
|
37
|
+
serve_hipporag,
|
|
38
|
+
serve_lightrag,
|
|
39
|
+
serve_multi,
|
|
40
|
+
serve_subgraph_union_graph,
|
|
41
|
+
serve_topology_semantic_graph,
|
|
42
|
+
serve_tog,
|
|
43
|
+
update_session,
|
|
44
|
+
)
|
|
45
|
+
from .rag import (
|
|
46
|
+
BaseRetriever,
|
|
47
|
+
ChatMessage,
|
|
48
|
+
ChatRequest,
|
|
49
|
+
ChatResponse,
|
|
50
|
+
GraphRAGPipeline,
|
|
51
|
+
GraphRAGService,
|
|
52
|
+
FastInsightRetriever,
|
|
53
|
+
HybridRetriever,
|
|
54
|
+
OneHopRetriever,
|
|
55
|
+
OpenAIChatGenerator,
|
|
56
|
+
OpenAIEmbedder,
|
|
57
|
+
RetrievedNode,
|
|
58
|
+
RetrievalResult,
|
|
59
|
+
VectorRetriever,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
__all__ = [
|
|
63
|
+
"BaseGraphContainer",
|
|
64
|
+
"SearchableGraphContainer",
|
|
65
|
+
"SimpleGraphContainer",
|
|
66
|
+
"BaseIndexer",
|
|
67
|
+
"EdgeRecord",
|
|
68
|
+
"NodeRecord",
|
|
69
|
+
"GraphAdapter",
|
|
70
|
+
"GraphAdapterError",
|
|
71
|
+
"UnsupportedSourceError",
|
|
72
|
+
"FastInsightAdapter",
|
|
73
|
+
"LightRAGAdapter",
|
|
74
|
+
"InMemoryVectorIndexer",
|
|
75
|
+
"ChromaCollectionIndexer",
|
|
76
|
+
"PGVectorIndexer",
|
|
77
|
+
"import_graph_from_component_graph",
|
|
78
|
+
"import_graph_from_attribute_bundle_graph",
|
|
79
|
+
"import_graph_from_topology_semantic_graph",
|
|
80
|
+
"import_graph_from_subgraph_union_graph",
|
|
81
|
+
"import_graph_from_fastinsight",
|
|
82
|
+
"import_graph_from_lightrag",
|
|
83
|
+
"render_graph_html",
|
|
84
|
+
"render_fastinsight_html",
|
|
85
|
+
"LiveGraphVisualizer",
|
|
86
|
+
"serve_graph",
|
|
87
|
+
"serve_component_graph",
|
|
88
|
+
"serve_attribute_bundle_graph",
|
|
89
|
+
"serve_topology_semantic_graph",
|
|
90
|
+
"serve_subgraph_union_graph",
|
|
91
|
+
"serve_fastinsight",
|
|
92
|
+
"serve_lightrag",
|
|
93
|
+
"serve_hipporag",
|
|
94
|
+
"serve_g_retriever",
|
|
95
|
+
"serve_tog",
|
|
96
|
+
"serve_multi",
|
|
97
|
+
"VisualizerHTTPError",
|
|
98
|
+
"LiveVisualizerClient",
|
|
99
|
+
"health",
|
|
100
|
+
"get_config",
|
|
101
|
+
"list_sessions",
|
|
102
|
+
"create_session",
|
|
103
|
+
"get_session_snapshot",
|
|
104
|
+
"get_session_subgraph",
|
|
105
|
+
"update_session",
|
|
106
|
+
"set_progress",
|
|
107
|
+
"clear_session",
|
|
108
|
+
"delete_session",
|
|
109
|
+
"ChatMessage",
|
|
110
|
+
"ChatRequest",
|
|
111
|
+
"ChatResponse",
|
|
112
|
+
"RetrievedNode",
|
|
113
|
+
"RetrievalResult",
|
|
114
|
+
"OpenAIEmbedder",
|
|
115
|
+
"OpenAIChatGenerator",
|
|
116
|
+
"BaseRetriever",
|
|
117
|
+
"OneHopRetriever",
|
|
118
|
+
"VectorRetriever",
|
|
119
|
+
"HybridRetriever",
|
|
120
|
+
"FastInsightRetriever",
|
|
121
|
+
"GraphRAGPipeline",
|
|
122
|
+
"GraphRAGService",
|
|
123
|
+
]
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# src/GraphContainer/adapters/__init__.py
|
|
2
|
+
from .base import GraphAdapter, GraphAdapterError, UnsupportedSourceError
|
|
3
|
+
from .fastinsight import FastInsightAdapter, import_graph_from_fastinsight
|
|
4
|
+
from .expla_graphs import ExplaGraphsAdapter, import_graph_from_expla_graphs
|
|
5
|
+
from .freebasekg import FreebaseKGAdapter, import_graph_from_freebasekg
|
|
6
|
+
from .g_retriever import GRetrieverAdapter, import_graph_from_g_retriever
|
|
7
|
+
from .lightrag import LightRAGAdapter, import_graph_from_lightrag
|
|
8
|
+
from .hipporag import HippoRAGAdapter, import_graph_from_hipporag
|
|
9
|
+
|
|
10
|
+
import_graph_from_component_graph = import_graph_from_fastinsight
|
|
11
|
+
import_graph_from_attribute_bundle_graph = import_graph_from_lightrag
|
|
12
|
+
import_graph_from_topology_semantic_graph = import_graph_from_hipporag
|
|
13
|
+
import_graph_from_subgraph_union_graph = import_graph_from_g_retriever
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"GraphAdapter",
|
|
17
|
+
"GraphAdapterError",
|
|
18
|
+
"UnsupportedSourceError",
|
|
19
|
+
"FastInsightAdapter",
|
|
20
|
+
"ExplaGraphsAdapter",
|
|
21
|
+
"FreebaseKGAdapter",
|
|
22
|
+
"GRetrieverAdapter",
|
|
23
|
+
"LightRAGAdapter",
|
|
24
|
+
"HippoRAGAdapter",
|
|
25
|
+
"import_graph_from_component_graph",
|
|
26
|
+
"import_graph_from_attribute_bundle_graph",
|
|
27
|
+
"import_graph_from_topology_semantic_graph",
|
|
28
|
+
"import_graph_from_subgraph_union_graph",
|
|
29
|
+
"import_graph_from_fastinsight",
|
|
30
|
+
"import_graph_from_expla_graphs",
|
|
31
|
+
"import_graph_from_freebasekg",
|
|
32
|
+
"import_graph_from_lightrag",
|
|
33
|
+
"import_graph_from_hipporag",
|
|
34
|
+
"import_graph_from_g_retriever",
|
|
35
|
+
]
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# src/GraphContainer/adapters/base.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from ..core import SimpleGraphContainer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class GraphAdapterError(RuntimeError):
|
|
11
|
+
"""Base error for adapter-level failures."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class UnsupportedSourceError(GraphAdapterError):
|
|
15
|
+
"""Raised when an adapter cannot import the given source."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GraphAdapter(ABC):
|
|
19
|
+
"""Base interface for importing / exporting graphs."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, *, name: str, version: str = "0.1.0"):
|
|
22
|
+
self.name = name
|
|
23
|
+
self.version = version
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def can_import(self, source: Any) -> bool:
|
|
27
|
+
"""Return True if this adapter can import the given source."""
|
|
28
|
+
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def import_graph(
|
|
31
|
+
self,
|
|
32
|
+
source: Any,
|
|
33
|
+
container: Optional[SimpleGraphContainer] = None,
|
|
34
|
+
*,
|
|
35
|
+
keep_source_reference: bool = False,
|
|
36
|
+
) -> SimpleGraphContainer:
|
|
37
|
+
"""Load a graph from source into a container.
|
|
38
|
+
|
|
39
|
+
keep_source_reference is adapter-defined optional metadata.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def export_graph(
|
|
44
|
+
self,
|
|
45
|
+
container: SimpleGraphContainer,
|
|
46
|
+
destination: Any,
|
|
47
|
+
*,
|
|
48
|
+
overwrite: bool = False,
|
|
49
|
+
) -> Dict[str, Any]:
|
|
50
|
+
"""Serialize container into destination format and return result metadata."""
|