graphcontainer 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphcontainer/__init__.py +123 -0
- graphcontainer/adapters/__init__.py +35 -0
- graphcontainer/adapters/base.py +50 -0
- graphcontainer/adapters/expla_graphs.py +222 -0
- graphcontainer/adapters/fastinsight.py +195 -0
- graphcontainer/adapters/freebasekg.py +221 -0
- graphcontainer/adapters/g_retriever.py +644 -0
- graphcontainer/adapters/hipporag.py +833 -0
- graphcontainer/adapters/hipporag_raw.py +482 -0
- graphcontainer/adapters/lightrag.py +462 -0
- graphcontainer/adapters/tog.py +302 -0
- graphcontainer/base.py +34 -0
- graphcontainer/core.py +112 -0
- graphcontainer/index.py +18 -0
- graphcontainer/indexers.py +177 -0
- graphcontainer/rag/__init__.py +26 -0
- graphcontainer/rag/contracts.py +166 -0
- graphcontainer/rag/embeddings.py +241 -0
- graphcontainer/rag/generator.py +83 -0
- graphcontainer/rag/pipeline.py +96 -0
- graphcontainer/rag/retrievers/__init__.py +13 -0
- graphcontainer/rag/retrievers/base.py +29 -0
- graphcontainer/rag/retrievers/fastinsight.py +786 -0
- graphcontainer/rag/retrievers/hybrid.py +25 -0
- graphcontainer/rag/retrievers/one_hop.py +177 -0
- graphcontainer/rag/retrievers/utils.py +132 -0
- graphcontainer/rag/retrievers/vector.py +123 -0
- graphcontainer/rag/service.py +150 -0
- graphcontainer/types.py +17 -0
- graphcontainer/utils.py +14 -0
- graphcontainer/visualizer/__init__.py +55 -0
- graphcontainer/visualizer/client.py +309 -0
- graphcontainer/visualizer/live_visualizer.py +2036 -0
- graphcontainer/visualizer/web/app.js +2239 -0
- graphcontainer/visualizer/web/index.html +204 -0
- graphcontainer/visualizer/web/logo.png +0 -0
- graphcontainer/visualizer/web/style.css +1343 -0
- graphcontainer-0.1.0.dist-info/METADATA +182 -0
- graphcontainer-0.1.0.dist-info/RECORD +42 -0
- graphcontainer-0.1.0.dist-info/WHEEL +5 -0
- graphcontainer-0.1.0.dist-info/licenses/LICENSE +21 -0
- graphcontainer-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from .base import BaseGraphContainer
|
|
2
|
+
from .core import SearchableGraphContainer, SimpleGraphContainer
|
|
3
|
+
from .index import BaseIndexer
|
|
4
|
+
from .indexers import ChromaCollectionIndexer, InMemoryVectorIndexer, PGVectorIndexer
|
|
5
|
+
from .types import EdgeRecord, NodeRecord
|
|
6
|
+
from .adapters import (
|
|
7
|
+
FastInsightAdapter,
|
|
8
|
+
GraphAdapter,
|
|
9
|
+
GraphAdapterError,
|
|
10
|
+
LightRAGAdapter,
|
|
11
|
+
UnsupportedSourceError,
|
|
12
|
+
import_graph_from_attribute_bundle_graph,
|
|
13
|
+
import_graph_from_component_graph,
|
|
14
|
+
import_graph_from_fastinsight,
|
|
15
|
+
import_graph_from_lightrag,
|
|
16
|
+
import_graph_from_subgraph_union_graph,
|
|
17
|
+
import_graph_from_topology_semantic_graph,
|
|
18
|
+
)
|
|
19
|
+
from .visualizer import (
|
|
20
|
+
LiveGraphVisualizer,
|
|
21
|
+
LiveVisualizerClient,
|
|
22
|
+
VisualizerHTTPError,
|
|
23
|
+
clear_session,
|
|
24
|
+
create_session,
|
|
25
|
+
delete_session,
|
|
26
|
+
get_config,
|
|
27
|
+
get_session_snapshot,
|
|
28
|
+
get_session_subgraph,
|
|
29
|
+
health,
|
|
30
|
+
list_sessions,
|
|
31
|
+
set_progress,
|
|
32
|
+
serve_attribute_bundle_graph,
|
|
33
|
+
serve_component_graph,
|
|
34
|
+
serve_fastinsight,
|
|
35
|
+
serve_g_retriever,
|
|
36
|
+
serve_graph,
|
|
37
|
+
serve_hipporag,
|
|
38
|
+
serve_lightrag,
|
|
39
|
+
serve_multi,
|
|
40
|
+
serve_subgraph_union_graph,
|
|
41
|
+
serve_topology_semantic_graph,
|
|
42
|
+
serve_tog,
|
|
43
|
+
update_session,
|
|
44
|
+
)
|
|
45
|
+
from .rag import (
|
|
46
|
+
BaseRetriever,
|
|
47
|
+
ChatMessage,
|
|
48
|
+
ChatRequest,
|
|
49
|
+
ChatResponse,
|
|
50
|
+
GraphRAGPipeline,
|
|
51
|
+
GraphRAGService,
|
|
52
|
+
FastInsightRetriever,
|
|
53
|
+
HybridRetriever,
|
|
54
|
+
OneHopRetriever,
|
|
55
|
+
OpenAIChatGenerator,
|
|
56
|
+
OpenAIEmbedder,
|
|
57
|
+
RetrievedNode,
|
|
58
|
+
RetrievalResult,
|
|
59
|
+
VectorRetriever,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
__all__ = [
|
|
63
|
+
"BaseGraphContainer",
|
|
64
|
+
"SearchableGraphContainer",
|
|
65
|
+
"SimpleGraphContainer",
|
|
66
|
+
"BaseIndexer",
|
|
67
|
+
"EdgeRecord",
|
|
68
|
+
"NodeRecord",
|
|
69
|
+
"GraphAdapter",
|
|
70
|
+
"GraphAdapterError",
|
|
71
|
+
"UnsupportedSourceError",
|
|
72
|
+
"FastInsightAdapter",
|
|
73
|
+
"LightRAGAdapter",
|
|
74
|
+
"InMemoryVectorIndexer",
|
|
75
|
+
"ChromaCollectionIndexer",
|
|
76
|
+
"PGVectorIndexer",
|
|
77
|
+
"import_graph_from_component_graph",
|
|
78
|
+
"import_graph_from_attribute_bundle_graph",
|
|
79
|
+
"import_graph_from_topology_semantic_graph",
|
|
80
|
+
"import_graph_from_subgraph_union_graph",
|
|
81
|
+
"import_graph_from_fastinsight",
|
|
82
|
+
"import_graph_from_lightrag",
|
|
83
|
+
"render_graph_html",
|
|
84
|
+
"render_fastinsight_html",
|
|
85
|
+
"LiveGraphVisualizer",
|
|
86
|
+
"serve_graph",
|
|
87
|
+
"serve_component_graph",
|
|
88
|
+
"serve_attribute_bundle_graph",
|
|
89
|
+
"serve_topology_semantic_graph",
|
|
90
|
+
"serve_subgraph_union_graph",
|
|
91
|
+
"serve_fastinsight",
|
|
92
|
+
"serve_lightrag",
|
|
93
|
+
"serve_hipporag",
|
|
94
|
+
"serve_g_retriever",
|
|
95
|
+
"serve_tog",
|
|
96
|
+
"serve_multi",
|
|
97
|
+
"VisualizerHTTPError",
|
|
98
|
+
"LiveVisualizerClient",
|
|
99
|
+
"health",
|
|
100
|
+
"get_config",
|
|
101
|
+
"list_sessions",
|
|
102
|
+
"create_session",
|
|
103
|
+
"get_session_snapshot",
|
|
104
|
+
"get_session_subgraph",
|
|
105
|
+
"update_session",
|
|
106
|
+
"set_progress",
|
|
107
|
+
"clear_session",
|
|
108
|
+
"delete_session",
|
|
109
|
+
"ChatMessage",
|
|
110
|
+
"ChatRequest",
|
|
111
|
+
"ChatResponse",
|
|
112
|
+
"RetrievedNode",
|
|
113
|
+
"RetrievalResult",
|
|
114
|
+
"OpenAIEmbedder",
|
|
115
|
+
"OpenAIChatGenerator",
|
|
116
|
+
"BaseRetriever",
|
|
117
|
+
"OneHopRetriever",
|
|
118
|
+
"VectorRetriever",
|
|
119
|
+
"HybridRetriever",
|
|
120
|
+
"FastInsightRetriever",
|
|
121
|
+
"GraphRAGPipeline",
|
|
122
|
+
"GraphRAGService",
|
|
123
|
+
]
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# src/GraphContainer/adapters/__init__.py
|
|
2
|
+
from .base import GraphAdapter, GraphAdapterError, UnsupportedSourceError
|
|
3
|
+
from .fastinsight import FastInsightAdapter, import_graph_from_fastinsight
|
|
4
|
+
from .expla_graphs import ExplaGraphsAdapter, import_graph_from_expla_graphs
|
|
5
|
+
from .freebasekg import FreebaseKGAdapter, import_graph_from_freebasekg
|
|
6
|
+
from .g_retriever import GRetrieverAdapter, import_graph_from_g_retriever
|
|
7
|
+
from .lightrag import LightRAGAdapter, import_graph_from_lightrag
|
|
8
|
+
from .hipporag import HippoRAGAdapter, import_graph_from_hipporag
|
|
9
|
+
|
|
10
|
+
import_graph_from_component_graph = import_graph_from_fastinsight
|
|
11
|
+
import_graph_from_attribute_bundle_graph = import_graph_from_lightrag
|
|
12
|
+
import_graph_from_topology_semantic_graph = import_graph_from_hipporag
|
|
13
|
+
import_graph_from_subgraph_union_graph = import_graph_from_g_retriever
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"GraphAdapter",
|
|
17
|
+
"GraphAdapterError",
|
|
18
|
+
"UnsupportedSourceError",
|
|
19
|
+
"FastInsightAdapter",
|
|
20
|
+
"ExplaGraphsAdapter",
|
|
21
|
+
"FreebaseKGAdapter",
|
|
22
|
+
"GRetrieverAdapter",
|
|
23
|
+
"LightRAGAdapter",
|
|
24
|
+
"HippoRAGAdapter",
|
|
25
|
+
"import_graph_from_component_graph",
|
|
26
|
+
"import_graph_from_attribute_bundle_graph",
|
|
27
|
+
"import_graph_from_topology_semantic_graph",
|
|
28
|
+
"import_graph_from_subgraph_union_graph",
|
|
29
|
+
"import_graph_from_fastinsight",
|
|
30
|
+
"import_graph_from_expla_graphs",
|
|
31
|
+
"import_graph_from_freebasekg",
|
|
32
|
+
"import_graph_from_lightrag",
|
|
33
|
+
"import_graph_from_hipporag",
|
|
34
|
+
"import_graph_from_g_retriever",
|
|
35
|
+
]
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# src/GraphContainer/adapters/base.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from ..core import SimpleGraphContainer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class GraphAdapterError(RuntimeError):
|
|
11
|
+
"""Base error for adapter-level failures."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class UnsupportedSourceError(GraphAdapterError):
|
|
15
|
+
"""Raised when an adapter cannot import the given source."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GraphAdapter(ABC):
|
|
19
|
+
"""Base interface for importing / exporting graphs."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, *, name: str, version: str = "0.1.0"):
|
|
22
|
+
self.name = name
|
|
23
|
+
self.version = version
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def can_import(self, source: Any) -> bool:
|
|
27
|
+
"""Return True if this adapter can import the given source."""
|
|
28
|
+
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def import_graph(
|
|
31
|
+
self,
|
|
32
|
+
source: Any,
|
|
33
|
+
container: Optional[SimpleGraphContainer] = None,
|
|
34
|
+
*,
|
|
35
|
+
keep_source_reference: bool = False,
|
|
36
|
+
) -> SimpleGraphContainer:
|
|
37
|
+
"""Load a graph from source into a container.
|
|
38
|
+
|
|
39
|
+
keep_source_reference is adapter-defined optional metadata.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def export_graph(
|
|
44
|
+
self,
|
|
45
|
+
container: SimpleGraphContainer,
|
|
46
|
+
destination: Any,
|
|
47
|
+
*,
|
|
48
|
+
overwrite: bool = False,
|
|
49
|
+
) -> Dict[str, Any]:
|
|
50
|
+
"""Serialize container into destination format and return result metadata."""
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
from hashlib import md5
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Dict, Iterable, Iterator, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
from ..core import SearchableGraphContainer, SimpleGraphContainer
|
|
11
|
+
from ..types import EdgeRecord, NodeRecord
|
|
12
|
+
from ..utils import container_or_new
|
|
13
|
+
from .base import GraphAdapter, GraphAdapterError, UnsupportedSourceError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_TRIPLE_RE = re.compile(r"\((.*?)\)")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _normalize_source(source: Any) -> Path:
|
|
20
|
+
if isinstance(source, (str, Path)):
|
|
21
|
+
path = Path(source)
|
|
22
|
+
if path.is_dir():
|
|
23
|
+
return path / "train_dev.tsv"
|
|
24
|
+
return path
|
|
25
|
+
raise UnsupportedSourceError(f"Unsupported source type: {type(source)}")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _iter_tsv_rows(path: Path) -> Iterable[Dict[str, Any]]:
|
|
29
|
+
if not path.exists():
|
|
30
|
+
raise UnsupportedSourceError(f"Required file not found: {path}")
|
|
31
|
+
|
|
32
|
+
with path.open("r", encoding="utf-8", newline="") as f:
|
|
33
|
+
reader = csv.DictReader(f, delimiter="\t")
|
|
34
|
+
for row in reader:
|
|
35
|
+
if row:
|
|
36
|
+
yield row
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _normalize_text(value: Any) -> str:
|
|
40
|
+
if value is None:
|
|
41
|
+
return ""
|
|
42
|
+
return " ".join(str(value).strip().lower().split())
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _parse_triplets(graph_text: str) -> Iterator[Tuple[str, str, str]]:
|
|
46
|
+
for chunk in _TRIPLE_RE.findall(graph_text or ""):
|
|
47
|
+
parts = [p.strip() for p in chunk.split(";", maxsplit=2)]
|
|
48
|
+
if len(parts) != 3:
|
|
49
|
+
continue
|
|
50
|
+
src, rel, dst = parts
|
|
51
|
+
if not src or not dst:
|
|
52
|
+
continue
|
|
53
|
+
yield src, rel, dst
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ExplaGraphsAdapter(GraphAdapter):
|
|
57
|
+
"""Adapter for Expla_graphs train_dev.tsv format."""
|
|
58
|
+
|
|
59
|
+
def __init__(self):
|
|
60
|
+
super().__init__(name="expla_graphs", version="0.1.0")
|
|
61
|
+
|
|
62
|
+
def can_import(self, source: Any) -> bool:
|
|
63
|
+
try:
|
|
64
|
+
src = _normalize_source(source)
|
|
65
|
+
return src.is_file() and src.suffix.lower() in {".tsv", ".txt"}
|
|
66
|
+
except Exception:
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
def import_graph(
|
|
70
|
+
self,
|
|
71
|
+
source: Any,
|
|
72
|
+
container: Optional[SimpleGraphContainer] = None,
|
|
73
|
+
*,
|
|
74
|
+
keep_source_reference: bool = False,
|
|
75
|
+
**kwargs: Any,
|
|
76
|
+
) -> SearchableGraphContainer:
|
|
77
|
+
src_path = _normalize_source(source)
|
|
78
|
+
|
|
79
|
+
graph = container_or_new(container)
|
|
80
|
+
|
|
81
|
+
# Deduplicate nodes/edges across rows by default.
|
|
82
|
+
node_key_to_id: Dict[Tuple[str, str], str] = {}
|
|
83
|
+
edge_key_to_idx: Dict[Tuple[str, str, str], int] = {}
|
|
84
|
+
|
|
85
|
+
for row_idx, row in enumerate(_iter_tsv_rows(src_path)):
|
|
86
|
+
graph_text = row.get("graph")
|
|
87
|
+
if not graph_text:
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
for src, rel, dst in _parse_triplets(graph_text):
|
|
91
|
+
src_text = _normalize_text(src)
|
|
92
|
+
dst_text = _normalize_text(dst)
|
|
93
|
+
rel_text = _normalize_text(rel) or "related"
|
|
94
|
+
|
|
95
|
+
src_key = ("entity", src_text)
|
|
96
|
+
dst_key = ("entity", dst_text)
|
|
97
|
+
|
|
98
|
+
src_id = node_key_to_id.get(src_key)
|
|
99
|
+
if src_id is None:
|
|
100
|
+
src_id = f"node-{md5(f'{src_key[0]}|{src_key[1]}'.encode()).hexdigest()}"
|
|
101
|
+
node_key_to_id[src_key] = src_id
|
|
102
|
+
metadata: Dict[str, Any] = {
|
|
103
|
+
"original_label": src_id,
|
|
104
|
+
}
|
|
105
|
+
if keep_source_reference:
|
|
106
|
+
metadata.setdefault("_source_path", str(src_path))
|
|
107
|
+
metadata.setdefault("_source_style", "expla_graphs")
|
|
108
|
+
graph.add_node(
|
|
109
|
+
NodeRecord(
|
|
110
|
+
id=src_id,
|
|
111
|
+
type="Entity",
|
|
112
|
+
text=src,
|
|
113
|
+
metadata=metadata,
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
dst_id = node_key_to_id.get(dst_key)
|
|
118
|
+
if dst_id is None:
|
|
119
|
+
dst_id = f"node-{md5(f'{dst_key[0]}|{dst_key[1]}'.encode()).hexdigest()}"
|
|
120
|
+
node_key_to_id[dst_key] = dst_id
|
|
121
|
+
metadata = {
|
|
122
|
+
"original_label": dst_id,
|
|
123
|
+
}
|
|
124
|
+
if keep_source_reference:
|
|
125
|
+
metadata.setdefault("_source_path", str(src_path))
|
|
126
|
+
metadata.setdefault("_source_style", "expla_graphs")
|
|
127
|
+
graph.add_node(
|
|
128
|
+
NodeRecord(
|
|
129
|
+
id=dst_id,
|
|
130
|
+
type="Entity",
|
|
131
|
+
text=dst,
|
|
132
|
+
metadata=metadata,
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
edge_key = (src_id, rel_text, dst_id)
|
|
137
|
+
if edge_key in edge_key_to_idx:
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
metadata = {}
|
|
141
|
+
if keep_source_reference:
|
|
142
|
+
metadata.setdefault("_source_path", str(src_path))
|
|
143
|
+
metadata.setdefault("_source_style", "expla_graphs")
|
|
144
|
+
|
|
145
|
+
graph.add_edge(
|
|
146
|
+
EdgeRecord(
|
|
147
|
+
source=src_id,
|
|
148
|
+
target=dst_id,
|
|
149
|
+
relation=rel_text,
|
|
150
|
+
weight=1.0,
|
|
151
|
+
metadata=metadata,
|
|
152
|
+
)
|
|
153
|
+
)
|
|
154
|
+
edge_key_to_idx[edge_key] = len(graph.edges) - 1
|
|
155
|
+
|
|
156
|
+
return graph
|
|
157
|
+
|
|
158
|
+
def export_graph(
|
|
159
|
+
self,
|
|
160
|
+
container: SimpleGraphContainer,
|
|
161
|
+
destination: Any,
|
|
162
|
+
*,
|
|
163
|
+
overwrite: bool = False,
|
|
164
|
+
**kwargs: Any,
|
|
165
|
+
) -> Dict[str, Any]:
|
|
166
|
+
dest = Path(destination)
|
|
167
|
+
if dest.exists() and not overwrite and any(dest.iterdir()):
|
|
168
|
+
raise GraphAdapterError(f"Destination is not empty: {dest}")
|
|
169
|
+
dest.mkdir(parents=True, exist_ok=True)
|
|
170
|
+
|
|
171
|
+
node_path = dest / "nodes.jsonl"
|
|
172
|
+
edge_path = dest / "edges.jsonl"
|
|
173
|
+
|
|
174
|
+
with node_path.open("w", encoding="utf-8") as f:
|
|
175
|
+
for node in container.nodes.values():
|
|
176
|
+
payload = {
|
|
177
|
+
"id": node.id,
|
|
178
|
+
"text": node.text,
|
|
179
|
+
"type": node.type,
|
|
180
|
+
**node.metadata,
|
|
181
|
+
}
|
|
182
|
+
if node.embedding is not None:
|
|
183
|
+
payload["embedding"] = (
|
|
184
|
+
node.embedding.tolist()
|
|
185
|
+
if hasattr(node.embedding, "tolist")
|
|
186
|
+
else list(node.embedding)
|
|
187
|
+
)
|
|
188
|
+
f.write(json.dumps(payload, ensure_ascii=False) + "\n")
|
|
189
|
+
|
|
190
|
+
with edge_path.open("w", encoding="utf-8") as f:
|
|
191
|
+
for edge in container.edges:
|
|
192
|
+
payload = {
|
|
193
|
+
"source": edge.source,
|
|
194
|
+
"target": edge.target,
|
|
195
|
+
"relation": edge.relation,
|
|
196
|
+
"weight": edge.weight,
|
|
197
|
+
**edge.metadata,
|
|
198
|
+
}
|
|
199
|
+
f.write(json.dumps(payload, ensure_ascii=False) + "\n")
|
|
200
|
+
|
|
201
|
+
return {
|
|
202
|
+
"destination": str(dest),
|
|
203
|
+
"node_file": str(node_path),
|
|
204
|
+
"edge_file": str(edge_path),
|
|
205
|
+
"nodes": len(container.nodes),
|
|
206
|
+
"edges": len(container.edges),
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def import_graph_from_expla_graphs(
|
|
211
|
+
source: Any,
|
|
212
|
+
*,
|
|
213
|
+
container: Optional[SimpleGraphContainer] = None,
|
|
214
|
+
keep_source_reference: bool = False,
|
|
215
|
+
**kwargs: Any,
|
|
216
|
+
) -> SearchableGraphContainer:
|
|
217
|
+
return ExplaGraphsAdapter().import_graph(
|
|
218
|
+
source=source,
|
|
219
|
+
container=container,
|
|
220
|
+
keep_source_reference=keep_source_reference,
|
|
221
|
+
**kwargs,
|
|
222
|
+
)
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, Iterator, Optional, Tuple
|
|
7
|
+
|
|
8
|
+
from ..core import SearchableGraphContainer, SimpleGraphContainer
|
|
9
|
+
from ..types import EdgeRecord, NodeRecord
|
|
10
|
+
from ..utils import container_or_new
|
|
11
|
+
from .base import GraphAdapter, GraphAdapterError, UnsupportedSourceError
|
|
12
|
+
from ..indexers import ChromaCollectionIndexer
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _normalize_source(source: Any) -> Path:
|
|
16
|
+
if isinstance(source, (str, Path)):
|
|
17
|
+
return Path(source)
|
|
18
|
+
raise UnsupportedSourceError(f"Unsupported source type: {type(source)}")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _iter_jsonl(path: Path) -> Iterator[Dict[str, Any]]:
|
|
22
|
+
if not path.exists():
|
|
23
|
+
raise UnsupportedSourceError(f"Required file not found: {path}")
|
|
24
|
+
|
|
25
|
+
with path.open("r", encoding="utf-8") as f:
|
|
26
|
+
for line in f:
|
|
27
|
+
line = line.strip()
|
|
28
|
+
if not line:
|
|
29
|
+
continue
|
|
30
|
+
item = json.loads(line)
|
|
31
|
+
if isinstance(item, dict):
|
|
32
|
+
yield item
|
|
33
|
+
elif isinstance(item, list):
|
|
34
|
+
# edges.jsonl may use list rows.
|
|
35
|
+
yield {"_list": item}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _parse_edge(raw: Dict[str, Any]) -> Optional[Tuple[str, str]]:
|
|
39
|
+
if "_list" in raw:
|
|
40
|
+
row = raw["_list"]
|
|
41
|
+
if isinstance(row, list) and len(row) >= 2:
|
|
42
|
+
return str(row[0]), str(row[1])
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
source = raw.get("src", raw.get("source"))
|
|
46
|
+
target = raw.get("tgt", raw.get("target"))
|
|
47
|
+
if source is None or target is None:
|
|
48
|
+
return None
|
|
49
|
+
return str(source), str(target)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class FastInsightAdapter(GraphAdapter):
|
|
53
|
+
def __init__(self):
|
|
54
|
+
super().__init__(name="component_graph", version="0.1.0")
|
|
55
|
+
|
|
56
|
+
def can_import(self, source: Any) -> bool:
|
|
57
|
+
try:
|
|
58
|
+
src = _normalize_source(source)
|
|
59
|
+
return (src / "nodes.jsonl").exists() and (src / "edges.jsonl").exists()
|
|
60
|
+
except Exception:
|
|
61
|
+
return False
|
|
62
|
+
|
|
63
|
+
def _get_manifest(self, path: Path) -> Dict[str, Any]:
|
|
64
|
+
for name in ("manifest.json", "menifest.json"):
|
|
65
|
+
p = path / name
|
|
66
|
+
if p.exists():
|
|
67
|
+
return json.loads(p.read_text(encoding="utf-8"))
|
|
68
|
+
return {}
|
|
69
|
+
|
|
70
|
+
def import_graph(
|
|
71
|
+
self,
|
|
72
|
+
source: Any,
|
|
73
|
+
container: Optional[SimpleGraphContainer] = None,
|
|
74
|
+
*,
|
|
75
|
+
keep_source_reference: bool = False,
|
|
76
|
+
**kwargs: Any,
|
|
77
|
+
) -> SearchableGraphContainer:
|
|
78
|
+
src_path = _normalize_source(source)
|
|
79
|
+
manifest = self._get_manifest(src_path)
|
|
80
|
+
v_cfg = manifest.get("vector_store", {})
|
|
81
|
+
|
|
82
|
+
# Priority: kwargs > manifest > default
|
|
83
|
+
col_name = kwargs.get("collection_name") or v_cfg.get("collection_name")
|
|
84
|
+
db_path = kwargs.get("vector_store_path") or v_cfg.get("path")
|
|
85
|
+
distance_metric = str(kwargs.get("distance_metric", v_cfg.get("distance_metric", "cosine")))
|
|
86
|
+
|
|
87
|
+
graph = container_or_new(container)
|
|
88
|
+
|
|
89
|
+
for raw in _iter_jsonl(src_path / "nodes.jsonl"):
|
|
90
|
+
node_id = raw.get("id")
|
|
91
|
+
if node_id is None:
|
|
92
|
+
continue
|
|
93
|
+
metadata = {k: v for k, v in raw.items() if k not in {"id", "text", "type"}}
|
|
94
|
+
if keep_source_reference:
|
|
95
|
+
metadata.setdefault("_source_path", str(src_path))
|
|
96
|
+
metadata.setdefault("_source_style", "component_graph")
|
|
97
|
+
graph.add_node(
|
|
98
|
+
NodeRecord(
|
|
99
|
+
id=str(node_id),
|
|
100
|
+
type=str(raw.get("type", "Chunk")),
|
|
101
|
+
text=raw.get("text"),
|
|
102
|
+
metadata=metadata,
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
for raw in _iter_jsonl(src_path / "edges.jsonl"):
|
|
107
|
+
endpoints = _parse_edge(raw)
|
|
108
|
+
if endpoints is None:
|
|
109
|
+
continue
|
|
110
|
+
source_id, target_id = endpoints
|
|
111
|
+
graph.add_edge(EdgeRecord(source=source_id, target=target_id))
|
|
112
|
+
|
|
113
|
+
if col_name:
|
|
114
|
+
import chromadb
|
|
115
|
+
|
|
116
|
+
if not db_path:
|
|
117
|
+
db_path = f"./data/database/chroma_db/{col_name}"
|
|
118
|
+
client = chromadb.PersistentClient(path=str(Path(str(db_path)).resolve()))
|
|
119
|
+
col = client.get_or_create_collection(name=str(col_name))
|
|
120
|
+
indexer = ChromaCollectionIndexer(
|
|
121
|
+
col,
|
|
122
|
+
persist_path=str(db_path),
|
|
123
|
+
distance_metric=distance_metric,
|
|
124
|
+
)
|
|
125
|
+
graph.attach_index(str(col_name), indexer)
|
|
126
|
+
graph.attach_index("node_vector", indexer)
|
|
127
|
+
|
|
128
|
+
return graph
|
|
129
|
+
|
|
130
|
+
def export_graph(
|
|
131
|
+
self,
|
|
132
|
+
container: SimpleGraphContainer,
|
|
133
|
+
destination: Any,
|
|
134
|
+
*,
|
|
135
|
+
overwrite: bool = False,
|
|
136
|
+
**kwargs: Any,
|
|
137
|
+
) -> Dict[str, Any]:
|
|
138
|
+
dest = Path(destination)
|
|
139
|
+
if dest.exists() and not overwrite and any(dest.iterdir()):
|
|
140
|
+
raise GraphAdapterError(f"Destination is not empty: {dest}")
|
|
141
|
+
dest.mkdir(parents=True, exist_ok=True)
|
|
142
|
+
|
|
143
|
+
node_path = dest / "nodes.jsonl"
|
|
144
|
+
edge_path = dest / "edges.jsonl"
|
|
145
|
+
|
|
146
|
+
with node_path.open("w", encoding="utf-8") as f:
|
|
147
|
+
for n in container.nodes.values():
|
|
148
|
+
payload = {"id": n.id, "text": n.text, "type": n.type, **n.metadata}
|
|
149
|
+
f.write(json.dumps(payload, ensure_ascii=False) + "\n")
|
|
150
|
+
|
|
151
|
+
with edge_path.open("w", encoding="utf-8") as f:
|
|
152
|
+
for e in container.edges:
|
|
153
|
+
f.write(json.dumps([e.source, e.target], ensure_ascii=False) + "\n")
|
|
154
|
+
|
|
155
|
+
store_info = None
|
|
156
|
+
if isinstance(container, SearchableGraphContainer):
|
|
157
|
+
idx = container.get_index("node_vector")
|
|
158
|
+
if idx is not None and hasattr(idx, "describe_store"):
|
|
159
|
+
store_info = idx.describe_store()
|
|
160
|
+
|
|
161
|
+
col_name = kwargs.get("collection_name", dest.name)
|
|
162
|
+
vector_path = kwargs.get("vector_store_path", f"./chroma_db/{col_name}")
|
|
163
|
+
manifest = {
|
|
164
|
+
"vector_store": store_info
|
|
165
|
+
or {
|
|
166
|
+
"library": "chromadb",
|
|
167
|
+
"path": vector_path,
|
|
168
|
+
"collection_name": col_name,
|
|
169
|
+
"distance_metric": kwargs.get("distance_metric", "cosine"),
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
(dest / "manifest.json").write_text(json.dumps(manifest, indent=2), encoding="utf-8")
|
|
173
|
+
|
|
174
|
+
return {
|
|
175
|
+
"destination": str(dest),
|
|
176
|
+
"node_file": str(node_path),
|
|
177
|
+
"edge_file": str(edge_path),
|
|
178
|
+
"nodes": len(container.nodes),
|
|
179
|
+
"edges": len(container.edges),
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def import_graph_from_fastinsight(
|
|
184
|
+
source: Any,
|
|
185
|
+
*,
|
|
186
|
+
container: Optional[SimpleGraphContainer] = None,
|
|
187
|
+
keep_source_reference: bool = False,
|
|
188
|
+
**kwargs: Any,
|
|
189
|
+
) -> SearchableGraphContainer:
|
|
190
|
+
return FastInsightAdapter().import_graph(
|
|
191
|
+
source=source,
|
|
192
|
+
container=container,
|
|
193
|
+
keep_source_reference=keep_source_reference,
|
|
194
|
+
**kwargs,
|
|
195
|
+
)
|