code-graph-rag 0.0.88__tar.gz → 0.0.100__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code_graph_rag-0.0.88/code_graph_rag.egg-info → code_graph_rag-0.0.100}/PKG-INFO +1 -1
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100/code_graph_rag.egg-info}/PKG-INFO +1 -1
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/cli.py +12 -7
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/config.py +6 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/constants.py +32 -3
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/cypher_queries.py +21 -0
- code_graph_rag-0.0.100/codebase_rag/embedder.py +183 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/exceptions.py +1 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/graph_loader.py +12 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/graph_updater.py +217 -46
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/logs.py +44 -1
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/parser_loader.py +12 -13
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/unixcoder.py +11 -0
- code_graph_rag-0.0.100/codebase_rag/vector_store.py +169 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/pyproject.toml +1 -1
- code_graph_rag-0.0.88/codebase_rag/embedder.py +0 -48
- code_graph_rag-0.0.88/codebase_rag/vector_store.py +0 -80
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/LICENSE +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/PYPI_README.md +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/README.md +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/cgr/__init__.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/code_graph_rag.egg-info/SOURCES.txt +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/code_graph_rag.egg-info/dependency_links.txt +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/code_graph_rag.egg-info/entry_points.txt +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/code_graph_rag.egg-info/requires.txt +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/code_graph_rag.egg-info/top_level.txt +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/__init__.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/cli_help.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/decorators.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/language_spec.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/main.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/models.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/prompts.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/readme_sections.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/schema_builder.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/schemas.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/tool_errors.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codebase_rag/types_defs.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codec/__init__.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codec/schema_pb2.py +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/codec/schema_pb2.pyi +0 -0
- {code_graph_rag-0.0.88 → code_graph_rag-0.0.100}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-graph-rag
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.100
|
|
4
4
|
Summary: The ultimate RAG for your monorepo. Query, understand, and edit multi-language codebases with the power of AI and knowledge graphs
|
|
5
5
|
License-Expression: MIT
|
|
6
6
|
Keywords: rag,retrieval-augmented-generation,knowledge-graph,code-analysis,tree-sitter,mcp,mcp-server,llm,graph-database,semantic-search,codebase,memgraph,developer-tools,monorepo
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-graph-rag
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.100
|
|
4
4
|
Summary: The ultimate RAG for your monorepo. Query, understand, and edit multi-language codebases with the power of AI and knowledge graphs
|
|
5
5
|
License-Expression: MIT
|
|
6
6
|
Keywords: rag,retrieval-augmented-generation,knowledge-graph,code-analysis,tree-sitter,mcp,mcp-server,llm,graph-database,semantic-search,codebase,memgraph,developer-tools,monorepo
|
|
@@ -169,12 +169,12 @@ def start(
|
|
|
169
169
|
parsers, queries = load_parsers()
|
|
170
170
|
|
|
171
171
|
updater = GraphUpdater(
|
|
172
|
-
ingestor,
|
|
173
|
-
repo_to_update,
|
|
174
|
-
parsers,
|
|
175
|
-
queries,
|
|
176
|
-
unignore_paths,
|
|
177
|
-
exclude_paths,
|
|
172
|
+
ingestor=ingestor,
|
|
173
|
+
repo_path=repo_to_update,
|
|
174
|
+
parsers=parsers,
|
|
175
|
+
queries=queries,
|
|
176
|
+
unignore_paths=unignore_paths,
|
|
177
|
+
exclude_paths=exclude_paths,
|
|
178
178
|
)
|
|
179
179
|
updater.run()
|
|
180
180
|
|
|
@@ -245,7 +245,12 @@ def index(
|
|
|
245
245
|
)
|
|
246
246
|
parsers, queries = load_parsers()
|
|
247
247
|
updater = GraphUpdater(
|
|
248
|
-
ingestor,
|
|
248
|
+
ingestor=ingestor,
|
|
249
|
+
repo_path=repo_to_index,
|
|
250
|
+
parsers=parsers,
|
|
251
|
+
queries=queries,
|
|
252
|
+
unignore_paths=unignore_paths,
|
|
253
|
+
exclude_paths=exclude_paths,
|
|
249
254
|
)
|
|
250
255
|
|
|
251
256
|
updater.run()
|
|
@@ -246,9 +246,15 @@ class AppConfig(BaseSettings):
|
|
|
246
246
|
QDRANT_COLLECTION_NAME: str = "code_embeddings"
|
|
247
247
|
QDRANT_VECTOR_DIM: int = 768
|
|
248
248
|
QDRANT_TOP_K: int = 5
|
|
249
|
+
QDRANT_UPSERT_RETRIES: int = Field(default=3, gt=0)
|
|
250
|
+
QDRANT_RETRY_BASE_DELAY: float = Field(default=0.5, gt=0)
|
|
251
|
+
QDRANT_BATCH_SIZE: int = Field(default=50, gt=0)
|
|
249
252
|
EMBEDDING_MAX_LENGTH: int = 512
|
|
250
253
|
EMBEDDING_PROGRESS_INTERVAL: int = 10
|
|
251
254
|
|
|
255
|
+
FLUSH_THREAD_POOL_SIZE: int = Field(default=4, gt=0)
|
|
256
|
+
FILE_FLUSH_INTERVAL: int = Field(default=500, gt=0)
|
|
257
|
+
|
|
252
258
|
CACHE_MAX_ENTRIES: int = 1000
|
|
253
259
|
CACHE_MAX_MEMORY_MB: int = 500
|
|
254
260
|
CACHE_EVICTION_DIVISOR: int = 10
|
|
@@ -150,6 +150,8 @@ V1_PATH = "/v1"
|
|
|
150
150
|
HTTP_OK = 200
|
|
151
151
|
|
|
152
152
|
UNIXCODER_MODEL = "microsoft/unixcoder-base"
|
|
153
|
+
EMBEDDING_DEFAULT_BATCH_SIZE = 32
|
|
154
|
+
EMBEDDING_CACHE_FILENAME = ".embedding_cache.json"
|
|
153
155
|
|
|
154
156
|
KEY_NODES = "nodes"
|
|
155
157
|
KEY_RELATIONSHIPS = "relationships"
|
|
@@ -417,14 +419,21 @@ CSPROJ_SUFFIX = ".csproj"
|
|
|
417
419
|
# (H) Cypher queries
|
|
418
420
|
CYPHER_DEFAULT_LIMIT = 50
|
|
419
421
|
|
|
420
|
-
|
|
422
|
+
_CYPHER_EMBEDDING_BASE = """
|
|
421
423
|
MATCH (m:Module)-[:DEFINES]->(n)
|
|
422
424
|
WHERE (n:Function OR n:Method)
|
|
423
425
|
AND m.qualified_name STARTS WITH ($project_name + '.')
|
|
424
|
-
|
|
426
|
+
"""
|
|
427
|
+
|
|
428
|
+
CYPHER_QUERY_EMBEDDINGS = (
|
|
429
|
+
_CYPHER_EMBEDDING_BASE
|
|
430
|
+
+ """RETURN id(n) AS node_id, n.qualified_name AS qualified_name,
|
|
425
431
|
n.start_line AS start_line, n.end_line AS end_line,
|
|
426
432
|
m.path AS path
|
|
427
433
|
"""
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
CYPHER_QUERY_PROJECT_NODE_IDS = _CYPHER_EMBEDDING_BASE + "RETURN id(n) AS node_id\n"
|
|
428
437
|
|
|
429
438
|
|
|
430
439
|
class SupportedLanguage(StrEnum):
|
|
@@ -883,7 +892,7 @@ PYINSTALLER_ARG_HIDDEN_IMPORT = "--hidden-import"
|
|
|
883
892
|
PYINSTALLER_ARG_EXCLUDE_MODULE = "--exclude-module"
|
|
884
893
|
PYINSTALLER_ENTRY_POINT = "main.py"
|
|
885
894
|
|
|
886
|
-
PYINSTALLER_EXCLUDED_MODULES = ["logfire"
|
|
895
|
+
PYINSTALLER_EXCLUDED_MODULES = ["logfire"]
|
|
887
896
|
|
|
888
897
|
# (H) TOML parsing constants
|
|
889
898
|
TOML_KEY_PROJECT = "project"
|
|
@@ -908,6 +917,7 @@ PYINSTALLER_PACKAGES: list["PyInstallerPackage"] = [
|
|
|
908
917
|
PyInstallerPackage(name="loguru", collect_all=True),
|
|
909
918
|
PyInstallerPackage(name="toml", collect_all=True),
|
|
910
919
|
PyInstallerPackage(name="protobuf", collect_all=True),
|
|
920
|
+
PyInstallerPackage(name="genai_prices", collect_all=True),
|
|
911
921
|
]
|
|
912
922
|
|
|
913
923
|
ALLOWED_COMMENT_MARKERS = frozenset(
|
|
@@ -964,6 +974,22 @@ CYPHER_PREFIX = "cypher"
|
|
|
964
974
|
CYPHER_SEMICOLON = ";"
|
|
965
975
|
CYPHER_BACKTICK = "`"
|
|
966
976
|
CYPHER_MATCH_KEYWORD = "MATCH"
|
|
977
|
+
CYPHER_DANGEROUS_KEYWORDS: frozenset[str] = frozenset(
|
|
978
|
+
{
|
|
979
|
+
"DELETE",
|
|
980
|
+
"DETACH",
|
|
981
|
+
"DROP",
|
|
982
|
+
"CREATE INDEX",
|
|
983
|
+
"CREATE CONSTRAINT",
|
|
984
|
+
"REMOVE",
|
|
985
|
+
"SET",
|
|
986
|
+
"MERGE",
|
|
987
|
+
"CREATE",
|
|
988
|
+
"CALL",
|
|
989
|
+
"LOAD CSV",
|
|
990
|
+
"FOREACH",
|
|
991
|
+
}
|
|
992
|
+
)
|
|
967
993
|
|
|
968
994
|
# (H) Tool success messages
|
|
969
995
|
MSG_SURGICAL_SUCCESS = "Successfully applied surgical code replacement in: {path}"
|
|
@@ -1572,6 +1598,9 @@ GOMOD_COMMENT_PREFIX = "//"
|
|
|
1572
1598
|
# (H) Gemfile parsing patterns
|
|
1573
1599
|
GEMFILE_GEM_PREFIX = "gem "
|
|
1574
1600
|
|
|
1601
|
+
# (H) Incremental update hash cache
|
|
1602
|
+
HASH_CACHE_FILENAME = ".cgr-hash-cache.json"
|
|
1603
|
+
|
|
1575
1604
|
# (H) Import processor cache config
|
|
1576
1605
|
IMPORT_CACHE_TTL = 3600
|
|
1577
1606
|
IMPORT_CACHE_DIR = ".cache/codebase_rag"
|
|
@@ -126,3 +126,24 @@ def build_merge_relationship_query(
|
|
|
126
126
|
)
|
|
127
127
|
query += CYPHER_SET_PROPS_RETURN_COUNT if has_props else CYPHER_RETURN_COUNT
|
|
128
128
|
return query
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def build_create_node_query(label: str, id_key: str) -> str:
|
|
132
|
+
return f"CREATE (n:{label} {{{id_key}: row.id}})\nSET n += row.props"
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def build_create_relationship_query(
|
|
136
|
+
from_label: str,
|
|
137
|
+
from_key: str,
|
|
138
|
+
rel_type: str,
|
|
139
|
+
to_label: str,
|
|
140
|
+
to_key: str,
|
|
141
|
+
has_props: bool = False,
|
|
142
|
+
) -> str:
|
|
143
|
+
query = (
|
|
144
|
+
f"MATCH (a:{from_label} {{{from_key}: row.from_val}}), "
|
|
145
|
+
f"(b:{to_label} {{{to_key}: row.to_val}})\n"
|
|
146
|
+
f"CREATE (a)-[r:{rel_type}]->(b)\n"
|
|
147
|
+
)
|
|
148
|
+
query += CYPHER_SET_PROPS_RETURN_COUNT if has_props else CYPHER_RETURN_COUNT
|
|
149
|
+
return query
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from loguru import logger
|
|
9
|
+
|
|
10
|
+
from . import constants as cs
|
|
11
|
+
from . import exceptions as ex
|
|
12
|
+
from . import logs as ls
|
|
13
|
+
from .config import settings
|
|
14
|
+
from .utils.dependencies import has_torch, has_transformers
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class EmbeddingCache:
|
|
18
|
+
__slots__ = ("_cache", "_path")
|
|
19
|
+
|
|
20
|
+
def __init__(self, path: Path | None = None) -> None:
|
|
21
|
+
self._cache: dict[str, list[float]] = {}
|
|
22
|
+
self._path = path
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def _content_hash(content: str) -> str:
|
|
26
|
+
return hashlib.sha256(content.encode()).hexdigest()
|
|
27
|
+
|
|
28
|
+
def get(self, content: str) -> list[float] | None:
|
|
29
|
+
return self._cache.get(self._content_hash(content))
|
|
30
|
+
|
|
31
|
+
def put(self, content: str, embedding: list[float]) -> None:
|
|
32
|
+
self._cache[self._content_hash(content)] = embedding
|
|
33
|
+
|
|
34
|
+
def get_many(self, snippets: list[str]) -> dict[int, list[float]]:
|
|
35
|
+
results: dict[int, list[float]] = {}
|
|
36
|
+
for i, snippet in enumerate(snippets):
|
|
37
|
+
if (cached := self.get(snippet)) is not None:
|
|
38
|
+
results[i] = cached
|
|
39
|
+
return results
|
|
40
|
+
|
|
41
|
+
def put_many(self, snippets: list[str], embeddings: list[list[float]]) -> None:
|
|
42
|
+
for snippet, embedding in zip(snippets, embeddings):
|
|
43
|
+
self.put(snippet, embedding)
|
|
44
|
+
|
|
45
|
+
def save(self) -> None:
|
|
46
|
+
if self._path is None:
|
|
47
|
+
return
|
|
48
|
+
try:
|
|
49
|
+
self._path.parent.mkdir(parents=True, exist_ok=True)
|
|
50
|
+
with self._path.open("w", encoding="utf-8") as f:
|
|
51
|
+
json.dump(self._cache, f)
|
|
52
|
+
except Exception as e:
|
|
53
|
+
logger.warning(ls.EMBEDDING_CACHE_SAVE_FAILED, path=self._path, error=e)
|
|
54
|
+
|
|
55
|
+
def load(self) -> None:
|
|
56
|
+
if self._path is None or not self._path.exists():
|
|
57
|
+
return
|
|
58
|
+
try:
|
|
59
|
+
with self._path.open("r", encoding="utf-8") as f:
|
|
60
|
+
self._cache = json.load(f)
|
|
61
|
+
logger.debug(
|
|
62
|
+
ls.EMBEDDING_CACHE_LOADED, count=len(self._cache), path=self._path
|
|
63
|
+
)
|
|
64
|
+
except Exception as e:
|
|
65
|
+
logger.warning(ls.EMBEDDING_CACHE_LOAD_FAILED, path=self._path, error=e)
|
|
66
|
+
self._cache = {}
|
|
67
|
+
|
|
68
|
+
def clear(self) -> None:
|
|
69
|
+
self._cache.clear()
|
|
70
|
+
|
|
71
|
+
def __len__(self) -> int:
|
|
72
|
+
return len(self._cache)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
_embedding_cache: EmbeddingCache | None = None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_embedding_cache() -> EmbeddingCache:
|
|
79
|
+
global _embedding_cache
|
|
80
|
+
if _embedding_cache is None:
|
|
81
|
+
cache_path = Path(settings.QDRANT_DB_PATH) / cs.EMBEDDING_CACHE_FILENAME
|
|
82
|
+
_embedding_cache = EmbeddingCache(path=cache_path)
|
|
83
|
+
_embedding_cache.load()
|
|
84
|
+
return _embedding_cache
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def clear_embedding_cache() -> None:
|
|
88
|
+
global _embedding_cache
|
|
89
|
+
if _embedding_cache is not None:
|
|
90
|
+
_embedding_cache.clear()
|
|
91
|
+
_embedding_cache = None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
if has_torch() and has_transformers():
|
|
95
|
+
import numpy as np
|
|
96
|
+
import torch
|
|
97
|
+
from numpy.typing import NDArray
|
|
98
|
+
|
|
99
|
+
from .unixcoder import UniXcoder
|
|
100
|
+
|
|
101
|
+
@lru_cache(maxsize=1)
|
|
102
|
+
def get_model() -> UniXcoder:
|
|
103
|
+
model = UniXcoder(cs.UNIXCODER_MODEL)
|
|
104
|
+
model.eval()
|
|
105
|
+
if torch.cuda.is_available():
|
|
106
|
+
model = model.cuda()
|
|
107
|
+
return model
|
|
108
|
+
|
|
109
|
+
def embed_code(code: str, max_length: int | None = None) -> list[float]:
|
|
110
|
+
cache = get_embedding_cache()
|
|
111
|
+
if (cached := cache.get(code)) is not None:
|
|
112
|
+
return cached
|
|
113
|
+
|
|
114
|
+
if max_length is None:
|
|
115
|
+
max_length = settings.EMBEDDING_MAX_LENGTH
|
|
116
|
+
model = get_model()
|
|
117
|
+
device = next(model.parameters()).device
|
|
118
|
+
tokens = model.tokenize([code], max_length=max_length)
|
|
119
|
+
tokens_tensor = torch.tensor(tokens).to(device)
|
|
120
|
+
with torch.no_grad():
|
|
121
|
+
_, sentence_embeddings = model(tokens_tensor)
|
|
122
|
+
embedding: NDArray[np.float32] = sentence_embeddings.cpu().numpy()
|
|
123
|
+
result: list[float] = embedding[0].tolist()
|
|
124
|
+
|
|
125
|
+
cache.put(code, result)
|
|
126
|
+
return result
|
|
127
|
+
|
|
128
|
+
def embed_code_batch(
|
|
129
|
+
snippets: list[str],
|
|
130
|
+
max_length: int | None = None,
|
|
131
|
+
batch_size: int = cs.EMBEDDING_DEFAULT_BATCH_SIZE,
|
|
132
|
+
) -> list[list[float]]:
|
|
133
|
+
if not snippets:
|
|
134
|
+
return []
|
|
135
|
+
|
|
136
|
+
if max_length is None:
|
|
137
|
+
max_length = settings.EMBEDDING_MAX_LENGTH
|
|
138
|
+
|
|
139
|
+
cache = get_embedding_cache()
|
|
140
|
+
cached_results = cache.get_many(snippets)
|
|
141
|
+
|
|
142
|
+
if len(cached_results) == len(snippets):
|
|
143
|
+
logger.debug(ls.EMBEDDING_CACHE_HIT, count=len(snippets))
|
|
144
|
+
return [cached_results[i] for i in range(len(snippets))]
|
|
145
|
+
|
|
146
|
+
uncached_indices = [i for i in range(len(snippets)) if i not in cached_results]
|
|
147
|
+
uncached_snippets = [snippets[i] for i in uncached_indices]
|
|
148
|
+
|
|
149
|
+
model = get_model()
|
|
150
|
+
device = next(model.parameters()).device
|
|
151
|
+
|
|
152
|
+
all_new_embeddings: list[list[float]] = []
|
|
153
|
+
for start in range(0, len(uncached_snippets), batch_size):
|
|
154
|
+
batch = uncached_snippets[start : start + batch_size]
|
|
155
|
+
tokens_list = model.tokenize(batch, max_length=max_length, padding=True)
|
|
156
|
+
tokens_tensor = torch.tensor(tokens_list).to(device)
|
|
157
|
+
with torch.no_grad():
|
|
158
|
+
_, sentence_embeddings = model(tokens_tensor)
|
|
159
|
+
batch_np: NDArray[np.float32] = sentence_embeddings.cpu().numpy()
|
|
160
|
+
for row in batch_np:
|
|
161
|
+
all_new_embeddings.append(row.tolist())
|
|
162
|
+
|
|
163
|
+
cache.put_many(uncached_snippets, all_new_embeddings)
|
|
164
|
+
|
|
165
|
+
results: list[list[float]] = [[] for _ in snippets]
|
|
166
|
+
for i, emb in cached_results.items():
|
|
167
|
+
results[i] = emb
|
|
168
|
+
for idx, orig_i in enumerate(uncached_indices):
|
|
169
|
+
results[orig_i] = all_new_embeddings[idx]
|
|
170
|
+
|
|
171
|
+
return results
|
|
172
|
+
|
|
173
|
+
else:
|
|
174
|
+
|
|
175
|
+
def embed_code(code: str, max_length: int | None = None) -> list[float]:
|
|
176
|
+
raise RuntimeError(ex.SEMANTIC_EXTRA)
|
|
177
|
+
|
|
178
|
+
def embed_code_batch(
|
|
179
|
+
snippets: list[str],
|
|
180
|
+
max_length: int | None = None,
|
|
181
|
+
batch_size: int = cs.EMBEDDING_DEFAULT_BATCH_SIZE,
|
|
182
|
+
) -> list[list[float]]:
|
|
183
|
+
raise RuntimeError(ex.SEMANTIC_EXTRA)
|
|
@@ -42,6 +42,7 @@ NO_LANGUAGES = "No Tree-sitter languages available."
|
|
|
42
42
|
# (H) LLM errors
|
|
43
43
|
LLM_INIT_CYPHER = "Failed to initialize CypherGenerator: {error}"
|
|
44
44
|
LLM_INVALID_QUERY = "LLM did not generate a valid query. Output: {output}"
|
|
45
|
+
LLM_DANGEROUS_QUERY = "LLM generated a destructive Cypher query (found '{keyword}'). Query rejected: {query}"
|
|
45
46
|
LLM_GENERATION_FAILED = "Cypher generation failed: {error}"
|
|
46
47
|
LLM_INIT_ORCHESTRATOR = "Failed to initialize RAG Orchestrator: {error}"
|
|
47
48
|
|
|
@@ -13,6 +13,18 @@ from .types_defs import GraphData, GraphMetadata, GraphSummary, PropertyValue
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class GraphLoader:
|
|
16
|
+
__slots__ = (
|
|
17
|
+
"file_path",
|
|
18
|
+
"_data",
|
|
19
|
+
"_nodes",
|
|
20
|
+
"_relationships",
|
|
21
|
+
"_nodes_by_id",
|
|
22
|
+
"_nodes_by_label",
|
|
23
|
+
"_outgoing_rels",
|
|
24
|
+
"_incoming_rels",
|
|
25
|
+
"_property_indexes",
|
|
26
|
+
)
|
|
27
|
+
|
|
16
28
|
def __init__(self, file_path: str):
|
|
17
29
|
self.file_path = Path(file_path)
|
|
18
30
|
self._data: GraphData | None = None
|