devmem-agents 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devmem/__init__.py +5 -0
- devmem/api.py +257 -0
- devmem/config.py +34 -0
- devmem/embeddings.py +119 -0
- devmem/ingest.py +184 -0
- devmem/live_backend.py +344 -0
- devmem/main.py +11 -0
- devmem/models.py +157 -0
- devmem/retrieval_eval.py +145 -0
- devmem/service.py +280 -0
- devmem/storage/__init__.py +4 -0
- devmem/storage/milvus_store.py +321 -0
- devmem/storage/neptune_store.py +194 -0
- devmem/storage/record_store.py +974 -0
- devmem_agents-0.1.0.dist-info/METADATA +100 -0
- devmem_agents-0.1.0.dist-info/RECORD +19 -0
- devmem_agents-0.1.0.dist-info/WHEEL +5 -0
- devmem_agents-0.1.0.dist-info/licenses/LICENSE +21 -0
- devmem_agents-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Neptune adapter for devmem repository ingestion."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import boto3
|
|
11
|
+
import urllib3
|
|
12
|
+
from botocore import UNSIGNED
|
|
13
|
+
from botocore.config import Config as BotoConfig
|
|
14
|
+
|
|
15
|
+
from devmem.live_backend import LiveBackendConfig, neptune_tunnel
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class NeptuneStore:
|
|
21
|
+
"""Write project/file lineage facts into Neptune (openCypher)."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, cfg: LiveBackendConfig, *, namespace: str) -> None:
|
|
24
|
+
self.cfg = cfg
|
|
25
|
+
self.namespace = namespace
|
|
26
|
+
self._client: Any | None = None
|
|
27
|
+
self._tunnel_ctx = None
|
|
28
|
+
self._endpoint_host: str | None = None
|
|
29
|
+
self._endpoint_port: int | None = None
|
|
30
|
+
|
|
31
|
+
def connect(self) -> None:
|
|
32
|
+
"""Connect to Neptune, optionally through SSH tunnel."""
|
|
33
|
+
self._tunnel_ctx = neptune_tunnel(self.cfg)
|
|
34
|
+
host, port = self._tunnel_ctx.__enter__()
|
|
35
|
+
self._endpoint_host = host
|
|
36
|
+
self._endpoint_port = port
|
|
37
|
+
|
|
38
|
+
endpoint_url = f"{'https' if self.cfg.neptune_use_https else 'http'}://{host}:{port}"
|
|
39
|
+
session = boto3.Session()
|
|
40
|
+
kwargs: dict[str, Any] = {
|
|
41
|
+
"endpoint_url": endpoint_url,
|
|
42
|
+
"region_name": self.cfg.neptune_region,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if self.cfg.neptune_use_ssh_tunnel:
|
|
46
|
+
kwargs["verify"] = False
|
|
47
|
+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
48
|
+
|
|
49
|
+
if not self.cfg.neptune_iam_auth:
|
|
50
|
+
kwargs["config"] = BotoConfig(signature_version=UNSIGNED)
|
|
51
|
+
|
|
52
|
+
client = session.client("neptunedata", **kwargs)
|
|
53
|
+
|
|
54
|
+
if self.cfg.neptune_use_ssh_tunnel and self.cfg.neptune_iam_auth and self.cfg.neptune_endpoint:
|
|
55
|
+
sign_host = self.cfg.neptune_endpoint
|
|
56
|
+
|
|
57
|
+
def _inject_host_header(request, **_kwargs):
|
|
58
|
+
request.headers["Host"] = f"{sign_host}:{self.cfg.neptune_port}"
|
|
59
|
+
|
|
60
|
+
client.meta.events.register("before-sign.neptunedata.*", _inject_host_header)
|
|
61
|
+
|
|
62
|
+
self._client = client
|
|
63
|
+
|
|
64
|
+
def close(self) -> None:
|
|
65
|
+
"""Close tunnel context when opened."""
|
|
66
|
+
if self._tunnel_ctx is not None:
|
|
67
|
+
try:
|
|
68
|
+
self._tunnel_ctx.__exit__(None, None, None)
|
|
69
|
+
finally:
|
|
70
|
+
self._tunnel_ctx = None
|
|
71
|
+
self._client = None
|
|
72
|
+
|
|
73
|
+
def _exec(self, query: str, params: dict[str, Any]) -> dict[str, Any]:
|
|
74
|
+
if self._client is None:
|
|
75
|
+
raise RuntimeError("Neptune client is not initialized")
|
|
76
|
+
return self._client.execute_open_cypher_query(
|
|
77
|
+
openCypherQuery=query,
|
|
78
|
+
parameters=json.dumps(params, separators=(",", ":")),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def _coerce_scalar(value: Any) -> Any:
|
|
83
|
+
if isinstance(value, dict):
|
|
84
|
+
for key in ("stringValue", "intValue", "longValue", "doubleValue", "booleanValue"):
|
|
85
|
+
if key in value:
|
|
86
|
+
return value[key]
|
|
87
|
+
return value
|
|
88
|
+
|
|
89
|
+
def health_check(self) -> None:
|
|
90
|
+
"""Validate openCypher query execution."""
|
|
91
|
+
self._exec("RETURN 1 AS ok", {})
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def _file_id(project_id: str, path: str) -> str:
|
|
95
|
+
import hashlib
|
|
96
|
+
|
|
97
|
+
h = hashlib.sha256()
|
|
98
|
+
h.update(project_id.encode("utf-8"))
|
|
99
|
+
h.update(path.encode("utf-8"))
|
|
100
|
+
return h.hexdigest()
|
|
101
|
+
|
|
102
|
+
def upsert_project(self, *, project_id: str, name: str, repo_path: str, repo_id: str) -> None:
|
|
103
|
+
ts = datetime.now(timezone.utc).isoformat()
|
|
104
|
+
query = """
|
|
105
|
+
MERGE (p:DevMemProject {project_id: $project_id, ns: $ns})
|
|
106
|
+
SET p.name = $name,
|
|
107
|
+
p.repo_path = $repo_path,
|
|
108
|
+
p.repo_id = $repo_id,
|
|
109
|
+
p.updated_at = $updated_at
|
|
110
|
+
RETURN p.project_id AS project_id
|
|
111
|
+
"""
|
|
112
|
+
self._exec(
|
|
113
|
+
query,
|
|
114
|
+
{
|
|
115
|
+
"project_id": project_id,
|
|
116
|
+
"ns": self.namespace,
|
|
117
|
+
"name": name,
|
|
118
|
+
"repo_path": repo_path,
|
|
119
|
+
"repo_id": repo_id,
|
|
120
|
+
"updated_at": ts,
|
|
121
|
+
},
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def upsert_file(
|
|
125
|
+
self,
|
|
126
|
+
*,
|
|
127
|
+
project_id: str,
|
|
128
|
+
repo_id: str,
|
|
129
|
+
path: str,
|
|
130
|
+
ext: str,
|
|
131
|
+
sha: str,
|
|
132
|
+
size_bytes: int,
|
|
133
|
+
) -> None:
|
|
134
|
+
ts = datetime.now(timezone.utc).isoformat()
|
|
135
|
+
file_id = self._file_id(project_id, path)
|
|
136
|
+
|
|
137
|
+
query = """
|
|
138
|
+
MERGE (p:DevMemProject {project_id: $project_id, ns: $ns})
|
|
139
|
+
MERGE (f:DevMemFile {file_id: $file_id, ns: $ns})
|
|
140
|
+
SET f.path = $path,
|
|
141
|
+
f.ext = $ext,
|
|
142
|
+
f.sha = $sha,
|
|
143
|
+
f.size_bytes = $size_bytes,
|
|
144
|
+
f.repo_id = $repo_id,
|
|
145
|
+
f.updated_at = $updated_at
|
|
146
|
+
MERGE (p)-[r:HAS_FILE {ns: $ns}]->(f)
|
|
147
|
+
SET r.updated_at = $updated_at
|
|
148
|
+
RETURN f.file_id AS file_id
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
self._exec(
|
|
152
|
+
query,
|
|
153
|
+
{
|
|
154
|
+
"project_id": project_id,
|
|
155
|
+
"repo_id": repo_id,
|
|
156
|
+
"file_id": file_id,
|
|
157
|
+
"ns": self.namespace,
|
|
158
|
+
"path": path,
|
|
159
|
+
"ext": ext,
|
|
160
|
+
"sha": sha,
|
|
161
|
+
"size_bytes": int(size_bytes),
|
|
162
|
+
"updated_at": ts,
|
|
163
|
+
},
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
def list_project_files(self, *, project_id: str, repo_id: str | None = None, limit: int = 20000) -> list[str]:
|
|
167
|
+
"""Return file paths linked to a project in this namespace."""
|
|
168
|
+
query = f"""
|
|
169
|
+
MATCH (p:DevMemProject {{project_id: $project_id, ns: $ns}})-[:HAS_FILE {{ns: $ns}}]->(f:DevMemFile {{ns: $ns}})
|
|
170
|
+
WHERE $repo_id IS NULL OR f.repo_id = $repo_id
|
|
171
|
+
RETURN f.path AS path
|
|
172
|
+
LIMIT {int(limit)}
|
|
173
|
+
"""
|
|
174
|
+
response = self._exec(
|
|
175
|
+
query,
|
|
176
|
+
{
|
|
177
|
+
"project_id": project_id,
|
|
178
|
+
"repo_id": repo_id,
|
|
179
|
+
"ns": self.namespace,
|
|
180
|
+
},
|
|
181
|
+
)
|
|
182
|
+
results = response.get("results") or []
|
|
183
|
+
paths: list[str] = []
|
|
184
|
+
for row in results:
|
|
185
|
+
if isinstance(row, dict):
|
|
186
|
+
path = self._coerce_scalar(row.get("path"))
|
|
187
|
+
if path:
|
|
188
|
+
paths.append(str(path))
|
|
189
|
+
return paths
|
|
190
|
+
|
|
191
|
+
def endpoint_summary(self) -> str:
|
|
192
|
+
if self._endpoint_host and self._endpoint_port:
|
|
193
|
+
return f"{self._endpoint_host}:{self._endpoint_port}"
|
|
194
|
+
return f"{self.cfg.neptune_endpoint}:{self.cfg.neptune_port}"
|