devmem-agents 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,194 @@
1
+ """Neptune adapter for devmem repository ingestion."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ from datetime import datetime, timezone
8
+ from typing import Any
9
+
10
+ import boto3
11
+ import urllib3
12
+ from botocore import UNSIGNED
13
+ from botocore.config import Config as BotoConfig
14
+
15
+ from devmem.live_backend import LiveBackendConfig, neptune_tunnel
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class NeptuneStore:
21
+ """Write project/file lineage facts into Neptune (openCypher)."""
22
+
23
+ def __init__(self, cfg: LiveBackendConfig, *, namespace: str) -> None:
24
+ self.cfg = cfg
25
+ self.namespace = namespace
26
+ self._client: Any | None = None
27
+ self._tunnel_ctx = None
28
+ self._endpoint_host: str | None = None
29
+ self._endpoint_port: int | None = None
30
+
31
+ def connect(self) -> None:
32
+ """Connect to Neptune, optionally through SSH tunnel."""
33
+ self._tunnel_ctx = neptune_tunnel(self.cfg)
34
+ host, port = self._tunnel_ctx.__enter__()
35
+ self._endpoint_host = host
36
+ self._endpoint_port = port
37
+
38
+ endpoint_url = f"{'https' if self.cfg.neptune_use_https else 'http'}://{host}:{port}"
39
+ session = boto3.Session()
40
+ kwargs: dict[str, Any] = {
41
+ "endpoint_url": endpoint_url,
42
+ "region_name": self.cfg.neptune_region,
43
+ }
44
+
45
+ if self.cfg.neptune_use_ssh_tunnel:
46
+ kwargs["verify"] = False
47
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
48
+
49
+ if not self.cfg.neptune_iam_auth:
50
+ kwargs["config"] = BotoConfig(signature_version=UNSIGNED)
51
+
52
+ client = session.client("neptunedata", **kwargs)
53
+
54
+ if self.cfg.neptune_use_ssh_tunnel and self.cfg.neptune_iam_auth and self.cfg.neptune_endpoint:
55
+ sign_host = self.cfg.neptune_endpoint
56
+
57
+ def _inject_host_header(request, **_kwargs):
58
+ request.headers["Host"] = f"{sign_host}:{self.cfg.neptune_port}"
59
+
60
+ client.meta.events.register("before-sign.neptunedata.*", _inject_host_header)
61
+
62
+ self._client = client
63
+
64
+ def close(self) -> None:
65
+ """Close tunnel context when opened."""
66
+ if self._tunnel_ctx is not None:
67
+ try:
68
+ self._tunnel_ctx.__exit__(None, None, None)
69
+ finally:
70
+ self._tunnel_ctx = None
71
+ self._client = None
72
+
73
+ def _exec(self, query: str, params: dict[str, Any]) -> dict[str, Any]:
74
+ if self._client is None:
75
+ raise RuntimeError("Neptune client is not initialized")
76
+ return self._client.execute_open_cypher_query(
77
+ openCypherQuery=query,
78
+ parameters=json.dumps(params, separators=(",", ":")),
79
+ )
80
+
81
+ @staticmethod
82
+ def _coerce_scalar(value: Any) -> Any:
83
+ if isinstance(value, dict):
84
+ for key in ("stringValue", "intValue", "longValue", "doubleValue", "booleanValue"):
85
+ if key in value:
86
+ return value[key]
87
+ return value
88
+
89
+ def health_check(self) -> None:
90
+ """Validate openCypher query execution."""
91
+ self._exec("RETURN 1 AS ok", {})
92
+
93
+ @staticmethod
94
+ def _file_id(project_id: str, path: str) -> str:
95
+ import hashlib
96
+
97
+ h = hashlib.sha256()
98
+ h.update(project_id.encode("utf-8"))
99
+ h.update(path.encode("utf-8"))
100
+ return h.hexdigest()
101
+
102
+ def upsert_project(self, *, project_id: str, name: str, repo_path: str, repo_id: str) -> None:
103
+ ts = datetime.now(timezone.utc).isoformat()
104
+ query = """
105
+ MERGE (p:DevMemProject {project_id: $project_id, ns: $ns})
106
+ SET p.name = $name,
107
+ p.repo_path = $repo_path,
108
+ p.repo_id = $repo_id,
109
+ p.updated_at = $updated_at
110
+ RETURN p.project_id AS project_id
111
+ """
112
+ self._exec(
113
+ query,
114
+ {
115
+ "project_id": project_id,
116
+ "ns": self.namespace,
117
+ "name": name,
118
+ "repo_path": repo_path,
119
+ "repo_id": repo_id,
120
+ "updated_at": ts,
121
+ },
122
+ )
123
+
124
+ def upsert_file(
125
+ self,
126
+ *,
127
+ project_id: str,
128
+ repo_id: str,
129
+ path: str,
130
+ ext: str,
131
+ sha: str,
132
+ size_bytes: int,
133
+ ) -> None:
134
+ ts = datetime.now(timezone.utc).isoformat()
135
+ file_id = self._file_id(project_id, path)
136
+
137
+ query = """
138
+ MERGE (p:DevMemProject {project_id: $project_id, ns: $ns})
139
+ MERGE (f:DevMemFile {file_id: $file_id, ns: $ns})
140
+ SET f.path = $path,
141
+ f.ext = $ext,
142
+ f.sha = $sha,
143
+ f.size_bytes = $size_bytes,
144
+ f.repo_id = $repo_id,
145
+ f.updated_at = $updated_at
146
+ MERGE (p)-[r:HAS_FILE {ns: $ns}]->(f)
147
+ SET r.updated_at = $updated_at
148
+ RETURN f.file_id AS file_id
149
+ """
150
+
151
+ self._exec(
152
+ query,
153
+ {
154
+ "project_id": project_id,
155
+ "repo_id": repo_id,
156
+ "file_id": file_id,
157
+ "ns": self.namespace,
158
+ "path": path,
159
+ "ext": ext,
160
+ "sha": sha,
161
+ "size_bytes": int(size_bytes),
162
+ "updated_at": ts,
163
+ },
164
+ )
165
+
166
+ def list_project_files(self, *, project_id: str, repo_id: str | None = None, limit: int = 20000) -> list[str]:
167
+ """Return file paths linked to a project in this namespace."""
168
+ query = f"""
169
+ MATCH (p:DevMemProject {{project_id: $project_id, ns: $ns}})-[:HAS_FILE {{ns: $ns}}]->(f:DevMemFile {{ns: $ns}})
170
+ WHERE $repo_id IS NULL OR f.repo_id = $repo_id
171
+ RETURN f.path AS path
172
+ LIMIT {int(limit)}
173
+ """
174
+ response = self._exec(
175
+ query,
176
+ {
177
+ "project_id": project_id,
178
+ "repo_id": repo_id,
179
+ "ns": self.namespace,
180
+ },
181
+ )
182
+ results = response.get("results") or []
183
+ paths: list[str] = []
184
+ for row in results:
185
+ if isinstance(row, dict):
186
+ path = self._coerce_scalar(row.get("path"))
187
+ if path:
188
+ paths.append(str(path))
189
+ return paths
190
+
191
+ def endpoint_summary(self) -> str:
192
+ if self._endpoint_host and self._endpoint_port:
193
+ return f"{self._endpoint_host}:{self._endpoint_port}"
194
+ return f"{self.cfg.neptune_endpoint}:{self.cfg.neptune_port}"