devmem-agents 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
devmem/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """devmem package."""
2
+
3
+ from devmem.main import app
4
+
5
+ __all__ = ["app"]
devmem/api.py ADDED
@@ -0,0 +1,257 @@
1
+ """HTTP API for devmem."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fastapi import APIRouter, HTTPException, Query, Response
6
+
7
+ from devmem.models import (
8
+ ApiResponse,
9
+ ArtifactUpsertRequest,
10
+ ContextPullRequest,
11
+ DecisionUpsertRequest,
12
+ FactUpsertRequest,
13
+ FeedbackRecordRequest,
14
+ HandoffCreateRequest,
15
+ HybridSearchRequest,
16
+ SessionCommitRequest,
17
+ SessionStartRequest,
18
+ TaskSimilarRequest,
19
+ TaskUpdateRequest,
20
+ )
21
+ from devmem.service import NamespaceError, service
22
+
23
+ router = APIRouter(prefix="/v1")
24
+
25
+ # Endpoints superseded by POST /v1/sessions/commit. They still work but clients
26
+ # should migrate. Responses include a `Deprecation: true` header.
27
+ _DEPRECATED_SUPERSEDED_BY = "/v1/sessions/commit"
28
+
29
+
30
+ def _handle_namespace(exc: NamespaceError) -> None:
31
+ raise HTTPException(status_code=400, detail=str(exc)) from exc
32
+
33
+
34
+ def _mark_deprecated(response: Response) -> None:
35
+ response.headers["Deprecation"] = "true"
36
+ response.headers["Link"] = f'<{_DEPRECATED_SUPERSEDED_BY}>; rel="successor-version"'
37
+
38
+
39
+ @router.get("/health", response_model=ApiResponse)
40
+ def health() -> ApiResponse:
41
+ return ApiResponse(data=service.health())
42
+
43
+
44
+ @router.post("/sessions/start", response_model=ApiResponse)
45
+ def sessions_start(req: SessionStartRequest) -> ApiResponse:
46
+ try:
47
+ session = service.start_session(req)
48
+ except NamespaceError as exc:
49
+ _handle_namespace(exc)
50
+ return ApiResponse(data=session.model_dump())
51
+
52
+
53
+ @router.post("/sessions/commit", response_model=ApiResponse)
54
+ def sessions_commit(req: SessionCommitRequest) -> ApiResponse:
55
+ """Atomically finalize a session.
56
+
57
+ Replaces the 3-call ritual (`/artifacts/upsert` + `/decisions/upsert` +
58
+ `/tasks/update`) with one transactional write. Optional `client_commit_id`
59
+ gives idempotent retries — submitting the same id twice returns the
60
+ original result without re-inserting any rows.
61
+ """
62
+ try:
63
+ result = service.commit_session(
64
+ namespace=req.namespace,
65
+ session_id=req.session_id,
66
+ artifacts=[a.model_dump() for a in req.artifacts],
67
+ decisions=[d.model_dump() for d in req.decisions],
68
+ handoff=req.handoff.model_dump() if req.handoff else None,
69
+ task_update=req.task_update.model_dump() if req.task_update else None,
70
+ client_commit_id=req.client_commit_id,
71
+ )
72
+ except NamespaceError as exc:
73
+ _handle_namespace(exc)
74
+ return ApiResponse(data=result)
75
+
76
+
77
+ @router.post("/context/pull", response_model=ApiResponse)
78
+ def context_pull(req: ContextPullRequest) -> ApiResponse:
79
+ try:
80
+ payload = service.pull_context(
81
+ namespace=req.namespace,
82
+ session_id=req.session_id,
83
+ project=req.project,
84
+ repo=req.repo,
85
+ task=req.task,
86
+ top_k=req.top_k,
87
+ )
88
+ except NamespaceError as exc:
89
+ _handle_namespace(exc)
90
+ return ApiResponse(data=payload)
91
+
92
+
93
+ @router.post("/search/hybrid", response_model=ApiResponse)
94
+ def search_hybrid(req: HybridSearchRequest) -> ApiResponse:
95
+ try:
96
+ results = service.hybrid_search(
97
+ namespace=req.namespace,
98
+ q=req.q,
99
+ project=req.project,
100
+ repo=req.repo,
101
+ top_k=req.top_k,
102
+ )
103
+ except NamespaceError as exc:
104
+ _handle_namespace(exc)
105
+ return ApiResponse(data={"results": results})
106
+
107
+
108
+ @router.post("/tasks/similar", response_model=ApiResponse)
109
+ def tasks_similar(req: TaskSimilarRequest) -> ApiResponse:
110
+ try:
111
+ results = service.similar_tasks(
112
+ namespace=req.namespace,
113
+ q=req.q,
114
+ project=req.project,
115
+ repo=req.repo,
116
+ top_k=req.top_k,
117
+ )
118
+ except NamespaceError as exc:
119
+ _handle_namespace(exc)
120
+ return ApiResponse(data={"results": results})
121
+
122
+
123
+ @router.post("/artifacts/upsert", response_model=ApiResponse)
124
+ def artifacts_upsert(req: ArtifactUpsertRequest, response: Response) -> ApiResponse:
125
+ """Deprecated. Prefer POST /v1/sessions/commit for atomic multi-write."""
126
+ _mark_deprecated(response)
127
+ try:
128
+ result = service.upsert_artifact(req.namespace, req.model_dump())
129
+ except NamespaceError as exc:
130
+ _handle_namespace(exc)
131
+ return ApiResponse(data=result)
132
+
133
+
134
+ @router.get("/artifacts", response_model=ApiResponse)
135
+ def artifacts_list(
136
+ namespace: str = Query(...),
137
+ project: str | None = Query(default=None),
138
+ repo: str | None = Query(default=None),
139
+ session_id: str | None = Query(default=None),
140
+ artifact_type: str | None = Query(default=None),
141
+ since: str | None = Query(default=None, description="ISO-8601 timestamp; rows created_at >= since"),
142
+ limit: int = Query(default=50, ge=1, le=500),
143
+ offset: int = Query(default=0, ge=0),
144
+ ) -> ApiResponse:
145
+ try:
146
+ rows = service.list_artifacts(
147
+ namespace=namespace,
148
+ project=project,
149
+ repo=repo,
150
+ session_id=session_id,
151
+ artifact_type=artifact_type,
152
+ since=since,
153
+ limit=limit,
154
+ offset=offset,
155
+ )
156
+ except NamespaceError as exc:
157
+ _handle_namespace(exc)
158
+ return ApiResponse(data={"results": rows, "limit": limit, "offset": offset})
159
+
160
+
161
+ @router.post("/facts/upsert", response_model=ApiResponse)
162
+ def facts_upsert(req: FactUpsertRequest) -> ApiResponse:
163
+ try:
164
+ result = service.upsert_fact(req.namespace, req.model_dump())
165
+ except NamespaceError as exc:
166
+ _handle_namespace(exc)
167
+ return ApiResponse(data=result)
168
+
169
+
170
+ @router.post("/decisions/upsert", response_model=ApiResponse)
171
+ def decisions_upsert(req: DecisionUpsertRequest, response: Response) -> ApiResponse:
172
+ """Deprecated. Prefer POST /v1/sessions/commit for atomic multi-write."""
173
+ _mark_deprecated(response)
174
+ try:
175
+ result = service.upsert_decision(req.namespace, req.model_dump())
176
+ except NamespaceError as exc:
177
+ _handle_namespace(exc)
178
+ return ApiResponse(data=result)
179
+
180
+
181
+ @router.get("/decisions", response_model=ApiResponse)
182
+ def decisions_list(
183
+ namespace: str = Query(...),
184
+ project: str | None = Query(default=None),
185
+ repo: str | None = Query(default=None),
186
+ session_id: str | None = Query(default=None),
187
+ since: str | None = Query(default=None),
188
+ limit: int = Query(default=50, ge=1, le=500),
189
+ offset: int = Query(default=0, ge=0),
190
+ ) -> ApiResponse:
191
+ try:
192
+ rows = service.list_decisions(
193
+ namespace=namespace,
194
+ project=project,
195
+ repo=repo,
196
+ session_id=session_id,
197
+ since=since,
198
+ limit=limit,
199
+ offset=offset,
200
+ )
201
+ except NamespaceError as exc:
202
+ _handle_namespace(exc)
203
+ return ApiResponse(data={"results": rows, "limit": limit, "offset": offset})
204
+
205
+
206
+ @router.post("/handoffs/create", response_model=ApiResponse)
207
+ def handoffs_create(req: HandoffCreateRequest) -> ApiResponse:
208
+ try:
209
+ result = service.create_handoff(req.namespace, req.model_dump())
210
+ except NamespaceError as exc:
211
+ _handle_namespace(exc)
212
+ return ApiResponse(data=result)
213
+
214
+
215
+ @router.get("/handoffs", response_model=ApiResponse)
216
+ def handoffs_list(
217
+ namespace: str = Query(...),
218
+ project: str | None = Query(default=None),
219
+ repo: str | None = Query(default=None),
220
+ session_id: str | None = Query(default=None),
221
+ since: str | None = Query(default=None),
222
+ limit: int = Query(default=50, ge=1, le=500),
223
+ offset: int = Query(default=0, ge=0),
224
+ ) -> ApiResponse:
225
+ try:
226
+ rows = service.list_handoffs(
227
+ namespace=namespace,
228
+ project=project,
229
+ repo=repo,
230
+ session_id=session_id,
231
+ since=since,
232
+ limit=limit,
233
+ offset=offset,
234
+ )
235
+ except NamespaceError as exc:
236
+ _handle_namespace(exc)
237
+ return ApiResponse(data={"results": rows, "limit": limit, "offset": offset})
238
+
239
+
240
+ @router.post("/tasks/update", response_model=ApiResponse)
241
+ def tasks_update(req: TaskUpdateRequest, response: Response) -> ApiResponse:
242
+ """Deprecated. Prefer POST /v1/sessions/commit (pass task_update in body)."""
243
+ _mark_deprecated(response)
244
+ try:
245
+ result = service.update_task(req.namespace, req.model_dump())
246
+ except NamespaceError as exc:
247
+ _handle_namespace(exc)
248
+ return ApiResponse(data=result)
249
+
250
+
251
+ @router.post("/feedback/record", response_model=ApiResponse)
252
+ def feedback_record(req: FeedbackRecordRequest) -> ApiResponse:
253
+ try:
254
+ result = service.record_feedback(req.namespace, req.model_dump())
255
+ except NamespaceError as exc:
256
+ _handle_namespace(exc)
257
+ return ApiResponse(data=result)
devmem/config.py ADDED
@@ -0,0 +1,34 @@
1
+ """Configuration for devmem."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pydantic import Field
6
+ from pydantic_settings import BaseSettings, SettingsConfigDict
7
+
8
+
9
+ class Settings(BaseSettings):
10
+ """Runtime settings loaded from environment."""
11
+
12
+ model_config = SettingsConfigDict(env_prefix="DEVMEM_", env_file=".env", extra="ignore")
13
+
14
+ namespace: str = Field(default="devlib_v1")
15
+ service_name: str = Field(default="devmem-gateway")
16
+ service_version: str = Field(default="0.1.0")
17
+
18
+ # Backing services (placeholders for real integration).
19
+ milvus_uri: str | None = Field(default=None)
20
+ neptune_endpoint: str | None = Field(default=None)
21
+ aurora_dsn: str | None = Field(default=None)
22
+
23
+ # Embedding model (sentence-transformers). Falls back to SHA-256
24
+ # pseudo-embeddings if the model cannot be loaded. `embedding_dim` must
25
+ # match the model's output dimensionality and the Milvus collection schema.
26
+ embedding_model: str = Field(default="sentence-transformers/all-MiniLM-L6-v2")
27
+ embedding_dim: int = Field(default=384)
28
+
29
+ # Record store DSN (SQLAlchemy-compatible, e.g. postgresql+psycopg://user:pw@host/db).
30
+ # When unset, devmem uses an in-memory store (fine for tests, not for production).
31
+ record_store_dsn: str | None = Field(default=None)
32
+
33
+
34
+ settings = Settings()
devmem/embeddings.py ADDED
@@ -0,0 +1,119 @@
1
+ """Embedding provider for devmem.
2
+
3
+ Uses `sentence-transformers/all-MiniLM-L6-v2` (384-dim) by default. The model
4
+ is lazy-loaded on first use and cached for the process lifetime. If the
5
+ `sentence-transformers` package is not installed (or model download fails),
6
+ the provider falls back to a deterministic SHA-256 pseudo-embedding and logs
7
+ a loud warning so operators know vector search quality is degraded.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import hashlib
13
+ import logging
14
+ import threading
15
+ from typing import Iterable
16
+
17
+ import numpy as np
18
+
19
+ from devmem.config import settings
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ _MODEL_LOCK = threading.Lock()
24
+ _MODEL: object | None = None
25
+ _USING_FALLBACK: bool = False
26
+
27
+
28
+ def embedding_dim() -> int:
29
+ """Configured embedding dimension (must match Milvus collection schema)."""
30
+ return int(settings.embedding_dim)
31
+
32
+
33
+ def is_fallback_active() -> bool:
34
+ """True if real model failed to load and pseudo-embeddings are in use."""
35
+ return _USING_FALLBACK
36
+
37
+
38
+ def _load_model() -> object | None:
39
+ """Try to load the sentence-transformers model; return None on failure."""
40
+ try:
41
+ from sentence_transformers import SentenceTransformer # type: ignore
42
+ except Exception as exc:
43
+ logger.warning(
44
+ "sentence-transformers not available (%s); devmem will use SHA-256 "
45
+ "pseudo-embeddings. Install `sentence-transformers` to enable semantic search.",
46
+ exc,
47
+ )
48
+ return None
49
+ try:
50
+ model = SentenceTransformer(settings.embedding_model)
51
+ logger.info("Loaded embedding model=%s dim=%s", settings.embedding_model, embedding_dim())
52
+ return model
53
+ except Exception as exc:
54
+ logger.error(
55
+ "Failed to load embedding model %s (%s); falling back to SHA-256 pseudo-embeddings.",
56
+ settings.embedding_model,
57
+ exc,
58
+ exc_info=True,
59
+ )
60
+ return None
61
+
62
+
63
+ def _get_model() -> object | None:
64
+ global _MODEL, _USING_FALLBACK
65
+ if _MODEL is not None or _USING_FALLBACK:
66
+ return _MODEL
67
+ with _MODEL_LOCK:
68
+ if _MODEL is None and not _USING_FALLBACK:
69
+ _MODEL = _load_model()
70
+ if _MODEL is None:
71
+ _USING_FALLBACK = True
72
+ return _MODEL
73
+
74
+
75
+ def _pseudo_embed(text: str) -> list[float]:
76
+ """Deterministic hash-based fallback vector (not semantic)."""
77
+ dim = embedding_dim()
78
+ digest = hashlib.sha256((text or "").encode("utf-8", errors="ignore")).digest()
79
+ repeats = (dim // len(digest)) + 1
80
+ raw = (digest * repeats)[:dim]
81
+ vec = np.frombuffer(raw, dtype=np.uint8).astype(np.float32)
82
+ vec = (vec / 255.0) - 0.5
83
+ norm = float(np.linalg.norm(vec))
84
+ if norm > 0:
85
+ vec /= norm
86
+ return vec.tolist()
87
+
88
+
89
+ def embed_one(text: str) -> list[float]:
90
+ """Return a single embedding vector."""
91
+ model = _get_model()
92
+ if model is None:
93
+ return _pseudo_embed(text)
94
+ vec = model.encode([text or ""], normalize_embeddings=True)[0]
95
+ return [float(x) for x in vec]
96
+
97
+
98
+ def embed_many(texts: Iterable[str]) -> list[list[float]]:
99
+ """Return a batch of embedding vectors."""
100
+ items = [t or "" for t in texts]
101
+ if not items:
102
+ return []
103
+ model = _get_model()
104
+ if model is None:
105
+ return [_pseudo_embed(t) for t in items]
106
+ vectors = model.encode(items, normalize_embeddings=True, batch_size=32)
107
+ return [[float(x) for x in v] for v in vectors]
108
+
109
+
110
+ def cosine_similarity(a: list[float], b: list[float]) -> float:
111
+ """Cosine similarity between two vectors; assumes they may not be unit-normed."""
112
+ if not a or not b:
113
+ return 0.0
114
+ va = np.asarray(a, dtype=np.float32)
115
+ vb = np.asarray(b, dtype=np.float32)
116
+ denom = float(np.linalg.norm(va)) * float(np.linalg.norm(vb))
117
+ if denom == 0.0:
118
+ return 0.0
119
+ return float(np.dot(va, vb) / denom)
devmem/ingest.py ADDED
@@ -0,0 +1,184 @@
1
+ """Repository scanning and memory ingestion pipeline for devmem."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import logging
7
+ import os
8
+ import subprocess
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+ from typing import Iterable
12
+
13
+ from devmem.live_backend import LiveBackendConfig
14
+ from devmem.storage.milvus_store import MilvusStore
15
+ from devmem.storage.neptune_store import NeptuneStore
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ _ALLOWED_EXTENSIONS = {
20
+ ".py", ".md", ".toml", ".json", ".yaml", ".yml", ".js", ".ts", ".tsx", ".jsx",
21
+ ".css", ".html", ".sql", ".sh", ".txt", ".ini", ".cfg", ".conf", ".rst", ".csv", ".svg",
22
+ }
23
+ _ALLOWED_FILENAMES = {
24
+ "Makefile",
25
+ "Dockerfile",
26
+ "Jenkinsfile",
27
+ "Procfile",
28
+ }
29
+
30
+ _SKIP_PREFIXES = {
31
+ ".git/",
32
+ "node_modules/",
33
+ ".venv/",
34
+ "venv/",
35
+ "data/",
36
+ "logs/",
37
+ "__pycache__/",
38
+ ".pytest_cache/",
39
+ "react-app/node_modules/",
40
+ }
41
+
42
+
43
+ @dataclass
44
+ class IngestResult:
45
+ files_seen: int = 0
46
+ files_indexed: int = 0
47
+ files_skipped: int = 0
48
+ chunks_written: int = 0
49
+ kg_file_nodes: int = 0
50
+
51
+
52
+ def _is_binary(blob: bytes) -> bool:
53
+ return b"\x00" in blob
54
+
55
+
56
+ def _sha256(blob: bytes) -> str:
57
+ return hashlib.sha256(blob).hexdigest()
58
+
59
+
60
+ def _chunk_text(text: str, *, chunk_size: int = 1800, overlap: int = 200) -> list[str]:
61
+ if not text.strip():
62
+ return []
63
+ if chunk_size <= overlap:
64
+ raise ValueError("chunk_size must be greater than overlap")
65
+
66
+ chunks: list[str] = []
67
+ start = 0
68
+ n = len(text)
69
+ while start < n:
70
+ end = min(start + chunk_size, n)
71
+ chunks.append(text[start:end])
72
+ if end >= n:
73
+ break
74
+ start = max(0, end - overlap)
75
+ return chunks
76
+
77
+
78
+ def _tracked_files(repo_path: Path) -> list[Path]:
79
+ try:
80
+ proc = subprocess.run(
81
+ ["git", "-C", str(repo_path), "ls-files", "-z"],
82
+ check=True,
83
+ capture_output=True,
84
+ )
85
+ raw = proc.stdout.decode("utf-8", errors="ignore")
86
+ files = [repo_path / p for p in raw.split("\x00") if p]
87
+ return [p for p in files if p.is_file()]
88
+ except Exception:
89
+ logger.warning("git ls-files failed; falling back to filesystem walk", exc_info=True)
90
+ files: list[Path] = []
91
+ for root, _dirs, names in os.walk(repo_path):
92
+ for name in names:
93
+ files.append(Path(root) / name)
94
+ return files
95
+
96
+
97
+ def _should_index(rel_path: str, suffix: str, size_bytes: int) -> bool:
98
+ if size_bytes > 512 * 1024:
99
+ return False
100
+ path_obj = Path(rel_path)
101
+ if suffix.lower() not in _ALLOWED_EXTENSIONS and path_obj.name not in _ALLOWED_FILENAMES:
102
+ return False
103
+ norm = rel_path.replace("\\", "/")
104
+ for prefix in _SKIP_PREFIXES:
105
+ if norm.startswith(prefix):
106
+ return False
107
+ return True
108
+
109
+
110
+ def ingest_repository(
111
+ *,
112
+ repo_path: Path,
113
+ project_id: str,
114
+ project_name: str,
115
+ repo_id: str,
116
+ namespace: str,
117
+ cfg: LiveBackendConfig,
118
+ ) -> IngestResult:
119
+ """Scan repository and write memory records to Milvus and Neptune."""
120
+ result = IngestResult()
121
+
122
+ milvus = MilvusStore(cfg, namespace=namespace, project_id=project_id, repo_id=repo_id)
123
+ neptune = NeptuneStore(cfg, namespace=namespace)
124
+
125
+ try:
126
+ milvus.connect()
127
+ neptune.connect()
128
+ neptune.health_check()
129
+
130
+ neptune.upsert_project(
131
+ project_id=project_id,
132
+ name=project_name,
133
+ repo_path=str(repo_path),
134
+ repo_id=repo_id,
135
+ )
136
+ milvus.clear_project_data()
137
+
138
+ tracked = _tracked_files(repo_path)
139
+ result.files_seen = len(tracked)
140
+
141
+ for file_path in tracked:
142
+ rel_path = file_path.relative_to(repo_path).as_posix()
143
+ suffix = file_path.suffix.lower()
144
+ try:
145
+ blob = file_path.read_bytes()
146
+ except Exception:
147
+ logger.debug("Skipping unreadable file path=%s", rel_path, exc_info=True)
148
+ result.files_skipped += 1
149
+ continue
150
+
151
+ if _is_binary(blob):
152
+ result.files_skipped += 1
153
+ continue
154
+
155
+ if not _should_index(rel_path, suffix, len(blob)):
156
+ result.files_skipped += 1
157
+ continue
158
+
159
+ text = blob.decode("utf-8", errors="ignore")
160
+ sha = _sha256(blob)
161
+ chunks = _chunk_text(text)
162
+ if not chunks:
163
+ result.files_skipped += 1
164
+ continue
165
+
166
+ neptune.upsert_file(
167
+ project_id=project_id,
168
+ repo_id=repo_id,
169
+ path=rel_path,
170
+ ext=suffix,
171
+ sha=sha,
172
+ size_bytes=len(blob),
173
+ )
174
+ result.kg_file_nodes += 1
175
+
176
+ inserted = milvus.replace_file_chunks(path=rel_path, sha=sha, chunks=chunks)
177
+ result.chunks_written += inserted
178
+ result.files_indexed += 1
179
+
180
+ milvus.flush()
181
+ return result
182
+ finally:
183
+ neptune.close()
184
+ milvus.close()