data-olympus 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. data_olympus/__init__.py +14 -0
  2. data_olympus/_bin/_kb_detect_workspace.sh +57 -0
  3. data_olympus/_bin/_kb_enforce.py +619 -0
  4. data_olympus/_bin/_kb_fallback.py +361 -0
  5. data_olympus/_bin/kb +957 -0
  6. data_olympus/_bin/kb-enforce-hook +337 -0
  7. data_olympus/_bin/opencode/data-olympus-gate.ts +102 -0
  8. data_olympus/audit_log.py +275 -0
  9. data_olympus/audit_trailers.py +42 -0
  10. data_olympus/auth.py +139 -0
  11. data_olympus/cli/__init__.py +1 -0
  12. data_olympus/cli/import_cmd.py +115 -0
  13. data_olympus/cli/indexgen.py +60 -0
  14. data_olympus/cli/main.py +151 -0
  15. data_olympus/cli/report_cmd.py +181 -0
  16. data_olympus/config.py +261 -0
  17. data_olympus/cooccurrence.py +393 -0
  18. data_olympus/dedup.py +57 -0
  19. data_olympus/durable.py +51 -0
  20. data_olympus/embeddings.py +317 -0
  21. data_olympus/enforce_policy.py +297 -0
  22. data_olympus/format/__init__.py +16 -0
  23. data_olympus/format/document.py +39 -0
  24. data_olympus/format/frontmatter.py +35 -0
  25. data_olympus/format/lint.py +92 -0
  26. data_olympus/format/validate.py +71 -0
  27. data_olympus/git_ops.py +397 -0
  28. data_olympus/health.py +114 -0
  29. data_olympus/importer/__init__.py +13 -0
  30. data_olympus/importer/adr.py +286 -0
  31. data_olympus/importer/flat.py +170 -0
  32. data_olympus/importer/model.py +106 -0
  33. data_olympus/importer/okf.py +192 -0
  34. data_olympus/importer/run.py +416 -0
  35. data_olympus/importer/stamp.py +227 -0
  36. data_olympus/index.py +1745 -0
  37. data_olympus/markdown_parse.py +103 -0
  38. data_olympus/models.py +480 -0
  39. data_olympus/onboarding.py +131 -0
  40. data_olympus/onboarding_inflight.py +137 -0
  41. data_olympus/onboarding_playbook.py +99 -0
  42. data_olympus/pending.py +533 -0
  43. data_olympus/principals.py +168 -0
  44. data_olympus/prompts.py +35 -0
  45. data_olympus/push_queue.py +261 -0
  46. data_olympus/query_expansion.py +200 -0
  47. data_olympus/rate_limit.py +81 -0
  48. data_olympus/refresh.py +329 -0
  49. data_olympus/report.py +133 -0
  50. data_olympus/rest_api.py +845 -0
  51. data_olympus/safe_id.py +36 -0
  52. data_olympus/search_gate.py +64 -0
  53. data_olympus/search_shortcut.py +146 -0
  54. data_olympus/server.py +1115 -0
  55. data_olympus/session_metrics.py +303 -0
  56. data_olympus/setup_wizard.py +751 -0
  57. data_olympus/thin_pointer.py +20 -0
  58. data_olympus/tools_audit.py +41 -0
  59. data_olympus/tools_enforce.py +198 -0
  60. data_olympus/tools_onboarding.py +585 -0
  61. data_olympus/tools_read.py +230 -0
  62. data_olympus/tools_write.py +878 -0
  63. data_olympus/trigram.py +126 -0
  64. data_olympus/viewer/__init__.py +1 -0
  65. data_olympus/viewer/generator.py +375 -0
  66. data_olympus/worktrees.py +147 -0
  67. data_olympus/write_gate.py +382 -0
  68. data_olympus-0.3.0.dist-info/METADATA +97 -0
  69. data_olympus-0.3.0.dist-info/RECORD +73 -0
  70. data_olympus-0.3.0.dist-info/WHEEL +4 -0
  71. data_olympus-0.3.0.dist-info/entry_points.txt +3 -0
  72. data_olympus-0.3.0.dist-info/licenses/LICENSE +202 -0
  73. data_olympus-0.3.0.dist-info/licenses/NOTICE +8 -0
@@ -0,0 +1,361 @@
1
+ #!/usr/bin/env python3
2
+ """Local-grep fallback for bin/kb when the data-olympus-mcp endpoint is unreachable.
3
+
4
+ Walks the local KB checkout, uses ripgrep (or grep) to satisfy search/get/list/outline/health,
5
+ and emits the same JSON shape as the REST endpoint with `degraded: true` set.
6
+
7
+ Used by bin/kb subcommands when curl to KB_ENDPOINT fails. Not intended for direct invocation
8
+ but works standalone too.
9
+
10
+ Usage:
11
+ _kb_fallback.py search <query> [--limit N] [--tier T] [--category C]
12
+ _kb_fallback.py get <id>
13
+ _kb_fallback.py list <tier> [<category>]
14
+ _kb_fallback.py outline
15
+ _kb_fallback.py health
16
+
17
+ Environment:
18
+ KB_LOCAL_PATH Path to the local KB checkout (default: current directory)
19
+ KB_ENDPOINT The REST endpoint that was unreachable (for the warning message)
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import argparse
24
+ import datetime
25
+ import json
26
+ import os
27
+ import re
28
+ import subprocess
29
+ import sys
30
+ from pathlib import Path
31
+
32
+ DEFAULT_KB_LOCAL_PATH = Path.cwd()
33
+ DEFAULT_ENDPOINT = "http://localhost:8080"
34
+
35
+ _EXCLUDED = {".git", ".pytest_cache", ".mypy_cache", ".ruff_cache", ".venv",
36
+ "__pycache__", "node_modules", ".worktrees", "to-delete",
37
+ "archive", "_archive", "data-olympus-mcp",
38
+ "test-fixtures", "cli-fixtures"}
39
+
40
+ # Inline minimal front-matter parser (subset of markdown_parse for the package; bin/ has no deps).
41
+ _FM_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)
42
+ _LIST_RE = re.compile(r"^\[(.*)\]$")
43
+
44
+ # Mirror of src/data_olympus/index.py _DEFAULT_PATH_RULES and its
45
+ # KB_TAXONOMY_PATH loader. If you update one, update the other. See SPEC.md.
46
+ _DEFAULT_PATH_RULES: list[tuple[str, str, str]] = [
47
+ # T1 Universal, applies to every project, every stack.
48
+ ("universal/foundation/", "T1", "foundation"),
49
+ ("universal/quality/", "T1", "quality"),
50
+ ("universal/security/", "T1", "security"),
51
+ ("universal/infrastructure/", "T1", "infrastructure"),
52
+ ("universal/database/", "T1", "database"),
53
+ ("universal/api/", "T1", "api"),
54
+ ("universal/services/", "T1", "services"),
55
+
56
+ # T2 Stack-specific, classified dynamically: tech-stacks/<stack>/...
57
+ ("tech-stacks/", "T2", "stack"),
58
+
59
+ # Meta tiers (kept distinct from T1-T4).
60
+ ("decisions/", "decisions", "decisions"),
61
+ ("workflows/", "workflows", "workflows"),
62
+ ("memory/inbox/", "memory", "memory-inbox"),
63
+ ("memory/accepted/", "memory", "memory-accepted"),
64
+ ("memory/", "memory", "memory"),
65
+ ("tooling/", "tooling", "tooling"),
66
+ ("templates/", "templates", "templates"),
67
+
68
+ # T3 / T4 catch-all (project tree). The classifier post-processes
69
+ # this hit; see _classify for the T3 vs T4 distinction.
70
+ ("projects/", "T3", "project"),
71
+ ]
72
+
73
+
74
+ def _load_path_rules() -> list[tuple[str, str, str]]:
75
+ """Active taxonomy: KB_TAXONOMY_PATH JSON if set, else the default.
76
+
77
+ The JSON must be a list of [prefix, tier, category] triples; a malformed
78
+ file raises ValueError rather than silently misclassifying every document.
79
+ """
80
+ path = os.environ.get("KB_TAXONOMY_PATH", "").strip()
81
+ if not path:
82
+ return _DEFAULT_PATH_RULES
83
+ data = json.loads(Path(path).read_text(encoding="utf-8"))
84
+ if not isinstance(data, list) or not all(
85
+ isinstance(r, (list, tuple)) and len(r) == 3 for r in data
86
+ ):
87
+ raise ValueError(
88
+ f"KB_TAXONOMY_PATH={path!r} must be a JSON list of "
89
+ f"[prefix, tier, category] triples"
90
+ )
91
+ return [(str(r[0]), str(r[1]), str(r[2])) for r in data]
92
+
93
+
94
+ def _classify(rel: str) -> tuple[str, str]:
95
+ """Return (tier, category) inferred from the relative path.
96
+
97
+ Returns ('meta', 'meta') if no rule matches.
98
+ Within projects/, distinguishes T4 component paths from T3 project paths
99
+ by looking for the literal 'components' segment after the project name.
100
+ tech-stacks/<stack>/... is classified dynamically as stack:<stack>.
101
+ """
102
+ norm = rel.replace("\\", "/")
103
+ for prefix, tier, category in _load_path_rules():
104
+ if norm.startswith(prefix):
105
+ if prefix == "projects/":
106
+ parts = norm.split("/")
107
+ # projects/<name>/components/<component>/<file>... -> T4
108
+ # Requires len >= 5 so parts[3] is a real component DIRECTORY
109
+ # (not a loose file directly inside components/).
110
+ if len(parts) >= 5 and parts[2] == "components":
111
+ return "T4", f"component:{parts[1]}/{parts[3]}"
112
+ # projects/<name>/... (incl. components/ with no component yet) -> T3
113
+ if len(parts) >= 2:
114
+ # Strip .md so projects/index.md -> project:index
115
+ # (rather than project:index.md). For real project dirs
116
+ # like projects/example-project/ this is a no-op.
117
+ name = parts[1].removesuffix(".md")
118
+ return "T3", f"project:{name}"
119
+ if prefix == "tech-stacks/":
120
+ parts = norm.split("/")
121
+ # tech-stacks/<stack>/<file>... -> stack:<stack>
122
+ if len(parts) >= 2 and parts[1]:
123
+ return tier, f"{category}:{parts[1].removesuffix('.md')}"
124
+ return tier, category
125
+ return "meta", "meta"
126
+
127
+
128
+ def _parse_md(path: Path) -> dict[str, object]:
129
+ text = path.read_text(encoding="utf-8")
130
+ m = _FM_RE.match(text)
131
+ fm: dict[str, object] = {}
132
+ body = text
133
+ if m:
134
+ for line in m.group(1).splitlines():
135
+ line = line.strip()
136
+ if not line or ":" not in line:
137
+ continue
138
+ k, _, v = line.partition(":")
139
+ k, v = k.strip(), v.strip()
140
+ lm = _LIST_RE.match(v)
141
+ if lm:
142
+ fm[k] = [p.strip().strip("'\"") for p in lm.group(1).split(",") if p.strip()]
143
+ else:
144
+ fm[k] = v.strip("'\"")
145
+ body = text[m.end():]
146
+ return {"front_matter": fm, "body": body, "full_text": text}
147
+
148
+
149
+ def _iter_md_files(kb: Path) -> list[Path]:
150
+ return sorted(
151
+ p for p in kb.rglob("*.md")
152
+ if not any(part in _EXCLUDED for part in p.relative_to(kb).parts)
153
+ )
154
+
155
+
156
+ def _classify_doc(rel: str, fm: dict[str, object]) -> tuple[str, str, str, str]:
157
+ """Returns (id, title, tier, category) with front-matter override on tier/category."""
158
+ path_tier, path_category = _classify(rel)
159
+ fm_id = fm.get("id") or "-".join(Path(rel).with_suffix("").parts)
160
+ fm_title = fm.get("title") or ""
161
+ fm_tier = fm.get("tier") or path_tier
162
+ fm_category = fm.get("category") or path_category
163
+ return str(fm_id), str(fm_title), str(fm_tier), str(fm_category)
164
+
165
+
166
+ def _mtime_iso(path: Path) -> str:
167
+ mt = path.stat().st_mtime
168
+ return datetime.datetime.fromtimestamp(mt, tz=datetime.UTC).isoformat()
169
+
170
+
171
+ def _warning(kb: Path, endpoint: str, ranker: str) -> str:
172
+ return f"MCP unreachable at {endpoint}; using local {ranker} fallback over {kb}"
173
+
174
+
175
+ def _ranker() -> str:
176
+ """Return 'rg' or 'grep' depending on which is on PATH."""
177
+ if subprocess.run(["which", "rg"], capture_output=True).returncode == 0:
178
+ return "rg"
179
+ return "grep"
180
+
181
+
182
+ def cmd_health(*, kb_local_path: Path, endpoint: str) -> str:
183
+ files = _iter_md_files(kb_local_path)
184
+ out = {
185
+ "kb_commit": "fallback",
186
+ "index_built_at": None,
187
+ "total_rules": len(files),
188
+ "last_git_pull_at": None,
189
+ "last_git_push_at": None,
190
+ "staleness_seconds": None,
191
+ "degraded": True,
192
+ "db_size_bytes": 0,
193
+ "pending_count": 0,
194
+ "push_queue_size": 0,
195
+ "last_index_build_status": "ok",
196
+ "last_index_error": None,
197
+ "last_index_error_at": None,
198
+ "last_index_conflicts": [],
199
+ "warning": _warning(kb_local_path, endpoint, _ranker()),
200
+ }
201
+ return json.dumps(out)
202
+
203
+
204
+ def cmd_outline(*, kb_local_path: Path, endpoint: str) -> str:
205
+ tiers: dict[str, dict[str, int]] = {}
206
+ for f in _iter_md_files(kb_local_path):
207
+ rel = str(f.relative_to(kb_local_path))
208
+ parsed = _parse_md(f)
209
+ _, _, tier, category = _classify_doc(rel, parsed["front_matter"])
210
+ tiers.setdefault(tier, {}).setdefault(category, 0)
211
+ tiers[tier][category] += 1
212
+ tier_list = [
213
+ {"name": t, "categories": [{"name": c, "count": n} for c, n in cats.items()]}
214
+ for t, cats in sorted(tiers.items())
215
+ ]
216
+ return json.dumps({
217
+ "tiers": tier_list,
218
+ "source_commit": "fallback",
219
+ "degraded": True,
220
+ "warning": _warning(kb_local_path, endpoint, _ranker()),
221
+ })
222
+
223
+
224
+ def cmd_search(*, query: str, limit: int, kb_local_path: Path, endpoint: str,
225
+ tier: str | None = None, category: str | None = None) -> str:
226
+ ranker = _ranker()
227
+ if ranker == "rg":
228
+ result = subprocess.run(
229
+ ["rg", "--files-with-matches", "--ignore-case", query, str(kb_local_path)],
230
+ capture_output=True, text=True,
231
+ )
232
+ else:
233
+ result = subprocess.run(
234
+ ["grep", "-rilE", query, str(kb_local_path)],
235
+ capture_output=True, text=True,
236
+ )
237
+ raw_paths = [Path(p) for p in result.stdout.splitlines() if p.endswith(".md")]
238
+ hits = []
239
+ for p in sorted(raw_paths):
240
+ rel = str(p.relative_to(kb_local_path))
241
+ if any(part in _EXCLUDED for part in p.relative_to(kb_local_path).parts):
242
+ continue
243
+ parsed = _parse_md(p)
244
+ id_, title, t, c = _classify_doc(rel, parsed["front_matter"])
245
+ if tier and t != tier:
246
+ continue
247
+ if category and c != category:
248
+ continue
249
+ # Tiny snippet: first 160 chars of body containing the query (case-insensitive)
250
+ body = parsed["body"]
251
+ idx = body.lower().find(query.lower())
252
+ snippet = body[max(0, idx - 40):idx + 120] if idx >= 0 else body[:160]
253
+ hits.append({
254
+ "id": id_,
255
+ "path": rel,
256
+ "title": title,
257
+ "snippet": snippet,
258
+ "score": 0.0, # fallback has no FTS5 ranking
259
+ })
260
+ if len(hits) >= limit:
261
+ break
262
+ return json.dumps({
263
+ "query": query,
264
+ "hits": hits,
265
+ "source_commit": "fallback",
266
+ "total_returned": len(hits),
267
+ "degraded": True,
268
+ "warning": _warning(kb_local_path, endpoint, ranker),
269
+ })
270
+
271
+
272
+ def cmd_get(*, id: str, kb_local_path: Path, endpoint: str) -> str:
273
+ # Linear scan; fallback is slow by design
274
+ for f in _iter_md_files(kb_local_path):
275
+ rel = str(f.relative_to(kb_local_path))
276
+ parsed = _parse_md(f)
277
+ doc_id, title, tier, category = _classify_doc(rel, parsed["front_matter"])
278
+ if doc_id == id:
279
+ tags_raw = parsed["front_matter"].get("tags") or []
280
+ tags = list(tags_raw) if isinstance(tags_raw, list) else []
281
+ return json.dumps({
282
+ "id": id,
283
+ "path": rel,
284
+ "title": title,
285
+ "tier": tier,
286
+ "category": category,
287
+ "tags": tags,
288
+ "content_markdown": parsed["full_text"],
289
+ "last_modified": _mtime_iso(f),
290
+ "last_modified_source": "mtime-fallback",
291
+ "source_commit": "fallback",
292
+ "degraded": True,
293
+ "warning": _warning(kb_local_path, endpoint, _ranker()),
294
+ })
295
+ return json.dumps({
296
+ "error": "not_found",
297
+ "message": f"no document with id={id!r}",
298
+ "degraded": True,
299
+ "warning": _warning(kb_local_path, endpoint, _ranker()),
300
+ })
301
+
302
+
303
+ def cmd_list(*, tier: str, category: str | None, kb_local_path: Path, endpoint: str) -> str:
304
+ entries = []
305
+ for f in sorted(_iter_md_files(kb_local_path)):
306
+ rel = str(f.relative_to(kb_local_path))
307
+ parsed = _parse_md(f)
308
+ doc_id, title, t, c = _classify_doc(rel, parsed["front_matter"])
309
+ if t != tier:
310
+ continue
311
+ if category and c != category:
312
+ continue
313
+ entries.append({"id": doc_id, "title": title, "path": rel})
314
+ entries.sort(key=lambda e: e["id"])
315
+ return json.dumps({
316
+ "tier": tier,
317
+ "category": category,
318
+ "entries": entries,
319
+ "source_commit": "fallback",
320
+ "total": len(entries),
321
+ "degraded": True,
322
+ "warning": _warning(kb_local_path, endpoint, _ranker()),
323
+ })
324
+
325
+
326
+ def main() -> int:
327
+ parser = argparse.ArgumentParser(prog="_kb_fallback.py")
328
+ sub = parser.add_subparsers(dest="cmd", required=True)
329
+ p_search = sub.add_parser("search")
330
+ p_search.add_argument("query")
331
+ p_search.add_argument("--limit", type=int, default=20)
332
+ p_search.add_argument("--tier")
333
+ p_search.add_argument("--category")
334
+ p_get = sub.add_parser("get")
335
+ p_get.add_argument("id")
336
+ p_list = sub.add_parser("list")
337
+ p_list.add_argument("tier")
338
+ p_list.add_argument("category", nargs="?")
339
+ sub.add_parser("outline")
340
+ sub.add_parser("health")
341
+ args = parser.parse_args()
342
+ kb = Path(os.environ.get("KB_LOCAL_PATH", str(DEFAULT_KB_LOCAL_PATH)))
343
+ endpoint = os.environ.get("KB_ENDPOINT", DEFAULT_ENDPOINT)
344
+ if args.cmd == "search":
345
+ print(cmd_search(query=args.query, limit=args.limit,
346
+ kb_local_path=kb, endpoint=endpoint,
347
+ tier=args.tier, category=args.category))
348
+ elif args.cmd == "get":
349
+ print(cmd_get(id=args.id, kb_local_path=kb, endpoint=endpoint))
350
+ elif args.cmd == "list":
351
+ print(cmd_list(tier=args.tier, category=args.category,
352
+ kb_local_path=kb, endpoint=endpoint))
353
+ elif args.cmd == "outline":
354
+ print(cmd_outline(kb_local_path=kb, endpoint=endpoint))
355
+ elif args.cmd == "health":
356
+ print(cmd_health(kb_local_path=kb, endpoint=endpoint))
357
+ return 0
358
+
359
+
360
+ if __name__ == "__main__":
361
+ sys.exit(main())