ms8-macos 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. ms8/__init__.py +8 -0
  2. ms8/__main__.py +15 -0
  3. ms8/absorb/__init__.py +7 -0
  4. ms8/absorb/chunker.py +44 -0
  5. ms8/absorb/cli.py +241 -0
  6. ms8/absorb/fs_watcher.py +176 -0
  7. ms8/absorb/governance.py +144 -0
  8. ms8/absorb/health.py +61 -0
  9. ms8/absorb/incremental_processor.py +187 -0
  10. ms8/absorb/kg.py +126 -0
  11. ms8/absorb/ocr.py +56 -0
  12. ms8/absorb/parser.py +176 -0
  13. ms8/absorb/project_memory/__init__.py +10 -0
  14. ms8/absorb/project_memory/cli.py +206 -0
  15. ms8/absorb/project_memory/generator.py +613 -0
  16. ms8/absorb/project_memory/health.py +397 -0
  17. ms8/absorb/project_memory/parser.py +67 -0
  18. ms8/absorb/project_memory/repository.py +252 -0
  19. ms8/absorb/project_memory/scanner.py +169 -0
  20. ms8/absorb/project_memory/scope.py +366 -0
  21. ms8/absorb/project_memory/search.py +205 -0
  22. ms8/absorb/project_memory/submit.py +95 -0
  23. ms8/absorb/project_memory/watch.py +282 -0
  24. ms8/absorb/repository.py +508 -0
  25. ms8/absorb/reviewer.py +353 -0
  26. ms8/absorb/scope.py +184 -0
  27. ms8/absorb/search.py +136 -0
  28. ms8/absorb/spotlight_bootstrap.py +82 -0
  29. ms8/agent_native/__init__.py +6 -0
  30. ms8/agent_native/agent_cli.py +600 -0
  31. ms8/agent_native/onboarding.py +295 -0
  32. ms8/agent_native/permission.py +65 -0
  33. ms8/agent_native/report.py +16 -0
  34. ms8/agent_native/task_spec.py +12 -0
  35. ms8/agent_native/task_templates.py +326 -0
  36. ms8/app/__init__.py +5 -0
  37. ms8/app/classifier/__init__.py +3 -0
  38. ms8/app/classifier/context_builder.py +48 -0
  39. ms8/app/classifier/hybrid_classifier.py +82 -0
  40. ms8/app/classifier/llm_classifier.py +39 -0
  41. ms8/app/classifier/rule_classifier.py +27 -0
  42. ms8/app/classifier/threshold_manager.py +25 -0
  43. ms8/app/config.py +152 -0
  44. ms8/app/extractors/__init__.py +0 -0
  45. ms8/app/extractors/action_extractor.py +66 -0
  46. ms8/app/extractors/entity_extractor.py +121 -0
  47. ms8/app/extractors/technical_extractor.py +9 -0
  48. ms8/app/feedback/__init__.py +0 -0
  49. ms8/app/feedback/feedback_service.py +47 -0
  50. ms8/app/feedback/rule_optimizer.py +117 -0
  51. ms8/app/integrations/__init__.py +0 -0
  52. ms8/app/integrations/ollama_client.py +57 -0
  53. ms8/app/main.py +12 -0
  54. ms8/app/memory/__init__.py +0 -0
  55. ms8/app/memory/indexer.py +230 -0
  56. ms8/app/memory/models.py +9 -0
  57. ms8/app/memory/repository.py +170 -0
  58. ms8/app/memory/search.py +48 -0
  59. ms8/app/observability/__init__.py +0 -0
  60. ms8/app/observability/logger.py +44 -0
  61. ms8/app/observability/metrics.py +14 -0
  62. ms8/app/observability/trace.py +16 -0
  63. ms8/app/pipeline/__init__.py +37 -0
  64. ms8/app/pipeline/consistency.py +8 -0
  65. ms8/app/pipeline/decision.py +19 -0
  66. ms8/app/pipeline/dedupe.py +51 -0
  67. ms8/app/pipeline/memory_admission_engine.py +242 -0
  68. ms8/app/pipeline/memory_pipeline.py +414 -0
  69. ms8/app/pipeline/quality_gate.py +28 -0
  70. ms8/app/pipeline/risk_scoring.py +45 -0
  71. ms8/app/review/__init__.py +0 -0
  72. ms8/app/review/batch_review.py +112 -0
  73. ms8/app/review/review_service.py +82 -0
  74. ms8/app/rules/__init__.py +3 -0
  75. ms8/app/rules/base.py +53 -0
  76. ms8/app/rules/block_rules.py +177 -0
  77. ms8/app/rules/category_rules.py +58 -0
  78. ms8/app/rules/conflict_rules.py +72 -0
  79. ms8/app/rules/dedupe_rules.py +27 -0
  80. ms8/app/rules/extraction_rules.py +14 -0
  81. ms8/app/rules/preprocess_rules.py +41 -0
  82. ms8/app/rules/privacy_rules.py +99 -0
  83. ms8/app/rules/registry.py +19 -0
  84. ms8/app/rules/tag_rules.py +12 -0
  85. ms8/app/schemas/__init__.py +0 -0
  86. ms8/app/schemas/feedback_schema.py +16 -0
  87. ms8/app/schemas/pipeline_schema.py +70 -0
  88. ms8/app/schemas/review_schema.py +37 -0
  89. ms8/ask.py +55 -0
  90. ms8/cli.py +1956 -0
  91. ms8/compression_governance.py +133 -0
  92. ms8/connect/AGENTS.md +63 -0
  93. ms8/connect/CONNECT_GUIDE.md +86 -0
  94. ms8/connect/__init__.py +13 -0
  95. ms8/connect/adapter_registry/__init__.py +4 -0
  96. ms8/connect/adapter_registry/adapters.json +18 -0
  97. ms8/connect/adapter_registry/registry.py +92 -0
  98. ms8/connect/adapter_registry/scan_tools.py +23 -0
  99. ms8/connect/config/mcp_config.yaml +2 -0
  100. ms8/connect/integration_hooks/service_models.py +23 -0
  101. ms8/connect/local_llm_adapter/__init__.py +1 -0
  102. ms8/connect/local_llm_adapter/adapter_llm.py +54 -0
  103. ms8/connect/mcp_server/__init__.py +13 -0
  104. ms8/connect/mcp_server/mcp_server.py +392 -0
  105. ms8/connect/mcp_server/memory_access_policy.py +87 -0
  106. ms8/connect/mcp_server/memory_service_interface.py +736 -0
  107. ms8/connect/mcp_server/stdio_server.py +330 -0
  108. ms8/connect/profiles/README.md +29 -0
  109. ms8/connect/scripts/__init__.py +2 -0
  110. ms8/connect/scripts/apply_client_configs.py +160 -0
  111. ms8/connect/scripts/bootstrap.py +542 -0
  112. ms8/connect/scripts/client_config.py +504 -0
  113. ms8/connect/scripts/common.py +118 -0
  114. ms8/connect/scripts/connect.py +196 -0
  115. ms8/connect/scripts/generate_client_configs.py +44 -0
  116. ms8/connect/scripts/install_env.py +20 -0
  117. ms8/connect/scripts/rollback_client_configs.py +108 -0
  118. ms8/connect/scripts/scan_register.py +26 -0
  119. ms8/connect/scripts/smoke_test.py +72 -0
  120. ms8/connect/scripts/status.py +149 -0
  121. ms8/connect/scripts/verify_client_configs.py +131 -0
  122. ms8/dashboard.py +252 -0
  123. ms8/demo.py +75 -0
  124. ms8/doctor.py +791 -0
  125. ms8/engine.py +724 -0
  126. ms8/engine_core/__init__.py +10 -0
  127. ms8/engine_core/admission_compat.py +78 -0
  128. ms8/engine_core/agent_skills_standard.py +399 -0
  129. ms8/engine_core/auto_memory.py +695 -0
  130. ms8/engine_core/built_in_skills.py +526 -0
  131. ms8/engine_core/config.py +1170 -0
  132. ms8/engine_core/context_material.py +424 -0
  133. ms8/engine_core/context_understanding.py +555 -0
  134. ms8/engine_core/core.py +4974 -0
  135. ms8/engine_core/enhanced_self_improvement.py +373 -0
  136. ms8/engine_core/enhanced_subagents.py +684 -0
  137. ms8/engine_core/expression_preference_profile.py +172 -0
  138. ms8/engine_core/file_store.py +54 -0
  139. ms8/engine_core/file_write_guard.py +108 -0
  140. ms8/engine_core/git_utils.py +211 -0
  141. ms8/engine_core/governance.py +168 -0
  142. ms8/engine_core/knowledge_arbitration.py +113 -0
  143. ms8/engine_core/knowledge_feedback.py +330 -0
  144. ms8/engine_core/knowledge_graph.py +3271 -0
  145. ms8/engine_core/knowledge_rules.py +54 -0
  146. ms8/engine_core/learning.py +983 -0
  147. ms8/engine_core/license.py +229 -0
  148. ms8/engine_core/local_llm.py +964 -0
  149. ms8/engine_core/maintenance/__init__.py +1 -0
  150. ms8/engine_core/maintenance/self_check/__init__.py +10 -0
  151. ms8/engine_core/maintenance/self_check/check_runner.py +405 -0
  152. ms8/engine_core/maintenance/self_check/check_specs.py +3231 -0
  153. ms8/engine_core/maintenance/self_check/reporter.py +884 -0
  154. ms8/engine_core/maintenance/self_repair/__init__.py +13 -0
  155. ms8/engine_core/maintenance/self_repair/repair_audit.py +151 -0
  156. ms8/engine_core/maintenance/self_repair/repair_cli.py +74 -0
  157. ms8/engine_core/maintenance/self_repair/repair_orchestrator.py +176 -0
  158. ms8/engine_core/maintenance/self_repair/repair_policies.py +1245 -0
  159. ms8/engine_core/maintenance/self_repair/repair_runner.py +1037 -0
  160. ms8/engine_core/maintenance/self_repair/repair_schema.py +87 -0
  161. ms8/engine_core/maintenance/self_repair/repair_validator.py +33 -0
  162. ms8/engine_core/maintenance_manager.py +581 -0
  163. ms8/engine_core/maintenance_policy.py +820 -0
  164. ms8/engine_core/memory_blocks.py +110 -0
  165. ms8/engine_core/memory_section_parser.py +96 -0
  166. ms8/engine_core/meta_cognition.py +847 -0
  167. ms8/engine_core/metrics_contract.py +58 -0
  168. ms8/engine_core/monitoring.py +1117 -0
  169. ms8/engine_core/pattern_recognition.py +562 -0
  170. ms8/engine_core/policy_engine_iface.py +54 -0
  171. ms8/engine_core/policy_engine_loader.py +213 -0
  172. ms8/engine_core/policy_engine_open.py +286 -0
  173. ms8/engine_core/priority_engine.py +217 -0
  174. ms8/engine_core/record_gateway.py +46 -0
  175. ms8/engine_core/response_mode_router.py +336 -0
  176. ms8/engine_core/response_mode_types.py +77 -0
  177. ms8/engine_core/security/__init__.py +21 -0
  178. ms8/engine_core/security/cli.py +11 -0
  179. ms8/engine_core/security/crypto_manager.py +6 -0
  180. ms8/engine_core/security/encryption/__init__.py +15 -0
  181. ms8/engine_core/security/encryption/cli.py +86 -0
  182. ms8/engine_core/security/encryption/crypto_manager.py +267 -0
  183. ms8/engine_core/security/encryption/file_crypto.py +61 -0
  184. ms8/engine_core/security/encryption/key_manager.py +261 -0
  185. ms8/engine_core/security/encryption/recovery.py +23 -0
  186. ms8/engine_core/security/encryption/security_schema.py +55 -0
  187. ms8/engine_core/security/file_crypto.py +6 -0
  188. ms8/engine_core/security/key_manager.py +6 -0
  189. ms8/engine_core/security/recovery.py +6 -0
  190. ms8/engine_core/security/security_schema.py +6 -0
  191. ms8/engine_core/security/shadow/__init__.py +10 -0
  192. ms8/engine_core/security/shadow/shadow_audit.py +42 -0
  193. ms8/engine_core/security/shadow/shadow_capacity_guard.py +66 -0
  194. ms8/engine_core/security/shadow/shadow_checkpoint_guard.py +54 -0
  195. ms8/engine_core/security/shadow/shadow_cli.py +258 -0
  196. ms8/engine_core/security/shadow/shadow_control_gate.py +164 -0
  197. ms8/engine_core/security/shadow/shadow_fs_guard.py +25 -0
  198. ms8/engine_core/security/shadow/shadow_guard.py +1563 -0
  199. ms8/engine_core/security/shadow/shadow_ledger.py +727 -0
  200. ms8/engine_core/security/shadow/shadow_locking.py +80 -0
  201. ms8/engine_core/security/shadow/shadow_manifest_guard.py +247 -0
  202. ms8/engine_core/security/shadow/shadow_permissions.py +83 -0
  203. ms8/engine_core/security/shadow/shadow_platform_log.py +20 -0
  204. ms8/engine_core/security/shadow/shadow_quarantine.py +33 -0
  205. ms8/engine_core/security/shadow/shadow_recovery.py +145 -0
  206. ms8/engine_core/security/shadow/shadow_recovery_guard.py +275 -0
  207. ms8/engine_core/security/shadow/shadow_schema.py +101 -0
  208. ms8/engine_core/security/shadow/shadow_seal.py +297 -0
  209. ms8/engine_core/security/shadow/shadow_tokens.py +53 -0
  210. ms8/engine_core/self_improvement.py +710 -0
  211. ms8/engine_core/semantic_search.py +289 -0
  212. ms8/engine_core/skill_github_discovery.py +546 -0
  213. ms8/engine_core/skill_marketplace.py +342 -0
  214. ms8/engine_core/skill_search_index.py +410 -0
  215. ms8/engine_core/skills.py +549 -0
  216. ms8/engine_core/sqlite_store.py +385 -0
  217. ms8/engine_core/sticky_prompt_templates.py +85 -0
  218. ms8/engine_core/subagents.py +237 -0
  219. ms8/engine_core/synthetic_memory.py +1444 -0
  220. ms8/engine_core/threshold_cli.py +98 -0
  221. ms8/engine_core/utils.py +74 -0
  222. ms8/engine_core/whoosh_search.py +213 -0
  223. ms8/engine_core/working_memory.py +286 -0
  224. ms8/lifecycle.py +245 -0
  225. ms8/onboarding.py +127 -0
  226. ms8/paths.py +71 -0
  227. ms8/record_policy.py +439 -0
  228. ms8/review_governance.py +232 -0
  229. ms8/runtime.py +2745 -0
  230. ms8/service.py +215 -0
  231. ms8/service_platform.py +545 -0
  232. ms8/shortcut.py +76 -0
  233. ms8/watch.py +170 -0
  234. ms8_macos-0.2.14.dist-info/METADATA +525 -0
  235. ms8_macos-0.2.14.dist-info/RECORD +239 -0
  236. ms8_macos-0.2.14.dist-info/WHEEL +5 -0
  237. ms8_macos-0.2.14.dist-info/entry_points.txt +2 -0
  238. ms8_macos-0.2.14.dist-info/licenses/LICENSE +674 -0
  239. ms8_macos-0.2.14.dist-info/top_level.txt +1 -0
ms8/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ """MS8 package."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ try:
6
+ __version__ = version("ms8")
7
+ except PackageNotFoundError: # pragma: no cover - source tree fallback
8
+ __version__ = "0.2.14"
ms8/__main__.py ADDED
@@ -0,0 +1,15 @@
1
+ """Module entrypoint for python -m ms8."""
2
+
3
+ import sys
4
+
5
+ from .cli import main
6
+
7
+ if __name__ == "__main__":
8
+ code = int(main())
9
+ try:
10
+ sys.stdout.flush()
11
+ sys.stderr.flush()
12
+ except OSError:
13
+ # Best-effort flush at process exit; do not mask unrelated exceptions.
14
+ pass
15
+ raise SystemExit(code)
ms8/absorb/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """Authorized local document absorption for MS8."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .. import __version__ as __version__
6
+
7
+ __all__ = ["__version__"]
ms8/absorb/chunker.py ADDED
@@ -0,0 +1,44 @@
1
+ """Text chunking helpers for absorb."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import re
7
+
8
+
9
+ def estimate_tokens(text: str) -> int:
10
+ value = str(text or "")
11
+ cjk = len(re.findall(r"[\u4e00-\u9fff]", value))
12
+ words = len(re.findall(r"[A-Za-z0-9_]+", value))
13
+ other = max(0, len(value) - cjk - sum(len(m.group(0)) for m in re.finditer(r"[A-Za-z0-9_]+", value)))
14
+ return max(1, cjk + words + other // 4)
15
+
16
+
17
+ def make_chunk_hash(text: str) -> str:
18
+ return hashlib.sha256(str(text or "").encode("utf-8")).hexdigest()
19
+
20
+
21
+ def _slice_by_chars(text: str, max_tokens: int) -> int:
22
+ # Approximate mixed-language token/character ratio conservatively.
23
+ return max(1, max_tokens * 3)
24
+
25
+
26
+ def split_text(text: str, max_tokens: int = 512, overlap_tokens: int = 64) -> list[str]:
27
+ value = str(text or "").strip()
28
+ if not value:
29
+ return []
30
+ if estimate_tokens(value) <= max_tokens:
31
+ return [value]
32
+ char_window = _slice_by_chars(value, max_tokens)
33
+ char_overlap = _slice_by_chars(value, overlap_tokens)
34
+ chunks: list[str] = []
35
+ start = 0
36
+ while start < len(value):
37
+ end = min(len(value), start + char_window)
38
+ chunk = value[start:end].strip()
39
+ if chunk:
40
+ chunks.append(chunk)
41
+ if end >= len(value):
42
+ break
43
+ start = max(0, end - char_overlap)
44
+ return chunks
ms8/absorb/cli.py ADDED
@@ -0,0 +1,241 @@
1
+ """CLI dispatch for ms8 absorb."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from .health import absorb_health_summary
10
+ from .incremental_processor import process_pending
11
+ from .kg import extract_absorb_knowledge_graph
12
+ from .repository import init_repository
13
+ from .reviewer import (
14
+ approve_all,
15
+ approve_chunk,
16
+ auto_submit_by_tier,
17
+ export_review_items,
18
+ list_review_chunks,
19
+ reject_all,
20
+ reject_chunk,
21
+ restore_rejected_chunk,
22
+ rollback_auto_writes,
23
+ submit_chunk,
24
+ )
25
+ from .scope import (
26
+ add_allowed_root,
27
+ add_exclude_pattern,
28
+ list_allowed_roots,
29
+ load_absorb_config,
30
+ remove_allowed_root,
31
+ set_auto_submit_summaries,
32
+ set_auto_write_tier,
33
+ )
34
+ from .search import search_chunks
35
+ from .spotlight_bootstrap import bootstrap_authorized_roots
36
+
37
+
38
+ def _print(payload: dict) -> int:
39
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
40
+ return 0 if bool(payload.get("ok", True)) else 1
41
+
42
+
43
+ def _privacy_note() -> str:
44
+ return "Absorb indexes authorized local files only; main memory writes require explicit submit/autosubmit opt-in."
45
+
46
+
47
+ def _status_next_actions(summary: dict[str, Any]) -> list[str]:
48
+ roots = int(summary.get("authorized_roots", 0) or 0)
49
+ pending = int(summary.get("pending_review", 0) or 0)
50
+ quarantine = int(summary.get("quarantine", 0) or 0)
51
+ actions: list[str] = []
52
+ if roots <= 0:
53
+ actions.append("ms8 absorb add <directory>")
54
+ return actions
55
+ if pending:
56
+ actions.append("ms8 absorb review list")
57
+ if quarantine:
58
+ actions.append("ms8 absorb review export --include-quarantine")
59
+ actions.extend(["ms8 absorb rescan", "ms8 absorb ingest", "ms8 absorb search <query> --pretty"])
60
+ return actions
61
+
62
+
63
+ def _review_next_actions(items: list[dict[str, Any]]) -> list[str]:
64
+ if not items:
65
+ return ["ms8 absorb status"]
66
+ first = str(items[0].get("chunk_id", "") or "<chunk_id>")
67
+ status = str(items[0].get("status", "") or "")
68
+ if status == "QUARANTINED":
69
+ return ["ms8 absorb review export --include-quarantine"]
70
+ return [f"ms8 absorb review approve {first}", f"ms8 absorb review reject {first} --reason <reason>"]
71
+
72
+
73
+ def _search_next_actions(query: str, matches: list[dict[str, Any]]) -> list[str]:
74
+ if not matches:
75
+ return ["ms8 absorb rescan", "ms8 absorb ingest", f'ms8 absorb search "{query}" --pretty']
76
+ return [f'ms8 ask "{query}"', "ms8 absorb review submit <chunk_id>"]
77
+
78
+
79
+ def _with_next_actions(payload: dict[str, Any], actions: list[str]) -> dict[str, Any]:
80
+ payload.setdefault("next_actions", actions)
81
+ return payload
82
+
83
+
84
+ def _print_pretty_search(query: str, matches: list[dict[str, Any]]) -> int:
85
+ print("MS8_ABSORB_SEARCH")
86
+ print(f"query={query}")
87
+ print(f"matches={len(matches)}")
88
+ for idx, item in enumerate(matches, 1):
89
+ path = str(item.get("canonical_path", "") or "")
90
+ title = Path(path).name or path or str(item.get("chunk_id", ""))
91
+ file_type = str(item.get("file_type", "") or "")
92
+ status = str(item.get("status", "") or "")
93
+ risk = str(item.get("risk_level", "") or "")
94
+ backend = str(item.get("search_backend", "") or "")
95
+ score = item.get("score", "")
96
+ print("")
97
+ print(f"{idx}. {title} {file_type}".rstrip())
98
+ print(f" status={status} risk={risk} backend={backend} score={score}")
99
+ if path:
100
+ print(f" path={path}")
101
+ preview = str(item.get("text_preview", "") or "").replace("\n", " ").strip()
102
+ if preview:
103
+ print(f" preview={preview[:220]}")
104
+ print("")
105
+ print("next_actions:")
106
+ for action in _search_next_actions(query, matches):
107
+ print(f"- {action}")
108
+ return 0
109
+
110
+
111
+ def run_absorb_cli(args) -> int:
112
+ init_repository()
113
+ cmd = str(getattr(args, "absorb_cmd", "") or "")
114
+ if cmd == "add":
115
+ out = add_allowed_root(args.path, confirm_high_risk=bool(getattr(args, "confirm_high_risk", False)))
116
+ out["privacy_note"] = _privacy_note()
117
+ return _print(_with_next_actions(out, ["ms8 absorb rescan", "ms8 absorb ingest", "ms8 absorb start"]))
118
+ if cmd == "remove":
119
+ return _print(_with_next_actions(remove_allowed_root(args.path), ["ms8 absorb status"]))
120
+ if cmd == "list":
121
+ cfg = load_absorb_config()
122
+ roots = list_allowed_roots()
123
+ return _print(
124
+ {
125
+ "ok": True,
126
+ "allowed_roots": roots,
127
+ "exclude_patterns": cfg.get("exclude_patterns", []),
128
+ "next_actions": ["ms8 absorb add <directory>"] if not roots else ["ms8 absorb rescan", "ms8 absorb ingest"],
129
+ }
130
+ )
131
+ if cmd == "exclude":
132
+ if str(getattr(args, "exclude_cmd", "") or "") == "add":
133
+ return _print(add_exclude_pattern(args.pattern))
134
+ return _print({"ok": False, "error": "choose exclude add"})
135
+ if cmd == "rescan":
136
+ return _print(bootstrap_authorized_roots())
137
+ if cmd == "ingest":
138
+ submit = True if bool(getattr(args, "submit_summaries", False)) else None
139
+ return _print(process_pending(submit_summaries=submit, limit=int(args.limit)))
140
+ if cmd == "status":
141
+ summary = absorb_health_summary()
142
+ summary["counts"] = {"files": summary.get("files", {}), "chunks": summary.get("chunks", {})}
143
+ summary["next_actions"] = _status_next_actions(summary)
144
+ return _print(summary)
145
+ if cmd == "review":
146
+ subcmd = str(getattr(args, "review_cmd", "") or "")
147
+ if subcmd == "approve":
148
+ return _print(approve_chunk(args.chunk_id, submit=bool(getattr(args, "submit", False))))
149
+ if subcmd == "reject":
150
+ return _print(reject_chunk(args.chunk_id, reason=str(getattr(args, "reason", "") or "user_rejected")))
151
+ if subcmd == "restore":
152
+ return _print(restore_rejected_chunk(args.chunk_id))
153
+ if subcmd == "submit":
154
+ return _print(submit_chunk(args.chunk_id))
155
+ if subcmd == "approve-all":
156
+ return _print(
157
+ approve_all(
158
+ risk=str(getattr(args, "risk", "") or ""),
159
+ limit=int(getattr(args, "limit", 50)),
160
+ apply=bool(getattr(args, "apply", False)),
161
+ submit=bool(getattr(args, "submit", False)),
162
+ )
163
+ )
164
+ if subcmd == "reject-all":
165
+ return _print(
166
+ reject_all(
167
+ reason=str(getattr(args, "reason", "") or "bulk_rejected"),
168
+ risk=str(getattr(args, "risk", "") or ""),
169
+ limit=int(getattr(args, "limit", 50)),
170
+ apply=bool(getattr(args, "apply", False)),
171
+ )
172
+ )
173
+ if subcmd == "export":
174
+ return _print(export_review_items(limit=int(getattr(args, "limit", 100)), include_quarantine=bool(getattr(args, "include_quarantine", False))))
175
+ items = list_review_chunks(limit=int(getattr(args, "limit", 50)))
176
+ review_items = list(items.get("pending_review", []) or []) + list(items.get("quarantine", []) or [])
177
+ return _print(_with_next_actions(items, _review_next_actions(review_items)))
178
+ if cmd == "search":
179
+ matches = search_chunks(args.query, limit=int(args.limit))
180
+ if bool(getattr(args, "pretty", False)):
181
+ return _print_pretty_search(args.query, matches)
182
+ return _print({"ok": True, "query": args.query, "matches": matches, "next_actions": _search_next_actions(args.query, matches)})
183
+ if cmd == "autosubmit":
184
+ subcmd = str(getattr(args, "autosubmit_cmd", "") or "")
185
+ if subcmd == "enable":
186
+ return _print(set_auto_submit_summaries(True))
187
+ if subcmd == "disable":
188
+ return _print(set_auto_submit_summaries(False))
189
+ if subcmd == "tier":
190
+ return _print(set_auto_write_tier(str(getattr(args, "tier", "") or "")))
191
+ if subcmd == "run":
192
+ return _print(
193
+ auto_submit_by_tier(
194
+ limit=int(getattr(args, "limit", 20)),
195
+ daily_cap=int(getattr(args, "daily_cap", 20)),
196
+ apply=bool(getattr(args, "apply", False)),
197
+ )
198
+ )
199
+ if subcmd == "rollback":
200
+ return _print(
201
+ rollback_auto_writes(
202
+ since_hours=int(getattr(args, "since_hours", 1)),
203
+ limit=int(getattr(args, "limit", 100)),
204
+ apply=bool(getattr(args, "apply", False)),
205
+ source_system=str(getattr(args, "source_system", "absorb") or "absorb"),
206
+ )
207
+ )
208
+ cfg = load_absorb_config()
209
+ return _print(
210
+ {
211
+ "ok": True,
212
+ "auto_submit_summaries": bool(cfg.get("auto_submit_summaries", False)),
213
+ "auto_write_tier": str(cfg.get("auto_write_tier", "OFF")),
214
+ }
215
+ )
216
+ if cmd == "kg-extract":
217
+ return _print(
218
+ extract_absorb_knowledge_graph(
219
+ limit=int(getattr(args, "limit", 50)),
220
+ apply=bool(getattr(args, "apply", False)),
221
+ force=bool(getattr(args, "force", False)),
222
+ )
223
+ )
224
+ if cmd == "start":
225
+ from .fs_watcher import start_watch
226
+
227
+ submit = True if bool(getattr(args, "submit_summaries", False)) else None
228
+ out = start_watch(duration=getattr(args, "duration", None), submit_summaries=submit)
229
+ roots = out.get("roots", [])
230
+ out["summary"] = (
231
+ f"watched {len(roots)} root(s); "
232
+ f"events={out.get('events', 0)} poll_scans={out.get('poll_scans', 0)} "
233
+ f"processed={out.get('poll_processed', 0)}"
234
+ )
235
+ out["next_actions"] = ["ms8 absorb status", "ms8 absorb search <query> --pretty"]
236
+ return _print(out)
237
+ if cmd == "stop":
238
+ from .fs_watcher import stop_watch
239
+
240
+ return _print(stop_watch())
241
+ return _print({"ok": False, "error": "choose add|remove|list|exclude|rescan|ingest|status|review|search|autosubmit|kg-extract|start|stop"})
@@ -0,0 +1,176 @@
1
+ """Filesystem watcher for authorized absorb roots."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import time
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from .incremental_processor import process_delete, process_file, process_pending
11
+ from .repository import add_ingest_job, log_event, upsert_file_record
12
+ from .scope import DEFAULT_EXCLUDES, is_path_allowed, list_allowed_roots
13
+ from .spotlight_bootstrap import bootstrap_authorized_roots
14
+
15
+ IGNORED_PARTS = set(DEFAULT_EXCLUDES)
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def should_ignore_path(path: str | Path) -> bool:
20
+ p = Path(path).expanduser()
21
+ if any(part in IGNORED_PARTS for part in p.parts):
22
+ return True
23
+ if p.name.startswith("."):
24
+ return True
25
+ return not is_path_allowed(p)
26
+
27
+
28
+ def wait_until_file_stable(path: str | Path, *, checks: int = 3, interval: float = 0.2) -> bool:
29
+ p = Path(path).expanduser()
30
+ last: tuple[int, float] | None = None
31
+ for _ in range(max(1, checks)):
32
+ if not p.exists() or not p.is_file():
33
+ return False
34
+ stat = p.stat()
35
+ current = (stat.st_size, stat.st_mtime)
36
+ if last is not None and current == last:
37
+ return True
38
+ last = current
39
+ time.sleep(interval)
40
+ return p.exists() and p.is_file()
41
+
42
+
43
+ def event_to_file_record(event: Any) -> dict[str, Any]:
44
+ src = Path(getattr(event, "src_path", "")).expanduser().resolve()
45
+ stat = src.stat() if src.exists() else None
46
+ return {
47
+ "event_type": getattr(event, "event_type", "unknown"),
48
+ "path": str(src),
49
+ "canonical_path": str(src),
50
+ "file_type": src.suffix.lower(),
51
+ "size": stat.st_size if stat else 0,
52
+ "mtime": stat.st_mtime if stat else 0,
53
+ "ctime": stat.st_ctime if stat else 0,
54
+ "source": "fs_watcher",
55
+ }
56
+
57
+
58
+ def handle_event(event: Any, *, auto_ingest: bool = True, submit_summaries: bool | None = None) -> dict[str, Any]:
59
+ record = event_to_file_record(event)
60
+ event_type = str(record["event_type"])
61
+ path = record["canonical_path"]
62
+ if event_type == "deleted":
63
+ return process_delete(path)
64
+ if should_ignore_path(path):
65
+ log_event("watch", path, "ignored", "outside_authorized_scope_or_excluded")
66
+ return {"ok": False, "decision": "ignored", "record": record}
67
+ if not wait_until_file_stable(path):
68
+ return {"ok": False, "decision": "not_stable", "record": record}
69
+ row = upsert_file_record(
70
+ canonical_path=path,
71
+ file_type=record["file_type"],
72
+ size=int(record["size"]),
73
+ mtime=float(record["mtime"]),
74
+ ctime=float(record["ctime"]),
75
+ status="READY_FOR_PARSE",
76
+ source="fs_watcher",
77
+ )
78
+ add_ingest_job(row["file_id"], "parse", reason=f"watch:{event_type}")
79
+ log_event("watch", path, "queued", event_type, file_id=row["file_id"])
80
+ if auto_ingest:
81
+ return process_file(path, submit_summaries=submit_summaries)
82
+ return {"ok": True, "decision": "queued", "record": record, "file_id": row["file_id"]}
83
+
84
+
85
+ def start_watch(*, duration: float | None = None, submit_summaries: bool | None = None) -> dict[str, Any]:
86
+ roots = [Path(p) for p in list_allowed_roots()]
87
+ if not roots:
88
+ return {"ok": False, "status": "no_authorized_roots", "reason": "run ms8 absorb add <dir> first"}
89
+ try:
90
+ from watchdog.events import FileSystemEventHandler
91
+ from watchdog.observers import Observer
92
+ except ImportError:
93
+ return {"ok": False, "status": "missing_dependency", "reason": "install ms8[absorb] for watchdog support"}
94
+
95
+ class AbsorbHandler(FileSystemEventHandler):
96
+ def on_created(self, event: Any) -> None:
97
+ if not getattr(event, "is_directory", False):
98
+ handle_event(event, submit_summaries=submit_summaries)
99
+
100
+ def on_modified(self, event: Any) -> None:
101
+ if not getattr(event, "is_directory", False):
102
+ handle_event(event, submit_summaries=submit_summaries)
103
+
104
+ def on_deleted(self, event: Any) -> None:
105
+ if not getattr(event, "is_directory", False):
106
+ handle_event(event, submit_summaries=submit_summaries)
107
+
108
+ observer = Observer()
109
+ handler = AbsorbHandler()
110
+ for root in roots:
111
+ observer.schedule(handler, str(root), recursive=True)
112
+ observer.start()
113
+ started_at = time.time()
114
+ poll_scans = 0
115
+ poll_processed = 0
116
+ last_poll = started_at
117
+ try:
118
+ first_poll = _poll_authorized_roots(submit_summaries=submit_summaries)
119
+ poll_scans += 1
120
+ poll_processed += int(first_poll.get("processed", 0) or 0)
121
+ if duration is None:
122
+ while True:
123
+ time.sleep(1)
124
+ if time.time() - last_poll >= 5:
125
+ polled = _poll_authorized_roots(submit_summaries=submit_summaries)
126
+ poll_scans += 1
127
+ poll_processed += int(polled.get("processed", 0) or 0)
128
+ last_poll = time.time()
129
+ else:
130
+ deadline = started_at + max(0.0, float(duration))
131
+ while time.time() < deadline:
132
+ time.sleep(0.2)
133
+ if time.time() - last_poll >= 5:
134
+ polled = _poll_authorized_roots(submit_summaries=submit_summaries)
135
+ poll_scans += 1
136
+ poll_processed += int(polled.get("processed", 0) or 0)
137
+ last_poll = time.time()
138
+ except KeyboardInterrupt:
139
+ logger.info("absorb watcher interrupted by user")
140
+ finally:
141
+ try:
142
+ final_poll = _poll_authorized_roots(submit_summaries=submit_summaries)
143
+ poll_scans += 1
144
+ poll_processed += int(final_poll.get("processed", 0) or 0)
145
+ except (OSError, ValueError, TypeError) as exc:
146
+ log_event("watch", "", "poll_failed", str(exc))
147
+ observer.stop()
148
+ observer.join(timeout=5)
149
+ return {
150
+ "ok": True,
151
+ "status": "stopped",
152
+ "roots": [str(p) for p in roots],
153
+ "duration": round(time.time() - started_at, 2),
154
+ "poll_scans": poll_scans,
155
+ "poll_processed": poll_processed,
156
+ }
157
+
158
+
159
+ def stop_watch() -> dict[str, Any]:
160
+ return {
161
+ "ok": True,
162
+ "status": "foreground_only",
163
+ "reason": "ms8 absorb start runs in the foreground and stops with Ctrl-C",
164
+ "background_service_stop": "ms8 service absorb-remove",
165
+ }
166
+
167
+
168
+ def _poll_authorized_roots(*, submit_summaries: bool | None = None, limit: int = 100) -> dict[str, Any]:
169
+ scan = bootstrap_authorized_roots()
170
+ ingest = process_pending(submit_summaries=submit_summaries, limit=limit)
171
+ return {
172
+ "ok": bool(scan.get("ok", False)) and bool(ingest.get("ok", False)),
173
+ "discovered": int(scan.get("discovered", 0) or 0),
174
+ "indexed": int(scan.get("indexed", 0) or 0),
175
+ "processed": int(ingest.get("processed", 0) or 0),
176
+ }
@@ -0,0 +1,144 @@
1
+ """Governance checks for absorb chunks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from .repository import quarantine_dir
12
+
13
+ SECRET_PATTERNS = {
14
+ "private_key": re.compile(r"-----BEGIN (?:RSA |OPENSSH |EC |ENCRYPTED )?PRIVATE KEY-----"),
15
+ "bearer_token": re.compile(r"\bBearer\s+[A-Za-z0-9._\-]{20,}"),
16
+ "api_key": re.compile(r"(?i)\b(api[_-]?key|secret|token|password)\b\s*[:=]\s*[^\s]{8,}"),
17
+ "credit_card": re.compile(r"\b(?:\d[ -]*?){13,19}\b"),
18
+ }
19
+ PII_PATTERNS = {
20
+ "email": re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"),
21
+ "phone": re.compile(r"\b(?:\+?\d[\d -]{7,}\d)\b"),
22
+ }
23
+
24
+
25
+ def _now() -> str:
26
+ return datetime.now(timezone.utc).isoformat()
27
+
28
+
29
+ def _redact(text: str) -> str:
30
+ value = str(text or "")
31
+ for name, pattern in {**SECRET_PATTERNS, **PII_PATTERNS}.items():
32
+ value = pattern.sub(f"[REDACTED_{name.upper()}]", value)
33
+ return value[:300]
34
+
35
+
36
+ def run_absorb_governance(chunk: str, metadata: dict[str, Any]) -> dict[str, Any]:
37
+ text = str(chunk or "")
38
+ secret_hits = [name for name, pattern in SECRET_PATTERNS.items() if pattern.search(text)]
39
+ pii_hits = [name for name, pattern in PII_PATTERNS.items() if pattern.search(text)]
40
+ if secret_hits:
41
+ return {
42
+ "decision": "quarantine",
43
+ "risk_level": "high",
44
+ "matched_rule": ",".join(secret_hits),
45
+ "redacted_preview": _redact(text),
46
+ "reason": "secret_or_financial_pattern",
47
+ }
48
+ if pii_hits:
49
+ return {
50
+ "decision": "pending_review",
51
+ "risk_level": "medium",
52
+ "matched_rule": ",".join(pii_hits),
53
+ "redacted_preview": _redact(text),
54
+ "reason": "pii_requires_review",
55
+ }
56
+ return {
57
+ "decision": "local_index",
58
+ "risk_level": "low",
59
+ "matched_rule": "",
60
+ "redacted_preview": _redact(text),
61
+ "reason": "low_risk",
62
+ }
63
+
64
+
65
+ def write_quarantine_metadata(
66
+ *,
67
+ file_id: str,
68
+ chunk_index: int,
69
+ source_path: str,
70
+ content_hash: str,
71
+ chunk_hash: str,
72
+ governance: dict[str, Any],
73
+ ) -> Path:
74
+ quarantine_dir().mkdir(parents=True, exist_ok=True)
75
+ path = quarantine_dir() / f"{file_id}_{chunk_index}.json"
76
+ payload = {
77
+ "source_path": source_path,
78
+ "content_hash": content_hash,
79
+ "chunk_hash": chunk_hash,
80
+ "risk_type": governance.get("risk_level", "unknown"),
81
+ "matched_rule": governance.get("matched_rule", ""),
82
+ "redacted_preview": governance.get("redacted_preview", ""),
83
+ "created_at": _now(),
84
+ "decision": governance.get("decision", ""),
85
+ "reason": governance.get("reason", ""),
86
+ }
87
+ path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
88
+ return path
89
+
90
+
91
+ def submit_to_ms8_governed(summary_or_memory: str, metadata: dict[str, Any]) -> dict[str, Any]:
92
+ """Submit a document summary through the public runtime write path.
93
+
94
+ This remains opt-in from the CLI so absorb chunks do not flood main memory.
95
+ """
96
+ from ..runtime import ensure_runtime_dirs, write_memory
97
+
98
+ text = str(summary_or_memory or "").strip()
99
+ if not text:
100
+ return {"ok": False, "reason": "empty_summary"}
101
+ safe_meta = dict(metadata or {})
102
+ safe_meta["source_system"] = "absorb"
103
+ row = write_memory(text, source="absorb")
104
+ record_id = str(row.get("id", "") or "")
105
+ if record_id:
106
+ _tag_absorb_record(ensure_runtime_dirs()["memories"], record_id, safe_meta)
107
+ row.setdefault("meta", {})
108
+ if isinstance(row["meta"], dict):
109
+ row["meta"].update({"source_system": "absorb", "absorb": safe_meta})
110
+ return {"ok": True, "record": row, "metadata": metadata}
111
+
112
+
113
+ def _tag_absorb_record(records_file: Path, record_id: str, metadata: dict[str, Any]) -> bool:
114
+ """Tag an already-written main-memory record as absorb-originated.
115
+
116
+ The public runtime write API intentionally stays small (text/source only),
117
+ so absorb adds source metadata after the governed write succeeds.
118
+ """
119
+ if not records_file.exists():
120
+ return False
121
+ changed = False
122
+ lines: list[str] = []
123
+ for raw in records_file.read_text(encoding="utf-8", errors="ignore").splitlines():
124
+ if not raw.strip():
125
+ continue
126
+ try:
127
+ row = json.loads(raw)
128
+ except (TypeError, ValueError, json.JSONDecodeError):
129
+ lines.append(raw)
130
+ continue
131
+ if isinstance(row, dict) and str(row.get("id", "") or "") == record_id:
132
+ meta = row.setdefault("meta", {})
133
+ if not isinstance(meta, dict):
134
+ meta = {}
135
+ row["meta"] = meta
136
+ meta["source_system"] = "absorb"
137
+ meta["absorb"] = dict(metadata or {})
138
+ changed = True
139
+ lines.append(json.dumps(row, ensure_ascii=False) if isinstance(row, dict) else raw)
140
+ if changed:
141
+ tmp = records_file.with_suffix(records_file.suffix + ".absorb_tag_tmp")
142
+ tmp.write_text("\n".join(lines) + ("\n" if lines else ""), encoding="utf-8")
143
+ tmp.replace(records_file)
144
+ return changed