PyPI - ms8-macos - Versions diffs - 0.2.14__py3-none-any.whl - Mend

ms8-macos 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (239) hide show

ms8/__init__.py +8 -0
ms8/__main__.py +15 -0
ms8/absorb/__init__.py +7 -0
ms8/absorb/chunker.py +44 -0
ms8/absorb/cli.py +241 -0
ms8/absorb/fs_watcher.py +176 -0
ms8/absorb/governance.py +144 -0
ms8/absorb/health.py +61 -0
ms8/absorb/incremental_processor.py +187 -0
ms8/absorb/kg.py +126 -0
ms8/absorb/ocr.py +56 -0
ms8/absorb/parser.py +176 -0
ms8/absorb/project_memory/__init__.py +10 -0
ms8/absorb/project_memory/cli.py +206 -0
ms8/absorb/project_memory/generator.py +613 -0
ms8/absorb/project_memory/health.py +397 -0
ms8/absorb/project_memory/parser.py +67 -0
ms8/absorb/project_memory/repository.py +252 -0
ms8/absorb/project_memory/scanner.py +169 -0
ms8/absorb/project_memory/scope.py +366 -0
ms8/absorb/project_memory/search.py +205 -0
ms8/absorb/project_memory/submit.py +95 -0
ms8/absorb/project_memory/watch.py +282 -0
ms8/absorb/repository.py +508 -0
ms8/absorb/reviewer.py +353 -0
ms8/absorb/scope.py +184 -0
ms8/absorb/search.py +136 -0
ms8/absorb/spotlight_bootstrap.py +82 -0
ms8/agent_native/__init__.py +6 -0
ms8/agent_native/agent_cli.py +600 -0
ms8/agent_native/onboarding.py +295 -0
ms8/agent_native/permission.py +65 -0
ms8/agent_native/report.py +16 -0
ms8/agent_native/task_spec.py +12 -0
ms8/agent_native/task_templates.py +326 -0
ms8/app/__init__.py +5 -0
ms8/app/classifier/__init__.py +3 -0
ms8/app/classifier/context_builder.py +48 -0
ms8/app/classifier/hybrid_classifier.py +82 -0
ms8/app/classifier/llm_classifier.py +39 -0
ms8/app/classifier/rule_classifier.py +27 -0
ms8/app/classifier/threshold_manager.py +25 -0
ms8/app/config.py +152 -0
ms8/app/extractors/__init__.py +0 -0
ms8/app/extractors/action_extractor.py +66 -0
ms8/app/extractors/entity_extractor.py +121 -0
ms8/app/extractors/technical_extractor.py +9 -0
ms8/app/feedback/__init__.py +0 -0
ms8/app/feedback/feedback_service.py +47 -0
ms8/app/feedback/rule_optimizer.py +117 -0
ms8/app/integrations/__init__.py +0 -0
ms8/app/integrations/ollama_client.py +57 -0
ms8/app/main.py +12 -0
ms8/app/memory/__init__.py +0 -0
ms8/app/memory/indexer.py +230 -0
ms8/app/memory/models.py +9 -0
ms8/app/memory/repository.py +170 -0
ms8/app/memory/search.py +48 -0
ms8/app/observability/__init__.py +0 -0
ms8/app/observability/logger.py +44 -0
ms8/app/observability/metrics.py +14 -0
ms8/app/observability/trace.py +16 -0
ms8/app/pipeline/__init__.py +37 -0
ms8/app/pipeline/consistency.py +8 -0
ms8/app/pipeline/decision.py +19 -0
ms8/app/pipeline/dedupe.py +51 -0
ms8/app/pipeline/memory_admission_engine.py +242 -0
ms8/app/pipeline/memory_pipeline.py +414 -0
ms8/app/pipeline/quality_gate.py +28 -0
ms8/app/pipeline/risk_scoring.py +45 -0
ms8/app/review/__init__.py +0 -0
ms8/app/review/batch_review.py +112 -0
ms8/app/review/review_service.py +82 -0
ms8/app/rules/__init__.py +3 -0
ms8/app/rules/base.py +53 -0
ms8/app/rules/block_rules.py +177 -0
ms8/app/rules/category_rules.py +58 -0
ms8/app/rules/conflict_rules.py +72 -0
ms8/app/rules/dedupe_rules.py +27 -0
ms8/app/rules/extraction_rules.py +14 -0
ms8/app/rules/preprocess_rules.py +41 -0
ms8/app/rules/privacy_rules.py +99 -0
ms8/app/rules/registry.py +19 -0
ms8/app/rules/tag_rules.py +12 -0
ms8/app/schemas/__init__.py +0 -0
ms8/app/schemas/feedback_schema.py +16 -0
ms8/app/schemas/pipeline_schema.py +70 -0
ms8/app/schemas/review_schema.py +37 -0
ms8/ask.py +55 -0
ms8/cli.py +1956 -0
ms8/compression_governance.py +133 -0
ms8/connect/AGENTS.md +63 -0
ms8/connect/CONNECT_GUIDE.md +86 -0
ms8/connect/__init__.py +13 -0
ms8/connect/adapter_registry/__init__.py +4 -0
ms8/connect/adapter_registry/adapters.json +18 -0
ms8/connect/adapter_registry/registry.py +92 -0
ms8/connect/adapter_registry/scan_tools.py +23 -0
ms8/connect/config/mcp_config.yaml +2 -0
ms8/connect/integration_hooks/service_models.py +23 -0
ms8/connect/local_llm_adapter/__init__.py +1 -0
ms8/connect/local_llm_adapter/adapter_llm.py +54 -0
ms8/connect/mcp_server/__init__.py +13 -0
ms8/connect/mcp_server/mcp_server.py +392 -0
ms8/connect/mcp_server/memory_access_policy.py +87 -0
ms8/connect/mcp_server/memory_service_interface.py +736 -0
ms8/connect/mcp_server/stdio_server.py +330 -0
ms8/connect/profiles/README.md +29 -0
ms8/connect/scripts/__init__.py +2 -0
ms8/connect/scripts/apply_client_configs.py +160 -0
ms8/connect/scripts/bootstrap.py +542 -0
ms8/connect/scripts/client_config.py +504 -0
ms8/connect/scripts/common.py +118 -0
ms8/connect/scripts/connect.py +196 -0
ms8/connect/scripts/generate_client_configs.py +44 -0
ms8/connect/scripts/install_env.py +20 -0
ms8/connect/scripts/rollback_client_configs.py +108 -0
ms8/connect/scripts/scan_register.py +26 -0
ms8/connect/scripts/smoke_test.py +72 -0
ms8/connect/scripts/status.py +149 -0
ms8/connect/scripts/verify_client_configs.py +131 -0
ms8/dashboard.py +252 -0
ms8/demo.py +75 -0
ms8/doctor.py +791 -0
ms8/engine.py +724 -0
ms8/engine_core/__init__.py +10 -0
ms8/engine_core/admission_compat.py +78 -0
ms8/engine_core/agent_skills_standard.py +399 -0
ms8/engine_core/auto_memory.py +695 -0
ms8/engine_core/built_in_skills.py +526 -0
ms8/engine_core/config.py +1170 -0
ms8/engine_core/context_material.py +424 -0
ms8/engine_core/context_understanding.py +555 -0
ms8/engine_core/core.py +4974 -0
ms8/engine_core/enhanced_self_improvement.py +373 -0
ms8/engine_core/enhanced_subagents.py +684 -0
ms8/engine_core/expression_preference_profile.py +172 -0
ms8/engine_core/file_store.py +54 -0
ms8/engine_core/file_write_guard.py +108 -0
ms8/engine_core/git_utils.py +211 -0
ms8/engine_core/governance.py +168 -0
ms8/engine_core/knowledge_arbitration.py +113 -0
ms8/engine_core/knowledge_feedback.py +330 -0
ms8/engine_core/knowledge_graph.py +3271 -0
ms8/engine_core/knowledge_rules.py +54 -0
ms8/engine_core/learning.py +983 -0
ms8/engine_core/license.py +229 -0
ms8/engine_core/local_llm.py +964 -0
ms8/engine_core/maintenance/__init__.py +1 -0
ms8/engine_core/maintenance/self_check/__init__.py +10 -0
ms8/engine_core/maintenance/self_check/check_runner.py +405 -0
ms8/engine_core/maintenance/self_check/check_specs.py +3231 -0
ms8/engine_core/maintenance/self_check/reporter.py +884 -0
ms8/engine_core/maintenance/self_repair/__init__.py +13 -0
ms8/engine_core/maintenance/self_repair/repair_audit.py +151 -0
ms8/engine_core/maintenance/self_repair/repair_cli.py +74 -0
ms8/engine_core/maintenance/self_repair/repair_orchestrator.py +176 -0
ms8/engine_core/maintenance/self_repair/repair_policies.py +1245 -0
ms8/engine_core/maintenance/self_repair/repair_runner.py +1037 -0
ms8/engine_core/maintenance/self_repair/repair_schema.py +87 -0
ms8/engine_core/maintenance/self_repair/repair_validator.py +33 -0
ms8/engine_core/maintenance_manager.py +581 -0
ms8/engine_core/maintenance_policy.py +820 -0
ms8/engine_core/memory_blocks.py +110 -0
ms8/engine_core/memory_section_parser.py +96 -0
ms8/engine_core/meta_cognition.py +847 -0
ms8/engine_core/metrics_contract.py +58 -0
ms8/engine_core/monitoring.py +1117 -0
ms8/engine_core/pattern_recognition.py +562 -0
ms8/engine_core/policy_engine_iface.py +54 -0
ms8/engine_core/policy_engine_loader.py +213 -0
ms8/engine_core/policy_engine_open.py +286 -0
ms8/engine_core/priority_engine.py +217 -0
ms8/engine_core/record_gateway.py +46 -0
ms8/engine_core/response_mode_router.py +336 -0
ms8/engine_core/response_mode_types.py +77 -0
ms8/engine_core/security/__init__.py +21 -0
ms8/engine_core/security/cli.py +11 -0
ms8/engine_core/security/crypto_manager.py +6 -0
ms8/engine_core/security/encryption/__init__.py +15 -0
ms8/engine_core/security/encryption/cli.py +86 -0
ms8/engine_core/security/encryption/crypto_manager.py +267 -0
ms8/engine_core/security/encryption/file_crypto.py +61 -0
ms8/engine_core/security/encryption/key_manager.py +261 -0
ms8/engine_core/security/encryption/recovery.py +23 -0
ms8/engine_core/security/encryption/security_schema.py +55 -0
ms8/engine_core/security/file_crypto.py +6 -0
ms8/engine_core/security/key_manager.py +6 -0
ms8/engine_core/security/recovery.py +6 -0
ms8/engine_core/security/security_schema.py +6 -0
ms8/engine_core/security/shadow/__init__.py +10 -0
ms8/engine_core/security/shadow/shadow_audit.py +42 -0
ms8/engine_core/security/shadow/shadow_capacity_guard.py +66 -0
ms8/engine_core/security/shadow/shadow_checkpoint_guard.py +54 -0
ms8/engine_core/security/shadow/shadow_cli.py +258 -0
ms8/engine_core/security/shadow/shadow_control_gate.py +164 -0
ms8/engine_core/security/shadow/shadow_fs_guard.py +25 -0
ms8/engine_core/security/shadow/shadow_guard.py +1563 -0
ms8/engine_core/security/shadow/shadow_ledger.py +727 -0
ms8/engine_core/security/shadow/shadow_locking.py +80 -0
ms8/engine_core/security/shadow/shadow_manifest_guard.py +247 -0
ms8/engine_core/security/shadow/shadow_permissions.py +83 -0
ms8/engine_core/security/shadow/shadow_platform_log.py +20 -0
ms8/engine_core/security/shadow/shadow_quarantine.py +33 -0
ms8/engine_core/security/shadow/shadow_recovery.py +145 -0
ms8/engine_core/security/shadow/shadow_recovery_guard.py +275 -0
ms8/engine_core/security/shadow/shadow_schema.py +101 -0
ms8/engine_core/security/shadow/shadow_seal.py +297 -0
ms8/engine_core/security/shadow/shadow_tokens.py +53 -0
ms8/engine_core/self_improvement.py +710 -0
ms8/engine_core/semantic_search.py +289 -0
ms8/engine_core/skill_github_discovery.py +546 -0
ms8/engine_core/skill_marketplace.py +342 -0
ms8/engine_core/skill_search_index.py +410 -0
ms8/engine_core/skills.py +549 -0
ms8/engine_core/sqlite_store.py +385 -0
ms8/engine_core/sticky_prompt_templates.py +85 -0
ms8/engine_core/subagents.py +237 -0
ms8/engine_core/synthetic_memory.py +1444 -0
ms8/engine_core/threshold_cli.py +98 -0
ms8/engine_core/utils.py +74 -0
ms8/engine_core/whoosh_search.py +213 -0
ms8/engine_core/working_memory.py +286 -0
ms8/lifecycle.py +245 -0
ms8/onboarding.py +127 -0
ms8/paths.py +71 -0
ms8/record_policy.py +439 -0
ms8/review_governance.py +232 -0
ms8/runtime.py +2745 -0
ms8/service.py +215 -0
ms8/service_platform.py +545 -0
ms8/shortcut.py +76 -0
ms8/watch.py +170 -0
ms8_macos-0.2.14.dist-info/METADATA +525 -0
ms8_macos-0.2.14.dist-info/RECORD +239 -0
ms8_macos-0.2.14.dist-info/WHEEL +5 -0
ms8_macos-0.2.14.dist-info/entry_points.txt +2 -0
ms8_macos-0.2.14.dist-info/licenses/LICENSE +674 -0
ms8_macos-0.2.14.dist-info/top_level.txt +1 -0

ms8/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""MS8 package."""
+from importlib.metadata import PackageNotFoundError, version
+try:
+    __version__ = version("ms8")
+except PackageNotFoundError:  # pragma: no cover - source tree fallback
+    __version__ = "0.2.14"

ms8/__main__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Module entrypoint for python -m ms8."""
+import sys
+from .cli import main
+if __name__ == "__main__":
+    code = int(main())
+    try:
+        sys.stdout.flush()
+        sys.stderr.flush()
+    except OSError:
+        # Best-effort flush at process exit; do not mask unrelated exceptions.
+        pass
+    raise SystemExit(code)

ms8/absorb/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Authorized local document absorption for MS8."""
+from __future__ import annotations
+from .. import __version__ as __version__
+__all__ = ["__version__"]

ms8/absorb/chunker.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""Text chunking helpers for absorb."""
+from __future__ import annotations
+import hashlib
+import re
+def estimate_tokens(text: str) -> int:
+    value = str(text or "")
+    cjk = len(re.findall(r"[\u4e00-\u9fff]", value))
+    words = len(re.findall(r"[A-Za-z0-9_]+", value))
+    other = max(0, len(value) - cjk - sum(len(m.group(0)) for m in re.finditer(r"[A-Za-z0-9_]+", value)))
+    return max(1, cjk + words + other // 4)
+def make_chunk_hash(text: str) -> str:
+    return hashlib.sha256(str(text or "").encode("utf-8")).hexdigest()
+def _slice_by_chars(text: str, max_tokens: int) -> int:
+    # Approximate mixed-language token/character ratio conservatively.
+    return max(1, max_tokens * 3)
+def split_text(text: str, max_tokens: int = 512, overlap_tokens: int = 64) -> list[str]:
+    value = str(text or "").strip()
+    if not value:
+        return []
+    if estimate_tokens(value) <= max_tokens:
+        return [value]
+    char_window = _slice_by_chars(value, max_tokens)
+    char_overlap = _slice_by_chars(value, overlap_tokens)
+    chunks: list[str] = []
+    start = 0
+    while start < len(value):
+        end = min(len(value), start + char_window)
+        chunk = value[start:end].strip()
+        if chunk:
+            chunks.append(chunk)
+        if end >= len(value):
+            break
+        start = max(0, end - char_overlap)
+    return chunks

ms8/absorb/cli.py ADDED Viewed

@@ -0,0 +1,241 @@
+"""CLI dispatch for ms8 absorb."""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any
+from .health import absorb_health_summary
+from .incremental_processor import process_pending
+from .kg import extract_absorb_knowledge_graph
+from .repository import init_repository
+from .reviewer import (
+    approve_all,
+    approve_chunk,
+    auto_submit_by_tier,
+    export_review_items,
+    list_review_chunks,
+    reject_all,
+    reject_chunk,
+    restore_rejected_chunk,
+    rollback_auto_writes,
+    submit_chunk,
+)
+from .scope import (
+    add_allowed_root,
+    add_exclude_pattern,
+    list_allowed_roots,
+    load_absorb_config,
+    remove_allowed_root,
+    set_auto_submit_summaries,
+    set_auto_write_tier,
+)
+from .search import search_chunks
+from .spotlight_bootstrap import bootstrap_authorized_roots
+def _print(payload: dict) -> int:
+    print(json.dumps(payload, ensure_ascii=False, indent=2))
+    return 0 if bool(payload.get("ok", True)) else 1
+def _privacy_note() -> str:
+    return "Absorb indexes authorized local files only; main memory writes require explicit submit/autosubmit opt-in."
+def _status_next_actions(summary: dict[str, Any]) -> list[str]:
+    roots = int(summary.get("authorized_roots", 0) or 0)
+    pending = int(summary.get("pending_review", 0) or 0)
+    quarantine = int(summary.get("quarantine", 0) or 0)
+    actions: list[str] = []
+    if roots <= 0:
+        actions.append("ms8 absorb add <directory>")
+        return actions
+    if pending:
+        actions.append("ms8 absorb review list")
+    if quarantine:
+        actions.append("ms8 absorb review export --include-quarantine")
+    actions.extend(["ms8 absorb rescan", "ms8 absorb ingest", "ms8 absorb search <query> --pretty"])
+    return actions
+def _review_next_actions(items: list[dict[str, Any]]) -> list[str]:
+    if not items:
+        return ["ms8 absorb status"]
+    first = str(items[0].get("chunk_id", "") or "<chunk_id>")
+    status = str(items[0].get("status", "") or "")
+    if status == "QUARANTINED":
+        return ["ms8 absorb review export --include-quarantine"]
+    return [f"ms8 absorb review approve {first}", f"ms8 absorb review reject {first} --reason <reason>"]
+def _search_next_actions(query: str, matches: list[dict[str, Any]]) -> list[str]:
+    if not matches:
+        return ["ms8 absorb rescan", "ms8 absorb ingest", f'ms8 absorb search "{query}" --pretty']
+    return [f'ms8 ask "{query}"', "ms8 absorb review submit <chunk_id>"]
+def _with_next_actions(payload: dict[str, Any], actions: list[str]) -> dict[str, Any]:
+    payload.setdefault("next_actions", actions)
+    return payload
+def _print_pretty_search(query: str, matches: list[dict[str, Any]]) -> int:
+    print("MS8_ABSORB_SEARCH")
+    print(f"query={query}")
+    print(f"matches={len(matches)}")
+    for idx, item in enumerate(matches, 1):
+        path = str(item.get("canonical_path", "") or "")
+        title = Path(path).name or path or str(item.get("chunk_id", ""))
+        file_type = str(item.get("file_type", "") or "")
+        status = str(item.get("status", "") or "")
+        risk = str(item.get("risk_level", "") or "")
+        backend = str(item.get("search_backend", "") or "")
+        score = item.get("score", "")
+        print("")
+        print(f"{idx}. {title} {file_type}".rstrip())
+        print(f"   status={status} risk={risk} backend={backend} score={score}")
+        if path:
+            print(f"   path={path}")
+        preview = str(item.get("text_preview", "") or "").replace("\n", " ").strip()
+        if preview:
+            print(f"   preview={preview[:220]}")
+    print("")
+    print("next_actions:")
+    for action in _search_next_actions(query, matches):
+        print(f"- {action}")
+    return 0
+def run_absorb_cli(args) -> int:
+    init_repository()
+    cmd = str(getattr(args, "absorb_cmd", "") or "")
+    if cmd == "add":
+        out = add_allowed_root(args.path, confirm_high_risk=bool(getattr(args, "confirm_high_risk", False)))
+        out["privacy_note"] = _privacy_note()
+        return _print(_with_next_actions(out, ["ms8 absorb rescan", "ms8 absorb ingest", "ms8 absorb start"]))
+    if cmd == "remove":
+        return _print(_with_next_actions(remove_allowed_root(args.path), ["ms8 absorb status"]))
+    if cmd == "list":
+        cfg = load_absorb_config()
+        roots = list_allowed_roots()
+        return _print(
+            {
+                "ok": True,
+                "allowed_roots": roots,
+                "exclude_patterns": cfg.get("exclude_patterns", []),
+                "next_actions": ["ms8 absorb add <directory>"] if not roots else ["ms8 absorb rescan", "ms8 absorb ingest"],
+            }
+        )
+    if cmd == "exclude":
+        if str(getattr(args, "exclude_cmd", "") or "") == "add":
+            return _print(add_exclude_pattern(args.pattern))
+        return _print({"ok": False, "error": "choose exclude add"})
+    if cmd == "rescan":
+        return _print(bootstrap_authorized_roots())
+    if cmd == "ingest":
+        submit = True if bool(getattr(args, "submit_summaries", False)) else None
+        return _print(process_pending(submit_summaries=submit, limit=int(args.limit)))
+    if cmd == "status":
+        summary = absorb_health_summary()
+        summary["counts"] = {"files": summary.get("files", {}), "chunks": summary.get("chunks", {})}
+        summary["next_actions"] = _status_next_actions(summary)
+        return _print(summary)
+    if cmd == "review":
+        subcmd = str(getattr(args, "review_cmd", "") or "")
+        if subcmd == "approve":
+            return _print(approve_chunk(args.chunk_id, submit=bool(getattr(args, "submit", False))))
+        if subcmd == "reject":
+            return _print(reject_chunk(args.chunk_id, reason=str(getattr(args, "reason", "") or "user_rejected")))
+        if subcmd == "restore":
+            return _print(restore_rejected_chunk(args.chunk_id))
+        if subcmd == "submit":
+            return _print(submit_chunk(args.chunk_id))
+        if subcmd == "approve-all":
+            return _print(
+                approve_all(
+                    risk=str(getattr(args, "risk", "") or ""),
+                    limit=int(getattr(args, "limit", 50)),
+                    apply=bool(getattr(args, "apply", False)),
+                    submit=bool(getattr(args, "submit", False)),
+                )
+            )
+        if subcmd == "reject-all":
+            return _print(
+                reject_all(
+                    reason=str(getattr(args, "reason", "") or "bulk_rejected"),
+                    risk=str(getattr(args, "risk", "") or ""),
+                    limit=int(getattr(args, "limit", 50)),
+                    apply=bool(getattr(args, "apply", False)),
+                )
+            )
+        if subcmd == "export":
+            return _print(export_review_items(limit=int(getattr(args, "limit", 100)), include_quarantine=bool(getattr(args, "include_quarantine", False))))
+        items = list_review_chunks(limit=int(getattr(args, "limit", 50)))
+        review_items = list(items.get("pending_review", []) or []) + list(items.get("quarantine", []) or [])
+        return _print(_with_next_actions(items, _review_next_actions(review_items)))
+    if cmd == "search":
+        matches = search_chunks(args.query, limit=int(args.limit))
+        if bool(getattr(args, "pretty", False)):
+            return _print_pretty_search(args.query, matches)
+        return _print({"ok": True, "query": args.query, "matches": matches, "next_actions": _search_next_actions(args.query, matches)})
+    if cmd == "autosubmit":
+        subcmd = str(getattr(args, "autosubmit_cmd", "") or "")
+        if subcmd == "enable":
+            return _print(set_auto_submit_summaries(True))
+        if subcmd == "disable":
+            return _print(set_auto_submit_summaries(False))
+        if subcmd == "tier":
+            return _print(set_auto_write_tier(str(getattr(args, "tier", "") or "")))
+        if subcmd == "run":
+            return _print(
+                auto_submit_by_tier(
+                    limit=int(getattr(args, "limit", 20)),
+                    daily_cap=int(getattr(args, "daily_cap", 20)),
+                    apply=bool(getattr(args, "apply", False)),
+                )
+            )
+        if subcmd == "rollback":
+            return _print(
+                rollback_auto_writes(
+                    since_hours=int(getattr(args, "since_hours", 1)),
+                    limit=int(getattr(args, "limit", 100)),
+                    apply=bool(getattr(args, "apply", False)),
+                    source_system=str(getattr(args, "source_system", "absorb") or "absorb"),
+                )
+            )
+        cfg = load_absorb_config()
+        return _print(
+            {
+                "ok": True,
+                "auto_submit_summaries": bool(cfg.get("auto_submit_summaries", False)),
+                "auto_write_tier": str(cfg.get("auto_write_tier", "OFF")),
+            }
+        )
+    if cmd == "kg-extract":
+        return _print(
+            extract_absorb_knowledge_graph(
+                limit=int(getattr(args, "limit", 50)),
+                apply=bool(getattr(args, "apply", False)),
+                force=bool(getattr(args, "force", False)),
+            )
+        )
+    if cmd == "start":
+        from .fs_watcher import start_watch
+        submit = True if bool(getattr(args, "submit_summaries", False)) else None
+        out = start_watch(duration=getattr(args, "duration", None), submit_summaries=submit)
+        roots = out.get("roots", [])
+        out["summary"] = (
+            f"watched {len(roots)} root(s); "
+            f"events={out.get('events', 0)} poll_scans={out.get('poll_scans', 0)} "
+            f"processed={out.get('poll_processed', 0)}"
+        )
+        out["next_actions"] = ["ms8 absorb status", "ms8 absorb search <query> --pretty"]
+        return _print(out)
+    if cmd == "stop":
+        from .fs_watcher import stop_watch
+        return _print(stop_watch())
+    return _print({"ok": False, "error": "choose add|remove|list|exclude|rescan|ingest|status|review|search|autosubmit|kg-extract|start|stop"})

ms8/absorb/fs_watcher.py ADDED Viewed

@@ -0,0 +1,176 @@
+"""Filesystem watcher for authorized absorb roots."""
+from __future__ import annotations
+import logging
+import time
+from pathlib import Path
+from typing import Any
+from .incremental_processor import process_delete, process_file, process_pending
+from .repository import add_ingest_job, log_event, upsert_file_record
+from .scope import DEFAULT_EXCLUDES, is_path_allowed, list_allowed_roots
+from .spotlight_bootstrap import bootstrap_authorized_roots
+IGNORED_PARTS = set(DEFAULT_EXCLUDES)
+logger = logging.getLogger(__name__)
+def should_ignore_path(path: str | Path) -> bool:
+    p = Path(path).expanduser()
+    if any(part in IGNORED_PARTS for part in p.parts):
+        return True
+    if p.name.startswith("."):
+        return True
+    return not is_path_allowed(p)
+def wait_until_file_stable(path: str | Path, *, checks: int = 3, interval: float = 0.2) -> bool:
+    p = Path(path).expanduser()
+    last: tuple[int, float] | None = None
+    for _ in range(max(1, checks)):
+        if not p.exists() or not p.is_file():
+            return False
+        stat = p.stat()
+        current = (stat.st_size, stat.st_mtime)
+        if last is not None and current == last:
+            return True
+        last = current
+        time.sleep(interval)
+    return p.exists() and p.is_file()
+def event_to_file_record(event: Any) -> dict[str, Any]:
+    src = Path(getattr(event, "src_path", "")).expanduser().resolve()
+    stat = src.stat() if src.exists() else None
+    return {
+        "event_type": getattr(event, "event_type", "unknown"),
+        "path": str(src),
+        "canonical_path": str(src),
+        "file_type": src.suffix.lower(),
+        "size": stat.st_size if stat else 0,
+        "mtime": stat.st_mtime if stat else 0,
+        "ctime": stat.st_ctime if stat else 0,
+        "source": "fs_watcher",
+    }
+def handle_event(event: Any, *, auto_ingest: bool = True, submit_summaries: bool | None = None) -> dict[str, Any]:
+    record = event_to_file_record(event)
+    event_type = str(record["event_type"])
+    path = record["canonical_path"]
+    if event_type == "deleted":
+        return process_delete(path)
+    if should_ignore_path(path):
+        log_event("watch", path, "ignored", "outside_authorized_scope_or_excluded")
+        return {"ok": False, "decision": "ignored", "record": record}
+    if not wait_until_file_stable(path):
+        return {"ok": False, "decision": "not_stable", "record": record}
+    row = upsert_file_record(
+        canonical_path=path,
+        file_type=record["file_type"],
+        size=int(record["size"]),
+        mtime=float(record["mtime"]),
+        ctime=float(record["ctime"]),
+        status="READY_FOR_PARSE",
+        source="fs_watcher",
+    )
+    add_ingest_job(row["file_id"], "parse", reason=f"watch:{event_type}")
+    log_event("watch", path, "queued", event_type, file_id=row["file_id"])
+    if auto_ingest:
+        return process_file(path, submit_summaries=submit_summaries)
+    return {"ok": True, "decision": "queued", "record": record, "file_id": row["file_id"]}
+def start_watch(*, duration: float | None = None, submit_summaries: bool | None = None) -> dict[str, Any]:
+    roots = [Path(p) for p in list_allowed_roots()]
+    if not roots:
+        return {"ok": False, "status": "no_authorized_roots", "reason": "run ms8 absorb add <dir> first"}
+    try:
+        from watchdog.events import FileSystemEventHandler
+        from watchdog.observers import Observer
+    except ImportError:
+        return {"ok": False, "status": "missing_dependency", "reason": "install ms8[absorb] for watchdog support"}
+    class AbsorbHandler(FileSystemEventHandler):
+        def on_created(self, event: Any) -> None:
+            if not getattr(event, "is_directory", False):
+                handle_event(event, submit_summaries=submit_summaries)
+        def on_modified(self, event: Any) -> None:
+            if not getattr(event, "is_directory", False):
+                handle_event(event, submit_summaries=submit_summaries)
+        def on_deleted(self, event: Any) -> None:
+            if not getattr(event, "is_directory", False):
+                handle_event(event, submit_summaries=submit_summaries)
+    observer = Observer()
+    handler = AbsorbHandler()
+    for root in roots:
+        observer.schedule(handler, str(root), recursive=True)
+    observer.start()
+    started_at = time.time()
+    poll_scans = 0
+    poll_processed = 0
+    last_poll = started_at
+    try:
+        first_poll = _poll_authorized_roots(submit_summaries=submit_summaries)
+        poll_scans += 1
+        poll_processed += int(first_poll.get("processed", 0) or 0)
+        if duration is None:
+            while True:
+                time.sleep(1)
+                if time.time() - last_poll >= 5:
+                    polled = _poll_authorized_roots(submit_summaries=submit_summaries)
+                    poll_scans += 1
+                    poll_processed += int(polled.get("processed", 0) or 0)
+                    last_poll = time.time()
+        else:
+            deadline = started_at + max(0.0, float(duration))
+            while time.time() < deadline:
+                time.sleep(0.2)
+                if time.time() - last_poll >= 5:
+                    polled = _poll_authorized_roots(submit_summaries=submit_summaries)
+                    poll_scans += 1
+                    poll_processed += int(polled.get("processed", 0) or 0)
+                    last_poll = time.time()
+    except KeyboardInterrupt:
+        logger.info("absorb watcher interrupted by user")
+    finally:
+        try:
+            final_poll = _poll_authorized_roots(submit_summaries=submit_summaries)
+            poll_scans += 1
+            poll_processed += int(final_poll.get("processed", 0) or 0)
+        except (OSError, ValueError, TypeError) as exc:
+            log_event("watch", "", "poll_failed", str(exc))
+        observer.stop()
+        observer.join(timeout=5)
+    return {
+        "ok": True,
+        "status": "stopped",
+        "roots": [str(p) for p in roots],
+        "duration": round(time.time() - started_at, 2),
+        "poll_scans": poll_scans,
+        "poll_processed": poll_processed,
+    }
+def stop_watch() -> dict[str, Any]:
+    return {
+        "ok": True,
+        "status": "foreground_only",
+        "reason": "ms8 absorb start runs in the foreground and stops with Ctrl-C",
+        "background_service_stop": "ms8 service absorb-remove",
+    }
+def _poll_authorized_roots(*, submit_summaries: bool | None = None, limit: int = 100) -> dict[str, Any]:
+    scan = bootstrap_authorized_roots()
+    ingest = process_pending(submit_summaries=submit_summaries, limit=limit)
+    return {
+        "ok": bool(scan.get("ok", False)) and bool(ingest.get("ok", False)),
+        "discovered": int(scan.get("discovered", 0) or 0),
+        "indexed": int(scan.get("indexed", 0) or 0),
+        "processed": int(ingest.get("processed", 0) or 0),
+    }

ms8/absorb/governance.py ADDED Viewed

@@ -0,0 +1,144 @@
+"""Governance checks for absorb chunks."""
+from __future__ import annotations
+import json
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+from .repository import quarantine_dir
+SECRET_PATTERNS = {
+    "private_key": re.compile(r"-----BEGIN (?:RSA |OPENSSH |EC |ENCRYPTED )?PRIVATE KEY-----"),
+    "bearer_token": re.compile(r"\bBearer\s+[A-Za-z0-9._\-]{20,}"),
+    "api_key": re.compile(r"(?i)\b(api[_-]?key|secret|token|password)\b\s*[:=]\s*[^\s]{8,}"),
+    "credit_card": re.compile(r"\b(?:\d[ -]*?){13,19}\b"),
+}
+PII_PATTERNS = {
+    "email": re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"),
+    "phone": re.compile(r"\b(?:\+?\d[\d -]{7,}\d)\b"),
+}
+def _now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+def _redact(text: str) -> str:
+    value = str(text or "")
+    for name, pattern in {**SECRET_PATTERNS, **PII_PATTERNS}.items():
+        value = pattern.sub(f"[REDACTED_{name.upper()}]", value)
+    return value[:300]
+def run_absorb_governance(chunk: str, metadata: dict[str, Any]) -> dict[str, Any]:
+    text = str(chunk or "")
+    secret_hits = [name for name, pattern in SECRET_PATTERNS.items() if pattern.search(text)]
+    pii_hits = [name for name, pattern in PII_PATTERNS.items() if pattern.search(text)]
+    if secret_hits:
+        return {
+            "decision": "quarantine",
+            "risk_level": "high",
+            "matched_rule": ",".join(secret_hits),
+            "redacted_preview": _redact(text),
+            "reason": "secret_or_financial_pattern",
+        }
+    if pii_hits:
+        return {
+            "decision": "pending_review",
+            "risk_level": "medium",
+            "matched_rule": ",".join(pii_hits),
+            "redacted_preview": _redact(text),
+            "reason": "pii_requires_review",
+        }
+    return {
+        "decision": "local_index",
+        "risk_level": "low",
+        "matched_rule": "",
+        "redacted_preview": _redact(text),
+        "reason": "low_risk",
+    }
+def write_quarantine_metadata(
+    *,
+    file_id: str,
+    chunk_index: int,
+    source_path: str,
+    content_hash: str,
+    chunk_hash: str,
+    governance: dict[str, Any],
+) -> Path:
+    quarantine_dir().mkdir(parents=True, exist_ok=True)
+    path = quarantine_dir() / f"{file_id}_{chunk_index}.json"
+    payload = {
+        "source_path": source_path,
+        "content_hash": content_hash,
+        "chunk_hash": chunk_hash,
+        "risk_type": governance.get("risk_level", "unknown"),
+        "matched_rule": governance.get("matched_rule", ""),
+        "redacted_preview": governance.get("redacted_preview", ""),
+        "created_at": _now(),
+        "decision": governance.get("decision", ""),
+        "reason": governance.get("reason", ""),
+    }
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+    return path
+def submit_to_ms8_governed(summary_or_memory: str, metadata: dict[str, Any]) -> dict[str, Any]:
+    """Submit a document summary through the public runtime write path.
+    This remains opt-in from the CLI so absorb chunks do not flood main memory.
+    """
+    from ..runtime import ensure_runtime_dirs, write_memory
+    text = str(summary_or_memory or "").strip()
+    if not text:
+        return {"ok": False, "reason": "empty_summary"}
+    safe_meta = dict(metadata or {})
+    safe_meta["source_system"] = "absorb"
+    row = write_memory(text, source="absorb")
+    record_id = str(row.get("id", "") or "")
+    if record_id:
+        _tag_absorb_record(ensure_runtime_dirs()["memories"], record_id, safe_meta)
+        row.setdefault("meta", {})
+        if isinstance(row["meta"], dict):
+            row["meta"].update({"source_system": "absorb", "absorb": safe_meta})
+    return {"ok": True, "record": row, "metadata": metadata}
+def _tag_absorb_record(records_file: Path, record_id: str, metadata: dict[str, Any]) -> bool:
+    """Tag an already-written main-memory record as absorb-originated.
+    The public runtime write API intentionally stays small (text/source only),
+    so absorb adds source metadata after the governed write succeeds.
+    """
+    if not records_file.exists():
+        return False
+    changed = False
+    lines: list[str] = []
+    for raw in records_file.read_text(encoding="utf-8", errors="ignore").splitlines():
+        if not raw.strip():
+            continue
+        try:
+            row = json.loads(raw)
+        except (TypeError, ValueError, json.JSONDecodeError):
+            lines.append(raw)
+            continue
+        if isinstance(row, dict) and str(row.get("id", "") or "") == record_id:
+            meta = row.setdefault("meta", {})
+            if not isinstance(meta, dict):
+                meta = {}
+                row["meta"] = meta
+            meta["source_system"] = "absorb"
+            meta["absorb"] = dict(metadata or {})
+            changed = True
+        lines.append(json.dumps(row, ensure_ascii=False) if isinstance(row, dict) else raw)
+    if changed:
+        tmp = records_file.with_suffix(records_file.suffix + ".absorb_tag_tmp")
+        tmp.write_text("\n".join(lines) + ("\n" if lines else ""), encoding="utf-8")
+        tmp.replace(records_file)
+    return changed