PyPI - handovergap - Versions diffs - 0.1.0__py3-none-any.whl - Mend

handovergap 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

handovergap/__init__.py +8 -0
handovergap/cli.py +160 -0
handovergap/core/__init__.py +1 -0
handovergap/core/baselines.py +62 -0
handovergap/core/detector.py +79 -0
handovergap/core/evaluator.py +104 -0
handovergap/data/__init__.py +1 -0
handovergap/data/handover_gap_bench.json +434 -0
handovergap/data/handover_gap_bench_holdout.json +180 -0
handovergap/data/schema.sql +133 -0
handovergap/demo_app.py +277 -0
handovergap/schemas/__init__.py +21 -0
handovergap/schemas/models.py +70 -0
handovergap/slot_rules.py +65 -0
handovergap/store.py +43 -0
handovergap/stores/__init__.py +3 -0
handovergap/stores/tidb.py +139 -0
handovergap-0.1.0.dist-info/METADATA +219 -0
handovergap-0.1.0.dist-info/RECORD +22 -0
handovergap-0.1.0.dist-info/WHEEL +4 -0
handovergap-0.1.0.dist-info/entry_points.txt +2 -0
handovergap-0.1.0.dist-info/licenses/LICENSE +21 -0

handovergap/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""HandoverGap RAG public API."""
+from handovergap.core.detector import HandoverGapDetector
+from handovergap.core.evaluator import HandoverGapEvaluator
+from handovergap.store import InMemoryStore
+from handovergap.stores import TiDBStore
+__all__ = ["HandoverGapDetector", "HandoverGapEvaluator", "InMemoryStore", "TiDBStore"]

handovergap/cli.py ADDED Viewed

@@ -0,0 +1,160 @@
+from __future__ import annotations
+import subprocess
+import sys
+from importlib import resources
+import typer
+from rich.console import Console
+from rich.table import Table
+from handovergap.core.detector import HandoverGapDetector
+from handovergap.core.evaluator import HandoverGapEvaluator
+from handovergap.store import InMemoryStore
+from handovergap.stores import TiDBStore
+app = typer.Typer(help="Detect tacit context gaps in handover-oriented RAG memories.")
+console = Console(width=160)
+def _build_detector() -> HandoverGapDetector:
+    return HandoverGapDetector(store=InMemoryStore.from_builtin_dataset())
+def _print_detection(result) -> None:
+    console.print("[bold]Memory:[/bold]")
+    console.print(result.memory)
+    console.print()
+    console.print("[bold]Detected Gaps:[/bold]")
+    if not result.gaps:
+        console.print("No high-risk tacit context gaps detected.")
+    for gap in result.gaps:
+        console.print(f"[{gap.severity}] {gap.gap_type}")
+        console.print(f"  {gap.description}")
+    console.print()
+    console.print("[bold]Clarification Questions:[/bold]")
+    if not result.questions:
+        console.print("No clarification questions needed.")
+    for index, question in enumerate(result.questions, start=1):
+        console.print(f"{index}. {question.question}")
+    console.print()
+    console.print(f"[bold]Transferability:[/bold] {result.transferability_status}")
+    console.print(f"[bold]Score:[/bold] {result.transferability_score:.2f}")
+@app.command()
+def demo() -> None:
+    """Run the built-in valid-but-non-transferable memory demo."""
+    result = _build_detector().detect(scenario_id="S001", successor_role="CS")
+    _print_detection(result)
+@app.command()
+def detect(
+    scenario: str = typer.Option(..., "--scenario", "-s", help="Built-in scenario id, e.g. S001."),
+    role: str = typer.Option(..., "--role", "-r", help="Successor role: CS, Engineer, or Sales."),
+) -> None:
+    """Detect role-conditioned tacit context gaps for one scenario."""
+    result = _build_detector().detect(scenario_id=scenario, successor_role=role)
+    _print_detection(result)
+@app.command()
+def evaluate(
+    compare: bool = typer.Option(False, "--compare", help="Compare HandoverGap with naive and hybrid baselines."),
+    dataset: str = typer.Option("mini", "--dataset", help="Built-in dataset: mini, holdout, or all."),
+    slot_profile: str = typer.Option(
+        "provided",
+        "--slot-profile",
+        help="Slot filling profile: provided, conservative, or optimistic.",
+    ),
+    stress_filling: bool = typer.Option(
+        False,
+        "--stress-filling",
+        help="Evaluate HandoverGap across provided, conservative, and optimistic slot filling profiles.",
+    ),
+) -> None:
+    """Evaluate on HandoverGapBench mini."""
+    if stress_filling:
+        rows = []
+        for profile in ["provided", "conservative", "optimistic"]:
+            evaluator = HandoverGapEvaluator(store=InMemoryStore.from_builtin_dataset(dataset), slot_profile=profile)
+            metrics = evaluator.evaluate_method("handovergap")
+            rows.append(metrics.model_copy(update={"method": f"handovergap/{profile}"}))
+        title = f"HandoverGapBench {dataset} / slot filling stress"
+    else:
+        evaluator = HandoverGapEvaluator(store=InMemoryStore.from_builtin_dataset(dataset), slot_profile=slot_profile)
+        rows = evaluator.compare() if compare else [evaluator.evaluate_method("handovergap")]
+        title = f"HandoverGapBench {dataset} / slot-profile={slot_profile}"
+    table = Table(title=title)
+    table.add_column("Method", no_wrap=True)
+    table.add_column("Scenarios", justify="right", no_wrap=True)
+    table.add_column("Tacit Gap Recall", justify="right", no_wrap=True)
+    table.add_column("Unsafe Transfer Prevention", justify="right", no_wrap=True)
+    table.add_column("Question Coverage", justify="right", no_wrap=True)
+    table.add_column("Safe Transfer Allowance", justify="right", no_wrap=True)
+    table.add_column("Blocked Precision", justify="right", no_wrap=True)
+    for metrics in rows:
+        table.add_row(
+            metrics.method,
+            str(metrics.scenarios),
+            f"{metrics.tacit_gap_recall:.2f}",
+            f"{metrics.unsafe_transfer_prevention:.2f}",
+            f"{metrics.question_coverage:.2f}",
+            f"{metrics.safe_transfer_allowance:.2f}",
+            f"{metrics.blocked_precision:.2f}",
+        )
+    console.print(table)
+@app.command()
+def schema(
+    dialect: str = typer.Option("tidb", "--dialect", help="Schema dialect to print. Only 'tidb' is bundled."),
+) -> None:
+    """Print the optional TiDB schema without requiring a TiDB runtime."""
+    if dialect.lower() != "tidb":
+        raise typer.BadParameter("Only --dialect tidb is currently supported.")
+    console.print(TiDBStore.schema_sql())
+@app.command()
+def serve(
+    port: int = typer.Option(8501, "--port", help="Port for the local Streamlit demo."),
+    host: str = typer.Option("127.0.0.1", "--host", help="Host for the local Streamlit demo."),
+) -> None:
+    """Launch the optional bilingual Streamlit comparison demo."""
+    try:
+        import streamlit  # noqa: F401
+    except ImportError:
+        console.print('Streamlit is optional. Install it with: pip install "handovergap[demo]"')
+        raise typer.Exit(code=1)
+    app_file = resources.files("handovergap").joinpath("demo_app.py")
+    command = [
+        sys.executable,
+        "-m",
+        "streamlit",
+        "run",
+        str(app_file),
+        "--server.address",
+        host,
+        "--server.port",
+        str(port),
+        "--browser.gatherUsageStats",
+        "false",
+    ]
+    raise typer.Exit(code=subprocess.call(command))
+@app.command()
+def init(path: str | None = typer.Argument(None, help="Reserved for future sample project creation.")) -> None:
+    """Show first-run guidance."""
+    target = path or "."
+    console.print(f"HandoverGap sample project target: {target}")
+    console.print("Try: handovergap demo")
+    console.print("Then: handovergap evaluate --compare")
+if __name__ == "__main__":
+    app()

handovergap/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Core detection and evaluation logic."""

handovergap/core/baselines.py ADDED Viewed

@@ -0,0 +1,62 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Protocol
+from handovergap.schemas import HandoverScenario
+@dataclass(frozen=True)
+class BaselinePrediction:
+    method: str
+    gap_slots: set[str]
+    question_slots: set[str]
+    blocked: bool
+    rationale: str
+class BaselineMethod(Protocol):
+    method: str
+    def predict(self, scenario: HandoverScenario) -> BaselinePrediction:
+        ...
+class NaiveRAGBaseline:
+    method = "naive_rag"
+    def predict(self, scenario: HandoverScenario) -> BaselinePrediction:
+        return BaselinePrediction(
+            method=self.method,
+            gap_slots=set(),
+            question_slots=set(),
+            blocked=False,
+            rationale="Returns the retrieved memory as an answer without checking transferability.",
+        )
+class HybridRAGBaseline:
+    method = "hybrid_rag"
+    def predict(self, scenario: HandoverScenario) -> BaselinePrediction:
+        slots = _first_explicit_risk_slots(scenario)
+        first_gap = scenario.gold_gaps[0] if scenario.gold_gaps else None
+        return BaselinePrediction(
+            method=self.method,
+            gap_slots=slots,
+            question_slots=slots,
+            blocked=bool(first_gap and first_gap.severity == "HIGH"),
+            rationale="Adds evidence context and can flag one explicit risk, but does not fill role-required slots.",
+        )
+BASELINES: dict[str, BaselineMethod] = {
+    NaiveRAGBaseline.method: NaiveRAGBaseline(),
+    HybridRAGBaseline.method: HybridRAGBaseline(),
+}
+def _first_explicit_risk_slots(scenario: HandoverScenario) -> set[str]:
+    if not scenario.gold_gaps:
+        return set()
+    return {scenario.gold_gaps[0].slot_name}

handovergap/core/detector.py ADDED Viewed

@@ -0,0 +1,79 @@
+from __future__ import annotations
+from handovergap.schemas import ClarificationQuestion, DetectionResult, HandoverGap
+from handovergap.slot_rules import GAP_TYPE_BY_SLOT, QUESTION_BY_SLOT, ROLE_REQUIRED_SLOTS
+from handovergap.store import InMemoryStore
+class HandoverGapDetector:
+    """Deterministic role-conditioned slot-gap detector for the MVP."""
+    def __init__(self, store: InMemoryStore):
+        self.store = store
+    def detect(self, scenario_id: str, successor_role: str) -> DetectionResult:
+        scenario = self.store.get_scenario(scenario_id=scenario_id, successor_role=successor_role)
+        return self.detect_scenario(scenario)
+    def detect_scenario(self, scenario) -> DetectionResult:
+        required_slots = ROLE_REQUIRED_SLOTS[scenario.successor_role]
+        missing_slots = [slot for slot in required_slots if slot not in scenario.provided_slots]
+        gaps = [
+            HandoverGap(
+                gap_type=GAP_TYPE_BY_SLOT.get(slot, f"{slot}_gap"),
+                slot_name=slot,
+                description=_describe_gap(slot),
+                severity=_severity_for_slot(slot),
+            )
+            for slot in missing_slots
+        ]
+        questions = [
+            ClarificationQuestion(slot_name=slot, question=QUESTION_BY_SLOT[slot])
+            for slot in missing_slots
+            if slot in QUESTION_BY_SLOT
+        ]
+        transferability_score = max(0.0, 1.0 - (len(missing_slots) / max(len(required_slots), 1)))
+        status = "blocked" if scenario.unsafe_transfer_label and missing_slots else "transferable"
+        if 0 < transferability_score < 0.75 and status != "blocked":
+            status = "needs_clarification"
+        return DetectionResult(
+            scenario_id=scenario.scenario_id,
+            successor_role=scenario.successor_role,
+            memory=scenario.memory,
+            handover_task=scenario.handover_task,
+            gaps=gaps,
+            questions=questions,
+            transferability_score=transferability_score,
+            transferability_status=status,
+        )
+def _severity_for_slot(slot: str) -> str:
+    if slot in {"authority", "communication_status", "fallback_plan", "escalation_path", "contract_impact"}:
+        return "HIGH"
+    return "MEDIUM"
+def _describe_gap(slot: str) -> str:
+    descriptions = {
+        "scope": "引き継ぎ先が適用範囲を判断するための情報が不足しています",
+        "communication_status": "関係者または顧客に説明済みか不明です",
+        "authority": "後任が回答または判断してよい範囲が不明です",
+        "fallback_plan": "想定外の場合の代替手段が不明です",
+        "escalation_path": "問題発生時の相談先またはエスカレーション先が不明です",
+        "customer_facing_wording": "外部向けに使ってよい説明文が不明です",
+        "rationale": "なぜその判断になったかが不明です",
+        "technical_constraint": "技術的制約または前提条件が不明です",
+        "implementation_scope": "実装対象と対象外の境界が不明です",
+        "trigger_for_reconsideration": "再検討が必要になる条件が不明です",
+        "related_issue": "関連するチケットや追跡先が不明です",
+        "failure_modes": "失敗パターンと観測方法が不明です",
+        "contract_impact": "契約や商談への影響が不明です",
+        "promise_boundary": "顧客に約束してよい範囲が不明です",
+        "customer_expectation": "顧客期待値が調整済みか不明です",
+        "timeline_confidence": "提示できる時期の確度が不明です",
+        "negotiation_status": "交渉状況と未合意点が不明です",
+    }
+    return descriptions.get(slot, f"{slot} が不足しています")

handovergap/core/evaluator.py ADDED Viewed

@@ -0,0 +1,104 @@
+from __future__ import annotations
+from handovergap.core.baselines import BASELINES, BaselinePrediction
+from handovergap.core.detector import HandoverGapDetector
+from handovergap.schemas import EvalMetrics
+from handovergap.store import InMemoryStore
+class HandoverGapEvaluator:
+    def __init__(self, store: InMemoryStore, slot_profile: str = "provided"):
+        self.store = store
+        self.slot_profile = slot_profile
+    def compare(self) -> list[EvalMetrics]:
+        return [
+            self.evaluate_method("naive_rag"),
+            self.evaluate_method("hybrid_rag"),
+            self.evaluate_method("handovergap"),
+        ]
+    def evaluate_method(self, method: str = "handovergap") -> EvalMetrics:
+        scenarios = self.store.list_scenarios()
+        total_gold_gaps = 0
+        detected_gold_gaps = 0
+        unsafe_total = 0
+        unsafe_blocked = 0
+        total_gold_questions = 0
+        covered_gold_questions = 0
+        safe_total = 0
+        safe_allowed = 0
+        blocked_total = 0
+        blocked_unsafe = 0
+        for scenario in scenarios:
+            profiled_scenario = _scenario_for_profile(scenario, self.slot_profile)
+            prediction = self._predict(method, profiled_scenario)
+            gold_slots = {gap.slot_name for gap in scenario.gold_gaps}
+            gold_question_slots = {question.slot_name for question in scenario.gold_questions}
+            total_gold_gaps += len(gold_slots)
+            detected_gold_gaps += len(gold_slots & prediction.gap_slots)
+            total_gold_questions += len(gold_question_slots)
+            covered_gold_questions += len(gold_question_slots & prediction.question_slots)
+            if scenario.unsafe_transfer_label:
+                unsafe_total += 1
+                if prediction.blocked:
+                    unsafe_blocked += 1
+            else:
+                safe_total += 1
+                if not prediction.blocked:
+                    safe_allowed += 1
+            if prediction.blocked:
+                blocked_total += 1
+                if scenario.unsafe_transfer_label:
+                    blocked_unsafe += 1
+        return EvalMetrics(
+            method=method,
+            scenarios=len(scenarios),
+            tacit_gap_recall=_ratio(detected_gold_gaps, total_gold_gaps),
+            unsafe_transfer_prevention=_ratio(unsafe_blocked, unsafe_total),
+            question_coverage=_ratio(covered_gold_questions, total_gold_questions),
+            safe_transfer_allowance=_ratio(safe_allowed, safe_total),
+            blocked_precision=_ratio(blocked_unsafe, blocked_total),
+        )
+    def _predict(self, method: str, scenario) -> BaselinePrediction:
+        if method in BASELINES:
+            return BASELINES[method].predict(scenario)
+        if method == "handovergap":
+            result = HandoverGapDetector(store=self.store).detect_scenario(scenario)
+            return BaselinePrediction(
+                method=method,
+                gap_slots={gap.slot_name for gap in result.gaps},
+                question_slots={question.slot_name for question in result.questions},
+                blocked=result.transferability_status == "blocked",
+                rationale=(
+                    "Performs role-conditioned slot filling and blocks unsafe transfer "
+                    "when required slots are missing."
+                ),
+            )
+        raise ValueError(f"Unknown evaluation method: {method}")
+def _ratio(numerator: int, denominator: int) -> float:
+    if denominator == 0:
+        return 0.0
+    return numerator / denominator
+def _scenario_for_profile(scenario, slot_profile: str):
+    if slot_profile == "provided":
+        return scenario
+    try:
+        provided_slots = scenario.slot_fill_profiles[slot_profile]
+    except KeyError as exc:
+        if not scenario.slot_fill_profiles:
+            return scenario
+        available = ", ".join(["provided", *scenario.slot_fill_profiles])
+        raise ValueError(
+            f"Scenario {scenario.scenario_id} has no slot fill profile '{slot_profile}'. Available: {available}"
+        ) from exc
+    return scenario.model_copy(update={"provided_slots": provided_slots})

handovergap/data/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Bundled synthetic benchmark and schema assets."""