handovergap 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ """HandoverGap RAG public API."""
2
+
3
+ from handovergap.core.detector import HandoverGapDetector
4
+ from handovergap.core.evaluator import HandoverGapEvaluator
5
+ from handovergap.store import InMemoryStore
6
+ from handovergap.stores import TiDBStore
7
+
8
+ __all__ = ["HandoverGapDetector", "HandoverGapEvaluator", "InMemoryStore", "TiDBStore"]
handovergap/cli.py ADDED
@@ -0,0 +1,160 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess
4
+ import sys
5
+ from importlib import resources
6
+
7
+ import typer
8
+ from rich.console import Console
9
+ from rich.table import Table
10
+
11
+ from handovergap.core.detector import HandoverGapDetector
12
+ from handovergap.core.evaluator import HandoverGapEvaluator
13
+ from handovergap.store import InMemoryStore
14
+ from handovergap.stores import TiDBStore
15
+
16
+ app = typer.Typer(help="Detect tacit context gaps in handover-oriented RAG memories.")
17
+ console = Console(width=160)
18
+
19
+
20
+ def _build_detector() -> HandoverGapDetector:
21
+ return HandoverGapDetector(store=InMemoryStore.from_builtin_dataset())
22
+
23
+
24
+ def _print_detection(result) -> None:
25
+ console.print("[bold]Memory:[/bold]")
26
+ console.print(result.memory)
27
+ console.print()
28
+ console.print("[bold]Detected Gaps:[/bold]")
29
+ if not result.gaps:
30
+ console.print("No high-risk tacit context gaps detected.")
31
+ for gap in result.gaps:
32
+ console.print(f"[{gap.severity}] {gap.gap_type}")
33
+ console.print(f" {gap.description}")
34
+ console.print()
35
+ console.print("[bold]Clarification Questions:[/bold]")
36
+ if not result.questions:
37
+ console.print("No clarification questions needed.")
38
+ for index, question in enumerate(result.questions, start=1):
39
+ console.print(f"{index}. {question.question}")
40
+ console.print()
41
+ console.print(f"[bold]Transferability:[/bold] {result.transferability_status}")
42
+ console.print(f"[bold]Score:[/bold] {result.transferability_score:.2f}")
43
+
44
+
45
+ @app.command()
46
+ def demo() -> None:
47
+ """Run the built-in valid-but-non-transferable memory demo."""
48
+ result = _build_detector().detect(scenario_id="S001", successor_role="CS")
49
+ _print_detection(result)
50
+
51
+
52
+ @app.command()
53
+ def detect(
54
+ scenario: str = typer.Option(..., "--scenario", "-s", help="Built-in scenario id, e.g. S001."),
55
+ role: str = typer.Option(..., "--role", "-r", help="Successor role: CS, Engineer, or Sales."),
56
+ ) -> None:
57
+ """Detect role-conditioned tacit context gaps for one scenario."""
58
+ result = _build_detector().detect(scenario_id=scenario, successor_role=role)
59
+ _print_detection(result)
60
+
61
+
62
+ @app.command()
63
+ def evaluate(
64
+ compare: bool = typer.Option(False, "--compare", help="Compare HandoverGap with naive and hybrid baselines."),
65
+ dataset: str = typer.Option("mini", "--dataset", help="Built-in dataset: mini, holdout, or all."),
66
+ slot_profile: str = typer.Option(
67
+ "provided",
68
+ "--slot-profile",
69
+ help="Slot filling profile: provided, conservative, or optimistic.",
70
+ ),
71
+ stress_filling: bool = typer.Option(
72
+ False,
73
+ "--stress-filling",
74
+ help="Evaluate HandoverGap across provided, conservative, and optimistic slot filling profiles.",
75
+ ),
76
+ ) -> None:
77
+ """Evaluate on HandoverGapBench mini."""
78
+ if stress_filling:
79
+ rows = []
80
+ for profile in ["provided", "conservative", "optimistic"]:
81
+ evaluator = HandoverGapEvaluator(store=InMemoryStore.from_builtin_dataset(dataset), slot_profile=profile)
82
+ metrics = evaluator.evaluate_method("handovergap")
83
+ rows.append(metrics.model_copy(update={"method": f"handovergap/{profile}"}))
84
+ title = f"HandoverGapBench {dataset} / slot filling stress"
85
+ else:
86
+ evaluator = HandoverGapEvaluator(store=InMemoryStore.from_builtin_dataset(dataset), slot_profile=slot_profile)
87
+ rows = evaluator.compare() if compare else [evaluator.evaluate_method("handovergap")]
88
+ title = f"HandoverGapBench {dataset} / slot-profile={slot_profile}"
89
+
90
+ table = Table(title=title)
91
+ table.add_column("Method", no_wrap=True)
92
+ table.add_column("Scenarios", justify="right", no_wrap=True)
93
+ table.add_column("Tacit Gap Recall", justify="right", no_wrap=True)
94
+ table.add_column("Unsafe Transfer Prevention", justify="right", no_wrap=True)
95
+ table.add_column("Question Coverage", justify="right", no_wrap=True)
96
+ table.add_column("Safe Transfer Allowance", justify="right", no_wrap=True)
97
+ table.add_column("Blocked Precision", justify="right", no_wrap=True)
98
+ for metrics in rows:
99
+ table.add_row(
100
+ metrics.method,
101
+ str(metrics.scenarios),
102
+ f"{metrics.tacit_gap_recall:.2f}",
103
+ f"{metrics.unsafe_transfer_prevention:.2f}",
104
+ f"{metrics.question_coverage:.2f}",
105
+ f"{metrics.safe_transfer_allowance:.2f}",
106
+ f"{metrics.blocked_precision:.2f}",
107
+ )
108
+ console.print(table)
109
+
110
+
111
+ @app.command()
112
+ def schema(
113
+ dialect: str = typer.Option("tidb", "--dialect", help="Schema dialect to print. Only 'tidb' is bundled."),
114
+ ) -> None:
115
+ """Print the optional TiDB schema without requiring a TiDB runtime."""
116
+ if dialect.lower() != "tidb":
117
+ raise typer.BadParameter("Only --dialect tidb is currently supported.")
118
+ console.print(TiDBStore.schema_sql())
119
+
120
+
121
+ @app.command()
122
+ def serve(
123
+ port: int = typer.Option(8501, "--port", help="Port for the local Streamlit demo."),
124
+ host: str = typer.Option("127.0.0.1", "--host", help="Host for the local Streamlit demo."),
125
+ ) -> None:
126
+ """Launch the optional bilingual Streamlit comparison demo."""
127
+ try:
128
+ import streamlit # noqa: F401
129
+ except ImportError:
130
+ console.print('Streamlit is optional. Install it with: pip install "handovergap[demo]"')
131
+ raise typer.Exit(code=1)
132
+
133
+ app_file = resources.files("handovergap").joinpath("demo_app.py")
134
+ command = [
135
+ sys.executable,
136
+ "-m",
137
+ "streamlit",
138
+ "run",
139
+ str(app_file),
140
+ "--server.address",
141
+ host,
142
+ "--server.port",
143
+ str(port),
144
+ "--browser.gatherUsageStats",
145
+ "false",
146
+ ]
147
+ raise typer.Exit(code=subprocess.call(command))
148
+
149
+
150
+ @app.command()
151
+ def init(path: str | None = typer.Argument(None, help="Reserved for future sample project creation.")) -> None:
152
+ """Show first-run guidance."""
153
+ target = path or "."
154
+ console.print(f"HandoverGap sample project target: {target}")
155
+ console.print("Try: handovergap demo")
156
+ console.print("Then: handovergap evaluate --compare")
157
+
158
+
159
+ if __name__ == "__main__":
160
+ app()
@@ -0,0 +1 @@
1
+ """Core detection and evaluation logic."""
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Protocol
5
+
6
+ from handovergap.schemas import HandoverScenario
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class BaselinePrediction:
11
+ method: str
12
+ gap_slots: set[str]
13
+ question_slots: set[str]
14
+ blocked: bool
15
+ rationale: str
16
+
17
+
18
+ class BaselineMethod(Protocol):
19
+ method: str
20
+
21
+ def predict(self, scenario: HandoverScenario) -> BaselinePrediction:
22
+ ...
23
+
24
+
25
+ class NaiveRAGBaseline:
26
+ method = "naive_rag"
27
+
28
+ def predict(self, scenario: HandoverScenario) -> BaselinePrediction:
29
+ return BaselinePrediction(
30
+ method=self.method,
31
+ gap_slots=set(),
32
+ question_slots=set(),
33
+ blocked=False,
34
+ rationale="Returns the retrieved memory as an answer without checking transferability.",
35
+ )
36
+
37
+
38
+ class HybridRAGBaseline:
39
+ method = "hybrid_rag"
40
+
41
+ def predict(self, scenario: HandoverScenario) -> BaselinePrediction:
42
+ slots = _first_explicit_risk_slots(scenario)
43
+ first_gap = scenario.gold_gaps[0] if scenario.gold_gaps else None
44
+ return BaselinePrediction(
45
+ method=self.method,
46
+ gap_slots=slots,
47
+ question_slots=slots,
48
+ blocked=bool(first_gap and first_gap.severity == "HIGH"),
49
+ rationale="Adds evidence context and can flag one explicit risk, but does not fill role-required slots.",
50
+ )
51
+
52
+
53
+ BASELINES: dict[str, BaselineMethod] = {
54
+ NaiveRAGBaseline.method: NaiveRAGBaseline(),
55
+ HybridRAGBaseline.method: HybridRAGBaseline(),
56
+ }
57
+
58
+
59
+ def _first_explicit_risk_slots(scenario: HandoverScenario) -> set[str]:
60
+ if not scenario.gold_gaps:
61
+ return set()
62
+ return {scenario.gold_gaps[0].slot_name}
@@ -0,0 +1,79 @@
1
+ from __future__ import annotations
2
+
3
+ from handovergap.schemas import ClarificationQuestion, DetectionResult, HandoverGap
4
+ from handovergap.slot_rules import GAP_TYPE_BY_SLOT, QUESTION_BY_SLOT, ROLE_REQUIRED_SLOTS
5
+ from handovergap.store import InMemoryStore
6
+
7
+
8
+ class HandoverGapDetector:
9
+ """Deterministic role-conditioned slot-gap detector for the MVP."""
10
+
11
+ def __init__(self, store: InMemoryStore):
12
+ self.store = store
13
+
14
+ def detect(self, scenario_id: str, successor_role: str) -> DetectionResult:
15
+ scenario = self.store.get_scenario(scenario_id=scenario_id, successor_role=successor_role)
16
+ return self.detect_scenario(scenario)
17
+
18
+ def detect_scenario(self, scenario) -> DetectionResult:
19
+ required_slots = ROLE_REQUIRED_SLOTS[scenario.successor_role]
20
+ missing_slots = [slot for slot in required_slots if slot not in scenario.provided_slots]
21
+
22
+ gaps = [
23
+ HandoverGap(
24
+ gap_type=GAP_TYPE_BY_SLOT.get(slot, f"{slot}_gap"),
25
+ slot_name=slot,
26
+ description=_describe_gap(slot),
27
+ severity=_severity_for_slot(slot),
28
+ )
29
+ for slot in missing_slots
30
+ ]
31
+ questions = [
32
+ ClarificationQuestion(slot_name=slot, question=QUESTION_BY_SLOT[slot])
33
+ for slot in missing_slots
34
+ if slot in QUESTION_BY_SLOT
35
+ ]
36
+ transferability_score = max(0.0, 1.0 - (len(missing_slots) / max(len(required_slots), 1)))
37
+ status = "blocked" if scenario.unsafe_transfer_label and missing_slots else "transferable"
38
+ if 0 < transferability_score < 0.75 and status != "blocked":
39
+ status = "needs_clarification"
40
+
41
+ return DetectionResult(
42
+ scenario_id=scenario.scenario_id,
43
+ successor_role=scenario.successor_role,
44
+ memory=scenario.memory,
45
+ handover_task=scenario.handover_task,
46
+ gaps=gaps,
47
+ questions=questions,
48
+ transferability_score=transferability_score,
49
+ transferability_status=status,
50
+ )
51
+
52
+
53
+ def _severity_for_slot(slot: str) -> str:
54
+ if slot in {"authority", "communication_status", "fallback_plan", "escalation_path", "contract_impact"}:
55
+ return "HIGH"
56
+ return "MEDIUM"
57
+
58
+
59
+ def _describe_gap(slot: str) -> str:
60
+ descriptions = {
61
+ "scope": "引き継ぎ先が適用範囲を判断するための情報が不足しています",
62
+ "communication_status": "関係者または顧客に説明済みか不明です",
63
+ "authority": "後任が回答または判断してよい範囲が不明です",
64
+ "fallback_plan": "想定外の場合の代替手段が不明です",
65
+ "escalation_path": "問題発生時の相談先またはエスカレーション先が不明です",
66
+ "customer_facing_wording": "外部向けに使ってよい説明文が不明です",
67
+ "rationale": "なぜその判断になったかが不明です",
68
+ "technical_constraint": "技術的制約または前提条件が不明です",
69
+ "implementation_scope": "実装対象と対象外の境界が不明です",
70
+ "trigger_for_reconsideration": "再検討が必要になる条件が不明です",
71
+ "related_issue": "関連するチケットや追跡先が不明です",
72
+ "failure_modes": "失敗パターンと観測方法が不明です",
73
+ "contract_impact": "契約や商談への影響が不明です",
74
+ "promise_boundary": "顧客に約束してよい範囲が不明です",
75
+ "customer_expectation": "顧客期待値が調整済みか不明です",
76
+ "timeline_confidence": "提示できる時期の確度が不明です",
77
+ "negotiation_status": "交渉状況と未合意点が不明です",
78
+ }
79
+ return descriptions.get(slot, f"{slot} が不足しています")
@@ -0,0 +1,104 @@
1
+ from __future__ import annotations
2
+
3
+ from handovergap.core.baselines import BASELINES, BaselinePrediction
4
+ from handovergap.core.detector import HandoverGapDetector
5
+ from handovergap.schemas import EvalMetrics
6
+ from handovergap.store import InMemoryStore
7
+
8
+
9
+ class HandoverGapEvaluator:
10
+ def __init__(self, store: InMemoryStore, slot_profile: str = "provided"):
11
+ self.store = store
12
+ self.slot_profile = slot_profile
13
+
14
+ def compare(self) -> list[EvalMetrics]:
15
+ return [
16
+ self.evaluate_method("naive_rag"),
17
+ self.evaluate_method("hybrid_rag"),
18
+ self.evaluate_method("handovergap"),
19
+ ]
20
+
21
+ def evaluate_method(self, method: str = "handovergap") -> EvalMetrics:
22
+ scenarios = self.store.list_scenarios()
23
+ total_gold_gaps = 0
24
+ detected_gold_gaps = 0
25
+ unsafe_total = 0
26
+ unsafe_blocked = 0
27
+ total_gold_questions = 0
28
+ covered_gold_questions = 0
29
+ safe_total = 0
30
+ safe_allowed = 0
31
+ blocked_total = 0
32
+ blocked_unsafe = 0
33
+
34
+ for scenario in scenarios:
35
+ profiled_scenario = _scenario_for_profile(scenario, self.slot_profile)
36
+ prediction = self._predict(method, profiled_scenario)
37
+ gold_slots = {gap.slot_name for gap in scenario.gold_gaps}
38
+ gold_question_slots = {question.slot_name for question in scenario.gold_questions}
39
+
40
+ total_gold_gaps += len(gold_slots)
41
+ detected_gold_gaps += len(gold_slots & prediction.gap_slots)
42
+ total_gold_questions += len(gold_question_slots)
43
+ covered_gold_questions += len(gold_question_slots & prediction.question_slots)
44
+
45
+ if scenario.unsafe_transfer_label:
46
+ unsafe_total += 1
47
+ if prediction.blocked:
48
+ unsafe_blocked += 1
49
+ else:
50
+ safe_total += 1
51
+ if not prediction.blocked:
52
+ safe_allowed += 1
53
+ if prediction.blocked:
54
+ blocked_total += 1
55
+ if scenario.unsafe_transfer_label:
56
+ blocked_unsafe += 1
57
+
58
+ return EvalMetrics(
59
+ method=method,
60
+ scenarios=len(scenarios),
61
+ tacit_gap_recall=_ratio(detected_gold_gaps, total_gold_gaps),
62
+ unsafe_transfer_prevention=_ratio(unsafe_blocked, unsafe_total),
63
+ question_coverage=_ratio(covered_gold_questions, total_gold_questions),
64
+ safe_transfer_allowance=_ratio(safe_allowed, safe_total),
65
+ blocked_precision=_ratio(blocked_unsafe, blocked_total),
66
+ )
67
+
68
+ def _predict(self, method: str, scenario) -> BaselinePrediction:
69
+ if method in BASELINES:
70
+ return BASELINES[method].predict(scenario)
71
+ if method == "handovergap":
72
+ result = HandoverGapDetector(store=self.store).detect_scenario(scenario)
73
+ return BaselinePrediction(
74
+ method=method,
75
+ gap_slots={gap.slot_name for gap in result.gaps},
76
+ question_slots={question.slot_name for question in result.questions},
77
+ blocked=result.transferability_status == "blocked",
78
+ rationale=(
79
+ "Performs role-conditioned slot filling and blocks unsafe transfer "
80
+ "when required slots are missing."
81
+ ),
82
+ )
83
+ raise ValueError(f"Unknown evaluation method: {method}")
84
+
85
+
86
+ def _ratio(numerator: int, denominator: int) -> float:
87
+ if denominator == 0:
88
+ return 0.0
89
+ return numerator / denominator
90
+
91
+
92
+ def _scenario_for_profile(scenario, slot_profile: str):
93
+ if slot_profile == "provided":
94
+ return scenario
95
+ try:
96
+ provided_slots = scenario.slot_fill_profiles[slot_profile]
97
+ except KeyError as exc:
98
+ if not scenario.slot_fill_profiles:
99
+ return scenario
100
+ available = ", ".join(["provided", *scenario.slot_fill_profiles])
101
+ raise ValueError(
102
+ f"Scenario {scenario.scenario_id} has no slot fill profile '{slot_profile}'. Available: {available}"
103
+ ) from exc
104
+ return scenario.model_copy(update={"provided_slots": provided_slots})
@@ -0,0 +1 @@
1
+ """Bundled synthetic benchmark and schema assets."""