ummaya 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/README.md +2 -1
  2. package/npm-shrinkwrap.json +2 -2
  3. package/package.json +1 -1
  4. package/prompts/manifest.yaml +2 -2
  5. package/prompts/session_guidance_v1.md +3 -1
  6. package/prompts/system_v1.md +8 -7
  7. package/pyproject.toml +2 -7
  8. package/src/ummaya/context/builder.py +17 -11
  9. package/src/ummaya/engine/engine.py +27 -7
  10. package/src/ummaya/engine/query.py +20 -0
  11. package/src/ummaya/evidence/__init__.py +25 -0
  12. package/src/ummaya/evidence/__main__.py +7 -0
  13. package/src/ummaya/evidence/models.py +58 -0
  14. package/src/ummaya/evidence/runner.py +308 -0
  15. package/src/ummaya/evidence/task_registry.py +264 -0
  16. package/src/ummaya/ipc/frame_schema.py +47 -0
  17. package/src/ummaya/ipc/stdio.py +1349 -90
  18. package/src/ummaya/llm/client.py +132 -56
  19. package/src/ummaya/llm/reasoning.py +84 -0
  20. package/src/ummaya/tools/discovery_bridge.py +17 -1
  21. package/src/ummaya/tools/executor.py +32 -12
  22. package/src/ummaya/tools/geocoding/kakao_client.py +1 -2
  23. package/src/ummaya/tools/kma/apihub_catalog.py +984 -1
  24. package/src/ummaya/tools/kma/apihub_structured_adapter.py +86 -6
  25. package/src/ummaya/tools/kma/apihub_url_adapter.py +593 -0
  26. package/src/ummaya/tools/kma/apihub_url_catalog.py +296 -0
  27. package/src/ummaya/tools/location_adapters.py +8 -6
  28. package/src/ummaya/tools/manifest_metadata.py +16 -3
  29. package/src/ummaya/tools/mvp_surface.py +2 -2
  30. package/src/ummaya/tools/nmc/emergency_search.py +8 -6
  31. package/src/ummaya/tools/register_all.py +9 -0
  32. package/src/ummaya/tools/resolve_location.py +4 -4
  33. package/src/ummaya/tools/search.py +664 -18
  34. package/src/ummaya/tools/verified_data_go_kr/_manifest.py +115 -25
  35. package/src/ummaya/tools/verified_data_go_kr/airkorea_air_quality.py +109 -4
  36. package/src/ummaya/tools/verified_data_go_kr/nmc_aed_site.py +108 -2
  37. package/src/ummaya/tools/verified_data_go_kr/pps_bid_public_info.py +174 -9
  38. package/src/ummaya/tools/verified_data_go_kr/tago_bus_arrival.py +66 -3
  39. package/src/ummaya/tools/verified_data_go_kr/tago_bus_location.py +12 -2
  40. package/src/ummaya/tools/verified_data_go_kr/tago_bus_route.py +8 -2
  41. package/src/ummaya/tools/verified_data_go_kr/tago_bus_route_station.py +114 -0
  42. package/src/ummaya/tools/verified_data_go_kr/tago_bus_station.py +14 -3
  43. package/src/ummaya/tools/verify_canonical_map.py +21 -0
  44. package/tui/package.json +1 -2
  45. package/tui/src/QueryEngine.ts +4 -0
  46. package/tui/src/cli/handlers/auth.ts +1 -1
  47. package/tui/src/cli/handlers/mcp.tsx +3 -3
  48. package/tui/src/cli/print.ts +69 -18
  49. package/tui/src/cli/update.ts +13 -13
  50. package/tui/src/commands/copy/index.ts +1 -1
  51. package/tui/src/commands/cost/cost.ts +2 -2
  52. package/tui/src/commands/init-verifiers.ts +5 -5
  53. package/tui/src/commands/init.ts +30 -30
  54. package/tui/src/commands/insights.ts +43 -43
  55. package/tui/src/commands/install-github-app/install-github-app.tsx +2 -2
  56. package/tui/src/commands/install-github-app/setupGitHubActions.ts +3 -3
  57. package/tui/src/commands/install.tsx +5 -5
  58. package/tui/src/commands/mcp/addCommand.ts +5 -5
  59. package/tui/src/commands/mcp/xaaIdpCommand.ts +2 -2
  60. package/tui/src/commands/plugin/ManageMarketplaces.tsx +2 -2
  61. package/tui/src/commands/reasoning/index.ts +13 -0
  62. package/tui/src/commands/reasoning/reasoning.tsx +177 -0
  63. package/tui/src/commands/thinkback/thinkback.tsx +3 -3
  64. package/tui/src/commands.ts +2 -0
  65. package/tui/src/components/Messages.tsx +2 -1
  66. package/tui/src/components/Spinner.tsx +2 -2
  67. package/tui/src/components/design-system/LoadingState.tsx +2 -2
  68. package/tui/src/ipc/codec.ts +26 -0
  69. package/tui/src/ipc/frames.generated.ts +398 -303
  70. package/tui/src/ipc/llmClient.ts +130 -51
  71. package/tui/src/ipc/llmTypes.ts +16 -1
  72. package/tui/src/ipc/schema/frame.schema.json +1 -3475
  73. package/tui/src/main.tsx +3 -0
  74. package/tui/src/query.ts +467 -2
  75. package/tui/src/screens/REPL.tsx +3 -3
  76. package/tui/src/services/api/claude.ts +54 -25
  77. package/tui/src/services/api/client.ts +33 -12
  78. package/tui/src/services/api/ummaya.ts +70 -16
  79. package/tui/src/skills/bundled/stuck.ts +12 -12
  80. package/tui/src/state/AppStateStore.ts +7 -0
  81. package/tui/src/tools/AdapterTool/AdapterTool.ts +590 -7
  82. package/tui/src/tools/LookupPrimitive/LookupPrimitive.ts +43 -17
  83. package/tui/src/tools/LookupPrimitive/prompt.ts +7 -6
  84. package/tui/src/tools/ResolveLocationPrimitive/ResolveLocationPrimitive.ts +40 -19
  85. package/tui/src/tools/SubmitPrimitive/SubmitPrimitive.ts +25 -9
  86. package/tui/src/tools/VerifyPrimitive/VerifyPrimitive.ts +25 -9
  87. package/tui/src/tools/_shared/citizenUserText.ts +49 -0
  88. package/tui/src/tools/_shared/directPublicDataGuard.ts +362 -0
  89. package/tui/src/tools/_shared/kmaAnalysisGuard.ts +197 -0
  90. package/tui/src/tools/_shared/kmaAviationGuard.ts +70 -0
  91. package/tui/src/tools/_shared/locationInputRepair.ts +112 -0
  92. package/tui/src/tools/_shared/nmcAedGuard.ts +234 -0
  93. package/tui/src/tools/_shared/protectedCheckGuard.ts +207 -0
  94. package/tui/src/tools/_shared/rootPrimitiveInput.ts +67 -0
  95. package/tui/src/tools/_shared/textToolCallGuard.ts +91 -0
  96. package/tui/src/tools/_shared/toolChoiceRepair.ts +866 -0
  97. package/tui/src/utils/attachments.ts +1 -1
  98. package/tui/src/utils/kExaoneReasoning.ts +138 -0
  99. package/tui/src/utils/messages.ts +1 -0
  100. package/tui/src/utils/multiToolLayout.ts +13 -0
  101. package/tui/src/utils/processUserInput/processSlashCommand.tsx +2 -2
  102. package/tui/src/utils/processUserInput/processUserInput.ts +26 -0
  103. package/tui/src/utils/settings/applySettingsChange.ts +4 -0
  104. package/tui/src/utils/settings/types.ts +9 -3
  105. package/tui/src/utils/stats.ts +1 -1
  106. package/uv.lock +1 -15
  107. package/assets/copilot-gate-logo.svg +0 -58
  108. package/assets/govon-logo.svg +0 -40
  109. package/src/ummaya/eval/__init__.py +0 -5
  110. package/src/ummaya/eval/retrieval.py +0 -713
  111. package/tui/src/utils/messageStream.ts +0 -186
@@ -0,0 +1,308 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Evidence Fabric v2 dataset runner.
3
+
4
+ The runner is intentionally local and deterministic. It validates scenario
5
+ contracts and emits a typed RunEvidence document without calling live public
6
+ service channels, LLM providers, or observability backends.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ from collections.abc import Mapping, Sequence
13
+ from pathlib import Path
14
+ from typing import Literal, cast
15
+
16
+ import yaml
17
+ from pydantic import BaseModel, ConfigDict, Field, ValidationError
18
+
19
+ from ummaya.evidence.models import EvidenceGate, RunEvidence
20
+ from ummaya.evidence.task_registry import EvidenceDatasetRef, load_task_registry
21
+
22
+ _REPO_ROOT = Path(__file__).resolve().parents[3]
23
+ _DEFAULT_SCENARIO_PATH = _REPO_ROOT / "evidence/scenarios/national_ax_citizen_requests_v1.yaml"
24
+ _DEFAULT_TASK_REGISTRY_PATH = _REPO_ROOT / "evidence/registry.yaml"
25
+ _DEFAULT_DATASET_REF = "ummaya/national-ax-core@local"
26
+ _BANNED_MODEL_VISIBLE_KEYS = frozenset(
27
+ {
28
+ "adapter_id",
29
+ "tool_id",
30
+ "expected_tool_id",
31
+ "fixture_refs",
32
+ "fixture_ref",
33
+ "current_adapter_id",
34
+ }
35
+ )
36
+ _REQUIRED_DOMAINS = frozenset(
37
+ {
38
+ "tax",
39
+ "civil_affairs",
40
+ "payments",
41
+ "utilities",
42
+ "identity",
43
+ "welfare",
44
+ "healthcare",
45
+ "housing",
46
+ "mobility",
47
+ "business",
48
+ "labor",
49
+ "education",
50
+ "safety",
51
+ "immigration",
52
+ "legal",
53
+ "personal_data",
54
+ }
55
+ )
56
+
57
+
58
+ class EvidenceContractError(ValueError):
59
+ """Raised when a scenario dataset violates the Evidence Fabric contract."""
60
+
61
+
62
+ class ExpectedStep(BaseModel):
63
+ """One expected public-service loop step in a scenario."""
64
+
65
+ model_config = ConfigDict(frozen=True, extra="forbid")
66
+
67
+ primitive: str
68
+ purpose: str
69
+
70
+
71
+ class PermissionRequirements(BaseModel):
72
+ """Permission requirements attached to a citizen scenario."""
73
+
74
+ model_config = ConfigDict(frozen=True, extra="allow")
75
+
76
+ identity_assurance: str
77
+ user_confirmations: tuple[str, ...] = Field(default_factory=tuple)
78
+ sensitive_data: tuple[str, ...] = Field(default_factory=tuple)
79
+
80
+
81
+ class Scenario(BaseModel):
82
+ """Minimum scenario shape needed by Evidence Fabric v2."""
83
+
84
+ model_config = ConfigDict(frozen=True, extra="allow")
85
+
86
+ id: str
87
+ priority: str = "P2"
88
+ lifecycle_domain: str
89
+ request_ko: str
90
+ expected_ax_chain: tuple[ExpectedStep, ...]
91
+ permission_requirements: PermissionRequirements
92
+
93
+
94
+ class ScenarioDataset(BaseModel):
95
+ """Versioned citizen-demand scenario dataset."""
96
+
97
+ model_config = ConfigDict(frozen=True, extra="allow")
98
+
99
+ version: int
100
+ dataset_id: str
101
+ coverage_domains: tuple[str, ...]
102
+ scenarios: tuple[Scenario, ...]
103
+
104
+
105
+ def _load_yaml_mapping(path: Path) -> Mapping[str, object]:
106
+ if not path.exists():
107
+ raise EvidenceContractError(f"scenario dataset not found: {path}")
108
+ loaded = yaml.safe_load(path.read_text(encoding="utf-8"))
109
+ if not isinstance(loaded, Mapping):
110
+ raise EvidenceContractError(f"scenario dataset must be a mapping: {path}")
111
+ return cast(Mapping[str, object], loaded)
112
+
113
+
114
+ def _find_banned_keys(value: object, path: str = "$") -> tuple[str, ...]:
115
+ if isinstance(value, Mapping):
116
+ hits: list[str] = []
117
+ for key, nested in value.items():
118
+ key_text = str(key)
119
+ nested_path = f"{path}.{key_text}"
120
+ if key_text in _BANNED_MODEL_VISIBLE_KEYS:
121
+ hits.append(nested_path)
122
+ hits.extend(_find_banned_keys(nested, nested_path))
123
+ return tuple(hits)
124
+ if isinstance(value, Sequence) and not isinstance(value, str | bytes | bytearray):
125
+ hits = []
126
+ for index, nested in enumerate(value):
127
+ hits.extend(_find_banned_keys(nested, f"{path}[{index}]"))
128
+ return tuple(hits)
129
+ return ()
130
+
131
+
132
+ def _parse_dataset(path: Path) -> ScenarioDataset:
133
+ raw = _load_yaml_mapping(path)
134
+ banned = _find_banned_keys(raw)
135
+ if banned:
136
+ raise EvidenceContractError(
137
+ "model-visible scenario dataset contains banned implementation keys: "
138
+ + ", ".join(banned)
139
+ )
140
+ try:
141
+ return ScenarioDataset.model_validate(raw)
142
+ except ValidationError as exc:
143
+ raise EvidenceContractError(str(exc)) from exc
144
+
145
+
146
+ def _gate(
147
+ name: Literal["contract", "scenario", "observability", "adversarial", "ux", "live_canary"],
148
+ status: Literal["pass", "fail", "skip"],
149
+ summary: str,
150
+ check_ids: tuple[str, ...],
151
+ ) -> EvidenceGate:
152
+ return EvidenceGate(name=name, status=status, summary=summary, check_ids=check_ids)
153
+
154
+
155
+ def _build_gates(dataset: ScenarioDataset) -> tuple[EvidenceGate, ...]:
156
+ covered_domains = set(dataset.coverage_domains)
157
+ missing_domains = tuple(sorted(_REQUIRED_DOMAINS - covered_domains))
158
+ scenario_domains = {scenario.lifecycle_domain for scenario in dataset.scenarios}
159
+ uncovered_scenario_domains = tuple(sorted(scenario_domains - covered_domains))
160
+
161
+ scenario_status: Literal["pass", "fail"] = (
162
+ "pass" if not missing_domains and not uncovered_scenario_domains else "fail"
163
+ )
164
+ scenario_summary = (
165
+ "all required citizen infrastructure domains are covered"
166
+ if scenario_status == "pass"
167
+ else "missing coverage: " + ", ".join(missing_domains + uncovered_scenario_domains)
168
+ )
169
+
170
+ return (
171
+ _gate(
172
+ "contract",
173
+ "pass",
174
+ "dataset is versioned, typed, and free of model-visible implementation keys",
175
+ ("dataset-schema", "task-registry", "no-adapter-leakage"),
176
+ ),
177
+ _gate(
178
+ "scenario",
179
+ scenario_status,
180
+ scenario_summary,
181
+ ("coverage-domains", "scenario-shape"),
182
+ ),
183
+ _gate(
184
+ "observability",
185
+ "pass",
186
+ "RunEvidence carries trace join keys for OTEL/Langfuse correlation",
187
+ ("trace-join-keys",),
188
+ ),
189
+ _gate(
190
+ "adversarial",
191
+ "pass",
192
+ "adapter IDs, fixture references, and expected tool IDs are rejected before scoring",
193
+ ("reward-hack-surface", "hidden-implementation-leakage"),
194
+ ),
195
+ _gate(
196
+ "ux",
197
+ "skip",
198
+ "UX frame artifacts are attached by interactive runners, not by dataset validation",
199
+ ("ux-artifact-slot",),
200
+ ),
201
+ _gate(
202
+ "live_canary",
203
+ "skip",
204
+ "live public-service checks are manual-only and excluded from CI",
205
+ ("no-live-ci",),
206
+ ),
207
+ )
208
+
209
+
210
+ def _resolve_repo_path(path: Path) -> Path:
211
+ return path if path.is_absolute() else _REPO_ROOT / path
212
+
213
+
214
+ def _resolve_task_dataset(
215
+ *,
216
+ dataset: ScenarioDataset,
217
+ scenario_path: Path,
218
+ task_registry_path: Path | None,
219
+ dataset_ref: str,
220
+ ) -> tuple[str | None, EvidenceDatasetRef | None]:
221
+ if task_registry_path is None:
222
+ return None, None
223
+ registry = load_task_registry(task_registry_path)
224
+ task_dataset = registry.resolve_dataset(dataset_ref)
225
+ if task_dataset.dataset_id != dataset.dataset_id:
226
+ raise EvidenceContractError(
227
+ f"task registry dataset_id {task_dataset.dataset_id!r} does not match "
228
+ f"scenario dataset_id {dataset.dataset_id!r}"
229
+ )
230
+ if _resolve_repo_path(task_dataset.scenario_path) != _resolve_repo_path(scenario_path):
231
+ raise EvidenceContractError(
232
+ f"task registry scenario_path {task_dataset.scenario_path} does not match "
233
+ f"run scenario_path {scenario_path}"
234
+ )
235
+ return registry.registry_id, task_dataset
236
+
237
+
238
+ def run_dataset(
239
+ *,
240
+ scenario_path: Path = _DEFAULT_SCENARIO_PATH,
241
+ source_ref: str = "local",
242
+ task_registry_path: Path | None = _DEFAULT_TASK_REGISTRY_PATH,
243
+ dataset_ref: str = _DEFAULT_DATASET_REF,
244
+ ) -> RunEvidence:
245
+ """Validate a scenario dataset and return a typed evidence document."""
246
+
247
+ dataset = _parse_dataset(scenario_path)
248
+ task_registry_id, task_dataset = _resolve_task_dataset(
249
+ dataset=dataset,
250
+ scenario_path=scenario_path,
251
+ task_registry_path=task_registry_path,
252
+ dataset_ref=dataset_ref,
253
+ )
254
+ return RunEvidence(
255
+ source_ref=source_ref,
256
+ dataset_id=dataset.dataset_id,
257
+ task_registry_id=task_registry_id,
258
+ dataset_ref=task_dataset.ref if task_dataset else None,
259
+ task_count=len(task_dataset.tasks) if task_dataset else 0,
260
+ task_ids=tuple(task.task_id for task in task_dataset.tasks) if task_dataset else (),
261
+ scenario_count=len(dataset.scenarios),
262
+ scenario_ids=tuple(scenario.id for scenario in dataset.scenarios),
263
+ gates=_build_gates(dataset),
264
+ )
265
+
266
+
267
+ def main() -> None:
268
+ """CLI entrypoint for `python -m ummaya.evidence`."""
269
+
270
+ parser = argparse.ArgumentParser(prog="python -m ummaya.evidence")
271
+ parser.add_argument(
272
+ "--scenarios",
273
+ type=Path,
274
+ default=_DEFAULT_SCENARIO_PATH,
275
+ help="Path to the Evidence Fabric scenario dataset.",
276
+ )
277
+ parser.add_argument(
278
+ "--source-ref",
279
+ default="local",
280
+ help="Source revision or label recorded in the evidence document.",
281
+ )
282
+ parser.add_argument(
283
+ "--task-registry",
284
+ type=Path,
285
+ default=_DEFAULT_TASK_REGISTRY_PATH,
286
+ help="Path to the Harbor-style Evidence Fabric task registry.",
287
+ )
288
+ parser.add_argument(
289
+ "--dataset-ref",
290
+ default=_DEFAULT_DATASET_REF,
291
+ help="Harbor-style dataset ref to resolve from the task registry.",
292
+ )
293
+ parser.add_argument(
294
+ "--out",
295
+ type=Path,
296
+ default=Path(".evidence/run.json"),
297
+ help="Output JSON path.",
298
+ )
299
+ args = parser.parse_args()
300
+
301
+ evidence = run_dataset(
302
+ scenario_path=args.scenarios,
303
+ source_ref=args.source_ref,
304
+ task_registry_path=args.task_registry,
305
+ dataset_ref=args.dataset_ref,
306
+ )
307
+ args.out.parent.mkdir(parents=True, exist_ok=True)
308
+ args.out.write_text(evidence.model_dump_json(indent=2), encoding="utf-8")
@@ -0,0 +1,264 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Harbor-style task registry for Evidence Fabric v2.
3
+
4
+ The registry mirrors Harbor's task boundary: a task has an instruction,
5
+ metadata/configuration, and a verifier script. UMMAYA keeps execution local and
6
+ deterministic; this module only resolves and validates task definitions.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import tomllib
12
+ from collections.abc import Mapping, Sequence
13
+ from pathlib import Path
14
+ from typing import Literal, cast
15
+
16
+ import yaml
17
+ from pydantic import BaseModel, ConfigDict, Field, ValidationError
18
+
19
+ _REPO_ROOT = Path(__file__).resolve().parents[3]
20
+ _BANNED_MODEL_VISIBLE_KEYS = frozenset(
21
+ {
22
+ "adapter_id",
23
+ "tool_id",
24
+ "expected_tool_id",
25
+ "fixture_refs",
26
+ "fixture_ref",
27
+ "current_adapter_id",
28
+ }
29
+ )
30
+
31
+
32
+ class TaskRegistryError(ValueError):
33
+ """Raised when an Evidence Fabric task registry is invalid."""
34
+
35
+
36
+ class EvidenceTask(BaseModel):
37
+ """One resolved Harbor-style evidence task."""
38
+
39
+ model_config = ConfigDict(frozen=True, extra="forbid")
40
+
41
+ task_id: str
42
+ task_dir: Path
43
+ instruction_path: Path
44
+ verifier_path: Path
45
+ description: str
46
+ dataset_id: str
47
+ keywords: tuple[str, ...] = Field(default_factory=tuple)
48
+ environment_os: Literal["linux", "windows"] = "linux"
49
+ allow_internet: bool = False
50
+ verifier_timeout_sec: float = 120.0
51
+
52
+
53
+ class EvidenceDatasetRef(BaseModel):
54
+ """A dataset reference resolved from the local task registry."""
55
+
56
+ model_config = ConfigDict(frozen=True, extra="forbid")
57
+
58
+ ref: str
59
+ dataset_id: str
60
+ scenario_path: Path
61
+ tasks: tuple[EvidenceTask, ...]
62
+
63
+
64
+ class EvidenceTaskRegistry(BaseModel):
65
+ """Resolved Evidence Fabric task registry."""
66
+
67
+ model_config = ConfigDict(frozen=True, extra="forbid")
68
+
69
+ version: int
70
+ registry_id: str
71
+ datasets: tuple[EvidenceDatasetRef, ...]
72
+
73
+ def resolve_dataset(self, ref: str) -> EvidenceDatasetRef:
74
+ """Return the dataset entry matching a Harbor-style dataset ref."""
75
+
76
+ for dataset in self.datasets:
77
+ if dataset.ref == ref:
78
+ return dataset
79
+ raise TaskRegistryError(f"dataset ref not found in task registry: {ref}")
80
+
81
+
82
+ def _load_yaml_mapping(path: Path) -> Mapping[str, object]:
83
+ if not path.exists():
84
+ raise TaskRegistryError(f"task registry not found: {path}")
85
+ loaded = yaml.safe_load(path.read_text(encoding="utf-8"))
86
+ if not isinstance(loaded, Mapping):
87
+ raise TaskRegistryError(f"task registry must be a mapping: {path}")
88
+ return cast(Mapping[str, object], loaded)
89
+
90
+
91
+ def _find_banned_keys(value: object, path: str = "$") -> tuple[str, ...]:
92
+ if isinstance(value, Mapping):
93
+ hits: list[str] = []
94
+ for key, nested in value.items():
95
+ key_text = str(key)
96
+ nested_path = f"{path}.{key_text}"
97
+ if key_text in _BANNED_MODEL_VISIBLE_KEYS:
98
+ hits.append(nested_path)
99
+ hits.extend(_find_banned_keys(nested, nested_path))
100
+ return tuple(hits)
101
+ if isinstance(value, Sequence) and not isinstance(value, str | bytes | bytearray):
102
+ hits = []
103
+ for index, nested in enumerate(value):
104
+ hits.extend(_find_banned_keys(nested, f"{path}[{index}]"))
105
+ return tuple(hits)
106
+ return ()
107
+
108
+
109
+ def _read_path(path: Path) -> Path:
110
+ return path if path.is_absolute() else _REPO_ROOT / path
111
+
112
+
113
+ def _require_existing_files(paths: tuple[Path, ...]) -> None:
114
+ for required in paths:
115
+ if not _read_path(required).exists():
116
+ raise TaskRegistryError(f"task file missing: {required}")
117
+
118
+
119
+ def _require_mapping(value: object, label: str) -> Mapping[str, object]:
120
+ if not isinstance(value, Mapping):
121
+ raise TaskRegistryError(f"{label} must be a mapping")
122
+ return cast(Mapping[str, object], value)
123
+
124
+
125
+ def _require_non_empty_str(value: object, label: str) -> str:
126
+ if not isinstance(value, str) or not value:
127
+ raise TaskRegistryError(f"{label} must be a non-empty string")
128
+ return value
129
+
130
+
131
+ def _require_sequence(value: object, label: str) -> Sequence[object]:
132
+ if not isinstance(value, Sequence) or isinstance(value, str | bytes | bytearray):
133
+ raise TaskRegistryError(f"{label} must be a list")
134
+ return cast(Sequence[object], value)
135
+
136
+
137
+ def _optional_float(value: object, label: str, default: float) -> float:
138
+ if value is None:
139
+ return default
140
+ if isinstance(value, bool) or not isinstance(value, str | int | float):
141
+ raise TaskRegistryError(f"{label} must be a number")
142
+ return float(value)
143
+
144
+
145
+ def _load_task_toml(task_toml: Path) -> Mapping[str, object]:
146
+ raw = tomllib.loads(_read_path(task_toml).read_text(encoding="utf-8"))
147
+ banned = _find_banned_keys(raw)
148
+ if banned:
149
+ raise TaskRegistryError(
150
+ "model-visible task registry contains banned implementation keys: " + ", ".join(banned)
151
+ )
152
+ return cast(Mapping[str, object], raw)
153
+
154
+
155
+ def _build_task(
156
+ *,
157
+ task_dir: Path,
158
+ instruction_path: Path,
159
+ verifier_path: Path,
160
+ raw: Mapping[str, object],
161
+ ) -> EvidenceTask:
162
+ task_section = _require_mapping(raw.get("task"), "task.toml [task]")
163
+ metadata_section = _require_mapping(raw.get("metadata", {}), "task.toml [metadata]")
164
+ environment_section = _require_mapping(raw.get("environment", {}), "task.toml [environment]")
165
+ verifier_section = _require_mapping(raw.get("verifier", {}), "task.toml [verifier]")
166
+ keywords = _require_sequence(task_section.get("keywords", ()), "task.toml [task].keywords")
167
+
168
+ try:
169
+ return EvidenceTask(
170
+ task_id=_require_non_empty_str(task_section.get("name"), "task.toml [task].name"),
171
+ task_dir=task_dir,
172
+ instruction_path=instruction_path,
173
+ verifier_path=verifier_path,
174
+ description=_require_non_empty_str(
175
+ task_section.get("description"),
176
+ "task.toml [task].description",
177
+ ),
178
+ dataset_id=_require_non_empty_str(
179
+ metadata_section.get("dataset_id"),
180
+ "task.toml [metadata].dataset_id",
181
+ ),
182
+ keywords=tuple(str(keyword) for keyword in keywords),
183
+ environment_os=cast(
184
+ Literal["linux", "windows"],
185
+ environment_section.get("os", "linux"),
186
+ ),
187
+ allow_internet=bool(environment_section.get("allow_internet", False)),
188
+ verifier_timeout_sec=_optional_float(
189
+ verifier_section.get("timeout_sec"),
190
+ "task.toml [verifier].timeout_sec",
191
+ 120.0,
192
+ ),
193
+ )
194
+ except ValidationError as exc:
195
+ raise TaskRegistryError(str(exc)) from exc
196
+
197
+
198
+ def _load_task(task_dir: Path) -> EvidenceTask:
199
+ task_toml = task_dir / "task.toml"
200
+ instruction_path = task_dir / "instruction.md"
201
+ verifier_path = task_dir / "tests" / "test.sh"
202
+ _require_existing_files((task_toml, instruction_path, verifier_path))
203
+ return _build_task(
204
+ task_dir=task_dir,
205
+ instruction_path=instruction_path,
206
+ verifier_path=verifier_path,
207
+ raw=_load_task_toml(task_toml),
208
+ )
209
+
210
+
211
+ def _load_dataset_ref(index: int, dataset_raw: object) -> EvidenceDatasetRef:
212
+ dataset_map = _require_mapping(dataset_raw, f"datasets[{index}]")
213
+ ref = _require_non_empty_str(dataset_map.get("ref"), f"datasets[{index}].ref")
214
+ dataset_id = _require_non_empty_str(
215
+ dataset_map.get("dataset_id"),
216
+ f"datasets[{index}].dataset_id",
217
+ )
218
+ scenario_path = _require_non_empty_str(
219
+ dataset_map.get("scenario_path"),
220
+ f"datasets[{index}].scenario_path",
221
+ )
222
+ task_paths = _require_sequence(dataset_map.get("task_paths"), f"datasets[{index}].task_paths")
223
+
224
+ tasks = tuple(_load_task(Path(str(task_path))) for task_path in task_paths)
225
+ mismatched = tuple(task.task_id for task in tasks if task.dataset_id != dataset_id)
226
+ if mismatched:
227
+ raise TaskRegistryError(
228
+ f"dataset {ref} has tasks with mismatched dataset_id: {', '.join(mismatched)}"
229
+ )
230
+ return EvidenceDatasetRef(
231
+ ref=ref,
232
+ dataset_id=dataset_id,
233
+ scenario_path=Path(scenario_path),
234
+ tasks=tasks,
235
+ )
236
+
237
+
238
+ def load_task_registry(path: Path) -> EvidenceTaskRegistry:
239
+ """Load and validate a Harbor-style Evidence Fabric task registry."""
240
+
241
+ raw = _load_yaml_mapping(path)
242
+ banned = _find_banned_keys(raw)
243
+ if banned:
244
+ raise TaskRegistryError(
245
+ "model-visible task registry contains banned implementation keys: " + ", ".join(banned)
246
+ )
247
+
248
+ version = raw.get("version")
249
+ if not isinstance(version, int):
250
+ raise TaskRegistryError("task registry version must be an integer")
251
+ registry_id = _require_non_empty_str(raw.get("registry_id"), "task registry_id")
252
+ datasets = _require_sequence(raw.get("datasets"), "task registry datasets")
253
+ resolved_datasets = tuple(
254
+ _load_dataset_ref(index, dataset_raw) for index, dataset_raw in enumerate(datasets)
255
+ )
256
+
257
+ try:
258
+ return EvidenceTaskRegistry(
259
+ version=version,
260
+ registry_id=registry_id,
261
+ datasets=resolved_datasets,
262
+ )
263
+ except ValidationError as exc:
264
+ raise TaskRegistryError(str(exc)) from exc
@@ -56,6 +56,7 @@ _ROLE_KIND_ALLOW_LIST: dict[str, frozenset[str]] = {
56
56
  # Spec 1978 ADR-0001 — tools-aware chat request from TUI
57
57
  "chat_request": frozenset({"tui"}),
58
58
  "assistant_chunk": frozenset({"backend", "llm"}),
59
+ "progress_event": frozenset({"backend"}),
59
60
  "tool_call": frozenset({"backend", "tool"}),
60
61
  "tool_result": frozenset({"backend", "tool"}),
61
62
  "coordinator_phase": frozenset({"backend"}),
@@ -385,6 +386,10 @@ class ChatRequestFrame(_BaseFrame):
385
386
  le=1.0,
386
387
  description="Nucleus sampling threshold.",
387
388
  )
389
+ reasoning_mode: Literal["fast", "balanced", "deep", "diagnostic", "auto"] | None = Field(
390
+ default=None,
391
+ description="K-EXAONE/FriendliAI reasoning policy for this assistant turn.",
392
+ )
388
393
 
389
394
  @model_validator(mode="after")
390
395
  def _v_tool_message_integrity(self) -> ChatRequestFrame:
@@ -449,6 +454,46 @@ class AssistantChunkFrame(_BaseFrame):
449
454
  done: bool = Field(description="True if this is the terminal chunk for this message_id.")
450
455
 
451
456
 
457
+ # ---------------------------------------------------------------------------
458
+ # Arm: progress_event (UMMAYA query-loop painting — deterministic and safe)
459
+ # ---------------------------------------------------------------------------
460
+
461
+
462
+ class ProgressEventFrame(_BaseFrame):
463
+ """backend -> TUI: deterministic query-loop progress.
464
+
465
+ This is intentionally separate from ``AssistantChunkFrame.thinking``.
466
+ ``progress_event`` carries safe harness state such as analysis, tool
467
+ selection, tool dispatch/result, and answer synthesis. Provider reasoning
468
+ remains on the gated ``thinking_delta`` channel.
469
+ """
470
+
471
+ kind: Literal["progress_event"] = Field(
472
+ default="progress_event", description="Frame discriminator."
473
+ )
474
+ phase: Literal[
475
+ "analysis",
476
+ "tool_selection",
477
+ "tool_call",
478
+ "tool_result",
479
+ "answer_synthesis",
480
+ ] = Field(description="Safe query-loop phase represented by this event.")
481
+ message_ko: str = Field(min_length=1, description="Korean progress text for the TUI.")
482
+ message_en: str = Field(min_length=1, description="English fallback progress text.")
483
+ safe_to_persist: bool = Field(
484
+ default=True,
485
+ description="True because this channel never carries raw provider chain-of-thought.",
486
+ )
487
+ tool_id: str | None = Field(
488
+ default=None,
489
+ description="Concrete adapter/tool id when this event is tied to one.",
490
+ )
491
+ call_id: str | None = Field(
492
+ default=None,
493
+ description="Tool call id when this event is tied to a specific invocation.",
494
+ )
495
+
496
+
452
497
  # ---------------------------------------------------------------------------
453
498
  # Arm: tool_call (Spec 287 baseline — arguments changed from Any to dict[str, object])
454
499
  # ---------------------------------------------------------------------------
@@ -1436,6 +1481,7 @@ IPCFrame = Annotated[
1436
1481
  UserInputFrame
1437
1482
  | ChatRequestFrame
1438
1483
  | AssistantChunkFrame
1484
+ | ProgressEventFrame
1439
1485
  | ToolCallFrame
1440
1486
  | ToolResultFrame
1441
1487
  | CoordinatorPhaseFrame
@@ -1505,6 +1551,7 @@ __all__ = [
1505
1551
  "ToolDefinition",
1506
1552
  "ToolDefinitionFunction",
1507
1553
  "AssistantChunkFrame",
1554
+ "ProgressEventFrame",
1508
1555
  "ToolCallFrame",
1509
1556
  "ToolResultFrame",
1510
1557
  "ToolResultEnvelope",