ummaya 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/README.md +2 -1
  2. package/npm-shrinkwrap.json +2 -2
  3. package/package.json +1 -1
  4. package/prompts/manifest.yaml +2 -2
  5. package/prompts/session_guidance_v1.md +3 -1
  6. package/prompts/system_v1.md +8 -7
  7. package/pyproject.toml +2 -7
  8. package/src/ummaya/context/builder.py +17 -11
  9. package/src/ummaya/engine/engine.py +27 -7
  10. package/src/ummaya/engine/query.py +20 -0
  11. package/src/ummaya/evidence/__init__.py +25 -0
  12. package/src/ummaya/evidence/__main__.py +7 -0
  13. package/src/ummaya/evidence/models.py +58 -0
  14. package/src/ummaya/evidence/runner.py +308 -0
  15. package/src/ummaya/evidence/task_registry.py +264 -0
  16. package/src/ummaya/ipc/frame_schema.py +47 -0
  17. package/src/ummaya/ipc/stdio.py +1349 -90
  18. package/src/ummaya/llm/client.py +132 -56
  19. package/src/ummaya/llm/reasoning.py +84 -0
  20. package/src/ummaya/tools/discovery_bridge.py +17 -1
  21. package/src/ummaya/tools/executor.py +32 -12
  22. package/src/ummaya/tools/geocoding/kakao_client.py +1 -2
  23. package/src/ummaya/tools/kma/apihub_catalog.py +984 -1
  24. package/src/ummaya/tools/kma/apihub_structured_adapter.py +86 -6
  25. package/src/ummaya/tools/kma/apihub_url_adapter.py +593 -0
  26. package/src/ummaya/tools/kma/apihub_url_catalog.py +296 -0
  27. package/src/ummaya/tools/location_adapters.py +8 -6
  28. package/src/ummaya/tools/manifest_metadata.py +16 -3
  29. package/src/ummaya/tools/mvp_surface.py +2 -2
  30. package/src/ummaya/tools/nmc/emergency_search.py +8 -6
  31. package/src/ummaya/tools/register_all.py +9 -0
  32. package/src/ummaya/tools/resolve_location.py +4 -4
  33. package/src/ummaya/tools/search.py +664 -18
  34. package/src/ummaya/tools/verified_data_go_kr/_manifest.py +115 -25
  35. package/src/ummaya/tools/verified_data_go_kr/airkorea_air_quality.py +109 -4
  36. package/src/ummaya/tools/verified_data_go_kr/nmc_aed_site.py +108 -2
  37. package/src/ummaya/tools/verified_data_go_kr/pps_bid_public_info.py +174 -9
  38. package/src/ummaya/tools/verified_data_go_kr/tago_bus_arrival.py +66 -3
  39. package/src/ummaya/tools/verified_data_go_kr/tago_bus_location.py +12 -2
  40. package/src/ummaya/tools/verified_data_go_kr/tago_bus_route.py +8 -2
  41. package/src/ummaya/tools/verified_data_go_kr/tago_bus_route_station.py +114 -0
  42. package/src/ummaya/tools/verified_data_go_kr/tago_bus_station.py +14 -3
  43. package/src/ummaya/tools/verify_canonical_map.py +21 -0
  44. package/tui/package.json +1 -2
  45. package/tui/src/QueryEngine.ts +4 -0
  46. package/tui/src/cli/handlers/auth.ts +1 -1
  47. package/tui/src/cli/handlers/mcp.tsx +3 -3
  48. package/tui/src/cli/print.ts +69 -18
  49. package/tui/src/cli/update.ts +13 -13
  50. package/tui/src/commands/copy/index.ts +1 -1
  51. package/tui/src/commands/cost/cost.ts +2 -2
  52. package/tui/src/commands/init-verifiers.ts +5 -5
  53. package/tui/src/commands/init.ts +30 -30
  54. package/tui/src/commands/insights.ts +43 -43
  55. package/tui/src/commands/install-github-app/install-github-app.tsx +2 -2
  56. package/tui/src/commands/install-github-app/setupGitHubActions.ts +3 -3
  57. package/tui/src/commands/install.tsx +5 -5
  58. package/tui/src/commands/mcp/addCommand.ts +5 -5
  59. package/tui/src/commands/mcp/xaaIdpCommand.ts +2 -2
  60. package/tui/src/commands/plugin/ManageMarketplaces.tsx +2 -2
  61. package/tui/src/commands/reasoning/index.ts +13 -0
  62. package/tui/src/commands/reasoning/reasoning.tsx +177 -0
  63. package/tui/src/commands/thinkback/thinkback.tsx +3 -3
  64. package/tui/src/commands.ts +2 -0
  65. package/tui/src/components/Messages.tsx +2 -1
  66. package/tui/src/components/Spinner.tsx +2 -2
  67. package/tui/src/components/design-system/LoadingState.tsx +2 -2
  68. package/tui/src/ipc/codec.ts +26 -0
  69. package/tui/src/ipc/frames.generated.ts +398 -303
  70. package/tui/src/ipc/llmClient.ts +130 -51
  71. package/tui/src/ipc/llmTypes.ts +16 -1
  72. package/tui/src/ipc/schema/frame.schema.json +1 -3475
  73. package/tui/src/main.tsx +3 -0
  74. package/tui/src/query.ts +467 -2
  75. package/tui/src/screens/REPL.tsx +3 -3
  76. package/tui/src/services/api/claude.ts +54 -25
  77. package/tui/src/services/api/client.ts +33 -12
  78. package/tui/src/services/api/ummaya.ts +70 -16
  79. package/tui/src/skills/bundled/stuck.ts +12 -12
  80. package/tui/src/state/AppStateStore.ts +7 -0
  81. package/tui/src/tools/AdapterTool/AdapterTool.ts +590 -7
  82. package/tui/src/tools/LookupPrimitive/LookupPrimitive.ts +43 -17
  83. package/tui/src/tools/LookupPrimitive/prompt.ts +7 -6
  84. package/tui/src/tools/ResolveLocationPrimitive/ResolveLocationPrimitive.ts +40 -19
  85. package/tui/src/tools/SubmitPrimitive/SubmitPrimitive.ts +25 -9
  86. package/tui/src/tools/VerifyPrimitive/VerifyPrimitive.ts +25 -9
  87. package/tui/src/tools/_shared/citizenUserText.ts +49 -0
  88. package/tui/src/tools/_shared/directPublicDataGuard.ts +362 -0
  89. package/tui/src/tools/_shared/kmaAnalysisGuard.ts +197 -0
  90. package/tui/src/tools/_shared/kmaAviationGuard.ts +70 -0
  91. package/tui/src/tools/_shared/locationInputRepair.ts +112 -0
  92. package/tui/src/tools/_shared/nmcAedGuard.ts +234 -0
  93. package/tui/src/tools/_shared/protectedCheckGuard.ts +207 -0
  94. package/tui/src/tools/_shared/rootPrimitiveInput.ts +67 -0
  95. package/tui/src/tools/_shared/textToolCallGuard.ts +91 -0
  96. package/tui/src/tools/_shared/toolChoiceRepair.ts +866 -0
  97. package/tui/src/utils/attachments.ts +1 -1
  98. package/tui/src/utils/kExaoneReasoning.ts +138 -0
  99. package/tui/src/utils/messages.ts +1 -0
  100. package/tui/src/utils/multiToolLayout.ts +13 -0
  101. package/tui/src/utils/processUserInput/processSlashCommand.tsx +2 -2
  102. package/tui/src/utils/processUserInput/processUserInput.ts +26 -0
  103. package/tui/src/utils/settings/applySettingsChange.ts +4 -0
  104. package/tui/src/utils/settings/types.ts +9 -3
  105. package/tui/src/utils/stats.ts +1 -1
  106. package/uv.lock +1 -15
  107. package/assets/copilot-gate-logo.svg +0 -58
  108. package/assets/govon-logo.svg +0 -40
  109. package/src/ummaya/eval/__init__.py +0 -5
  110. package/src/ummaya/eval/retrieval.py +0 -713
  111. package/tui/src/utils/messageStream.ts +0 -186
package/README.md CHANGED
@@ -161,7 +161,8 @@ This is not a separate safety pitch. It is part of how `check` and `send` work:
161
161
  UMMAYA currently uses `LGAI-EXAONE/K-EXAONE-236B-A23B` through FriendliAI Serverless for LLM responses.
162
162
 
163
163
  - Model: [K-EXAONE-236B-A23B](https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B)
164
- - Thinking channel: `UMMAYA_K_EXAONE_THINKING` default `false`; set it to `true` only for reasoning-channel diagnostics or benchmark runs.
164
+ - Reasoning mode: `/reasoning` or `UMMAYA_K_EXAONE_REASONING_MODE` selects `fast`, `balanced`, `deep`, `diagnostic`, or `auto`.
165
+ - Thinking channel: `UMMAYA_K_EXAONE_THINKING` default `false` remains as a legacy compatibility flag; set `/reasoning deep` or `UMMAYA_K_EXAONE_REASONING_MODE=deep` for reasoning-channel diagnostics or benchmark runs.
165
166
  - Model license: [K-EXAONE AI Model License Agreement](https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B/blob/main/LICENSE)
166
167
  - Project license: [Apache License 2.0](LICENSE)
167
168
 
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "ummaya",
3
- "version": "0.2.2",
3
+ "version": "0.2.4",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "ummaya",
9
- "version": "0.2.2",
9
+ "version": "0.2.4",
10
10
  "license": "Apache-2.0",
11
11
  "dependencies": {
12
12
  "@alcalzone/ansi-tokenize": "^0.3.0",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ummaya",
3
- "version": "0.2.2",
3
+ "version": "0.2.4",
4
4
  "description": "Conversational multi-agent harness for Korean public-service channels",
5
5
  "license": "Apache-2.0",
6
6
  "type": "module",
@@ -6,9 +6,9 @@ entries:
6
6
  path: compact_v1.md
7
7
  - prompt_id: session_guidance_v1
8
8
  version: 1
9
- sha256: 063e92b54886d624a80170ae8c5f62498d2b7817daa2743db6753279578902b1
9
+ sha256: 5464ca763790332b2d6a3969151b43723ec51cc43bdeaed09074f7a0a1f62f98
10
10
  path: session_guidance_v1.md
11
11
  - prompt_id: system_v1
12
12
  version: 1
13
- sha256: 0611f086efeaa50986521b9ded4bb30adf628cd51898dce340efc8a1ebec920c
13
+ sha256: f70d0e9a95a85c501f426c6d1376bbc953767ba474be2213f946f30b6b7e53aa
14
14
  path: system_v1.md
@@ -1,3 +1,5 @@
1
- When the citizen's message names a district, neighborhood, landmark, or address, invoke the geocoding tool before any tool that takes an administrative code. Do not fill administrative region codes from memory; pass them only after a geocoding tool has produced them in this session. In UMMAYA's active tool surface, "geocoding tool" means the locate primitive and its registered locate adapters. Also call locate when the citizen names a campus, station, POI, or nearby-search anchor before any downstream adapter that needs coordinates, KMA nx/ny, administrative codes, or region names. Choose a registered locate adapter from <available_adapters>: use kakao_keyword_search for POIs and named places, kakao_address_search for structured road/jibun addresses, kakao_coord_to_region after coordinates when a region-mode adapter needs q0/q1, and JUSO/SGIS adapters only when their schema better matches the required identifier. Do not fill coordinates, KMA grids, administrative codes, or region names from memory; pass only values produced by a locate adapter in this session. When the citizen's request matches a registered adapter's purpose (accident statistics, emergency rooms, hospitals, weather observations, forecast data, welfare eligibility, authentication, submissions, etc.), invoke the appropriate primitive and adapter to fetch or perform the authoritative action; do not answer factual public-service queries from parametric memory. For current/today weather, call kma_current_observation with nx/ny from locate before final prose; call kma_forecast_fetch as a companion when the citizen also needs today's later forecast. For emergency-room search near a named place, first call locate with {"tool_id":"kakao_keyword_search","params":{"query":"<citizen-place-text>"}}, then call locate with {"tool_id":"kakao_coord_to_region","params":{"lat":"<locate lat>","lon":"<locate lon>"}} if the NMC adapter needs q0/q1 region-mode parameters. Always use tools for location-based factual queries even when you recognize the place name. Never call a tool with an empty or whitespace-only argument value.
1
+ Prefer concrete adapter function calls when a registered adapter is loaded in the current tools[] list. A concrete adapter function is named by its tool_id and accepts exactly the schema fields from <available_adapters>; do not wrap {"tool_id": "...", "params": {...}} inside that concrete function. Use legacy root wrappers locate/find/check/send only when the concrete adapter function is not loaded and only the root primitive is available. When the citizen's message names a district, neighborhood, landmark, or address, invoke the geocoding tool before any tool that takes an administrative code. Do not fill administrative region codes from memory; pass them only after a geocoding tool has produced them in this session. In the active concrete tool surface, that geocoding tool should be the registered locate adapter function itself. Use kakao_keyword_search({"query":"<citizen-place-text>"}) for POIs and named places, kakao_address_search({"query":"<citizen-address>"}) for structured road/jibun addresses, kakao_coord_to_region({"lat":<locate lat>,"lon":<locate lon>}) after coordinates when a region-mode adapter needs q0/q1, and JUSO/SGIS adapters only when their schema better matches the required identifier. Do not fill coordinates, KMA grids, administrative codes, or region names from memory; pass only values produced by a locate adapter in this session. When the citizen's request matches a registered adapter's purpose (accident statistics, emergency rooms, hospitals, weather observations, forecast data, welfare eligibility, authentication, submissions, etc.), invoke the appropriate concrete adapter to fetch or perform the authoritative action; do not answer factual public-service queries from parametric memory. For current/today non-aviation weather, call kma_current_observation with nx/ny from locate before final prose; call kma_forecast_fetch as a companion when the citizen also needs today's later forecast. Aviation exception: when the citizen names METAR, SPECI, AMOS, RVR, runway, airport aviation weather, or a concrete KMA APIHub aviation adapter from <available_adapters>, do not route to kma_current_observation and do not call locate unless the selected adapter schema explicitly requires coordinates. Use the aviation adapter from <available_adapters> directly, usually kma_apihub_url_air_metar_decoded({"org":"K","help":1}) for Gimhae/RKPK decoded METAR. For emergency-room search near a named place, first call kakao_keyword_search({"query":"<citizen-place-text>"}), then call kakao_coord_to_region({"lat":<locate lat>,"lon":<locate lon>}) if the NMC adapter needs q0/q1 region-mode parameters. For collapse, unconsciousness, cardiac-arrest, or AED-relevant wording such as 사람이 쓰러졌어, after emergency-room search also call nmc_aed_site_locate before final prose when that adapter is surfaced; if AED returns no data or an upstream error, report that result with 119 guidance instead of substituting ER data. Always use tools for location-based factual queries even when you recognize the place name. Never call a tool with an empty or whitespace-only argument value.
2
+
3
+ Treat ordinary airport flight-operation wording as aviation weather intent even when the citizen does not say METAR, AMOS, or 항공기상. Examples include 비행기, 항공편, 비행편, 이륙, 착륙, 결항, 지연, 운항, 뜰 만한가, flight, takeoff, landing, delay, or cancellation near an airport name. For those turns, prefer the KMA APIHub aviation adapter in <available_adapters> over locate plus kma_current_observation.
2
4
 
3
5
  Before each tool call, compare the citizen request with the selected adapter's input schema in <available_adapters>. Preserve every explicit constraint from the citizen request as a matching parameter when the schema exposes one: requested result count, radius or distance wording, date/time, institution type, category, specialty, keyword, administrative region, and other filter fields. Do not make a broad unfiltered call when the request is narrower and the adapter has an optional field for that narrowing. After a tool_result, verify that the returned collection still matches the citizen's stated constraints before writing final prose; if it does not, call the same selected adapter again with corrected schema-valid params instead of answering from the broad result.
@@ -38,8 +38,8 @@
38
38
 
39
39
  <tool_usage>
40
40
  <primitives>
41
- - `locate(tool_id, params)`등록된 locate 어댑터를 골라 위치 / 주소 / / 관공서 좌표 + 행정동 + POI 반환. 예: `locate(tool_id="kakao_keyword_search", params={"query":"동아대학교 승학캠퍼스"})`.
42
- - `find(tool_id, params)`외부 도메인 API 조회 도구 (기상청, HIRA, KOROAD 등). 백엔드가 사용자 발화 시점에 BM25 로 후보 어댑터를 사전 선별해 `<available_adapters>` 섹션에 inject 합니다 — LLM 은 그 목록에서 tool_id 골라 fetch 만 호출. `mode="search"` 는 backend internal 기능이므로 LLM 이 직접 호출 금지.
41
+ - **Concrete adapter first** tools[] 안에 concrete adapter function 있으면 function 이름은 `tool_id` 입니다. function adapter schema 필드만 받습니다. 예: `kakao_keyword_search({"query":"동아대학교 승학캠퍼스"})`, `kma_current_observation({"base_date":"YYYYMMDD","base_time":"HH00","nx":97,"ny":74})`. concrete adapter function 에 `{"tool_id": "...", "params": {...}}` 를 넣지 마십시오.
42
+ - **Legacy root wrappers** concrete adapter function 로드되지 않고 root primitive 있을 때만 `locate({"tool_id":"kakao_keyword_search","params":{"query":"동아대학교 승학캠퍼스"}})` 또는 `find({"tool_id":"kma_current_observation","params":{...}})` 형식을 사용합니다. `mode="search"` 는 backend internal 기능이므로 LLM 이 직접 호출 금지.
43
43
  - `check(tool_id, params)` — 인증 ceremony. `params = {scope_list, purpose_ko, purpose_en, session_id?}`. 반환 = `DelegationContext` (또는 any_id_sso 의 경우 `IdentityAssertion`).
44
44
  - `send(tool_id, params)` — OPAQUE-도메인 행정 모듈 호출. `params` 에 `delegation_context` (check 반환) + 어댑터별 payload. 접수번호 반환.
45
45
  **Public-data boundary**: 공개자료 `find` 조회가 성공했고 시민 발화에 인증/본인확인/동의/신청/제출/납부/신고 요구가 없으면 다음 turn 은 최종 답변입니다. 공개 의약품, 채용공고, 통계, 요금, 수질, 시설 목록 같은 read-only 결과를 "검증"하려고 `check` 를 추가 호출하지 마십시오.
@@ -77,14 +77,14 @@
77
77
  | 마이데이터 인증 / 거래내역 동의 | `mock_verify_mydata` | (해당 어댑터) | `mock_submit_module_public_mydata_action` |
78
78
  | 공동·금융 통합 (default) | `mock_verify_gongdong_injeungseo` | (선택) | (선택) |
79
79
  | 통합 SSO / Any-ID 로그인 | `mock_verify_module_any_id_sso` | (선택) | (호출 금지 — IdentityAssertion 만 반환) |
80
- **3-step chain**: (1) check(tool_id, params={scope_list, purpose_ko, purpose_en, session_id?}) → DelegationContext. (2) find(tool_id, params={delegation_context}) 선택. (3) send(tool_id, params={delegation_context, ...}) → 접수번호.
80
+ **3-step chain**: (1) check 계열 인증 adapter → DelegationContext. (2) 필요한 경우 concrete lookup adapter를 schema 필드로 직접 호출. (3) send 계열 제출 adapter → 접수번호.
81
81
  **Send payload contract (fail-closed)**: send 호출은 항상 최상위 `tool_id` 와 `params` 를 모두 포함해야 합니다. `send(params={...})` 만 호출하거나, `tool_id` 없이 도메인 필드를 최상위에 펼친 send 호출은 절대 금지입니다. send `params` 는 해당 어댑터의 Pydantic input_schema 와 정확히 일치해야 합니다. 시민 발화에 이미 있는 필수 payload 필드(예: `minwon_type`, `applicant_name`, `delivery_method`, `session_id`)는 첫 send 호출에 모두 포함하십시오. `delegation_context` 는 check 반환값 전체를 `delegation_context` 한 필드 아래에만 넣고, `token`, `citizen_did`, `purpose_ko`, `purpose_en`, `scope`, `mode`, `_mode` 같은 내부 필드를 send params 최상위로 펼치거나 복사하지 마십시오. send schema 에 `session_id` 가 있으면 직전 check `params` 와 send `params` 에 같은 `session_id` 값을 넣어야 합니다. 시민이 세션 ID 를 명시했다면 check `params` 에도 `session_id` 를 직접 포함하십시오. `params.session_context.session_id` 형태로 중첩하지 마십시오. send 결과가 `status="succeeded"` 이거나 "제출이 접수되었습니다" 로 반환되면 같은 요청을 다시 send 하지 말고, 즉시 final answer 를 작성하십시오.
82
82
  **Mock PII minimization defaults**: 홈택스 mock 흐름에서 시민에게 주민등록번호 앞 6자리, 총소득액, 실제 세무 식별자를 되묻지 마십시오. mock 조회의 `resident_id_prefix` 는 시민이 명시하지 않으면 synthetic fixture 값 `"000000"` 을 사용합니다. 연도가 없으면 직전 귀속연도(예: 2026년에 실행 중이면 2025)를 사용합니다. mock 종합소득세 send 의 `total_income_krw` 가 없으면 synthetic fixture 값 `42000000` 을 사용하되, final answer 에 실제 세무자료처럼 표현하지 마십시오.
83
83
  **Tool choice override**: 시민 발화가 `마이데이터`를 포함하면 check 도구는 항상 `mock_verify_mydata` 입니다. `mock_verify_module_modid` 는 홈택스/모바일ID family에만 사용하고 마이데이터 동의에는 사용하지 마십시오. 시민 발화가 `간편인증`을 포함하면 `mock_verify_ganpyeon_injeung` 을 사용하고, 정부24 민원/등본/발급 문맥이 아닌 한 `mock_verify_module_simple_auth` 를 사용하지 마십시오.
84
84
  **Identity/scope fixed values**: 모바일 신분증 본인확인 scope_list 는 정확히 `["check:mobile_id.identity"]` 입니다. `find:identity.info`, `find:identity.check` 같은 alias 를 만들지 마십시오. 간편인증 로그인 scope_list 는 정확히 `["check:ganpyeon.identity"]` 입니다. `mock_verify_module_any_id_sso`, `find:admin_service.permission_check`, `send:admin_service.permission_management` 로 대체하지 마십시오.
85
85
  **Worked example** — 시민: "종합소득세 신고해줘"
86
86
  1. `check(tool_id="mock_verify_module_modid", params={"scope_list": ["find:hometax.simplified", "send:hometax.tax-return"], "purpose_ko": "종합소득세 신고", "purpose_en": "Comprehensive income tax filing"})`
87
- 2. `find(tool_id="mock_lookup_module_hometax_simplified", params={"year": 2025, "resident_id_prefix": "000000"})`
87
+ 2. `mock_lookup_module_hometax_simplified({"year": 2025, "resident_id_prefix": "000000"})`
88
88
  3. `send(tool_id="mock_submit_module_hometax_taxreturn", params={"delegation_context": <ctx>, "tax_year": 2025, "income_type": "종합소득", "total_income_krw": 42000000, "session_id": "HOMETAX-TAXRETURN-SESSION-001"})` → `접수번호: hometax-YYYY-MM-DD-RX-XXXXX`
89
89
  **Worked example** — 시민: "마이데이터 동의 상태 확인하고 필요한 공공 마이데이터 제공 동의까지 진행해줘"
90
90
  1. `check(tool_id="mock_verify_mydata", params={"scope_list": ["send:public_mydata.action"], "purpose_ko": "공공 마이데이터 제공 동의", "purpose_en": "Public MyData consent action", "session_id": "MYDATA-ACTION-SESSION-001"})`
@@ -93,7 +93,7 @@
93
93
  1. `check(tool_id="mock_verify_module_simple_auth", params={"scope_list": ["send:gov24.minwon"], "purpose_ko": "주민등록등본 발급 민원 신청", "purpose_en": "Gov24 resident registration certificate civil petition", "session_id": "GOV24-MINWON-SESSION-001"})`
94
94
  2. `send(tool_id="mock_submit_module_gov24_minwon", params={"delegation_context": <ctx>, "minwon_type": "주민등록등본", "applicant_name": "홍길동", "delivery_method": "online", "session_id": "GOV24-MINWON-SESSION-001"})` → `접수번호: gov24-YYYY-MM-DD-MW-XXXXXXXX`
95
95
  **Worked example** — 시민: "한부모가족 아동양육비 지원을 신청해줘"
96
- 1. `find(tool_id="mohw_welfare_eligibility_search", params={"search_wrd": "한부모가족 아동양육비", "trgter_indvdl_array": "060", "onap_psblt_yn": "Y"})`
96
+ 1. `mohw_welfare_eligibility_search({"search_wrd": "한부모가족 아동양육비", "trgter_indvdl_array": "060", "onap_psblt_yn": "Y"})`
97
97
  2. `check(tool_id="mock_verify_mydata", params={"scope_list": ["send:mydata.welfare_application"], "purpose_ko": "한부모가족 아동양육비 지원 신청", "purpose_en": "Single-parent family child support application"})`
98
98
  3. `send(tool_id="mock_welfare_application_submit_v1", params={"applicant_id": "DI-...", "benefit_code": "WLF00001068", "application_type": "new", "household_size": 2, "delegation_context": <ctx>})` → `접수번호: MOCK-WA-...`
99
99
  **금지 패턴 (이미 위 규칙에서 명시한 것 외)**: 검색 결과 빈 후 "어댑터가 없습니다" 또는 "어댑터 ID 를 알려주세요" 답변 — 위 매핑 표가 답입니다. 시민에게 hometax.go.kr / gov.kr 직접 접속 안내 (chain 시도 전). 같은 find search 를 다른 query 로 재시도 — 첫 search 가 비었으면 즉시 매핑 표 사용. `mock_verify_module_any_id_sso` 뒤에 send chain — IdentityAssertion 만 반환합니다. 복지 급여 신청은 MyData send tier 이므로 `mock_verify_mydata`만 사용하고 Any-ID SSO를 사용하지 마십시오. 복지 신청 check scope_list 에 `find:mohw.welfare_eligibility_search` 또는 `send:mock.welfare_application_submit_v1` 를 넣지 말고 정확히 `["send:mydata.welfare_application"]` 만 사용하십시오.
@@ -133,8 +133,9 @@ Use available tools when the citizen's request requires live data lookup.
133
133
  **절대 금지 (fabrication patterns — 위반 시 시민 안전 침해):** 도구 실패 후 "기존 정보로는…", "일반적으로…", "참고로…", "통계상…" 으로 시작하는 어떤 구체 데이터도 출력 금지. **숫자·이름·주소·전화·URL·날짜·좌표 0개**. 도구가 0건 반환했는데 LLM 학습 데이터의 병원 이름·소방서 통계·복지 서비스명·bokjiro.go.kr URL 을 보충하는 행위 금지 — 학습 데이터의 servId / wlfareInfoId 는 stale (출시 후 변경됨), fabricate 시 시민이 잘못된 service detail link 클릭. "도구는 실패했지만 제가 알기로는…" / "도구 결과는 없지만 일반적으로…" 류의 hedging fabrication 금지. 도구 응답에 없는 단위 (예: "약 X km", "대략 Y건", "보통 Z명") 의 어림 추정 금지 — 통계는 호출이 실패하면 *답변 자체가 없어야 함*.
134
134
  **이유**: 의료·응급·교통·119 구급·복지 보조금 도메인의 fabricated 답변은 시민 misinformation 으로 이어집니다. 잘못된 병원 번호는 응급 상황에서 골든타임 손실, 잘못된 wlfareInfoId 는 잘못된 보조금 신청 페이지로 이동, fabricated 119 통계는 정부 행정 도구 신뢰 붕괴. 도구가 실패하면 *모른다고 솔직히 말하는 것이 정답*입니다 — "정확한 정보는 [공식 채널] 에서 확인" 형식 강제.
135
135
  **Dependent 도구는 직렬로 호출.** 선행 도구 결과 (예: locate 의 좌표) 가 후속 도구 (예: kma_forecast_fetch 의 lat/lon) 의 인자에 필요하면 같은 turn 에 두 도구 동시 emit 금지 — 선행 결과 받은 다음 turn 에서 후속 호출.
136
- **[CRITICAL — 주소 존재 여부를 산문으로 판단 금지]** 시민이 "근처/주변/주소/역/동/구/시" 등 위치 기반 요청을 하면 주소가 가짜처럼 보이거나 불완전해 보여도 먼저 `locate(tool_id="kakao_keyword_search", params={"query":"<citizen location>"})` 또는 구조화 주소일 때 `locate(tool_id="kakao_address_search", params={"query":"<citizen address>"})` 를 호출하십시오. "실제 주소가 아닌 것 같습니다" 같은 판단은 도구의 `not_found` 결과를 받은 뒤에만 말할 수 있습니다.
137
- **[CRITICAL — locate 단독 종결 금지 · 시민 안전 directive]** `locate` 호출 후 좌표 / 행정동 코드 / POI 만 받고 답변 turn 으로 종결하면 시민 fabrication 위험. 좌표만 받아서 날씨 / 병원 / 응급실 / 사고 / 119 / 복지 데이터를 답변에 포함하는 행위 = 100% 학습데이터 추측 (실측 없음). **locate 결과 받은 다음 turn 은 반드시 `find(tool_id="<adapter>", params={lat:<resolved>, lon:<resolved>, ...})` 호출**. `<adapter>` 는 `<available_adapters>` 블록에서 선택. 예: 날씨 → `kma_current_observation` / 병원 → `hira_hospital_search` / 응급실 → `nmc_emergency_search` / 사고다발지 → `koroad_accident_hazard_search`. locate 만 두번/세번 반복 호출 후 답변 종결도 금지 — 첫 호출에서 좌표 받았으면 다음은 find. 백엔드 chain gate 가 답변 turn 에 후속 find 누락을 detect 하면 turn reject + 강제 retry — 즉시 fabricate 시도하지 말고 find 호출.
136
+ **[CRITICAL — 주소 존재 여부를 산문으로 판단 금지]** 시민이 "근처/주변/주소/역/동/구/시" 등 위치 기반 요청을 하면 주소가 가짜처럼 보이거나 불완전해 보여도 먼저 concrete adapter `kakao_keyword_search({"query":"<citizen location>"})` 또는 구조화 주소일 때 `kakao_address_search({"query":"<citizen address>"})` 를 호출하십시오. concrete function 이 로드되지 않은 legacy 경로에서만 `locate({"tool_id":"kakao_keyword_search","params":{"query":"<citizen location>"}})` 를 사용합니다. "실제 주소가 아닌 것 같습니다" 같은 판단은 도구의 `not_found` 결과를 받은 뒤에만 말할 수 있습니다.
137
+ **[CRITICAL — locate 단독 종결 금지 · 시민 안전 directive]** locate 계열 호출 후 좌표 / 행정동 코드 / POI 만 받고 답변 turn 으로 종결하면 시민 fabrication 위험. 좌표만 받아서 날씨 / 병원 / 응급실 / 사고 / 119 / 복지 데이터를 답변에 포함하는 행위 = 100% 학습데이터 추측 (실측 없음). **locate 결과 받은 다음 turn 은 반드시 `<adapter>({lat:<resolved>, lon:<resolved>, ...})` 같은 concrete adapter 호출**. `<adapter>` 는 `<available_adapters>` 블록에서 선택. 예: 날씨 → `kma_current_observation` / 병원 → `hira_hospital_search` / 응급실 → `nmc_emergency_search` / 사고다발지 → `koroad_accident_hazard_search`. locate 만 두번/세번 반복 호출 후 답변 종결도 금지 — 첫 호출에서 좌표 받았으면 다음은 concrete find adapter. 백엔드 chain gate 가 답변 turn 에 후속 find 누락을 detect 하면 turn reject + 강제 retry — 즉시 fabricate 시도하지 말고 후속 adapter를 호출.
138
+ **[CRITICAL — collapse/AED chain]** 시민이 "사람이 쓰러졌어", 의식 없음, 심정지, 호흡 없음, AED/자동심장충격기/제세동기처럼 collapse·cardiac-arrest 상황을 말하면 응급실(`nmc_emergency_search`)만으로 종결하지 마십시오. `<available_adapters>`에 `nmc_aed_site_locate`가 있으면 응급실 조회 후 AED 조회도 호출한 다음 최종 답변합니다. AED 결과가 NO_DATA/upstream error 여도 그 실패를 119 안내와 함께 설명하고, 응급실 결과를 AED 결과처럼 대체하지 마십시오.
138
139
  </turn_order>
139
140
 
140
141
  <output_style>
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ummaya"
3
- version = "0.2.2"
3
+ version = "0.2.4"
4
4
  description = "Conversational multi-agent platform for Korean public APIs"
5
5
  readme = "README.md"
6
6
  license = "Apache-2.0"
@@ -87,11 +87,6 @@ dev = [
87
87
  "pip-audit>=2.10.0",
88
88
  "vulture>=2.16",
89
89
  "pip-licenses>=5.0",
90
- # Spec 2521 — TUI Layer 5 (asciinema cast → per-frame text snapshot).
91
- # Dev-only: replays asciicast v2/v3 byte streams through a real VT-100
92
- # emulator so LLM agents can grep deterministic cell-grid text frames
93
- # instead of OCR'ing PNG keyframes. See AGENTS.md § TUI verification.
94
- "pyte>=0.8.2",
95
90
  ]
96
91
  # spec 026 FR-C09 — Langfuse Prompt Management is an OPT-IN integration.
97
92
  # It must never be a core runtime dependency (AGENTS.md hard rule: no new
@@ -314,7 +309,7 @@ min_confidence = 80
314
309
 
315
310
  [tool.commitizen]
316
311
  name = "cz_conventional_commits"
317
- version = "0.2.2"
312
+ version = "0.2.4"
318
313
  tag_format = "v$version"
319
314
 
320
315
  # PyTorch CPU-only wheel for Docker image size discipline (SC-1: ≤ 2 GB).
@@ -20,6 +20,8 @@ import json
20
20
  import logging
21
21
  from typing import TYPE_CHECKING
22
22
 
23
+ from ummaya.context.attachments import AttachmentCollector
24
+ from ummaya.context.budget import BudgetEstimator
23
25
  from ummaya.context.compact_models import CompactionConfig, CompactionResult
24
26
  from ummaya.context.models import (
25
27
  AssembledContext,
@@ -57,6 +59,10 @@ class ContextBuilder:
57
59
  self._registry = registry
58
60
  self._compaction_config = compaction_config
59
61
  self._assembler = SystemPromptAssembler()
62
+ self._attachment_collector = AttachmentCollector(config=self._config)
63
+ self._budget_estimator = BudgetEstimator()
64
+ self._core_tool_defs_cache_key: tuple[str, ...] | None = None
65
+ self._core_tool_defs_cache: list[dict[str, object]] = []
60
66
 
61
67
  # Cached assembled ChatMessage (set on first build_system_message() call).
62
68
  self._system_message: ChatMessage | None = None
@@ -106,11 +112,7 @@ class ContextBuilder:
106
112
  ``ContextLayer(role='user', layer_name='turn_attachment', content=…)``
107
113
  or ``None`` when no attachment content exists.
108
114
  """
109
- # Import here to avoid circular imports between builder and attachments.
110
- from ummaya.context.attachments import AttachmentCollector # noqa: PLC0415
111
-
112
- collector = AttachmentCollector(config=self._config)
113
- collected = collector.collect(state=state, api_health=api_health)
115
+ collected = self._attachment_collector.collect(state=state, api_health=api_health)
114
116
  if collected is None:
115
117
  return None
116
118
  return ContextLayer(role="user", layer_name="turn_attachment", content=collected)
@@ -161,15 +163,12 @@ class ContextBuilder:
161
163
  tool_definitions = self._build_tool_definitions(state)
162
164
 
163
165
  # --- Budget (US4) ---
164
- from ummaya.context.budget import BudgetEstimator # noqa: PLC0415
165
-
166
- estimator = BudgetEstimator()
167
166
  assembled_no_budget = AssembledContext(
168
167
  system_layer=system_layer,
169
168
  turn_attachment=turn_attachment,
170
169
  tool_definitions=tool_definitions,
171
170
  )
172
- budget = estimator.estimate(
171
+ budget = self._budget_estimator.estimate(
173
172
  context=assembled_no_budget,
174
173
  hard_limit=hard_limit,
175
174
  soft_limit=int(hard_limit * 0.80),
@@ -232,8 +231,15 @@ class ContextBuilder:
232
231
  if self._registry is None:
233
232
  return []
234
233
 
235
- # Core prefix (deterministic, sorted by id — FR-004)
236
- core_defs = self._registry.export_core_tools_openai()
234
+ # Core prefix (deterministic, sorted by id — FR-004). Core tool
235
+ # schemas are stable across turns, so cache the expensive Pydantic JSON
236
+ # schema export and invalidate only when the active core id set changes.
237
+ core_tools = self._registry.core_tools()
238
+ core_cache_key = tuple(tool.id for tool in core_tools)
239
+ if core_cache_key != self._core_tool_defs_cache_key:
240
+ self._core_tool_defs_cache = [tool.to_openai_tool() for tool in core_tools]
241
+ self._core_tool_defs_cache_key = core_cache_key
242
+ core_defs = self._core_tool_defs_cache
237
243
 
238
244
  # Situational suffix (dynamic, sorted by id — FR-004)
239
245
  situational_defs: list[dict[str, object]] = []
@@ -29,6 +29,8 @@ if TYPE_CHECKING:
29
29
 
30
30
  logger = logging.getLogger(__name__)
31
31
 
32
+ _INTERNAL_CONTEXT_TOOL_IDS = frozenset({"find", "locate", "check", "send", "search_tools"})
33
+
32
34
  _LOCATION_DEPENDENT_SCHEMA_KEYS = frozenset(
33
35
  {
34
36
  "adm_cd",
@@ -54,9 +56,24 @@ def _schema_requires_location_resolution(
54
56
  ) -> bool:
55
57
  """Return True when an adapter schema needs prior locate output."""
56
58
 
57
- return _contains_location_dependent_key(input_schema_json) or _contains_location_dependent_key(
59
+ return _contains_location_dependent_key(
58
60
  required_params
59
- )
61
+ ) or _schema_required_fields_contain_location_key(input_schema_json)
62
+
63
+
64
+ def _schema_required_fields_contain_location_key(value: object) -> bool:
65
+ """Return True when JSON Schema required fields demand locate-derived data."""
66
+
67
+ if isinstance(value, dict):
68
+ required = value.get("required")
69
+ if _contains_location_dependent_key(required):
70
+ return True
71
+ return any(
72
+ _schema_required_fields_contain_location_key(nested) for nested in value.values()
73
+ )
74
+ if isinstance(value, list):
75
+ return any(_schema_required_fields_contain_location_key(item) for item in value)
76
+ return False
60
77
 
61
78
 
62
79
  def _contains_location_dependent_key(value: object) -> bool:
@@ -302,7 +319,7 @@ class QueryEngine:
302
319
  continue
303
320
  if candidate.score <= 0:
304
321
  continue
305
- if tool.is_core or tool.ministry == "UMMAYA":
322
+ if tool.id in _INTERNAL_CONTEXT_TOOL_IDS:
306
323
  continue
307
324
  primitive = candidate.primitive if isinstance(candidate.primitive, str) else None
308
325
  requires_location = _schema_requires_location_resolution(
@@ -342,10 +359,13 @@ class QueryEngine:
342
359
  [
343
360
  "<available_adapters>",
344
361
  "Use these adapter candidates for this citizen request. "
345
- "Call the function named exactly as tool_id with that adapter's "
346
- "schema arguments. Do not wrap adapter calls in root primitives "
347
- "such as find({tool_id, params}), locate({tool_id, params}), "
348
- "check({tool_id, params}), or send({tool_id, params}). "
362
+ "The model-facing function name is the concrete tool_id shown "
363
+ "below when that function is present in tools[]. Call the "
364
+ "concrete adapter directly with exactly the input_schema_json "
365
+ "fields. Do not wrap tool_id/params inside a concrete adapter "
366
+ "call. The root primitives (find, locate, check, send) are "
367
+ "legacy compatibility wrappers only when a concrete adapter "
368
+ "function is not loaded. "
349
369
  "Do not call locate just because the citizen text contains a "
350
370
  "city/province name; treat that as the dataset/filter term. "
351
371
  "Call locate only when the selected adapter schema requires "
@@ -424,6 +424,11 @@ async def _dispatch_root_primitive(
424
424
  error=f"{primitive} cannot target itself.",
425
425
  error_type="validation",
426
426
  )
427
+ params = _normalize_root_primitive_adapter_params(
428
+ primitive=primitive,
429
+ target_tool_id=target_tool_id,
430
+ params=params,
431
+ )
427
432
 
428
433
  request_id = tc.id or f"{primitive}-call"
429
434
  if primitive == "find":
@@ -452,6 +457,21 @@ async def _dispatch_root_primitive(
452
457
  return ToolResult(tool_id=primitive, success=True, data=data)
453
458
 
454
459
 
460
+ def _normalize_root_primitive_adapter_params(
461
+ *,
462
+ primitive: str,
463
+ target_tool_id: str,
464
+ params: dict[str, object],
465
+ ) -> dict[str, object]:
466
+ """Remove wrapper metadata accidentally duplicated inside adapter params."""
467
+ nested_tool_id = params.get("tool_id")
468
+ if nested_tool_id == target_tool_id:
469
+ return {key: value for key, value in params.items() if key != "tool_id"}
470
+ if target_tool_id == primitive and isinstance(nested_tool_id, str):
471
+ return {key: value for key, value in params.items() if key != "tool_id"}
472
+ return params
473
+
474
+
455
475
  async def _dispatch_concrete_adapter(
456
476
  tc: ToolCall,
457
477
  primitive: str,
@@ -0,0 +1,25 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Evidence Fabric v2 public API."""
3
+
4
+ from ummaya.evidence.models import EvidenceGate, EvidenceStatus, RunEvidence
5
+ from ummaya.evidence.runner import EvidenceContractError, run_dataset
6
+ from ummaya.evidence.task_registry import (
7
+ EvidenceDatasetRef,
8
+ EvidenceTask,
9
+ EvidenceTaskRegistry,
10
+ TaskRegistryError,
11
+ load_task_registry,
12
+ )
13
+
14
+ __all__ = [
15
+ "EvidenceDatasetRef",
16
+ "EvidenceContractError",
17
+ "EvidenceGate",
18
+ "EvidenceStatus",
19
+ "EvidenceTask",
20
+ "EvidenceTaskRegistry",
21
+ "RunEvidence",
22
+ "TaskRegistryError",
23
+ "load_task_registry",
24
+ "run_dataset",
25
+ ]
@@ -0,0 +1,7 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Command line entrypoint for Evidence Fabric v2."""
3
+
4
+ from ummaya.evidence.runner import main
5
+
6
+ if __name__ == "__main__":
7
+ main()
@@ -0,0 +1,58 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Typed Evidence Fabric v2 models."""
3
+
4
+ from __future__ import annotations
5
+
6
+ from datetime import UTC, datetime
7
+ from typing import Literal
8
+ from uuid import uuid4
9
+
10
+ from pydantic import BaseModel, ConfigDict, Field
11
+
12
+ EvidenceStatus = Literal["pass", "fail", "skip"]
13
+ EvidenceGateName = Literal[
14
+ "contract",
15
+ "scenario",
16
+ "observability",
17
+ "adversarial",
18
+ "ux",
19
+ "live_canary",
20
+ ]
21
+
22
+
23
+ class EvidenceGate(BaseModel):
24
+ """One scored verification gate in a run evidence document."""
25
+
26
+ model_config = ConfigDict(frozen=True, extra="forbid")
27
+
28
+ name: EvidenceGateName
29
+ status: EvidenceStatus
30
+ summary: str
31
+ check_ids: tuple[str, ...] = Field(default_factory=tuple)
32
+
33
+
34
+ class RunEvidence(BaseModel):
35
+ """Top-level immutable evidence document emitted by the v2 runner."""
36
+
37
+ model_config = ConfigDict(frozen=True, extra="forbid")
38
+
39
+ schema_version: Literal["evidence.v2"] = "evidence.v2"
40
+ run_id: str = Field(default_factory=lambda: f"ev-{uuid4()}")
41
+ created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
42
+ source_ref: str
43
+ dataset_id: str
44
+ task_registry_id: str | None = None
45
+ dataset_ref: str | None = None
46
+ task_count: int = 0
47
+ task_ids: tuple[str, ...] = Field(default_factory=tuple)
48
+ scenario_count: int
49
+ scenario_ids: tuple[str, ...]
50
+ gates: tuple[EvidenceGate, ...]
51
+ trace_join_keys: tuple[str, ...] = (
52
+ "scenario_id",
53
+ "trace_id",
54
+ "correlation_id",
55
+ "prompt_manifest_hash",
56
+ "tool_catalog_hash",
57
+ "frame_hash",
58
+ )