ummaya 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
- package/prompts/manifest.yaml +2 -2
- package/prompts/session_guidance_v1.md +3 -1
- package/prompts/system_v1.md +8 -7
- package/pyproject.toml +2 -7
- package/src/ummaya/context/builder.py +17 -11
- package/src/ummaya/engine/engine.py +27 -7
- package/src/ummaya/engine/query.py +20 -0
- package/src/ummaya/evidence/__init__.py +25 -0
- package/src/ummaya/evidence/__main__.py +7 -0
- package/src/ummaya/evidence/models.py +58 -0
- package/src/ummaya/evidence/runner.py +308 -0
- package/src/ummaya/evidence/task_registry.py +264 -0
- package/src/ummaya/ipc/frame_schema.py +47 -0
- package/src/ummaya/ipc/stdio.py +1349 -90
- package/src/ummaya/llm/client.py +132 -56
- package/src/ummaya/llm/reasoning.py +84 -0
- package/src/ummaya/tools/discovery_bridge.py +17 -1
- package/src/ummaya/tools/executor.py +32 -12
- package/src/ummaya/tools/geocoding/kakao_client.py +1 -2
- package/src/ummaya/tools/kma/apihub_catalog.py +984 -1
- package/src/ummaya/tools/kma/apihub_structured_adapter.py +86 -6
- package/src/ummaya/tools/kma/apihub_url_adapter.py +593 -0
- package/src/ummaya/tools/kma/apihub_url_catalog.py +296 -0
- package/src/ummaya/tools/location_adapters.py +8 -6
- package/src/ummaya/tools/manifest_metadata.py +16 -3
- package/src/ummaya/tools/mvp_surface.py +2 -2
- package/src/ummaya/tools/nmc/emergency_search.py +8 -6
- package/src/ummaya/tools/register_all.py +9 -0
- package/src/ummaya/tools/resolve_location.py +4 -4
- package/src/ummaya/tools/search.py +664 -18
- package/src/ummaya/tools/verified_data_go_kr/_manifest.py +115 -25
- package/src/ummaya/tools/verified_data_go_kr/airkorea_air_quality.py +109 -4
- package/src/ummaya/tools/verified_data_go_kr/nmc_aed_site.py +108 -2
- package/src/ummaya/tools/verified_data_go_kr/pps_bid_public_info.py +174 -9
- package/src/ummaya/tools/verified_data_go_kr/tago_bus_arrival.py +66 -3
- package/src/ummaya/tools/verified_data_go_kr/tago_bus_location.py +12 -2
- package/src/ummaya/tools/verified_data_go_kr/tago_bus_route.py +8 -2
- package/src/ummaya/tools/verified_data_go_kr/tago_bus_route_station.py +114 -0
- package/src/ummaya/tools/verified_data_go_kr/tago_bus_station.py +14 -3
- package/src/ummaya/tools/verify_canonical_map.py +21 -0
- package/tui/package.json +1 -2
- package/tui/src/QueryEngine.ts +4 -0
- package/tui/src/cli/handlers/auth.ts +1 -1
- package/tui/src/cli/handlers/mcp.tsx +3 -3
- package/tui/src/cli/print.ts +69 -18
- package/tui/src/cli/update.ts +13 -13
- package/tui/src/commands/copy/index.ts +1 -1
- package/tui/src/commands/cost/cost.ts +2 -2
- package/tui/src/commands/init-verifiers.ts +5 -5
- package/tui/src/commands/init.ts +30 -30
- package/tui/src/commands/insights.ts +43 -43
- package/tui/src/commands/install-github-app/install-github-app.tsx +2 -2
- package/tui/src/commands/install-github-app/setupGitHubActions.ts +3 -3
- package/tui/src/commands/install.tsx +5 -5
- package/tui/src/commands/mcp/addCommand.ts +5 -5
- package/tui/src/commands/mcp/xaaIdpCommand.ts +2 -2
- package/tui/src/commands/plugin/ManageMarketplaces.tsx +2 -2
- package/tui/src/commands/reasoning/index.ts +13 -0
- package/tui/src/commands/reasoning/reasoning.tsx +177 -0
- package/tui/src/commands/thinkback/thinkback.tsx +3 -3
- package/tui/src/commands.ts +2 -0
- package/tui/src/components/Messages.tsx +2 -1
- package/tui/src/components/Spinner.tsx +2 -2
- package/tui/src/components/design-system/LoadingState.tsx +2 -2
- package/tui/src/ipc/codec.ts +26 -0
- package/tui/src/ipc/frames.generated.ts +398 -303
- package/tui/src/ipc/llmClient.ts +130 -51
- package/tui/src/ipc/llmTypes.ts +16 -1
- package/tui/src/ipc/schema/frame.schema.json +1 -3475
- package/tui/src/main.tsx +3 -0
- package/tui/src/query.ts +467 -2
- package/tui/src/screens/REPL.tsx +3 -3
- package/tui/src/services/api/claude.ts +54 -25
- package/tui/src/services/api/client.ts +33 -12
- package/tui/src/services/api/ummaya.ts +70 -16
- package/tui/src/skills/bundled/stuck.ts +12 -12
- package/tui/src/state/AppStateStore.ts +7 -0
- package/tui/src/tools/AdapterTool/AdapterTool.ts +590 -7
- package/tui/src/tools/LookupPrimitive/LookupPrimitive.ts +43 -17
- package/tui/src/tools/LookupPrimitive/prompt.ts +7 -6
- package/tui/src/tools/ResolveLocationPrimitive/ResolveLocationPrimitive.ts +40 -19
- package/tui/src/tools/SubmitPrimitive/SubmitPrimitive.ts +25 -9
- package/tui/src/tools/VerifyPrimitive/VerifyPrimitive.ts +25 -9
- package/tui/src/tools/_shared/citizenUserText.ts +49 -0
- package/tui/src/tools/_shared/directPublicDataGuard.ts +362 -0
- package/tui/src/tools/_shared/kmaAnalysisGuard.ts +197 -0
- package/tui/src/tools/_shared/kmaAviationGuard.ts +70 -0
- package/tui/src/tools/_shared/locationInputRepair.ts +112 -0
- package/tui/src/tools/_shared/nmcAedGuard.ts +234 -0
- package/tui/src/tools/_shared/protectedCheckGuard.ts +207 -0
- package/tui/src/tools/_shared/rootPrimitiveInput.ts +67 -0
- package/tui/src/tools/_shared/textToolCallGuard.ts +91 -0
- package/tui/src/tools/_shared/toolChoiceRepair.ts +866 -0
- package/tui/src/utils/attachments.ts +1 -1
- package/tui/src/utils/kExaoneReasoning.ts +138 -0
- package/tui/src/utils/messages.ts +1 -0
- package/tui/src/utils/multiToolLayout.ts +13 -0
- package/tui/src/utils/processUserInput/processSlashCommand.tsx +2 -2
- package/tui/src/utils/processUserInput/processUserInput.ts +26 -0
- package/tui/src/utils/settings/applySettingsChange.ts +4 -0
- package/tui/src/utils/settings/types.ts +9 -3
- package/tui/src/utils/stats.ts +1 -1
- package/uv.lock +1 -15
- package/assets/copilot-gate-logo.svg +0 -58
- package/assets/govon-logo.svg +0 -40
- package/src/ummaya/eval/__init__.py +0 -5
- package/src/ummaya/eval/retrieval.py +0 -713
- package/tui/src/utils/messageStream.ts +0 -186
package/README.md
CHANGED
|
@@ -161,7 +161,8 @@ This is not a separate safety pitch. It is part of how `check` and `send` work:
|
|
|
161
161
|
UMMAYA currently uses `LGAI-EXAONE/K-EXAONE-236B-A23B` through FriendliAI Serverless for LLM responses.
|
|
162
162
|
|
|
163
163
|
- Model: [K-EXAONE-236B-A23B](https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B)
|
|
164
|
-
-
|
|
164
|
+
- Reasoning mode: `/reasoning` or `UMMAYA_K_EXAONE_REASONING_MODE` selects `fast`, `balanced`, `deep`, `diagnostic`, or `auto`.
|
|
165
|
+
- Thinking channel: `UMMAYA_K_EXAONE_THINKING` default `false` remains as a legacy compatibility flag; set `/reasoning deep` or `UMMAYA_K_EXAONE_REASONING_MODE=deep` for reasoning-channel diagnostics or benchmark runs.
|
|
165
166
|
- Model license: [K-EXAONE AI Model License Agreement](https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B/blob/main/LICENSE)
|
|
166
167
|
- Project license: [Apache License 2.0](LICENSE)
|
|
167
168
|
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ummaya",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.4",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "ummaya",
|
|
9
|
-
"version": "0.2.
|
|
9
|
+
"version": "0.2.4",
|
|
10
10
|
"license": "Apache-2.0",
|
|
11
11
|
"dependencies": {
|
|
12
12
|
"@alcalzone/ansi-tokenize": "^0.3.0",
|
package/package.json
CHANGED
package/prompts/manifest.yaml
CHANGED
|
@@ -6,9 +6,9 @@ entries:
|
|
|
6
6
|
path: compact_v1.md
|
|
7
7
|
- prompt_id: session_guidance_v1
|
|
8
8
|
version: 1
|
|
9
|
-
sha256:
|
|
9
|
+
sha256: 5464ca763790332b2d6a3969151b43723ec51cc43bdeaed09074f7a0a1f62f98
|
|
10
10
|
path: session_guidance_v1.md
|
|
11
11
|
- prompt_id: system_v1
|
|
12
12
|
version: 1
|
|
13
|
-
sha256:
|
|
13
|
+
sha256: f70d0e9a95a85c501f426c6d1376bbc953767ba474be2213f946f30b6b7e53aa
|
|
14
14
|
path: system_v1.md
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
-
When the citizen's message names a district, neighborhood, landmark, or address, invoke the geocoding tool before any tool that takes an administrative code. Do not fill administrative region codes from memory; pass them only after a geocoding tool has produced them in this session. In
|
|
1
|
+
Prefer concrete adapter function calls when a registered adapter is loaded in the current tools[] list. A concrete adapter function is named by its tool_id and accepts exactly the schema fields from <available_adapters>; do not wrap {"tool_id": "...", "params": {...}} inside that concrete function. Use legacy root wrappers locate/find/check/send only when the concrete adapter function is not loaded and only the root primitive is available. When the citizen's message names a district, neighborhood, landmark, or address, invoke the geocoding tool before any tool that takes an administrative code. Do not fill administrative region codes from memory; pass them only after a geocoding tool has produced them in this session. In the active concrete tool surface, that geocoding tool should be the registered locate adapter function itself. Use kakao_keyword_search({"query":"<citizen-place-text>"}) for POIs and named places, kakao_address_search({"query":"<citizen-address>"}) for structured road/jibun addresses, kakao_coord_to_region({"lat":<locate lat>,"lon":<locate lon>}) after coordinates when a region-mode adapter needs q0/q1, and JUSO/SGIS adapters only when their schema better matches the required identifier. Do not fill coordinates, KMA grids, administrative codes, or region names from memory; pass only values produced by a locate adapter in this session. When the citizen's request matches a registered adapter's purpose (accident statistics, emergency rooms, hospitals, weather observations, forecast data, welfare eligibility, authentication, submissions, etc.), invoke the appropriate concrete adapter to fetch or perform the authoritative action; do not answer factual public-service queries from parametric memory. For current/today non-aviation weather, call kma_current_observation with nx/ny from locate before final prose; call kma_forecast_fetch as a companion when the citizen also needs today's later forecast. Aviation exception: when the citizen names METAR, SPECI, AMOS, RVR, runway, airport aviation weather, or a concrete KMA APIHub aviation adapter from <available_adapters>, do not route to kma_current_observation and do not call locate unless the selected adapter schema explicitly requires coordinates. Use the aviation adapter from <available_adapters> directly, usually kma_apihub_url_air_metar_decoded({"org":"K","help":1}) for Gimhae/RKPK decoded METAR. For emergency-room search near a named place, first call kakao_keyword_search({"query":"<citizen-place-text>"}), then call kakao_coord_to_region({"lat":<locate lat>,"lon":<locate lon>}) if the NMC adapter needs q0/q1 region-mode parameters. For collapse, unconsciousness, cardiac-arrest, or AED-relevant wording such as 사람이 쓰러졌어, after emergency-room search also call nmc_aed_site_locate before final prose when that adapter is surfaced; if AED returns no data or an upstream error, report that result with 119 guidance instead of substituting ER data. Always use tools for location-based factual queries even when you recognize the place name. Never call a tool with an empty or whitespace-only argument value.
|
|
2
|
+
|
|
3
|
+
Treat ordinary airport flight-operation wording as aviation weather intent even when the citizen does not say METAR, AMOS, or 항공기상. Examples include 비행기, 항공편, 비행편, 이륙, 착륙, 결항, 지연, 운항, 뜰 만한가, flight, takeoff, landing, delay, or cancellation near an airport name. For those turns, prefer the KMA APIHub aviation adapter in <available_adapters> over locate plus kma_current_observation.
|
|
2
4
|
|
|
3
5
|
Before each tool call, compare the citizen request with the selected adapter's input schema in <available_adapters>. Preserve every explicit constraint from the citizen request as a matching parameter when the schema exposes one: requested result count, radius or distance wording, date/time, institution type, category, specialty, keyword, administrative region, and other filter fields. Do not make a broad unfiltered call when the request is narrower and the adapter has an optional field for that narrowing. After a tool_result, verify that the returned collection still matches the citizen's stated constraints before writing final prose; if it does not, call the same selected adapter again with corrected schema-valid params instead of answering from the broad result.
|
package/prompts/system_v1.md
CHANGED
|
@@ -38,8 +38,8 @@
|
|
|
38
38
|
|
|
39
39
|
<tool_usage>
|
|
40
40
|
<primitives>
|
|
41
|
-
-
|
|
42
|
-
-
|
|
41
|
+
- **Concrete adapter first** — tools[] 안에 concrete adapter function 이 있으면 function 이름은 `tool_id` 입니다. 그 function 은 adapter schema 필드만 받습니다. 예: `kakao_keyword_search({"query":"동아대학교 승학캠퍼스"})`, `kma_current_observation({"base_date":"YYYYMMDD","base_time":"HH00","nx":97,"ny":74})`. concrete adapter function 에 `{"tool_id": "...", "params": {...}}` 를 넣지 마십시오.
|
|
42
|
+
- **Legacy root wrappers** — concrete adapter function 이 로드되지 않고 root primitive 만 있을 때만 `locate({"tool_id":"kakao_keyword_search","params":{"query":"동아대학교 승학캠퍼스"}})` 또는 `find({"tool_id":"kma_current_observation","params":{...}})` 형식을 사용합니다. `mode="search"` 는 backend internal 기능이므로 LLM 이 직접 호출 금지.
|
|
43
43
|
- `check(tool_id, params)` — 인증 ceremony. `params = {scope_list, purpose_ko, purpose_en, session_id?}`. 반환 = `DelegationContext` (또는 any_id_sso 의 경우 `IdentityAssertion`).
|
|
44
44
|
- `send(tool_id, params)` — OPAQUE-도메인 행정 모듈 호출. `params` 에 `delegation_context` (check 반환) + 어댑터별 payload. 접수번호 반환.
|
|
45
45
|
**Public-data boundary**: 공개자료 `find` 조회가 성공했고 시민 발화에 인증/본인확인/동의/신청/제출/납부/신고 요구가 없으면 다음 turn 은 최종 답변입니다. 공개 의약품, 채용공고, 통계, 요금, 수질, 시설 목록 같은 read-only 결과를 "검증"하려고 `check` 를 추가 호출하지 마십시오.
|
|
@@ -77,14 +77,14 @@
|
|
|
77
77
|
| 마이데이터 인증 / 거래내역 동의 | `mock_verify_mydata` | (해당 어댑터) | `mock_submit_module_public_mydata_action` |
|
|
78
78
|
| 공동·금융 통합 (default) | `mock_verify_gongdong_injeungseo` | (선택) | (선택) |
|
|
79
79
|
| 통합 SSO / Any-ID 로그인 | `mock_verify_module_any_id_sso` | (선택) | (호출 금지 — IdentityAssertion 만 반환) |
|
|
80
|
-
**3-step chain**: (1) check
|
|
80
|
+
**3-step chain**: (1) check 계열 인증 adapter → DelegationContext. (2) 필요한 경우 concrete lookup adapter를 schema 필드로 직접 호출. (3) send 계열 제출 adapter → 접수번호.
|
|
81
81
|
**Send payload contract (fail-closed)**: send 호출은 항상 최상위 `tool_id` 와 `params` 를 모두 포함해야 합니다. `send(params={...})` 만 호출하거나, `tool_id` 없이 도메인 필드를 최상위에 펼친 send 호출은 절대 금지입니다. send `params` 는 해당 어댑터의 Pydantic input_schema 와 정확히 일치해야 합니다. 시민 발화에 이미 있는 필수 payload 필드(예: `minwon_type`, `applicant_name`, `delivery_method`, `session_id`)는 첫 send 호출에 모두 포함하십시오. `delegation_context` 는 check 반환값 전체를 `delegation_context` 한 필드 아래에만 넣고, `token`, `citizen_did`, `purpose_ko`, `purpose_en`, `scope`, `mode`, `_mode` 같은 내부 필드를 send params 최상위로 펼치거나 복사하지 마십시오. send schema 에 `session_id` 가 있으면 직전 check `params` 와 send `params` 에 같은 `session_id` 값을 넣어야 합니다. 시민이 세션 ID 를 명시했다면 check `params` 에도 `session_id` 를 직접 포함하십시오. `params.session_context.session_id` 형태로 중첩하지 마십시오. send 결과가 `status="succeeded"` 이거나 "제출이 접수되었습니다" 로 반환되면 같은 요청을 다시 send 하지 말고, 즉시 final answer 를 작성하십시오.
|
|
82
82
|
**Mock PII minimization defaults**: 홈택스 mock 흐름에서 시민에게 주민등록번호 앞 6자리, 총소득액, 실제 세무 식별자를 되묻지 마십시오. mock 조회의 `resident_id_prefix` 는 시민이 명시하지 않으면 synthetic fixture 값 `"000000"` 을 사용합니다. 연도가 없으면 직전 귀속연도(예: 2026년에 실행 중이면 2025)를 사용합니다. mock 종합소득세 send 의 `total_income_krw` 가 없으면 synthetic fixture 값 `42000000` 을 사용하되, final answer 에 실제 세무자료처럼 표현하지 마십시오.
|
|
83
83
|
**Tool choice override**: 시민 발화가 `마이데이터`를 포함하면 check 도구는 항상 `mock_verify_mydata` 입니다. `mock_verify_module_modid` 는 홈택스/모바일ID family에만 사용하고 마이데이터 동의에는 사용하지 마십시오. 시민 발화가 `간편인증`을 포함하면 `mock_verify_ganpyeon_injeung` 을 사용하고, 정부24 민원/등본/발급 문맥이 아닌 한 `mock_verify_module_simple_auth` 를 사용하지 마십시오.
|
|
84
84
|
**Identity/scope fixed values**: 모바일 신분증 본인확인 scope_list 는 정확히 `["check:mobile_id.identity"]` 입니다. `find:identity.info`, `find:identity.check` 같은 alias 를 만들지 마십시오. 간편인증 로그인 scope_list 는 정확히 `["check:ganpyeon.identity"]` 입니다. `mock_verify_module_any_id_sso`, `find:admin_service.permission_check`, `send:admin_service.permission_management` 로 대체하지 마십시오.
|
|
85
85
|
**Worked example** — 시민: "종합소득세 신고해줘"
|
|
86
86
|
1. `check(tool_id="mock_verify_module_modid", params={"scope_list": ["find:hometax.simplified", "send:hometax.tax-return"], "purpose_ko": "종합소득세 신고", "purpose_en": "Comprehensive income tax filing"})`
|
|
87
|
-
2. `
|
|
87
|
+
2. `mock_lookup_module_hometax_simplified({"year": 2025, "resident_id_prefix": "000000"})`
|
|
88
88
|
3. `send(tool_id="mock_submit_module_hometax_taxreturn", params={"delegation_context": <ctx>, "tax_year": 2025, "income_type": "종합소득", "total_income_krw": 42000000, "session_id": "HOMETAX-TAXRETURN-SESSION-001"})` → `접수번호: hometax-YYYY-MM-DD-RX-XXXXX`
|
|
89
89
|
**Worked example** — 시민: "마이데이터 동의 상태 확인하고 필요한 공공 마이데이터 제공 동의까지 진행해줘"
|
|
90
90
|
1. `check(tool_id="mock_verify_mydata", params={"scope_list": ["send:public_mydata.action"], "purpose_ko": "공공 마이데이터 제공 동의", "purpose_en": "Public MyData consent action", "session_id": "MYDATA-ACTION-SESSION-001"})`
|
|
@@ -93,7 +93,7 @@
|
|
|
93
93
|
1. `check(tool_id="mock_verify_module_simple_auth", params={"scope_list": ["send:gov24.minwon"], "purpose_ko": "주민등록등본 발급 민원 신청", "purpose_en": "Gov24 resident registration certificate civil petition", "session_id": "GOV24-MINWON-SESSION-001"})`
|
|
94
94
|
2. `send(tool_id="mock_submit_module_gov24_minwon", params={"delegation_context": <ctx>, "minwon_type": "주민등록등본", "applicant_name": "홍길동", "delivery_method": "online", "session_id": "GOV24-MINWON-SESSION-001"})` → `접수번호: gov24-YYYY-MM-DD-MW-XXXXXXXX`
|
|
95
95
|
**Worked example** — 시민: "한부모가족 아동양육비 지원을 신청해줘"
|
|
96
|
-
1. `
|
|
96
|
+
1. `mohw_welfare_eligibility_search({"search_wrd": "한부모가족 아동양육비", "trgter_indvdl_array": "060", "onap_psblt_yn": "Y"})`
|
|
97
97
|
2. `check(tool_id="mock_verify_mydata", params={"scope_list": ["send:mydata.welfare_application"], "purpose_ko": "한부모가족 아동양육비 지원 신청", "purpose_en": "Single-parent family child support application"})`
|
|
98
98
|
3. `send(tool_id="mock_welfare_application_submit_v1", params={"applicant_id": "DI-...", "benefit_code": "WLF00001068", "application_type": "new", "household_size": 2, "delegation_context": <ctx>})` → `접수번호: MOCK-WA-...`
|
|
99
99
|
**금지 패턴 (이미 위 규칙에서 명시한 것 외)**: 검색 결과 빈 후 "어댑터가 없습니다" 또는 "어댑터 ID 를 알려주세요" 답변 — 위 매핑 표가 답입니다. 시민에게 hometax.go.kr / gov.kr 직접 접속 안내 (chain 시도 전). 같은 find search 를 다른 query 로 재시도 — 첫 search 가 비었으면 즉시 매핑 표 사용. `mock_verify_module_any_id_sso` 뒤에 send chain — IdentityAssertion 만 반환합니다. 복지 급여 신청은 MyData send tier 이므로 `mock_verify_mydata`만 사용하고 Any-ID SSO를 사용하지 마십시오. 복지 신청 check scope_list 에 `find:mohw.welfare_eligibility_search` 또는 `send:mock.welfare_application_submit_v1` 를 넣지 말고 정확히 `["send:mydata.welfare_application"]` 만 사용하십시오.
|
|
@@ -133,8 +133,9 @@ Use available tools when the citizen's request requires live data lookup.
|
|
|
133
133
|
**절대 금지 (fabrication patterns — 위반 시 시민 안전 침해):** 도구 실패 후 "기존 정보로는…", "일반적으로…", "참고로…", "통계상…" 으로 시작하는 어떤 구체 데이터도 출력 금지. **숫자·이름·주소·전화·URL·날짜·좌표 0개**. 도구가 0건 반환했는데 LLM 학습 데이터의 병원 이름·소방서 통계·복지 서비스명·bokjiro.go.kr URL 을 보충하는 행위 금지 — 학습 데이터의 servId / wlfareInfoId 는 stale (출시 후 변경됨), fabricate 시 시민이 잘못된 service detail link 클릭. "도구는 실패했지만 제가 알기로는…" / "도구 결과는 없지만 일반적으로…" 류의 hedging fabrication 금지. 도구 응답에 없는 단위 (예: "약 X km", "대략 Y건", "보통 Z명") 의 어림 추정 금지 — 통계는 호출이 실패하면 *답변 자체가 없어야 함*.
|
|
134
134
|
**이유**: 의료·응급·교통·119 구급·복지 보조금 도메인의 fabricated 답변은 시민 misinformation 으로 이어집니다. 잘못된 병원 번호는 응급 상황에서 골든타임 손실, 잘못된 wlfareInfoId 는 잘못된 보조금 신청 페이지로 이동, fabricated 119 통계는 정부 행정 도구 신뢰 붕괴. 도구가 실패하면 *모른다고 솔직히 말하는 것이 정답*입니다 — "정확한 정보는 [공식 채널] 에서 확인" 형식 강제.
|
|
135
135
|
**Dependent 도구는 직렬로 호출.** 선행 도구 결과 (예: locate 의 좌표) 가 후속 도구 (예: kma_forecast_fetch 의 lat/lon) 의 인자에 필요하면 같은 turn 에 두 도구 동시 emit 금지 — 선행 결과 받은 다음 turn 에서 후속 호출.
|
|
136
|
-
**[CRITICAL — 주소 존재 여부를 산문으로 판단 금지]** 시민이 "근처/주변/주소/역/동/구/시" 등 위치 기반 요청을 하면 주소가 가짜처럼 보이거나 불완전해 보여도 먼저 `
|
|
137
|
-
**[CRITICAL — locate 단독 종결 금지 · 시민 안전 directive]**
|
|
136
|
+
**[CRITICAL — 주소 존재 여부를 산문으로 판단 금지]** 시민이 "근처/주변/주소/역/동/구/시" 등 위치 기반 요청을 하면 주소가 가짜처럼 보이거나 불완전해 보여도 먼저 concrete adapter `kakao_keyword_search({"query":"<citizen location>"})` 또는 구조화 주소일 때 `kakao_address_search({"query":"<citizen address>"})` 를 호출하십시오. concrete function 이 로드되지 않은 legacy 경로에서만 `locate({"tool_id":"kakao_keyword_search","params":{"query":"<citizen location>"}})` 를 사용합니다. "실제 주소가 아닌 것 같습니다" 같은 판단은 도구의 `not_found` 결과를 받은 뒤에만 말할 수 있습니다.
|
|
137
|
+
**[CRITICAL — locate 단독 종결 금지 · 시민 안전 directive]** locate 계열 호출 후 좌표 / 행정동 코드 / POI 만 받고 답변 turn 으로 종결하면 시민 fabrication 위험. 좌표만 받아서 날씨 / 병원 / 응급실 / 사고 / 119 / 복지 데이터를 답변에 포함하는 행위 = 100% 학습데이터 추측 (실측 없음). **locate 결과 받은 다음 turn 은 반드시 `<adapter>({lat:<resolved>, lon:<resolved>, ...})` 같은 concrete adapter 호출**. `<adapter>` 는 `<available_adapters>` 블록에서 선택. 예: 날씨 → `kma_current_observation` / 병원 → `hira_hospital_search` / 응급실 → `nmc_emergency_search` / 사고다발지 → `koroad_accident_hazard_search`. locate 만 두번/세번 반복 호출 후 답변 종결도 금지 — 첫 호출에서 좌표 받았으면 다음은 concrete find adapter. 백엔드 chain gate 가 답변 turn 에 후속 find 누락을 detect 하면 turn reject + 강제 retry — 즉시 fabricate 시도하지 말고 후속 adapter를 호출.
|
|
138
|
+
**[CRITICAL — collapse/AED chain]** 시민이 "사람이 쓰러졌어", 의식 없음, 심정지, 호흡 없음, AED/자동심장충격기/제세동기처럼 collapse·cardiac-arrest 상황을 말하면 응급실(`nmc_emergency_search`)만으로 종결하지 마십시오. `<available_adapters>`에 `nmc_aed_site_locate`가 있으면 응급실 조회 후 AED 조회도 호출한 다음 최종 답변합니다. AED 결과가 NO_DATA/upstream error 여도 그 실패를 119 안내와 함께 설명하고, 응급실 결과를 AED 결과처럼 대체하지 마십시오.
|
|
138
139
|
</turn_order>
|
|
139
140
|
|
|
140
141
|
<output_style>
|
package/pyproject.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "ummaya"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.4"
|
|
4
4
|
description = "Conversational multi-agent platform for Korean public APIs"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -87,11 +87,6 @@ dev = [
|
|
|
87
87
|
"pip-audit>=2.10.0",
|
|
88
88
|
"vulture>=2.16",
|
|
89
89
|
"pip-licenses>=5.0",
|
|
90
|
-
# Spec 2521 — TUI Layer 5 (asciinema cast → per-frame text snapshot).
|
|
91
|
-
# Dev-only: replays asciicast v2/v3 byte streams through a real VT-100
|
|
92
|
-
# emulator so LLM agents can grep deterministic cell-grid text frames
|
|
93
|
-
# instead of OCR'ing PNG keyframes. See AGENTS.md § TUI verification.
|
|
94
|
-
"pyte>=0.8.2",
|
|
95
90
|
]
|
|
96
91
|
# spec 026 FR-C09 — Langfuse Prompt Management is an OPT-IN integration.
|
|
97
92
|
# It must never be a core runtime dependency (AGENTS.md hard rule: no new
|
|
@@ -314,7 +309,7 @@ min_confidence = 80
|
|
|
314
309
|
|
|
315
310
|
[tool.commitizen]
|
|
316
311
|
name = "cz_conventional_commits"
|
|
317
|
-
version = "0.2.
|
|
312
|
+
version = "0.2.4"
|
|
318
313
|
tag_format = "v$version"
|
|
319
314
|
|
|
320
315
|
# PyTorch CPU-only wheel for Docker image size discipline (SC-1: ≤ 2 GB).
|
|
@@ -20,6 +20,8 @@ import json
|
|
|
20
20
|
import logging
|
|
21
21
|
from typing import TYPE_CHECKING
|
|
22
22
|
|
|
23
|
+
from ummaya.context.attachments import AttachmentCollector
|
|
24
|
+
from ummaya.context.budget import BudgetEstimator
|
|
23
25
|
from ummaya.context.compact_models import CompactionConfig, CompactionResult
|
|
24
26
|
from ummaya.context.models import (
|
|
25
27
|
AssembledContext,
|
|
@@ -57,6 +59,10 @@ class ContextBuilder:
|
|
|
57
59
|
self._registry = registry
|
|
58
60
|
self._compaction_config = compaction_config
|
|
59
61
|
self._assembler = SystemPromptAssembler()
|
|
62
|
+
self._attachment_collector = AttachmentCollector(config=self._config)
|
|
63
|
+
self._budget_estimator = BudgetEstimator()
|
|
64
|
+
self._core_tool_defs_cache_key: tuple[str, ...] | None = None
|
|
65
|
+
self._core_tool_defs_cache: list[dict[str, object]] = []
|
|
60
66
|
|
|
61
67
|
# Cached assembled ChatMessage (set on first build_system_message() call).
|
|
62
68
|
self._system_message: ChatMessage | None = None
|
|
@@ -106,11 +112,7 @@ class ContextBuilder:
|
|
|
106
112
|
``ContextLayer(role='user', layer_name='turn_attachment', content=…)``
|
|
107
113
|
or ``None`` when no attachment content exists.
|
|
108
114
|
"""
|
|
109
|
-
|
|
110
|
-
from ummaya.context.attachments import AttachmentCollector # noqa: PLC0415
|
|
111
|
-
|
|
112
|
-
collector = AttachmentCollector(config=self._config)
|
|
113
|
-
collected = collector.collect(state=state, api_health=api_health)
|
|
115
|
+
collected = self._attachment_collector.collect(state=state, api_health=api_health)
|
|
114
116
|
if collected is None:
|
|
115
117
|
return None
|
|
116
118
|
return ContextLayer(role="user", layer_name="turn_attachment", content=collected)
|
|
@@ -161,15 +163,12 @@ class ContextBuilder:
|
|
|
161
163
|
tool_definitions = self._build_tool_definitions(state)
|
|
162
164
|
|
|
163
165
|
# --- Budget (US4) ---
|
|
164
|
-
from ummaya.context.budget import BudgetEstimator # noqa: PLC0415
|
|
165
|
-
|
|
166
|
-
estimator = BudgetEstimator()
|
|
167
166
|
assembled_no_budget = AssembledContext(
|
|
168
167
|
system_layer=system_layer,
|
|
169
168
|
turn_attachment=turn_attachment,
|
|
170
169
|
tool_definitions=tool_definitions,
|
|
171
170
|
)
|
|
172
|
-
budget =
|
|
171
|
+
budget = self._budget_estimator.estimate(
|
|
173
172
|
context=assembled_no_budget,
|
|
174
173
|
hard_limit=hard_limit,
|
|
175
174
|
soft_limit=int(hard_limit * 0.80),
|
|
@@ -232,8 +231,15 @@ class ContextBuilder:
|
|
|
232
231
|
if self._registry is None:
|
|
233
232
|
return []
|
|
234
233
|
|
|
235
|
-
# Core prefix (deterministic, sorted by id — FR-004)
|
|
236
|
-
|
|
234
|
+
# Core prefix (deterministic, sorted by id — FR-004). Core tool
|
|
235
|
+
# schemas are stable across turns, so cache the expensive Pydantic JSON
|
|
236
|
+
# schema export and invalidate only when the active core id set changes.
|
|
237
|
+
core_tools = self._registry.core_tools()
|
|
238
|
+
core_cache_key = tuple(tool.id for tool in core_tools)
|
|
239
|
+
if core_cache_key != self._core_tool_defs_cache_key:
|
|
240
|
+
self._core_tool_defs_cache = [tool.to_openai_tool() for tool in core_tools]
|
|
241
|
+
self._core_tool_defs_cache_key = core_cache_key
|
|
242
|
+
core_defs = self._core_tool_defs_cache
|
|
237
243
|
|
|
238
244
|
# Situational suffix (dynamic, sorted by id — FR-004)
|
|
239
245
|
situational_defs: list[dict[str, object]] = []
|
|
@@ -29,6 +29,8 @@ if TYPE_CHECKING:
|
|
|
29
29
|
|
|
30
30
|
logger = logging.getLogger(__name__)
|
|
31
31
|
|
|
32
|
+
_INTERNAL_CONTEXT_TOOL_IDS = frozenset({"find", "locate", "check", "send", "search_tools"})
|
|
33
|
+
|
|
32
34
|
_LOCATION_DEPENDENT_SCHEMA_KEYS = frozenset(
|
|
33
35
|
{
|
|
34
36
|
"adm_cd",
|
|
@@ -54,9 +56,24 @@ def _schema_requires_location_resolution(
|
|
|
54
56
|
) -> bool:
|
|
55
57
|
"""Return True when an adapter schema needs prior locate output."""
|
|
56
58
|
|
|
57
|
-
return _contains_location_dependent_key(
|
|
59
|
+
return _contains_location_dependent_key(
|
|
58
60
|
required_params
|
|
59
|
-
)
|
|
61
|
+
) or _schema_required_fields_contain_location_key(input_schema_json)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _schema_required_fields_contain_location_key(value: object) -> bool:
|
|
65
|
+
"""Return True when JSON Schema required fields demand locate-derived data."""
|
|
66
|
+
|
|
67
|
+
if isinstance(value, dict):
|
|
68
|
+
required = value.get("required")
|
|
69
|
+
if _contains_location_dependent_key(required):
|
|
70
|
+
return True
|
|
71
|
+
return any(
|
|
72
|
+
_schema_required_fields_contain_location_key(nested) for nested in value.values()
|
|
73
|
+
)
|
|
74
|
+
if isinstance(value, list):
|
|
75
|
+
return any(_schema_required_fields_contain_location_key(item) for item in value)
|
|
76
|
+
return False
|
|
60
77
|
|
|
61
78
|
|
|
62
79
|
def _contains_location_dependent_key(value: object) -> bool:
|
|
@@ -302,7 +319,7 @@ class QueryEngine:
|
|
|
302
319
|
continue
|
|
303
320
|
if candidate.score <= 0:
|
|
304
321
|
continue
|
|
305
|
-
if tool.
|
|
322
|
+
if tool.id in _INTERNAL_CONTEXT_TOOL_IDS:
|
|
306
323
|
continue
|
|
307
324
|
primitive = candidate.primitive if isinstance(candidate.primitive, str) else None
|
|
308
325
|
requires_location = _schema_requires_location_resolution(
|
|
@@ -342,10 +359,13 @@ class QueryEngine:
|
|
|
342
359
|
[
|
|
343
360
|
"<available_adapters>",
|
|
344
361
|
"Use these adapter candidates for this citizen request. "
|
|
345
|
-
"
|
|
346
|
-
"
|
|
347
|
-
"
|
|
348
|
-
"
|
|
362
|
+
"The model-facing function name is the concrete tool_id shown "
|
|
363
|
+
"below when that function is present in tools[]. Call the "
|
|
364
|
+
"concrete adapter directly with exactly the input_schema_json "
|
|
365
|
+
"fields. Do not wrap tool_id/params inside a concrete adapter "
|
|
366
|
+
"call. The root primitives (find, locate, check, send) are "
|
|
367
|
+
"legacy compatibility wrappers only when a concrete adapter "
|
|
368
|
+
"function is not loaded. "
|
|
349
369
|
"Do not call locate just because the citizen text contains a "
|
|
350
370
|
"city/province name; treat that as the dataset/filter term. "
|
|
351
371
|
"Call locate only when the selected adapter schema requires "
|
|
@@ -424,6 +424,11 @@ async def _dispatch_root_primitive(
|
|
|
424
424
|
error=f"{primitive} cannot target itself.",
|
|
425
425
|
error_type="validation",
|
|
426
426
|
)
|
|
427
|
+
params = _normalize_root_primitive_adapter_params(
|
|
428
|
+
primitive=primitive,
|
|
429
|
+
target_tool_id=target_tool_id,
|
|
430
|
+
params=params,
|
|
431
|
+
)
|
|
427
432
|
|
|
428
433
|
request_id = tc.id or f"{primitive}-call"
|
|
429
434
|
if primitive == "find":
|
|
@@ -452,6 +457,21 @@ async def _dispatch_root_primitive(
|
|
|
452
457
|
return ToolResult(tool_id=primitive, success=True, data=data)
|
|
453
458
|
|
|
454
459
|
|
|
460
|
+
def _normalize_root_primitive_adapter_params(
|
|
461
|
+
*,
|
|
462
|
+
primitive: str,
|
|
463
|
+
target_tool_id: str,
|
|
464
|
+
params: dict[str, object],
|
|
465
|
+
) -> dict[str, object]:
|
|
466
|
+
"""Remove wrapper metadata accidentally duplicated inside adapter params."""
|
|
467
|
+
nested_tool_id = params.get("tool_id")
|
|
468
|
+
if nested_tool_id == target_tool_id:
|
|
469
|
+
return {key: value for key, value in params.items() if key != "tool_id"}
|
|
470
|
+
if target_tool_id == primitive and isinstance(nested_tool_id, str):
|
|
471
|
+
return {key: value for key, value in params.items() if key != "tool_id"}
|
|
472
|
+
return params
|
|
473
|
+
|
|
474
|
+
|
|
455
475
|
async def _dispatch_concrete_adapter(
|
|
456
476
|
tc: ToolCall,
|
|
457
477
|
primitive: str,
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
"""Evidence Fabric v2 public API."""
|
|
3
|
+
|
|
4
|
+
from ummaya.evidence.models import EvidenceGate, EvidenceStatus, RunEvidence
|
|
5
|
+
from ummaya.evidence.runner import EvidenceContractError, run_dataset
|
|
6
|
+
from ummaya.evidence.task_registry import (
|
|
7
|
+
EvidenceDatasetRef,
|
|
8
|
+
EvidenceTask,
|
|
9
|
+
EvidenceTaskRegistry,
|
|
10
|
+
TaskRegistryError,
|
|
11
|
+
load_task_registry,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"EvidenceDatasetRef",
|
|
16
|
+
"EvidenceContractError",
|
|
17
|
+
"EvidenceGate",
|
|
18
|
+
"EvidenceStatus",
|
|
19
|
+
"EvidenceTask",
|
|
20
|
+
"EvidenceTaskRegistry",
|
|
21
|
+
"RunEvidence",
|
|
22
|
+
"TaskRegistryError",
|
|
23
|
+
"load_task_registry",
|
|
24
|
+
"run_dataset",
|
|
25
|
+
]
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
"""Typed Evidence Fabric v2 models."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
|
+
from typing import Literal
|
|
8
|
+
from uuid import uuid4
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
11
|
+
|
|
12
|
+
EvidenceStatus = Literal["pass", "fail", "skip"]
|
|
13
|
+
EvidenceGateName = Literal[
|
|
14
|
+
"contract",
|
|
15
|
+
"scenario",
|
|
16
|
+
"observability",
|
|
17
|
+
"adversarial",
|
|
18
|
+
"ux",
|
|
19
|
+
"live_canary",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class EvidenceGate(BaseModel):
|
|
24
|
+
"""One scored verification gate in a run evidence document."""
|
|
25
|
+
|
|
26
|
+
model_config = ConfigDict(frozen=True, extra="forbid")
|
|
27
|
+
|
|
28
|
+
name: EvidenceGateName
|
|
29
|
+
status: EvidenceStatus
|
|
30
|
+
summary: str
|
|
31
|
+
check_ids: tuple[str, ...] = Field(default_factory=tuple)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class RunEvidence(BaseModel):
|
|
35
|
+
"""Top-level immutable evidence document emitted by the v2 runner."""
|
|
36
|
+
|
|
37
|
+
model_config = ConfigDict(frozen=True, extra="forbid")
|
|
38
|
+
|
|
39
|
+
schema_version: Literal["evidence.v2"] = "evidence.v2"
|
|
40
|
+
run_id: str = Field(default_factory=lambda: f"ev-{uuid4()}")
|
|
41
|
+
created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
|
|
42
|
+
source_ref: str
|
|
43
|
+
dataset_id: str
|
|
44
|
+
task_registry_id: str | None = None
|
|
45
|
+
dataset_ref: str | None = None
|
|
46
|
+
task_count: int = 0
|
|
47
|
+
task_ids: tuple[str, ...] = Field(default_factory=tuple)
|
|
48
|
+
scenario_count: int
|
|
49
|
+
scenario_ids: tuple[str, ...]
|
|
50
|
+
gates: tuple[EvidenceGate, ...]
|
|
51
|
+
trace_join_keys: tuple[str, ...] = (
|
|
52
|
+
"scenario_id",
|
|
53
|
+
"trace_id",
|
|
54
|
+
"correlation_id",
|
|
55
|
+
"prompt_manifest_hash",
|
|
56
|
+
"tool_catalog_hash",
|
|
57
|
+
"frame_hash",
|
|
58
|
+
)
|