devlyn-cli 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/devlyn.js +1 -0
- package/optional-skills/better-auth-setup/SKILL.md +222 -11
- package/optional-skills/better-auth-setup/references/proxy-gotchas.md +148 -0
- package/optional-skills/better-auth-setup/references/proxy-setup.md +284 -0
- package/optional-skills/dokkit/ANALYSIS.md +198 -0
- package/optional-skills/dokkit/COMMANDS.md +365 -0
- package/optional-skills/dokkit/DOCX-XML.md +76 -0
- package/optional-skills/dokkit/EXPORT.md +102 -0
- package/optional-skills/dokkit/FILLING.md +377 -0
- package/optional-skills/dokkit/HWPX-XML.md +73 -0
- package/optional-skills/dokkit/IMAGE-SOURCING.md +127 -0
- package/optional-skills/dokkit/INGESTION.md +65 -0
- package/optional-skills/dokkit/SKILL.md +153 -0
- package/optional-skills/dokkit/STATE.md +60 -0
- package/optional-skills/dokkit/references/docx-field-patterns.md +151 -0
- package/optional-skills/dokkit/references/docx-structure.md +58 -0
- package/optional-skills/dokkit/references/field-detection-patterns.md +130 -0
- package/optional-skills/dokkit/references/hwpx-field-patterns.md +461 -0
- package/optional-skills/dokkit/references/hwpx-structure.md +159 -0
- package/optional-skills/dokkit/references/image-opportunity-heuristics.md +121 -0
- package/optional-skills/dokkit/references/image-xml-patterns.md +338 -0
- package/optional-skills/dokkit/references/section-image-interleaving.md +346 -0
- package/optional-skills/dokkit/references/section-range-detection.md +118 -0
- package/optional-skills/dokkit/references/state-schema.md +143 -0
- package/optional-skills/dokkit/references/supported-formats.md +67 -0
- package/optional-skills/dokkit/scripts/compile_hwpx.py +134 -0
- package/optional-skills/dokkit/scripts/detect_fields.py +301 -0
- package/optional-skills/dokkit/scripts/detect_fields_hwpx.py +286 -0
- package/optional-skills/dokkit/scripts/export_pdf.py +99 -0
- package/optional-skills/dokkit/scripts/parse_hwpx.py +185 -0
- package/optional-skills/dokkit/scripts/parse_image_with_gemini.py +159 -0
- package/optional-skills/dokkit/scripts/parse_xlsx.py +98 -0
- package/optional-skills/dokkit/scripts/source_images.py +365 -0
- package/optional-skills/dokkit/scripts/validate_docx.py +142 -0
- package/optional-skills/dokkit/scripts/validate_hwpx.py +281 -0
- package/optional-skills/dokkit/scripts/validate_state.py +132 -0
- package/package.json +1 -1
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
# Reverse Proxy Architecture — Complete Setup Guide
|
|
2
|
+
|
|
3
|
+
When the frontend (e.g., Next.js on Cloudflare Workers) is a separate application that proxies auth requests to the backend (e.g., Hono on Fly.io), several non-obvious configurations are required for OAuth, cookies, and redirects to work correctly.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
|
|
7
|
+
1. [Architecture Overview](#architecture-overview)
|
|
8
|
+
2. [Proxy Route Configuration](#proxy-route-configuration)
|
|
9
|
+
3. [Forwarding Headers](#forwarding-headers)
|
|
10
|
+
4. [Location Header Rewriting](#location-header-rewriting)
|
|
11
|
+
5. [Dynamic baseURL with allowedHosts](#dynamic-baseurl-with-allowedhosts)
|
|
12
|
+
6. [Frontend Middleware Cookie Detection](#frontend-middleware-cookie-detection)
|
|
13
|
+
7. [OAuth Provider Configuration](#oauth-provider-configuration)
|
|
14
|
+
8. [CSP Configuration](#csp-configuration)
|
|
15
|
+
9. [Edge Runtime (Cloudflare Workers)](#edge-runtime-cloudflare-workers)
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Architecture Overview
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
Browser (your-frontend.com)
|
|
23
|
+
|
|
|
24
|
+
|-- POST /api/auth/sign-in/social --> Proxy --> Backend /auth/sign-in/social
|
|
25
|
+
| |
|
|
26
|
+
| v
|
|
27
|
+
| Sets state cookie
|
|
28
|
+
| Returns redirect to Google
|
|
29
|
+
|
|
|
30
|
+
|-- Browser redirects to Google OAuth
|
|
31
|
+
|
|
|
32
|
+
|-- Google redirects to callback URL
|
|
33
|
+
| (MUST go through proxy, not directly to backend)
|
|
34
|
+
|
|
|
35
|
+
|-- GET /auth/callback/google --> Proxy --> Backend /auth/callback/google
|
|
36
|
+
| |
|
|
37
|
+
| v
|
|
38
|
+
| Reads state cookie (same domain!)
|
|
39
|
+
| Creates session
|
|
40
|
+
| Sets session cookie
|
|
41
|
+
| Redirects to /dashboard
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
The critical insight: **OAuth callbacks MUST go through the same proxy as the initial request**, so cookies (state and session) are on the same domain.
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Proxy Route Configuration
|
|
49
|
+
|
|
50
|
+
Two proxy routes are needed — one for API calls, one for OAuth callbacks:
|
|
51
|
+
|
|
52
|
+
| Frontend Route | Backend Route | Purpose |
|
|
53
|
+
|---|---|---|
|
|
54
|
+
| `/api/auth/*` | `/auth/*` | Auth-client API calls (browser SDK) |
|
|
55
|
+
| `/auth/*` | `/auth/*` | OAuth callbacks (provider redirects here) |
|
|
56
|
+
|
|
57
|
+
The auth-client (browser) sends requests to `/api/auth/*`. OAuth callbacks arrive at `/auth/*` because Better Auth constructs callback URLs from `{baseURL}{basePath}/callback/{provider}`, and the derived baseURL from `allowedHosts` is the frontend origin.
|
|
58
|
+
|
|
59
|
+
### Next.js API Route Example
|
|
60
|
+
|
|
61
|
+
```typescript
|
|
62
|
+
// src/app/api/auth/[...path]/route.ts
|
|
63
|
+
import { NextRequest, NextResponse } from "next/server";
|
|
64
|
+
|
|
65
|
+
const API_URL = process.env.API_URL; // e.g., "https://backend.fly.dev"
|
|
66
|
+
|
|
67
|
+
async function proxyRequest(request: NextRequest) {
|
|
68
|
+
const url = new URL(request.url);
|
|
69
|
+
const authPath = url.pathname.replace("/api/auth", "/auth");
|
|
70
|
+
const targetUrl = `${API_URL}${authPath}${url.search}`;
|
|
71
|
+
|
|
72
|
+
const headers = new Headers(request.headers);
|
|
73
|
+
|
|
74
|
+
// Strip then set forwarding headers (see next section)
|
|
75
|
+
headers.delete("x-forwarded-for");
|
|
76
|
+
headers.delete("x-forwarded-host");
|
|
77
|
+
headers.delete("x-forwarded-proto");
|
|
78
|
+
headers.delete("x-real-ip");
|
|
79
|
+
headers.delete("cf-connecting-ip");
|
|
80
|
+
headers.set("x-forwarded-host", url.host);
|
|
81
|
+
headers.set("x-forwarded-proto", url.protocol.replace(":", ""));
|
|
82
|
+
|
|
83
|
+
const response = await fetch(targetUrl, {
|
|
84
|
+
method: request.method,
|
|
85
|
+
headers,
|
|
86
|
+
body: request.method !== "GET" && request.method !== "HEAD"
|
|
87
|
+
? await request.text()
|
|
88
|
+
: undefined,
|
|
89
|
+
redirect: "manual",
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
const responseHeaders = new Headers(response.headers);
|
|
93
|
+
|
|
94
|
+
// Rewrite Location headers (see section below)
|
|
95
|
+
const location = responseHeaders.get("location");
|
|
96
|
+
if (location && API_URL && location.startsWith(API_URL)) {
|
|
97
|
+
const frontendOrigin = url.origin;
|
|
98
|
+
responseHeaders.set("location", location.replace(API_URL, frontendOrigin));
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return new NextResponse(response.body, {
|
|
102
|
+
status: response.status,
|
|
103
|
+
headers: responseHeaders,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export const GET = proxyRequest;
|
|
108
|
+
export const POST = proxyRequest;
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
```typescript
|
|
112
|
+
// src/app/auth/[...path]/route.ts
|
|
113
|
+
// Same handler — reuse for OAuth callbacks
|
|
114
|
+
export { GET, POST } from "../api/auth/[...path]/route";
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Forwarding Headers
|
|
120
|
+
|
|
121
|
+
The proxy MUST set `X-Forwarded-Host` and `X-Forwarded-Proto` so Better Auth knows the real origin. This is how Better Auth constructs the correct OAuth callback URL.
|
|
122
|
+
|
|
123
|
+
```typescript
|
|
124
|
+
// Strip user-provided headers to prevent spoofing
|
|
125
|
+
headers.delete("x-forwarded-for");
|
|
126
|
+
headers.delete("x-forwarded-host");
|
|
127
|
+
headers.delete("x-forwarded-proto");
|
|
128
|
+
headers.delete("x-real-ip");
|
|
129
|
+
headers.delete("cf-connecting-ip");
|
|
130
|
+
|
|
131
|
+
// Then set correct values from the actual request
|
|
132
|
+
headers.set("x-forwarded-host", url.host);
|
|
133
|
+
headers.set("x-forwarded-proto", url.protocol.replace(":", ""));
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
**Why both strip AND set?** Stripping prevents clients from spoofing these headers. Setting them tells the backend the true origin of the request. The proxy is the trusted boundary — it's the only thing that knows the real origin.
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## Location Header Rewriting
|
|
141
|
+
|
|
142
|
+
When the backend returns a redirect (302), the `Location` header points to the backend's origin. The proxy must rewrite it to the frontend:
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
const location = responseHeaders.get("location");
|
|
146
|
+
if (location && API_URL && location.startsWith(API_URL)) {
|
|
147
|
+
const frontendOrigin = new URL(request.url).origin;
|
|
148
|
+
responseHeaders.set("location", location.replace(API_URL, frontendOrigin));
|
|
149
|
+
}
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Without this, redirects after OAuth callback would send the user to the backend domain instead of the frontend.
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## Dynamic baseURL with allowedHosts
|
|
157
|
+
|
|
158
|
+
**DO NOT** use a static `baseURL` string in the backend's Better Auth config. It breaks the proxy architecture because:
|
|
159
|
+
|
|
160
|
+
- A static baseURL caches on first request (often a health check with `Host: backend.internal`), permanently setting the wrong origin
|
|
161
|
+
- `trustedProxyHeaders: true` alone does NOT work when `baseURL` is set — the static value takes precedence
|
|
162
|
+
- The `BETTER_AUTH_URL` env var also overrides forwarded headers (it's checked before headers in the priority chain)
|
|
163
|
+
|
|
164
|
+
**The correct approach** — use `allowedHosts` with a `fallback`:
|
|
165
|
+
|
|
166
|
+
```typescript
|
|
167
|
+
export const auth = betterAuth({
|
|
168
|
+
baseURL: {
|
|
169
|
+
allowedHosts: [
|
|
170
|
+
"your-frontend.com", // Frontend domain (via proxy)
|
|
171
|
+
"your-backend.fly.dev", // Backend domain (direct access)
|
|
172
|
+
"localhost", // Local development
|
|
173
|
+
"*.fly.dev", // Platform internal routing
|
|
174
|
+
],
|
|
175
|
+
fallback: process.env.BETTER_AUTH_URL, // For health checks / unmatched hosts
|
|
176
|
+
},
|
|
177
|
+
basePath: "/auth",
|
|
178
|
+
|
|
179
|
+
advanced: {
|
|
180
|
+
// Required for allowedHosts to read X-Forwarded-Host from the proxy
|
|
181
|
+
trustedProxyHeaders: true,
|
|
182
|
+
},
|
|
183
|
+
// ...
|
|
184
|
+
});
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
**How allowedHosts works internally:**
|
|
188
|
+
1. Reads `X-Forwarded-Host` header (set by proxy), falls back to `Host` header
|
|
189
|
+
2. Validates against the allowedHosts list (supports wildcards like `*.fly.dev`)
|
|
190
|
+
3. Constructs baseURL per-request (not cached!)
|
|
191
|
+
4. If no match, uses `fallback`
|
|
192
|
+
5. OAuth callback URL = `{derived-baseURL}{basePath}/callback/{provider}`
|
|
193
|
+
|
|
194
|
+
**The `getBaseURL()` priority chain** (why `trustedProxyHeaders` alone isn't enough):
|
|
195
|
+
1. Static `baseURL` string (if set) — **always wins**
|
|
196
|
+
2. `BETTER_AUTH_URL` environment variable — **checked before headers**
|
|
197
|
+
3. `X-Forwarded-Host` / `X-Forwarded-Proto` headers — only reached if 1 and 2 are absent
|
|
198
|
+
4. Request URL — last resort
|
|
199
|
+
|
|
200
|
+
`allowedHosts` bypasses this entire priority chain by explicitly reading the forwarded headers and constructing the URL per-request.
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## Frontend Middleware Cookie Detection
|
|
205
|
+
|
|
206
|
+
In production with HTTPS, Better Auth adds a `__Secure-` prefix to cookie names. Your frontend middleware MUST check for both:
|
|
207
|
+
|
|
208
|
+
```typescript
|
|
209
|
+
const SESSION_COOKIES = [
|
|
210
|
+
"__Secure-better-auth.session_token", // Production (HTTPS)
|
|
211
|
+
"better-auth.session_token", // Development (HTTP)
|
|
212
|
+
];
|
|
213
|
+
|
|
214
|
+
const hasSession = SESSION_COOKIES.some(
|
|
215
|
+
(name) => !!request.cookies.get(name)?.value,
|
|
216
|
+
);
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
**Why this happens:** When `baseURL` is a dynamic config object (allowedHosts), Better Auth can't determine the protocol at initialization time. In production (`NODE_ENV=production`), it defaults to secure cookies with the `__Secure-` prefix. If your middleware only checks for `better-auth.session_token`, it will never find the cookie and will redirect authenticated users to sign-in.
|
|
220
|
+
|
|
221
|
+
This is the most insidious gotcha in the proxy architecture — OAuth completes successfully, session is created, cookie is set, but the frontend doesn't recognize it.
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
## OAuth Provider Configuration
|
|
226
|
+
|
|
227
|
+
### Google Console Settings
|
|
228
|
+
|
|
229
|
+
- **Authorized JavaScript origins**: `https://your-frontend.com`
|
|
230
|
+
- **Authorized redirect URIs**: `https://your-frontend.com/auth/callback/google`
|
|
231
|
+
|
|
232
|
+
The redirect URI uses `/auth/callback/google` (NOT `/api/auth/callback/google`) because Better Auth constructs it from `{baseURL}{basePath}/callback/google`, and the derived baseURL from `allowedHosts` is the frontend origin.
|
|
233
|
+
|
|
234
|
+
### Google Console Propagation
|
|
235
|
+
|
|
236
|
+
Changes take up to 5 minutes to propagate. If you get `redirect_uri_mismatch` immediately after updating, wait and retry before changing configuration.
|
|
237
|
+
|
|
238
|
+
### Backend Social Provider Config
|
|
239
|
+
|
|
240
|
+
```typescript
|
|
241
|
+
socialProviders: {
|
|
242
|
+
google: {
|
|
243
|
+
clientId: process.env.GOOGLE_CLIENT_ID!,
|
|
244
|
+
clientSecret: process.env.GOOGLE_CLIENT_SECRET!,
|
|
245
|
+
},
|
|
246
|
+
// Add more as needed:
|
|
247
|
+
// github: { clientId: ..., clientSecret: ... },
|
|
248
|
+
},
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
---
|
|
252
|
+
|
|
253
|
+
## CSP Configuration
|
|
254
|
+
|
|
255
|
+
If using Content Security Policy headers on the frontend, add required domains:
|
|
256
|
+
|
|
257
|
+
```typescript
|
|
258
|
+
const cspDirectives = [
|
|
259
|
+
"default-src 'self'",
|
|
260
|
+
`script-src 'self' 'unsafe-inline' https://static.cloudflareinsights.com`,
|
|
261
|
+
"style-src 'self' 'unsafe-inline'",
|
|
262
|
+
"img-src 'self' data: https://lh3.googleusercontent.com", // Google avatars
|
|
263
|
+
"connect-src 'self'",
|
|
264
|
+
"font-src 'self'",
|
|
265
|
+
"frame-ancestors 'none'",
|
|
266
|
+
];
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
## Edge Runtime (Cloudflare Workers)
|
|
272
|
+
|
|
273
|
+
If deploying the frontend to Cloudflare Workers, the middleware must use the correct runtime:
|
|
274
|
+
|
|
275
|
+
```typescript
|
|
276
|
+
// Next.js 16
|
|
277
|
+
export const runtime = "experimental-edge";
|
|
278
|
+
// NOT "edge" — Next.js 16 requires "experimental-edge"
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
For earlier Next.js versions:
|
|
282
|
+
```typescript
|
|
283
|
+
export const runtime = "edge";
|
|
284
|
+
```
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# Analysis Knowledge
|
|
2
|
+
|
|
3
|
+
Template analysis patterns and field detection strategies for the dokkit-analyzer agent. Covers field identification, confidence scoring, and the analysis output schema.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
|
|
7
|
+
- [Field Detection Strategy](#field-detection-strategy)
|
|
8
|
+
- [Section Detection](#section-detection)
|
|
9
|
+
- [Cross-Language Mapping](#cross-language-mapping)
|
|
10
|
+
- [Confidence Scoring](#confidence-scoring)
|
|
11
|
+
- [Analysis Output Format](#analysis-output-format)
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Field Detection Strategy
|
|
16
|
+
|
|
17
|
+
Detect ALL fillable locations in a template. Fields appear in these patterns:
|
|
18
|
+
|
|
19
|
+
### 1. Placeholder Text
|
|
20
|
+
- `{{field_name}}` or `<<field_name>>` — explicit placeholders
|
|
21
|
+
- `[field_name]` or `(field_name)` — bracket patterns
|
|
22
|
+
- `___` (underscores) — blank line indicators
|
|
23
|
+
- `...` (dots) — fill-in indicators
|
|
24
|
+
|
|
25
|
+
### 2. Empty Table Cells
|
|
26
|
+
In form-like documents (especially Korean templates):
|
|
27
|
+
- A label cell (e.g., "Name") with an adjacent empty cell = fill target
|
|
28
|
+
- Pattern: `[Label Cell] [Empty Cell]`
|
|
29
|
+
|
|
30
|
+
### 3. Instruction Text
|
|
31
|
+
Text telling the user what to enter:
|
|
32
|
+
- "(enter name here)", "(type your answer)"
|
|
33
|
+
- Korean: "(날짜를 입력하세요)", "(내용을 기재)"
|
|
34
|
+
- These should be REPLACED with the actual value
|
|
35
|
+
|
|
36
|
+
### 4. Form Controls (DOCX only)
|
|
37
|
+
- Content controls (`w:sdt`) with explicit placeholder values
|
|
38
|
+
- Legacy form fields (`w:fldChar`)
|
|
39
|
+
|
|
40
|
+
### 5. Underline Runs
|
|
41
|
+
Runs styled with underline containing only spaces or underscores:
|
|
42
|
+
- Indicates a blank line for handwriting
|
|
43
|
+
- In digital filling, replace with the value
|
|
44
|
+
|
|
45
|
+
### 6. Image Fields
|
|
46
|
+
Fields requiring an image rather than text:
|
|
47
|
+
- `{{사진}}`, `{{photo}}`, `<<signature>>` — image placeholder text
|
|
48
|
+
- Existing `<w:drawing>` (DOCX) or `<hp:pic>` (HWPX) in table cells
|
|
49
|
+
- Empty cells adjacent to cells with image keywords
|
|
50
|
+
|
|
51
|
+
**Image keywords** (Korean): 사진, 증명사진, 여권사진, 로고, 서명, 날인, 도장, 직인
|
|
52
|
+
**Image keywords** (English): Photo, Picture, Logo, Signature, Stamp, Seal, Image, Portrait
|
|
53
|
+
|
|
54
|
+
**Classification** (`image_type`): `photo`, `logo`, `signature`, or `figure`
|
|
55
|
+
|
|
56
|
+
### 7. Writing Tip Boxes (작성 팁)
|
|
57
|
+
Standalone 1x1 tables with DASH borders containing guidance text:
|
|
58
|
+
- HWPX: `rowCnt="1"`, `colCnt="1"` with `※` text
|
|
59
|
+
- DOCX: Single `<w:tr>/<w:tc>` with dashed borders
|
|
60
|
+
- Often styled in red (#FF0000)
|
|
61
|
+
|
|
62
|
+
Detect as `field_type: "tip_box"` with `action: "delete"`.
|
|
63
|
+
|
|
64
|
+
**Container types**:
|
|
65
|
+
- `"standalone"` — top-level 1x1 table between other content
|
|
66
|
+
- `"nested"` — inside `<hp:subList>` within a fill-target cell; include `parent_field_id`
|
|
67
|
+
|
|
68
|
+
**`has_formatting` flag**: For mapped fields where `mapped_value` is >100 chars and contains markdown syntax (`**bold**`, `## heading`, `- bullet`, `1. numbered`), set `has_formatting: true`.
|
|
69
|
+
|
|
70
|
+
## Section Detection
|
|
71
|
+
|
|
72
|
+
Group fields into logical sections:
|
|
73
|
+
1. Use document headings (H1, H2) as section boundaries
|
|
74
|
+
2. In table-based forms, use spanning header rows
|
|
75
|
+
3. In Korean templates, look for: "인적사항", "학력", "경력", "자격증"
|
|
76
|
+
4. If no clear sections, use "General" as default
|
|
77
|
+
|
|
78
|
+
## Cross-Language Mapping
|
|
79
|
+
|
|
80
|
+
Common Korean-English field equivalents:
|
|
81
|
+
|
|
82
|
+
| Korean | English |
|
|
83
|
+
|--------|---------|
|
|
84
|
+
| 성명 / 이름 | Name / Full Name |
|
|
85
|
+
| 생년월일 | Date of Birth |
|
|
86
|
+
| 주소 | Address |
|
|
87
|
+
| 전화번호 / 연락처 | Phone / Contact |
|
|
88
|
+
| 이메일 | Email |
|
|
89
|
+
| 학력 | Education |
|
|
90
|
+
| 경력 | Work Experience |
|
|
91
|
+
| 자격증 | Certifications |
|
|
92
|
+
| 직위 / 직책 | Position / Title |
|
|
93
|
+
| 회사명 | Company Name |
|
|
94
|
+
| 기간 | Period / Duration |
|
|
95
|
+
|
|
96
|
+
## Confidence Scoring
|
|
97
|
+
|
|
98
|
+
### High Confidence
|
|
99
|
+
- Exact label match between source and template field
|
|
100
|
+
- Unambiguous data (one clear value in sources)
|
|
101
|
+
- Same language label match
|
|
102
|
+
|
|
103
|
+
### Medium Confidence
|
|
104
|
+
- Semantic match (different wording, same meaning)
|
|
105
|
+
- Cross-language match (Korean-English)
|
|
106
|
+
- Multiple candidate values in sources
|
|
107
|
+
- Partial data match
|
|
108
|
+
|
|
109
|
+
### Low Confidence
|
|
110
|
+
- Indirect inference (value derived from context)
|
|
111
|
+
- Ambiguous mapping (could match multiple fields)
|
|
112
|
+
- Best guess from limited data
|
|
113
|
+
|
|
114
|
+
## Analysis Output Format
|
|
115
|
+
|
|
116
|
+
Write to `.dokkit/analysis.json`:
|
|
117
|
+
|
|
118
|
+
```json
|
|
119
|
+
{
|
|
120
|
+
"template": {
|
|
121
|
+
"file_path": "...",
|
|
122
|
+
"file_type": "docx|hwpx",
|
|
123
|
+
"display_name": "..."
|
|
124
|
+
},
|
|
125
|
+
"sections": [
|
|
126
|
+
{
|
|
127
|
+
"name": "Section Name",
|
|
128
|
+
"fields": [
|
|
129
|
+
{
|
|
130
|
+
"id": "field_001",
|
|
131
|
+
"label": "Field Label",
|
|
132
|
+
"field_type": "placeholder_text|empty_cell|underline|form_control|instruction_text|image|tip_box|section_content|table_content",
|
|
133
|
+
"xml_path": {
|
|
134
|
+
"file": "word/document.xml",
|
|
135
|
+
"element_path": "body/tbl[0]/tr[1]/tc[2]/p[0]/r[0]",
|
|
136
|
+
"namespaced_path": "w:body/w:tbl[0]/w:tr[1]/w:tc[2]/w:p[0]/w:r[0]"
|
|
137
|
+
},
|
|
138
|
+
"pattern": "{{name}}",
|
|
139
|
+
"current_content": "{{name}}",
|
|
140
|
+
"mapped_value": "John Doe",
|
|
141
|
+
"source": "resume.pdf",
|
|
142
|
+
"source_location": "key_value_pairs.Name",
|
|
143
|
+
"confidence": "high",
|
|
144
|
+
"has_formatting": false
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
"id": "field_015",
|
|
148
|
+
"label": "tip box label",
|
|
149
|
+
"field_type": "tip_box",
|
|
150
|
+
"action": "delete",
|
|
151
|
+
"container": "standalone",
|
|
152
|
+
"xml_path": { "file": "...", "element_path": "...", "namespaced_path": "..." },
|
|
153
|
+
"pattern": "(tip box: 1x1 table)",
|
|
154
|
+
"current_content": "※ 작성 팁: ...",
|
|
155
|
+
"mapped_value": null,
|
|
156
|
+
"confidence": "high"
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
"id": "field_020",
|
|
160
|
+
"label": "사진",
|
|
161
|
+
"field_type": "image",
|
|
162
|
+
"image_type": "photo",
|
|
163
|
+
"xml_path": { "file": "...", "element_path": "...", "namespaced_path": "..." },
|
|
164
|
+
"pattern": "(empty cell, image label)",
|
|
165
|
+
"current_content": "",
|
|
166
|
+
"image_source": "ingested",
|
|
167
|
+
"image_file": ".dokkit/sources/photo.jpg",
|
|
168
|
+
"dimensions": { "width_emu": 1260000, "height_emu": 1620000 },
|
|
169
|
+
"confidence": "high"
|
|
170
|
+
}
|
|
171
|
+
]
|
|
172
|
+
}
|
|
173
|
+
],
|
|
174
|
+
"summary": {
|
|
175
|
+
"total_fields": 22,
|
|
176
|
+
"mapped": 18,
|
|
177
|
+
"unmapped": 4,
|
|
178
|
+
"high_confidence": 15,
|
|
179
|
+
"medium_confidence": 2,
|
|
180
|
+
"low_confidence": 1,
|
|
181
|
+
"image_fields": 2,
|
|
182
|
+
"image_fields_sourced": 1,
|
|
183
|
+
"image_fields_pending": 1,
|
|
184
|
+
"tip_boxes": 3
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Critical Rules for Analysis Output
|
|
190
|
+
|
|
191
|
+
- For `table_content` fields that are pre-filled from source: set `mapped_value: null` with `action: "preserve"`. NEVER set `mapped_value` to a placeholder string — the filler treats any non-null `mapped_value` as literal data and will destroy the table.
|
|
192
|
+
- For `image` fields: search `.dokkit/sources/` for matching images first. Set `image_source: "ingested"` if found, or leave `image_file: null` (pending).
|
|
193
|
+
- For `section_content` fields: scan for visual enhancement opportunities (max 3 per field, max 12 total). Record with `generation_prompt`, `dimensions`, `status: "pending"`.
|
|
194
|
+
|
|
195
|
+
## References
|
|
196
|
+
|
|
197
|
+
See `references/field-detection-patterns.md` for advanced detection heuristics (9 DOCX + 6 HWPX).
|
|
198
|
+
See `references/image-opportunity-heuristics.md` for AI image opportunity detection in section content.
|