devlyn-cli 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/bin/devlyn.js +1 -0
  2. package/optional-skills/better-auth-setup/SKILL.md +222 -11
  3. package/optional-skills/better-auth-setup/references/proxy-gotchas.md +148 -0
  4. package/optional-skills/better-auth-setup/references/proxy-setup.md +284 -0
  5. package/optional-skills/dokkit/ANALYSIS.md +198 -0
  6. package/optional-skills/dokkit/COMMANDS.md +365 -0
  7. package/optional-skills/dokkit/DOCX-XML.md +76 -0
  8. package/optional-skills/dokkit/EXPORT.md +102 -0
  9. package/optional-skills/dokkit/FILLING.md +377 -0
  10. package/optional-skills/dokkit/HWPX-XML.md +73 -0
  11. package/optional-skills/dokkit/IMAGE-SOURCING.md +127 -0
  12. package/optional-skills/dokkit/INGESTION.md +65 -0
  13. package/optional-skills/dokkit/SKILL.md +153 -0
  14. package/optional-skills/dokkit/STATE.md +60 -0
  15. package/optional-skills/dokkit/references/docx-field-patterns.md +151 -0
  16. package/optional-skills/dokkit/references/docx-structure.md +58 -0
  17. package/optional-skills/dokkit/references/field-detection-patterns.md +130 -0
  18. package/optional-skills/dokkit/references/hwpx-field-patterns.md +461 -0
  19. package/optional-skills/dokkit/references/hwpx-structure.md +159 -0
  20. package/optional-skills/dokkit/references/image-opportunity-heuristics.md +121 -0
  21. package/optional-skills/dokkit/references/image-xml-patterns.md +338 -0
  22. package/optional-skills/dokkit/references/section-image-interleaving.md +346 -0
  23. package/optional-skills/dokkit/references/section-range-detection.md +118 -0
  24. package/optional-skills/dokkit/references/state-schema.md +143 -0
  25. package/optional-skills/dokkit/references/supported-formats.md +67 -0
  26. package/optional-skills/dokkit/scripts/compile_hwpx.py +134 -0
  27. package/optional-skills/dokkit/scripts/detect_fields.py +301 -0
  28. package/optional-skills/dokkit/scripts/detect_fields_hwpx.py +286 -0
  29. package/optional-skills/dokkit/scripts/export_pdf.py +99 -0
  30. package/optional-skills/dokkit/scripts/parse_hwpx.py +185 -0
  31. package/optional-skills/dokkit/scripts/parse_image_with_gemini.py +159 -0
  32. package/optional-skills/dokkit/scripts/parse_xlsx.py +98 -0
  33. package/optional-skills/dokkit/scripts/source_images.py +365 -0
  34. package/optional-skills/dokkit/scripts/validate_docx.py +142 -0
  35. package/optional-skills/dokkit/scripts/validate_hwpx.py +281 -0
  36. package/optional-skills/dokkit/scripts/validate_state.py +132 -0
  37. package/package.json +1 -1
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env python3
2
+ """Compile an HWPX document from its unpacked working directory.
3
+
4
+ Usage:
5
+ python compile_hwpx.py <work_dir> <output.hwpx> [--reference <original.hwpx>]
6
+
7
+ Critical: mimetype must be the first file in the ZIP and stored uncompressed.
8
+ When --reference is given, preserves the original ZIP's file ordering and
9
+ per-file compression types (STORED vs DEFLATED). New files not present in the
10
+ reference are appended at the end with DEFLATED compression.
11
+ """
12
+
13
+ import os
14
+ import sys
15
+ import zipfile
16
+ from pathlib import Path
17
+
18
+
19
+ def compile_hwpx(work_dir: str, output_path: str, reference_zip: str | None = None) -> str:
20
+ """Repackage an HWPX from its unpacked working directory."""
21
+ work = Path(work_dir)
22
+ out = Path(output_path)
23
+ out.parent.mkdir(parents=True, exist_ok=True)
24
+
25
+ # Collect all files in work_dir (excluding mimetype and .bak)
26
+ all_work_files: set[str] = set()
27
+ for root, dirs, files in os.walk(work):
28
+ for f in files:
29
+ if f == "mimetype" or f.endswith(".bak"):
30
+ continue
31
+ fpath = os.path.join(root, f)
32
+ arcname = os.path.relpath(fpath, work).replace(os.sep, "/")
33
+ all_work_files.add(arcname)
34
+
35
+ if reference_zip:
36
+ _compile_with_reference(work, out, reference_zip, all_work_files)
37
+ else:
38
+ _compile_default(work, out, all_work_files)
39
+
40
+ # Validate
41
+ with zipfile.ZipFile(out, 'r') as zf:
42
+ names = zf.namelist()
43
+ if names and names[0] != "mimetype":
44
+ print("Warning: mimetype is not the first entry in the archive", file=sys.stderr)
45
+
46
+ size = out.stat().st_size
47
+ print(f"Compiled: {out} ({size:,} bytes)", file=sys.stderr)
48
+ return str(out)
49
+
50
+
51
+ def _compile_with_reference(work: Path, out: Path, reference_zip: str, all_work_files: set[str]) -> None:
52
+ """Compile preserving the reference ZIP's file order and compression types."""
53
+ ref_zip = zipfile.ZipFile(reference_zip)
54
+ ref_entries = [(info.filename, info.compress_type) for info in ref_zip.infolist()]
55
+
56
+ added: set[str] = set()
57
+ with zipfile.ZipFile(out, 'w') as zf:
58
+ # 1. mimetype first, stored
59
+ mimetype_path = work / "mimetype"
60
+ if mimetype_path.exists():
61
+ zf.write(str(mimetype_path), "mimetype", compress_type=zipfile.ZIP_STORED)
62
+ added.add("mimetype")
63
+
64
+ # 2. Files from reference in original order with original compression
65
+ for filename, compress_type in ref_entries:
66
+ if filename == "mimetype":
67
+ continue
68
+ file_path = work / filename
69
+ if file_path.exists():
70
+ zf.write(str(file_path), filename, compress_type=compress_type)
71
+ else:
72
+ # Fall back to original content
73
+ data = ref_zip.read(filename)
74
+ info = zipfile.ZipInfo(filename)
75
+ info.compress_type = compress_type
76
+ zf.writestr(info, data)
77
+ added.add(filename)
78
+
79
+ # 3. New files not in reference (BinData images etc.)
80
+ for arcname in sorted(all_work_files - added):
81
+ file_path = work / arcname
82
+ zf.write(str(file_path), arcname, compress_type=zipfile.ZIP_DEFLATED)
83
+
84
+ ref_zip.close()
85
+
86
+
87
+ def _compile_default(work: Path, out: Path, all_work_files: set[str]) -> None:
88
+ """Compile with default ordering (mimetype first, rest alphabetical, all deflated)."""
89
+ with zipfile.ZipFile(out, 'w') as zf:
90
+ # mimetype MUST be first and uncompressed
91
+ mimetype_path = work / "mimetype"
92
+ if mimetype_path.exists():
93
+ zf.write(str(mimetype_path), "mimetype", compress_type=zipfile.ZIP_STORED)
94
+ else:
95
+ print("Warning: mimetype file not found", file=sys.stderr)
96
+
97
+ # All other files with compression
98
+ for root, dirs, files in os.walk(work):
99
+ dirs.sort()
100
+ for file in sorted(files):
101
+ if file == "mimetype" or file.endswith(".bak"):
102
+ continue
103
+ file_path = os.path.join(root, file)
104
+ arcname = os.path.relpath(file_path, work)
105
+ zf.write(file_path, arcname, compress_type=zipfile.ZIP_DEFLATED)
106
+
107
+
108
+ def main():
109
+ if len(sys.argv) < 3:
110
+ print("Usage: python compile_hwpx.py <work_dir> <output.hwpx> [--reference <original.hwpx>]",
111
+ file=sys.stderr)
112
+ sys.exit(1)
113
+
114
+ work_dir = sys.argv[1]
115
+ output_path = sys.argv[2]
116
+ reference_zip = None
117
+
118
+ if "--reference" in sys.argv:
119
+ idx = sys.argv.index("--reference")
120
+ if idx + 1 < len(sys.argv):
121
+ reference_zip = sys.argv[idx + 1]
122
+ else:
123
+ print("Error: --reference requires a path argument", file=sys.stderr)
124
+ sys.exit(1)
125
+
126
+ if not Path(work_dir).is_dir():
127
+ print(f"Error: Not a directory: {work_dir}", file=sys.stderr)
128
+ sys.exit(1)
129
+
130
+ compile_hwpx(work_dir, output_path, reference_zip)
131
+
132
+
133
+ if __name__ == "__main__":
134
+ main()
@@ -0,0 +1,301 @@
1
+ #!/usr/bin/env python3
2
+ """Detect fillable fields in a DOCX document.xml file.
3
+
4
+ Usage:
5
+ python detect_fields.py <path-to-document.xml>
6
+
7
+ Output:
8
+ JSON array of detected fields to stdout.
9
+ """
10
+
11
+ import json
12
+ import re
13
+ import sys
14
+ import xml.etree.ElementTree as ET
15
+ from pathlib import Path
16
+
17
+
18
+ NS = {
19
+ "w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
20
+ "wp": "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
21
+ "a": "http://schemas.openxmlformats.org/drawingml/2006/main",
22
+ "pic": "http://schemas.openxmlformats.org/drawingml/2006/picture",
23
+ "r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
24
+ }
25
+
26
+ # Keywords that indicate image fields (Korean and English)
27
+ IMAGE_KEYWORDS_KO = ["사진", "증명사진", "여권사진", "로고", "서명", "날인", "도장", "직인"]
28
+ IMAGE_KEYWORDS_EN = ["photo", "picture", "logo", "signature", "stamp", "seal", "image", "portrait"]
29
+ IMAGE_KEYWORDS = IMAGE_KEYWORDS_KO + IMAGE_KEYWORDS_EN
30
+
31
+ # Map keywords to image_type classifier
32
+ IMAGE_TYPE_MAP = {
33
+ "사진": "photo", "증명사진": "photo", "여권사진": "photo",
34
+ "photo": "photo", "picture": "photo", "portrait": "photo", "image": "photo",
35
+ "로고": "logo", "logo": "logo",
36
+ "서명": "signature", "날인": "signature", "stamp": "signature", "seal": "signature",
37
+ "도장": "signature", "직인": "signature",
38
+ }
39
+
40
+
41
+ def get_text(elem) -> str:
42
+ """Extract all text from an element and its children."""
43
+ texts = []
44
+ for t in elem.iter("{%s}t" % NS["w"]):
45
+ if t.text:
46
+ texts.append(t.text)
47
+ return "".join(texts)
48
+
49
+
50
+ def _classify_image_type(text: str) -> str:
51
+ """Classify image type from text. Returns photo/logo/signature/figure."""
52
+ lower = text.lower().strip()
53
+ for keyword, img_type in IMAGE_TYPE_MAP.items():
54
+ if keyword in lower:
55
+ return img_type
56
+ return "figure"
57
+
58
+
59
+ def _is_image_keyword(text: str) -> bool:
60
+ """Check if text contains an image-related keyword."""
61
+ lower = text.lower().strip()
62
+ return any(kw in lower for kw in IMAGE_KEYWORDS)
63
+
64
+
65
+ def detect_placeholder_text(root) -> list[dict]:
66
+ """Find {{placeholder}} and <<placeholder>> patterns (excluding image keywords)."""
67
+ fields = []
68
+ pattern = re.compile(r"\{\{([^}]+)\}\}|<<([^>]+)>>|\[([^\]]+)\]")
69
+
70
+ for i, p in enumerate(root.iter("{%s}p" % NS["w"])):
71
+ text = get_text(p)
72
+ for match in pattern.finditer(text):
73
+ label = match.group(1) or match.group(2) or match.group(3)
74
+ # Skip image keywords — handled by detect_image_fields
75
+ if _is_image_keyword(label):
76
+ continue
77
+ fields.append({
78
+ "label": label.strip(),
79
+ "field_type": "placeholder_text",
80
+ "pattern": match.group(0),
81
+ "xml_path": f"p[{i}]",
82
+ })
83
+ return fields
84
+
85
+
86
+ def detect_empty_table_cells(root) -> list[dict]:
87
+ """Find empty table cells adjacent to label cells."""
88
+ fields = []
89
+
90
+ for ti, tbl in enumerate(root.iter("{%s}tbl" % NS["w"])):
91
+ for ri, tr in enumerate(tbl.iter("{%s}tr" % NS["w"])):
92
+ cells = list(tr.iter("{%s}tc" % NS["w"]))
93
+ for ci in range(len(cells) - 1):
94
+ label_text = get_text(cells[ci]).strip()
95
+ next_text = get_text(cells[ci + 1]).strip()
96
+
97
+ if label_text and not next_text and len(label_text) < 50:
98
+ # Skip image keywords — handled by detect_image_fields
99
+ if _is_image_keyword(label_text):
100
+ continue
101
+ fields.append({
102
+ "label": label_text,
103
+ "field_type": "empty_cell",
104
+ "pattern": "(empty cell)",
105
+ "xml_path": f"tbl[{ti}]/tr[{ri}]/tc[{ci + 1}]",
106
+ })
107
+ return fields
108
+
109
+
110
+ def detect_underline_fields(root) -> list[dict]:
111
+ """Find underline-only runs (blank line placeholders)."""
112
+ fields = []
113
+
114
+ for i, r in enumerate(root.iter("{%s}r" % NS["w"])):
115
+ rPr = r.find("{%s}rPr" % NS["w"])
116
+ if rPr is None:
117
+ continue
118
+ u = rPr.find("{%s}u" % NS["w"])
119
+ if u is None:
120
+ continue
121
+
122
+ t = r.find("{%s}t" % NS["w"])
123
+ if t is not None and t.text:
124
+ text = t.text.strip()
125
+ if not text or all(c in " _" for c in text):
126
+ # Find preceding text for label
127
+ parent_p = None
128
+ for p in root.iter("{%s}p" % NS["w"]):
129
+ if r in list(p):
130
+ parent_p = p
131
+ break
132
+
133
+ label = "underline_field"
134
+ if parent_p is not None:
135
+ full_text = get_text(parent_p)
136
+ # Try to extract label from surrounding text
137
+ clean = full_text.replace(t.text, "").strip()
138
+ if clean:
139
+ label = clean
140
+
141
+ fields.append({
142
+ "label": label,
143
+ "field_type": "underline",
144
+ "pattern": "(underline)",
145
+ "xml_path": f"r[{i}]",
146
+ })
147
+ return fields
148
+
149
+
150
+ def detect_content_controls(root) -> list[dict]:
151
+ """Find structured document tags (content controls)."""
152
+ fields = []
153
+
154
+ for i, sdt in enumerate(root.iter("{%s}sdt" % NS["w"])):
155
+ sdtPr = sdt.find("{%s}sdtPr" % NS["w"])
156
+ if sdtPr is None:
157
+ continue
158
+
159
+ # Get alias or tag
160
+ alias = sdtPr.find("{%s}alias" % NS["w"])
161
+ tag = sdtPr.find("{%s}tag" % NS["w"])
162
+
163
+ label = "unknown"
164
+ if alias is not None:
165
+ label = alias.get("{%s}val" % NS["w"], "unknown")
166
+ elif tag is not None:
167
+ label = tag.get("{%s}val" % NS["w"], "unknown")
168
+
169
+ fields.append({
170
+ "label": label,
171
+ "field_type": "form_control",
172
+ "pattern": "(content control)",
173
+ "xml_path": f"sdt[{i}]",
174
+ })
175
+ return fields
176
+
177
+
178
+ def detect_image_fields(root) -> list[dict]:
179
+ """Detect image placeholders in a DOCX document.
180
+
181
+ Detects:
182
+ - Existing <w:drawing> elements in table cells (pre-positioned image slots)
183
+ - Image placeholder text: {{photo}}, {{사진}}, <<signature>>, etc.
184
+ - Empty cells adjacent to image-keyword labels
185
+ """
186
+ fields = []
187
+ placeholder_pattern = re.compile(
188
+ r"\{\{([^}]+)\}\}|<<([^>]+)>>|\[([^\]]+)\]"
189
+ )
190
+
191
+ # 1. Detect image placeholder text ({{photo}}, <<signature>>, etc.)
192
+ for i, p in enumerate(root.iter("{%s}p" % NS["w"])):
193
+ text = get_text(p)
194
+ for match in placeholder_pattern.finditer(text):
195
+ label = match.group(1) or match.group(2) or match.group(3)
196
+ if _is_image_keyword(label):
197
+ fields.append({
198
+ "label": label.strip(),
199
+ "field_type": "image",
200
+ "image_type": _classify_image_type(label),
201
+ "pattern": match.group(0),
202
+ "xml_path": f"p[{i}]",
203
+ })
204
+
205
+ # 2. Detect existing <w:drawing> placeholders in table cells
206
+ for ti, tbl in enumerate(root.iter("{%s}tbl" % NS["w"])):
207
+ for ri, tr in enumerate(tbl.iter("{%s}tr" % NS["w"])):
208
+ cells = list(tr.iter("{%s}tc" % NS["w"]))
209
+ for ci, cell in enumerate(cells):
210
+ drawings = list(cell.iter("{%s}drawing" % NS["w"]))
211
+ if drawings:
212
+ # Cell has a drawing — check if adjacent cell has image-keyword label
213
+ label_text = ""
214
+ if ci > 0:
215
+ label_text = get_text(cells[ci - 1]).strip()
216
+ if not _is_image_keyword(label_text) and ci + 1 < len(cells):
217
+ label_text = get_text(cells[ci + 1]).strip()
218
+ if not _is_image_keyword(label_text):
219
+ label_text = "image_placeholder"
220
+
221
+ fields.append({
222
+ "label": label_text,
223
+ "field_type": "image",
224
+ "image_type": _classify_image_type(label_text),
225
+ "pattern": "(existing drawing)",
226
+ "xml_path": f"tbl[{ti}]/tr[{ri}]/tc[{ci}]",
227
+ })
228
+
229
+ # 3. Detect empty cells adjacent to image-keyword labels
230
+ for ti, tbl in enumerate(root.iter("{%s}tbl" % NS["w"])):
231
+ for ri, tr in enumerate(tbl.iter("{%s}tr" % NS["w"])):
232
+ cells = list(tr.iter("{%s}tc" % NS["w"]))
233
+ for ci in range(len(cells) - 1):
234
+ label_text = get_text(cells[ci]).strip()
235
+ next_text = get_text(cells[ci + 1]).strip()
236
+
237
+ if _is_image_keyword(label_text) and not next_text:
238
+ # Check the empty cell doesn't already have a drawing
239
+ has_drawing = bool(list(cells[ci + 1].iter("{%s}drawing" % NS["w"])))
240
+ if not has_drawing:
241
+ fields.append({
242
+ "label": label_text,
243
+ "field_type": "image",
244
+ "image_type": _classify_image_type(label_text),
245
+ "pattern": "(empty cell, image label)",
246
+ "xml_path": f"tbl[{ti}]/tr[{ri}]/tc[{ci + 1}]",
247
+ })
248
+
249
+ return fields
250
+
251
+
252
+ def detect_instruction_text(root) -> list[dict]:
253
+ """Find instruction text patterns like (enter name here)."""
254
+ fields = []
255
+ pattern = re.compile(
256
+ r"\(.*?(?:enter|type|input|write|fill|입력|기재|작성).*?\)",
257
+ re.IGNORECASE
258
+ )
259
+
260
+ for i, p in enumerate(root.iter("{%s}p" % NS["w"])):
261
+ text = get_text(p)
262
+ for match in pattern.finditer(text):
263
+ fields.append({
264
+ "label": match.group(0).strip("()"),
265
+ "field_type": "instruction_text",
266
+ "pattern": match.group(0),
267
+ "xml_path": f"p[{i}]",
268
+ })
269
+ return fields
270
+
271
+
272
+ def main():
273
+ if len(sys.argv) != 2:
274
+ print("Usage: python detect_fields.py <document.xml>", file=sys.stderr)
275
+ sys.exit(1)
276
+
277
+ path = Path(sys.argv[1])
278
+ if not path.exists():
279
+ print(json.dumps({"error": f"File not found: {path}"}))
280
+ sys.exit(1)
281
+
282
+ tree = ET.parse(path)
283
+ root = tree.getroot()
284
+
285
+ all_fields = []
286
+ all_fields.extend(detect_placeholder_text(root))
287
+ all_fields.extend(detect_empty_table_cells(root))
288
+ all_fields.extend(detect_underline_fields(root))
289
+ all_fields.extend(detect_content_controls(root))
290
+ all_fields.extend(detect_instruction_text(root))
291
+ all_fields.extend(detect_image_fields(root))
292
+
293
+ # Assign IDs
294
+ for i, field in enumerate(all_fields):
295
+ field["id"] = f"field_{i + 1:03d}"
296
+
297
+ print(json.dumps(all_fields, ensure_ascii=False, indent=2))
298
+
299
+
300
+ if __name__ == "__main__":
301
+ main()