devlyn-cli 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,819 @@
1
+ """
2
+ fill_docx.py — python-docx based document filler for Dokkit v4
3
+
4
+ Usage: python fill_docx.py <template.docx> <analysis.json> <fill_content.json> <output.docx>
5
+
6
+ Handles:
7
+ - Tip box removal (작성요령 tables)
8
+ - Instruction text removal (blue ※ paragraphs + blue text inside table cells)
9
+ - Overview table filling (수요기업 개요)
10
+ - Section content insertion with proper formatting
11
+ - Page breaks between major chapters
12
+ - All text forced to black
13
+ """
14
+
15
+ import sys
16
+ import os
17
+ import json
18
+ import re
19
+ import copy
20
+ from docx import Document
21
+ from docx.oxml.ns import qn
22
+ from docx.oxml import OxmlElement
23
+
24
+ W = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
25
+
26
+
27
+ # ── Helpers ──────────────────────────────────────────────────────────────
28
+
29
+ def remove_element(el):
30
+ p = el.getparent()
31
+ if p is not None:
32
+ p.remove(el)
33
+
34
+
35
+ def get_text(el):
36
+ return ''.join(t.text or '' for t in el.iter(qn('w:t')))
37
+
38
+
39
+ def set_cell_text(cell_el, text):
40
+ """Replace ALL text in a table cell with new text, preserving first run's formatting."""
41
+ first_rPr = None
42
+ for r in cell_el.iter(qn('w:r')):
43
+ rPr = r.find(qn('w:rPr'))
44
+ if rPr is not None and first_rPr is None:
45
+ first_rPr = copy.deepcopy(rPr)
46
+ break
47
+
48
+ # Clear all existing paragraphs' runs
49
+ for p in cell_el.findall(qn('w:p')):
50
+ for r in list(p.findall(qn('w:r'))):
51
+ p.remove(r)
52
+
53
+ # Insert new run in first paragraph
54
+ first_p = cell_el.find(qn('w:p'))
55
+ if first_p is None:
56
+ first_p = OxmlElement('w:p')
57
+ cell_el.append(first_p)
58
+
59
+ r = OxmlElement('w:r')
60
+ if first_rPr is not None:
61
+ rPr = copy.deepcopy(first_rPr)
62
+ # Force black color
63
+ for c in rPr.findall(qn('w:color')):
64
+ c.set(qn('w:val'), '000000')
65
+ # Remove italic
66
+ for i in rPr.findall(qn('w:i')):
67
+ rPr.remove(i)
68
+ r.append(rPr)
69
+
70
+ t = OxmlElement('w:t')
71
+ t.set(qn('xml:space'), 'preserve')
72
+ t.text = text
73
+ r.append(t)
74
+ first_p.append(r)
75
+
76
+
77
+ # ── Detection ────────────────────────────────────────────────────────────
78
+
79
+ def is_tip_box(tbl):
80
+ text = get_text(tbl)
81
+ return '작성요령' in text or '작성 요령' in text
82
+
83
+
84
+ def has_blue_color(el):
85
+ for c in el.iter(qn('w:color')):
86
+ val = c.get(qn('w:val'), '').upper()
87
+ if val in ('0000FF', '0000FFFF'):
88
+ return True
89
+ return False
90
+
91
+
92
+ def is_instruction_para(p):
93
+ text = get_text(p).strip()
94
+ if not text:
95
+ return False
96
+ if text.startswith('※'):
97
+ return True
98
+ if has_blue_color(p):
99
+ return True
100
+ return False
101
+
102
+
103
+ def is_chapter_header(tbl):
104
+ text = get_text(tbl).strip()
105
+ return bool(re.match(r'^[ⅠⅡⅢⅣⅤ]', text))
106
+
107
+
108
+ def get_chapter_num(tbl):
109
+ text = get_text(tbl).strip()
110
+ for ch, n in {'Ⅰ': 1, 'Ⅱ': 2, 'Ⅲ': 3, 'Ⅳ': 4, 'Ⅴ': 5}.items():
111
+ if text.startswith(ch):
112
+ return n
113
+ return 0
114
+
115
+
116
+ # ── Formatting ───────────────────────────────────────────────────────────
117
+
118
+ def extract_body_ppr(body):
119
+ """Find the first content paragraph with left indentation → copy its pPr."""
120
+ for child in body:
121
+ if child.tag != qn('w:p'):
122
+ continue
123
+ pPr = child.find(qn('w:pPr'))
124
+ if pPr is None:
125
+ continue
126
+ ind = pPr.find(qn('w:ind'))
127
+ if ind is not None and int(ind.get(qn('w:left'), '0')) > 0:
128
+ return copy.deepcopy(pPr)
129
+ return None
130
+
131
+
132
+ def make_clean_rpr(font_name='맑은 고딕', font_size='20'):
133
+ """Create a clean black rPr with specified font. No gray, no blue."""
134
+ rPr = OxmlElement('w:rPr')
135
+ rFonts = OxmlElement('w:rFonts')
136
+ rFonts.set(qn('w:ascii'), font_name)
137
+ rFonts.set(qn('w:eastAsia'), font_name)
138
+ rFonts.set(qn('w:hAnsi'), font_name)
139
+ rPr.append(rFonts)
140
+ color = OxmlElement('w:color')
141
+ color.set(qn('w:val'), '000000')
142
+ rPr.append(color)
143
+ sz = OxmlElement('w:sz')
144
+ sz.set(qn('w:val'), font_size)
145
+ rPr.append(sz)
146
+ szCs = OxmlElement('w:szCs')
147
+ szCs.set(qn('w:val'), font_size)
148
+ rPr.append(szCs)
149
+ return rPr
150
+
151
+
152
+ def create_para(text, pPr_tpl, rPr_tpl, bold=False):
153
+ """Create a <w:p> with given text, copying pPr and rPr templates."""
154
+ p = OxmlElement('w:p')
155
+
156
+ if pPr_tpl is not None:
157
+ pPr = copy.deepcopy(pPr_tpl)
158
+ for inner in pPr.findall(qn('w:rPr')):
159
+ pPr.remove(inner)
160
+ p.append(pPr)
161
+
162
+ r = OxmlElement('w:r')
163
+ rPr = copy.deepcopy(rPr_tpl)
164
+ # Force black
165
+ for c in rPr.findall(qn('w:color')):
166
+ c.set(qn('w:val'), '000000')
167
+ for i in rPr.findall(qn('w:i')):
168
+ rPr.remove(i)
169
+ if bold:
170
+ if rPr.find(qn('w:b')) is None:
171
+ rPr.insert(0, OxmlElement('w:b'))
172
+ else:
173
+ b = rPr.find(qn('w:b'))
174
+ if b is not None:
175
+ rPr.remove(b)
176
+ r.append(rPr)
177
+
178
+ t = OxmlElement('w:t')
179
+ t.set(qn('xml:space'), 'preserve')
180
+ t.text = text
181
+ r.append(t)
182
+ p.append(r)
183
+ return p
184
+
185
+
186
+ def content_to_paras(text, pPr_tpl, rPr_tpl):
187
+ """Convert content string to list of <w:p> elements."""
188
+ paras = []
189
+ for line in text.split('\n'):
190
+ line = line.strip()
191
+ if not line:
192
+ paras.append(create_para('', pPr_tpl, rPr_tpl))
193
+ continue
194
+ bold = False
195
+ if line.startswith('[') and line.endswith(']'):
196
+ line = line[1:-1]
197
+ bold = True
198
+ elif line.startswith('**') and line.endswith('**'):
199
+ line = line[2:-2]
200
+ bold = True
201
+ if line.startswith('- ') or line.startswith('• '):
202
+ line = '• ' + line[2:]
203
+ paras.append(create_para(line, pPr_tpl, rPr_tpl, bold=bold))
204
+ return paras
205
+
206
+
207
+ # ── Field ID lookup ──────────────────────────────────────────────────────
208
+
209
+ def _get_fid(field):
210
+ """Get field ID from either 'id' or 'field_id' key."""
211
+ return field.get('id') or field.get('field_id', '')
212
+
213
+
214
+ def _find_field_id(analysis, field_type, label_keyword):
215
+ """Search analysis.json for a field matching type and label keyword.
216
+
217
+ Returns the field ID, or None if not found.
218
+ Supports both flat fields[] array and nested sections[].fields[] formats.
219
+ """
220
+ # Flat format: analysis.fields[]
221
+ for field in analysis.get('fields', []):
222
+ ft = field.get('type') or field.get('field_type', '')
223
+ if ft != field_type:
224
+ continue
225
+ label = field.get('label', '')
226
+ if label_keyword in label:
227
+ return _get_fid(field)
228
+ # Nested format: analysis.sections[].fields[]
229
+ for section in analysis.get('sections', []):
230
+ for field in section.get('fields', []):
231
+ ft = field.get('type') or field.get('field_type', '')
232
+ if ft != field_type:
233
+ continue
234
+ label = field.get('label', '')
235
+ if label_keyword in label:
236
+ return _get_fid(field)
237
+ return None
238
+
239
+
240
+ # ── Staff/schedule table filling from fill_content.json ─────────────────
241
+
242
+ def _fill_staff_table(tbl, staff_data, fill_content):
243
+ """Fill staff (참여인력) table from fill_content.json data.
244
+
245
+ staff_data is a dict with 'rows' key containing a list of row arrays.
246
+ """
247
+ if not staff_data or not isinstance(staff_data, dict):
248
+ return 0
249
+ rows_data = staff_data.get('rows', [])
250
+ rows = tbl.findall(qn('w:tr'))
251
+ filled = 0
252
+ for ri, row_data in enumerate(rows_data):
253
+ target_ri = ri + 1 # skip header
254
+ if target_ri >= len(rows):
255
+ break
256
+ cells = rows[target_ri].findall(qn('w:tc'))
257
+ if isinstance(row_data, list):
258
+ for ci, val in enumerate(row_data):
259
+ if ci < len(cells):
260
+ set_cell_text(cells[ci], str(val))
261
+ filled += 1
262
+ # Clear remaining rows
263
+ for ri in range(len(rows_data) + 1, len(rows)):
264
+ cells = rows[ri].findall(qn('w:tc'))
265
+ for cell in cells:
266
+ set_cell_text(cell, '')
267
+ return filled
268
+
269
+
270
+ # ── Main ─────────────────────────────────────────────────────────────────
271
+
272
+ def main():
273
+ if len(sys.argv) < 5:
274
+ print("Usage: python fill_docx.py <template> <analysis.json> <fill_content.json> <output>")
275
+ sys.exit(1)
276
+
277
+ template_path, analysis_path, content_path, output_path = sys.argv[1:5]
278
+
279
+ print(f"Loading: {template_path}")
280
+ doc = Document(template_path)
281
+ body = doc.element.body
282
+
283
+ with open(analysis_path, 'r', encoding='utf-8') as f:
284
+ analysis = json.load(f)
285
+ with open(content_path, 'r', encoding='utf-8') as f:
286
+ fill_content = json.load(f)
287
+
288
+ # ── Extract formatting templates ──
289
+ body_pPr = extract_body_ppr(body)
290
+ body_rPr = make_clean_rpr() # Always use clean black rPr — no gray leaks
291
+
292
+ if body_pPr:
293
+ ind = body_pPr.find(qn('w:ind'))
294
+ print(f" pPr indent: {ind.get(qn('w:left'), '?') if ind is not None else 'none'}")
295
+ print(f" rPr: 맑은 고딕 sz=20 color=000000 (clean)")
296
+
297
+ # ── Step 1: Remove tip boxes ──
298
+ tip_count = 0
299
+ for tbl in list(body.findall(qn('w:tbl'))):
300
+ if is_tip_box(tbl):
301
+ remove_element(tbl)
302
+ tip_count += 1
303
+ print(f"Step 1: Removed {tip_count} tip boxes")
304
+
305
+ # ── Step 2: Remove instruction text (standalone blue/※ paragraphs) ──
306
+ instr_count = 0
307
+ for p in list(body.findall(qn('w:p'))):
308
+ if is_instruction_para(p):
309
+ remove_element(p)
310
+ instr_count += 1
311
+ print(f"Step 2: Removed {instr_count} instruction paragraphs")
312
+
313
+ # ── Step 3: Clean ALL nested tables and instruction text inside table cells ──
314
+ cell_clean_count = 0
315
+ nested_tbl_count = 0
316
+ overview_tbl = None
317
+
318
+ # Find all top-level tables and clean their cells
319
+ for tbl in body.findall(qn('w:tbl')):
320
+ text = get_text(tbl)
321
+ if '구분' in text and '상세 내용' in text:
322
+ overview_tbl = tbl
323
+
324
+ # Collect ALL nested tables across all cells first, then remove
325
+ nested_to_remove = []
326
+ rows = tbl.findall(qn('w:tr'))
327
+ for row in rows:
328
+ for tc in row.findall(qn('w:tc')):
329
+ for nested in tc.findall(qn('w:tbl')):
330
+ nested_to_remove.append((tc, nested))
331
+ # Also clean ※ text in cell paragraphs
332
+ for p in list(tc.findall(qn('w:p'))):
333
+ pt = get_text(p).strip()
334
+ if pt.startswith('※') or pt.startswith('·※') or pt.startswith('* '):
335
+ for r in list(p.findall(qn('w:r'))):
336
+ p.remove(r)
337
+ cell_clean_count += 1
338
+
339
+ # Remove nested tables (safe — separate loop)
340
+ for tc, nested in nested_to_remove:
341
+ tc.remove(nested)
342
+ nested_tbl_count += 1
343
+
344
+ print(f"Step 3: Cleaned {cell_clean_count} instruction texts + {nested_tbl_count} nested tables")
345
+
346
+ # ── Step 4: Fill overview table — keyword matching ──
347
+ if overview_tbl is not None:
348
+ rows = overview_tbl.findall(qn('w:tr'))
349
+ overview_filled = 0
350
+
351
+ # Map cell label keywords → fill_content key
352
+ keyword_map = {
353
+ '사업(과제)명': 'overview_사업명',
354
+ '사업(과제)개요': 'overview_사업개요',
355
+ '핵심역량': 'overview_핵심역량',
356
+ '시장현황': 'overview_시장현황',
357
+ '데이터': 'overview_데이터필요성',
358
+ }
359
+ caption_map = {
360
+ '사진(이미지) 또는 설계도 제목': None, # handled separately
361
+ }
362
+
363
+ for ri, row in enumerate(rows):
364
+ cells = row.findall(qn('w:tc'))
365
+ if len(cells) < 3:
366
+ continue
367
+ # Check cell[1] (label column) for keyword matches
368
+ label_text = get_text(cells[1]).strip().replace(' ', '')
369
+ for keyword, content_key in keyword_map.items():
370
+ kw_clean = keyword.replace(' ', '')
371
+ if kw_clean in label_text:
372
+ content = fill_content.get(content_key, '')
373
+ if content and isinstance(content, str):
374
+ set_cell_text(cells[2], content)
375
+ overview_filled += 1
376
+ break
377
+
378
+ # Fill image captions (row 7, cells 2 and 3)
379
+ if len(rows) > 7:
380
+ cells7 = rows[7].findall(qn('w:tc'))
381
+ cap_l = fill_content.get('overview_이미지_caption_left', '')
382
+ cap_r = fill_content.get('overview_이미지_caption_right', '')
383
+ if cap_l and len(cells7) > 2:
384
+ set_cell_text(cells7[2], cap_l)
385
+ overview_filled += 1
386
+ if cap_r and len(cells7) > 3:
387
+ set_cell_text(cells7[3], cap_r)
388
+ overview_filled += 1
389
+
390
+ print(f"Step 4: Filled {overview_filled} overview table cells")
391
+ else:
392
+ print("Step 4: Overview table not found")
393
+
394
+ # ── Step 5: Force ALL colors to black ──
395
+ color_count = 0
396
+ for color in body.iter(qn('w:color')):
397
+ val = color.get(qn('w:val'), '').upper()
398
+ if val not in ('000000', 'AUTO', ''):
399
+ color.set(qn('w:val'), '000000')
400
+ color_count += 1
401
+ print(f"Step 5: Forced {color_count} non-black colors to black")
402
+
403
+ # ── Step 6: Page breaks before Ⅱ, Ⅲ, Ⅳ, Ⅴ ──
404
+ pb_count = 0
405
+ for tbl in body.findall(qn('w:tbl')):
406
+ if is_chapter_header(tbl) and get_chapter_num(tbl) >= 1: # Include Ⅰ (page break after 목차)
407
+ add_page_break_before = OxmlElement('w:p')
408
+ pPr = OxmlElement('w:pPr')
409
+ r = OxmlElement('w:r')
410
+ br = OxmlElement('w:br')
411
+ br.set(qn('w:type'), 'page')
412
+ r.append(br)
413
+ add_page_break_before.append(pPr)
414
+ add_page_break_before.append(r)
415
+ tbl.addprevious(add_page_break_before)
416
+ pb_count += 1
417
+ print(f"Step 6: Added {pb_count} page breaks")
418
+
419
+ # ── Step 7: Fill section content ──
420
+ # Build multiple matching keys for each section_content field.
421
+ # Analysis headings may use prefixed names like "Ⅱ.1.가 사업(과제) 목적"
422
+ # but the document paragraphs just have "가. 사업(과제) 목적".
423
+ # Strategy: extract the Korean sub-heading part and match flexibly.
424
+ heading_entries = [] # list of (match_keys, field_id)
425
+ # Support flat fields[] format
426
+ all_fields = analysis.get('fields', [])
427
+ # Also gather from nested sections[].fields[] if present
428
+ for section in analysis.get('sections', []):
429
+ all_fields.extend(section.get('fields', []))
430
+ # Also treat the section itself as a field if it has field_type
431
+ if section.get('field_type') in ('section_content', 'fill_conditional', 'fill_optional'):
432
+ # Use section_id as the field id
433
+ if 'id' not in section and 'field_id' not in section:
434
+ section['id'] = section.get('section_id', '')
435
+ all_fields.append(section)
436
+ for field in all_fields:
437
+ ft = field.get('type') or field.get('field_type', '')
438
+ if ft != 'section_content':
439
+ continue
440
+ fid = _get_fid(field)
441
+ cp = field.get('content_plan', {})
442
+ heading = cp.get('section_heading', field.get('label', ''))
443
+ label = field.get('label', '')
444
+
445
+ # Generate multiple match keys from the heading
446
+ keys = set()
447
+ keys.add(heading)
448
+ keys.add(label)
449
+ # Strip common prefixes: "Ⅱ.1.가 " → "가 사업..." or "Ⅳ.가 " → "가 ..."
450
+ stripped = re.sub(r'^[ⅠⅡⅢⅣⅤ]\.\d*\.?', '', heading).strip()
451
+ if stripped:
452
+ keys.add(stripped)
453
+ # Extract "가. XXX" or "가 XXX" pattern from the heading
454
+ m = re.search(r'([가-힣])[.\s]\s*(.+)', heading)
455
+ if m:
456
+ desc = m.group(2).strip()
457
+ keys.add(f'{m.group(1)}. {desc}') # "가. 사업(...)"
458
+ keys.add(f'{m.group(1)} {desc}') # "가 사업(...)"
459
+ keys.add(desc) # just "사업(...)"
460
+ # Also add versions with parenthetical suffix stripped
461
+ desc_short = re.sub(r'\s*\(.*$', '', desc).strip()
462
+ if desc_short and desc_short != desc:
463
+ keys.add(f'{m.group(1)}. {desc_short}')
464
+ keys.add(desc_short)
465
+ # Also extract content after all prefixes (numbers, roman, Korean letters)
466
+ content_only = re.sub(r'^[ⅠⅡⅢⅣⅤ\.\d\s]*[가-힣][\.\s]\s*', '', heading).strip()
467
+ if content_only and len(content_only) >= 3:
468
+ keys.add(content_only)
469
+ # Strip parenthetical suffix here too
470
+ co_short = re.sub(r'\s*\(.*$', '', content_only).strip()
471
+ if co_short and len(co_short) >= 3:
472
+ keys.add(co_short)
473
+ # Clean up empty keys
474
+ keys = {k.strip() for k in keys if k.strip() and len(k.strip()) >= 3}
475
+ heading_entries.append((keys, fid))
476
+
477
+ children = list(body)
478
+ filled = 0
479
+ filled_fids = set()
480
+
481
+ # Also match chapter tables (Ⅴ 기타, etc.) — collect them for fallback
482
+ chapter_positions = {} # chapter_num → body index of first paragraph after chapter table
483
+ for i, child in enumerate(children):
484
+ if child.tag == qn('w:tbl') and is_chapter_header(child):
485
+ ch_num = get_chapter_num(child)
486
+ # The first paragraph after the chapter table is where content goes
487
+ for j in range(i + 1, min(i + 5, len(children))):
488
+ if children[j].tag == qn('w:p'):
489
+ chapter_positions[ch_num] = j
490
+ break
491
+
492
+ for i, child in enumerate(children):
493
+ if child.tag != qn('w:p'):
494
+ continue
495
+ text = get_text(child).strip()
496
+ if not text:
497
+ continue
498
+
499
+ # Skip numbered parent headings (e.g. "1. 사업(과제) 개요", "4. 기대효과")
500
+ # These are parent-level, not sub-section headings where content goes
501
+ if re.match(r'^\d+\.\s', text):
502
+ continue
503
+
504
+ # Match heading to field using multiple strategies
505
+ matched_fid = None
506
+ best_key_len = 0
507
+ for keys, fid in heading_entries:
508
+ if fid in filled_fids:
509
+ continue
510
+ for key in keys:
511
+ kl = len(key)
512
+ # Exact substring match (most reliable)
513
+ if key in text and kl > best_key_len:
514
+ matched_fid = fid
515
+ best_key_len = kl
516
+ # Normalized match (ignore spaces)
517
+ elif key.replace(' ', '') in text.replace(' ', '') and kl > best_key_len:
518
+ matched_fid = fid
519
+ best_key_len = kl
520
+ if not matched_fid:
521
+ continue
522
+
523
+ content = fill_content.get(matched_fid, '')
524
+ if not content or not isinstance(content, str):
525
+ continue
526
+
527
+ # Find empty/content paragraphs after heading (these get replaced)
528
+ empties = []
529
+ for j in range(i + 1, min(i + 80, len(children))):
530
+ nxt = children[j]
531
+ if nxt.tag == qn('w:tbl'):
532
+ break
533
+ if nxt.tag == qn('w:p'):
534
+ nt = get_text(nxt).strip()
535
+ # Stop at next heading (가., 나., 다., 라., 마., 1., 2., etc.)
536
+ if nt and (re.match(r'^[가-힣]\.\s', nt) or re.match(r'^\d+\.\s', nt)):
537
+ break
538
+ empties.append(nxt)
539
+ else:
540
+ break
541
+
542
+ for old in empties:
543
+ remove_element(old)
544
+
545
+ # Insert content
546
+ new_paras = content_to_paras(content, body_pPr, body_rPr)
547
+ ref = child
548
+ for np in new_paras:
549
+ ref.addnext(np)
550
+ ref = np
551
+
552
+ filled += 1
553
+ filled_fids.add(matched_fid)
554
+ print(f" Filled {matched_fid}: {len(content)}c after '{text[:40]}'")
555
+
556
+ print(f"Step 7: Filled {filled} sections")
557
+
558
+ # ── Step 7b: Fallback for unfilled sections (combined sub-fields, chapter sections) ──
559
+ # Re-scan body for headings that have unfilled fields
560
+ children = list(body) # refresh after modifications
561
+ fallback_count = 0
562
+
563
+ # Combine sub-fields for "나. 데이터 상품 및 활용 서비스 필요성"
564
+ combined_data_fids = ['field_030', 'field_031', 'field_032', 'field_033', 'field_034']
565
+ if not any(f in filled_fids for f in combined_data_fids):
566
+ combined_content = '\n\n'.join(
567
+ fill_content.get(fid, '') for fid in combined_data_fids
568
+ if fill_content.get(fid, '') and isinstance(fill_content.get(fid, ''), str)
569
+ )
570
+ if combined_content:
571
+ for i, child in enumerate(children):
572
+ if child.tag != qn('w:p'):
573
+ continue
574
+ text = get_text(child).strip()
575
+ if '데이터 상품' in text and '필요성' in text:
576
+ empties = []
577
+ for j in range(i + 1, min(i + 80, len(children))):
578
+ nxt = children[j]
579
+ if nxt.tag == qn('w:tbl'):
580
+ break
581
+ if nxt.tag == qn('w:p'):
582
+ nt = get_text(nxt).strip()
583
+ if nt and (re.match(r'^[가-힣]\.\s', nt) or re.match(r'^\d+\.\s', nt)):
584
+ break
585
+ empties.append(nxt)
586
+ else:
587
+ break
588
+ for old in empties:
589
+ remove_element(old)
590
+ new_paras = content_to_paras(combined_content, body_pPr, body_rPr)
591
+ ref = child
592
+ for np in new_paras:
593
+ ref.addnext(np)
594
+ ref = np
595
+ fallback_count += 1
596
+ filled_fids.update(combined_data_fids)
597
+ print(f" Filled combined data fields: {len(combined_content)}c after '{text[:40]}'")
598
+ break
599
+
600
+ # Fill Ⅴ. 기타 section — content goes after chapter header table
601
+ # Find field ID for 기타 section (could be field_067, section_Ⅴ_기타, etc.)
602
+ kita_fid = None
603
+ for fid_candidate in fill_content:
604
+ if '기타' in fid_candidate or 'Ⅴ' in fid_candidate:
605
+ if fid_candidate not in filled_fids and isinstance(fill_content[fid_candidate], str):
606
+ kita_fid = fid_candidate
607
+ break
608
+ if kita_fid:
609
+ children = list(body) # refresh
610
+ for i, child in enumerate(children):
611
+ if child.tag == qn('w:tbl') and is_chapter_header(child) and get_chapter_num(child) == 5:
612
+ # Find empty paragraphs after the Ⅴ table
613
+ empties = []
614
+ for j in range(i + 1, min(i + 20, len(children))):
615
+ nxt = children[j]
616
+ if nxt.tag == qn('w:tbl'):
617
+ break
618
+ if nxt.tag == qn('w:p'):
619
+ nt = get_text(nxt).strip()
620
+ if nt and len(nt) > 20:
621
+ break
622
+ empties.append(nxt)
623
+ else:
624
+ break
625
+ for old in empties:
626
+ remove_element(old)
627
+ content = fill_content[kita_fid]
628
+ new_paras = content_to_paras(content, body_pPr, body_rPr)
629
+ ref = child
630
+ for np in new_paras:
631
+ ref.addnext(np)
632
+ ref = np
633
+ fallback_count += 1
634
+ filled_fids.add(kita_fid)
635
+ print(f" Filled {kita_fid}: {len(content)}c after Ⅴ chapter header")
636
+ break
637
+
638
+ if fallback_count:
639
+ print(f"Step 7b: Filled {fallback_count} fallback sections")
640
+
641
+ # ── Step 8: Fill budget table (사업비 편성비중) — from fill_content.json ──
642
+ for tbl in body.findall(qn('w:tbl')):
643
+ text = get_text(tbl)
644
+ if '기획' in text and '설계' in text and '구매' in text and '분석' in text:
645
+ rows = tbl.findall(qn('w:tr'))
646
+ if len(rows) >= 3:
647
+ # Try individual field keys from fill_content
648
+ cb_keys = ['budget_기획설계_check', 'budget_구매_check', 'budget_수집생성_check', 'budget_가공_check', 'budget_분석_check']
649
+ amt_keys = ['budget_기획설계_amount', 'budget_구매_amount', 'budget_수집생성_amount', 'budget_가공_amount', 'budget_분석_amount', 'budget_계_amount']
650
+
651
+ # Row 1: checkboxes (skip first label column)
652
+ r1_cells = rows[1].findall(qn('w:tc'))
653
+ for ci, key in enumerate(cb_keys):
654
+ target_ci = ci + 1 # skip label cell
655
+ val = fill_content.get(key, '')
656
+ if val and target_ci < len(r1_cells):
657
+ set_cell_text(r1_cells[target_ci], val)
658
+
659
+ # Row 2: amounts (skip first label cell)
660
+ r2_cells = rows[2].findall(qn('w:tc'))
661
+ for ci, key in enumerate(amt_keys):
662
+ target_ci = ci + 1 # skip label cell
663
+ val = fill_content.get(key, '')
664
+ if val and target_ci < len(r2_cells):
665
+ set_cell_text(r2_cells[target_ci], val)
666
+
667
+ print(f"Step 8a: Filled budget table ({len(cb_keys)} checks + {len(amt_keys)} amounts)")
668
+ break
669
+
670
+ # ── Step 8b: Fill schedule table (추진일정) — from fill_content.json ──
671
+ # Try named key first, then analysis field ID
672
+ sched_data = fill_content.get('schedule_table')
673
+ if sched_data is None:
674
+ sched_field_id = _find_field_id(analysis, 'table_content', '추진일정') or _find_field_id(analysis, 'table_content', '일정')
675
+ sched_data = fill_content.get(sched_field_id) if sched_field_id else None
676
+ # Normalize: if it's a list, wrap in dict
677
+ if isinstance(sched_data, list):
678
+ sched_data = {'rows': sched_data}
679
+ for tbl in body.findall(qn('w:tbl')):
680
+ text = get_text(tbl)
681
+ if '세부 업무' in text and '수행내용' in text:
682
+ rows = tbl.findall(qn('w:tr'))
683
+ if sched_data and isinstance(sched_data, dict):
684
+ sched_rows = sched_data.get('rows', [])
685
+ # Determine where data rows start (skip header rows)
686
+ data_start = 1
687
+ for ri, row in enumerate(rows):
688
+ rt = get_text(row)
689
+ if 'M' in rt or 'M+1' in rt:
690
+ data_start = ri + 1
691
+ break
692
+ for si, row_data in enumerate(sched_rows):
693
+ ri = data_start + si
694
+ if ri >= len(rows):
695
+ break
696
+ cells = rows[ri].findall(qn('w:tc'))
697
+ task = row_data.get('세부업무', '')
698
+ desc = row_data.get('수행내용', '')
699
+ # Support both 'marks' (string array) and 'schedule' (bool array) formats
700
+ marks = row_data.get('marks', [])
701
+ if not marks:
702
+ schedule = row_data.get('schedule', [])
703
+ marks = ['●' if m else '' for m in schedule]
704
+ weight = row_data.get('비중', '')
705
+ if len(cells) >= 2:
706
+ set_cell_text(cells[0], task)
707
+ set_cell_text(cells[1], desc)
708
+ # Month mark columns start at index 2
709
+ for mi, mark in enumerate(marks):
710
+ ci = 2 + mi
711
+ if ci < len(cells) - 1: # -1 to not overwrite weight col
712
+ set_cell_text(cells[ci], mark)
713
+ # Weight (last cell)
714
+ if len(cells) > 0 and weight:
715
+ set_cell_text(cells[-1], weight)
716
+ # Clear remaining data rows
717
+ for ri in range(data_start + len(sched_rows), len(rows)):
718
+ cells = rows[ri].findall(qn('w:tc'))
719
+ for cell in cells:
720
+ set_cell_text(cell, '')
721
+ print(f"Step 8b: Filled schedule table ({len(sched_rows)} rows)")
722
+ else:
723
+ print("Step 8b: Schedule field not found in fill_content — skipped")
724
+ break
725
+
726
+ # ── Step 8c: Fill employee count table — find field IDs dynamically ──
727
+ # Search analysis.json for empty_cell fields related to 재직인원/고용계획
728
+ # Try named keys first, then analysis field IDs
729
+ emp_fid = 'headcount_재직인원'
730
+ hire_fid = 'headcount_추가고용'
731
+ if emp_fid not in fill_content:
732
+ emp_fid = _find_field_id(analysis, 'empty_cell', '재직') or ''
733
+ if hire_fid not in fill_content:
734
+ hire_fid = _find_field_id(analysis, 'empty_cell', '고용') or ''
735
+ for tbl in body.findall(qn('w:tbl')):
736
+ text = get_text(tbl)
737
+ if '재직(소속)인원' in text and '추가 고용계획' in text:
738
+ rows = tbl.findall(qn('w:tr'))
739
+ if rows:
740
+ cells = rows[0].findall(qn('w:tc'))
741
+ if len(cells) >= 4:
742
+ v1_raw = fill_content.get(emp_fid, '') if emp_fid else ''
743
+ v2_raw = fill_content.get(hire_fid, '') if hire_fid else ''
744
+ # Handle dict or string values
745
+ if isinstance(v1_raw, dict):
746
+ v1 = v1_raw.get('재직인원', v1_raw.get('value', ''))
747
+ else:
748
+ v1 = str(v1_raw) if v1_raw else ''
749
+ if isinstance(v2_raw, dict):
750
+ v2 = v2_raw.get('추가고용계획', v2_raw.get('value', ''))
751
+ else:
752
+ v2 = str(v2_raw) if v2_raw else ''
753
+ # Add '명' suffix if not already present
754
+ if v1 and not v1.endswith('명'):
755
+ v1 = f'{v1} 명'
756
+ if v2 and not v2.endswith('명'):
757
+ v2 = f'{v2} 명'
758
+ if v1:
759
+ set_cell_text(cells[1], v1)
760
+ if v2:
761
+ set_cell_text(cells[3], v2)
762
+ print(f"Step 8c: Filled employee count ({emp_fid}={v1}, {hire_fid}={v2})")
763
+
764
+ # ── Step 8d: Fill staff table (참여인력) — from fill_content.json ──
765
+ # Try named key first, then analysis field ID
766
+ staff_data = fill_content.get('team_table')
767
+ if staff_data is None:
768
+ staff_fid = _find_field_id(analysis, 'table_content', '참여인력') or _find_field_id(analysis, 'table_content', '팀구성')
769
+ staff_data = fill_content.get(staff_fid) if staff_fid else None
770
+ # Normalize: list → dict with 'rows' of arrays
771
+ if isinstance(staff_data, list):
772
+ if staff_data and isinstance(staff_data[0], dict):
773
+ col_order = ['순번', '직급', '성명', '주요_담당업무', '경력_및_학력', '채용연월', '참여율']
774
+ rows = [[d.get(k, d.get(k.replace('_', ' '), '')) for k in col_order] for d in staff_data]
775
+ else:
776
+ rows = staff_data # already list of lists
777
+ staff_data = {'rows': rows}
778
+ for tbl in body.findall(qn('w:tbl')):
779
+ text = get_text(tbl)
780
+ if '순번' in text and '직급' in text and '성명' in text and '참여율' in text:
781
+ count = _fill_staff_table(tbl, staff_data, fill_content)
782
+ if count:
783
+ print(f"Step 8d: Filled staff table ({count} members)")
784
+ else:
785
+ print("Step 8d: Staff field not found in fill_content — skipped")
786
+ break
787
+
788
+ # ── Step 8e: Fill additional hiring table (추가 고용) — from fill_content.json ──
789
+ hire_tbl_data = fill_content.get('hiring_table')
790
+ if hire_tbl_data is None:
791
+ hire_tbl_fid = _find_field_id(analysis, 'table_content', '추가') or _find_field_id(analysis, 'table_content', '고용계획')
792
+ hire_tbl_data = fill_content.get(hire_tbl_fid) if hire_tbl_fid else None
793
+ # Normalize: list → dict with 'rows' of arrays
794
+ if isinstance(hire_tbl_data, list):
795
+ if hire_tbl_data and isinstance(hire_tbl_data[0], dict):
796
+ col_order = ['순번', '주요_담당업무', '요구_경력_학력', '채용시기']
797
+ rows = [[d.get(k, d.get(k.replace('_', ' '), '')) for k in col_order] for d in hire_tbl_data]
798
+ else:
799
+ rows = hire_tbl_data # already list of lists
800
+ hire_tbl_data = {'rows': rows}
801
+ for tbl in body.findall(qn('w:tbl')):
802
+ text = get_text(tbl)
803
+ if '주요 담당업무' in text and '요구되는' in text and '채용시기' in text:
804
+ count = _fill_staff_table(tbl, hire_tbl_data, fill_content)
805
+ if count:
806
+ print(f"Step 8e: Filled hiring table ({count} positions)")
807
+ else:
808
+ print("Step 8e: Hiring field not found in fill_content — skipped")
809
+ break
810
+
811
+ # ── Save ──
812
+ os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
813
+ doc.save(output_path)
814
+ size = os.path.getsize(output_path)
815
+ print(f"\nOutput: {output_path} ({size:,} bytes)")
816
+
817
+
818
+ if __name__ == '__main__':
819
+ main()