@panda-agent/panda-cli 0.1.28 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/dist/panda-cli-ink.bundle.mjs +267 -258
  2. package/package.json +6 -4
  3. package/skills/.gitkeep +0 -0
  4. package/skills/README.md +13 -0
  5. package/skills/docx/.skill-metadata.yaml +173 -0
  6. package/skills/docx/LICENSE.txt +30 -0
  7. package/skills/docx/SKILL.md +589 -0
  8. package/skills/docx/scripts/__init__.py +1 -0
  9. package/skills/docx/scripts/accept_changes.py +206 -0
  10. package/skills/docx/scripts/comment.py +442 -0
  11. package/skills/docx/scripts/office/helpers/__init__.py +1 -0
  12. package/skills/docx/scripts/office/helpers/merge_runs.py +190 -0
  13. package/skills/docx/scripts/office/helpers/simplify_redlines.py +185 -0
  14. package/skills/docx/scripts/office/pack.py +167 -0
  15. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  16. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  17. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  18. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  19. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  20. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  21. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  22. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  23. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  24. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  25. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  26. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  27. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  28. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  29. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  30. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  31. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  32. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  33. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  34. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  35. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  36. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  37. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  38. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  39. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  40. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  41. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  42. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  43. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  44. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  45. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  46. package/skills/docx/scripts/office/schemas/mce/mc.xsd +75 -0
  47. package/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  48. package/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  49. package/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  50. package/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  51. package/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  52. package/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  53. package/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  54. package/skills/docx/scripts/office/soffice.py +194 -0
  55. package/skills/docx/scripts/office/unpack.py +145 -0
  56. package/skills/docx/scripts/office/validate.py +114 -0
  57. package/skills/docx/scripts/office/validators/__init__.py +16 -0
  58. package/skills/docx/scripts/office/validators/base.py +733 -0
  59. package/skills/docx/scripts/office/validators/docx.py +354 -0
  60. package/skills/docx/scripts/office/validators/pptx.py +230 -0
  61. package/skills/docx/scripts/office/validators/redlining.py +212 -0
  62. package/skills/docx/scripts/templates/comments.xml +3 -0
  63. package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
  64. package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
  65. package/skills/docx/scripts/templates/commentsIds.xml +3 -0
  66. package/skills/docx/scripts/templates/people.xml +3 -0
  67. package/skills/frontend-design/LICENSE.txt +177 -0
  68. package/skills/frontend-design/SKILL.md +42 -0
  69. package/skills/pdf/.skill-metadata.yaml +273 -0
  70. package/skills/pdf/LICENSE.txt +30 -0
  71. package/skills/pdf/SKILL.md +324 -0
  72. package/skills/pdf/advanced-reference.md +609 -0
  73. package/skills/pdf/form-filling-guide.md +318 -0
  74. package/skills/pdf/forms.md +294 -0
  75. package/skills/pdf/reference.md +612 -0
  76. package/skills/pdf/scripts/check_bounding_boxes.py +198 -0
  77. package/skills/pdf/scripts/check_fillable_fields.py +64 -0
  78. package/skills/pdf/scripts/convert_pdf_to_images.py +102 -0
  79. package/skills/pdf/scripts/create_validation_image.py +125 -0
  80. package/skills/pdf/scripts/extract_form_field_info.py +220 -0
  81. package/skills/pdf/scripts/extract_form_structure.py +202 -0
  82. package/skills/pdf/scripts/fill_fillable_fields.py +205 -0
  83. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +193 -0
  84. package/skills/pptx-generator/SKILL.md +204 -0
  85. package/skills/pptx-generator/assets/styles/business.json +8 -0
  86. package/skills/pptx-generator/assets/styles/minimal.json +8 -0
  87. package/skills/pptx-generator/assets/styles/modern.json +8 -0
  88. package/skills/pptx-generator/assets/templates/ppt_data_template.json +40 -0
  89. package/skills/pptx-generator/references/collaboration_guide.md +381 -0
  90. package/skills/pptx-generator/references/json_format_spec.md +215 -0
  91. package/skills/pptx-generator/references/layout_guide.md +290 -0
  92. package/skills/pptx-generator/scripts/json_validator.py +194 -0
  93. package/skills/pptx-generator/scripts/pptx_builder.py +340 -0
  94. package/skills/pptx-generator/scripts/pptx_validator.py +162 -0
  95. package/skills/skill-creator/LICENSE.txt +202 -0
  96. package/skills/skill-creator/SKILL.md +479 -0
  97. package/skills/skill-creator/agents/analyzer.md +274 -0
  98. package/skills/skill-creator/agents/comparator.md +202 -0
  99. package/skills/skill-creator/agents/grader.md +223 -0
  100. package/skills/skill-creator/assets/eval_review.html +146 -0
  101. package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  102. package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  103. package/skills/skill-creator/references/schemas.md +430 -0
  104. package/skills/skill-creator/scripts/__init__.py +0 -0
  105. package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  106. package/skills/skill-creator/scripts/generate_report.py +326 -0
  107. package/skills/skill-creator/scripts/improve_description.py +248 -0
  108. package/skills/skill-creator/scripts/package_skill.py +136 -0
  109. package/skills/skill-creator/scripts/quick_validate.py +103 -0
  110. package/skills/skill-creator/scripts/run_eval.py +310 -0
  111. package/skills/skill-creator/scripts/run_loop.py +332 -0
  112. package/skills/skill-creator/scripts/utils.py +47 -0
  113. package/skills/xlsx/.skill-metadata.yaml +185 -0
  114. package/skills/xlsx/LICENSE.txt +30 -0
  115. package/skills/xlsx/SKILL.md +233 -0
  116. package/skills/xlsx/scripts/office/helpers/__init__.py +1 -0
  117. package/skills/xlsx/scripts/office/helpers/merge_runs.py +226 -0
  118. package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +198 -0
  119. package/skills/xlsx/scripts/office/pack.py +162 -0
  120. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  121. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  122. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  123. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  124. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  125. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  126. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  127. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  128. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  129. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  130. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  131. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  132. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  133. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  134. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  135. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  136. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  137. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  138. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  139. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  140. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  141. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  142. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  143. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  144. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  145. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  146. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  147. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  148. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  149. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  150. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  151. package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
  152. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  153. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  154. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  155. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  156. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  157. package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  158. package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  159. package/skills/xlsx/scripts/office/soffice.py +185 -0
  160. package/skills/xlsx/scripts/office/unpack.py +146 -0
  161. package/skills/xlsx/scripts/office/validate.py +108 -0
  162. package/skills/xlsx/scripts/office/validators/__init__.py +13 -0
  163. package/skills/xlsx/scripts/office/validators/base.py +800 -0
  164. package/skills/xlsx/scripts/office/validators/docx.py +383 -0
  165. package/skills/xlsx/scripts/office/validators/pptx.py +250 -0
  166. package/skills/xlsx/scripts/office/validators/redlining.py +229 -0
  167. package/skills/xlsx/scripts/recalc.py +296 -0
@@ -0,0 +1,233 @@
1
+ ---
2
+ name: xlsx
3
+ version: 1.0.1
4
+ description: "Use this skill any time a spreadsheet file is the primary input or output. This means any task where the user wants to: open, read, edit, or fix an existing .xlsx, .xlsm, .csv, or .tsv file (e.g., adding columns, computing formulas, formatting, charting, cleaning messy data); create a new spreadsheet from scratch or from other data sources; or convert between tabular file formats. Trigger especially when the user references a spreadsheet file by name or path — even casually (like \"the xlsx in my downloads\") — and wants something done to it or produced from it. Also trigger for cleaning or restructuring messy tabular data files (malformed rows, misplaced headers, junk data) into proper spreadsheets. The deliverable must be a spreadsheet file. Do NOT trigger when the primary deliverable is a Word document, HTML report, standalone Python script, database pipeline, or Google Sheets API integration, even if tabular data is involved."
5
+ description_zh: "当电子表格文件是主要输入或输出时使用此技能。包括:打开、读取、编辑或修复现有的 .xlsx、.xlsm、.csv 或 .tsv 文件(如添加列、计算公式、格式化、图表、清洗数据);从零或其他数据源创建新电子表格;在表格文件格式之间转换。当用户提及电子表格文件名或路径时触发——即使是随意提及(如\"下载文件夹里的 xlsx\")——并希望对其进行操作或生成电子表格。也适用于将混乱的表格数据文件(格式错误的行、错位的表头、垃圾数据)清理重组为规范的电子表格。交付物必须是电子表格文件。当主要交付物是 Word 文档、HTML 报告、独立 Python 脚本、数据库管道或 Google Sheets API 集成时,即使涉及表格数据也不要触发。"
6
+ license: Proprietary. LICENSE.txt has complete terms
7
+ ---
8
+
9
+ # Spreadsheet Creation, Editing, and Analysis
10
+
11
+ You have access to multiple tools and workflows for working with `.xlsx` files — from reading and analysing data, through programmatic creation and editing, to formula recalculation and error checking.
12
+
13
+ ## Tooling Primer
14
+
15
+ | Library | Best for |
16
+ |---------|----------|
17
+ | **pandas** | Bulk data manipulation, statistical analysis, quick CSV↔XLSX conversion |
18
+ | **openpyxl** | Cell-level formatting, Excel formulas, charts, conditional formatting |
19
+
20
+ ## ⛔ CRITICAL — Use Formulas, Never Hardcode Calculations
21
+
22
+ > **🚨 MANDATORY RULE — ZERO EXCEPTIONS 🚨**
23
+ >
24
+ > **Every computed value must be an Excel formula, not a Python-calculated literal.**
25
+ > This keeps the workbook dynamic and self-updating.
26
+ > Violations will produce stale, non-updating spreadsheets.
27
+
28
+ ```python
29
+ # ── WRONG — baking Python results into cells ──
30
+ total = df['Sales'].sum()
31
+ ws['B10'] = total # static 5000
32
+
33
+ growth = (df.iloc[-1]['Revenue'] - df.iloc[0]['Revenue']) / df.iloc[0]['Revenue']
34
+ ws['C5'] = growth # static 0.15
35
+
36
+ avg = sum(vals) / len(vals)
37
+ ws['D20'] = avg # static 42.5
38
+ ```
39
+
40
+ ```python
41
+ # ── RIGHT — let Excel do the maths ──
42
+ ws['B10'] = '=SUM(B2:B9)'
43
+ ws['C5'] = '=(C4-C2)/C2'
44
+ ws['D20'] = '=AVERAGE(D2:D19)'
45
+ ```
46
+
47
+ This rule applies to **all** calculations — sums, ratios, percentages, differences, etc.
48
+
49
+ ## Step-by-Step Workflow
50
+
51
+ 1. **Pick a library** — pandas for data; openpyxl for formatting / formulas.
52
+ 2. **Open or create** the workbook.
53
+ 3. **Modify** — add/edit data, formulas, and styles.
54
+ 4. **Save** to disk.
55
+ 5. **Recalculate** (mandatory when formulas are present):
56
+ ```bash
57
+ python scripts/recalc.py output.xlsx
58
+ ```
59
+ 6. **Inspect the JSON output** and fix any errors:
60
+ - `status: "errors_found"` → see `error_summary` for types and locations.
61
+ - Common errors: `#REF!` (bad reference), `#DIV/0!` (zero denominator), `#VALUE!` (type mismatch), `#NAME?` (unknown function).
62
+
63
+ ## Reading & Analysing Data
64
+
65
+ ```python
66
+ import pandas as pd
67
+
68
+ df = pd.read_excel('file.xlsx') # first sheet
69
+ sheets = pd.read_excel('file.xlsx', sheet_name=None) # all sheets → dict
70
+
71
+ df.head(); df.info(); df.describe() # quick overview
72
+
73
+ df.to_excel('result.xlsx', index=False) # write back
74
+ ```
75
+
76
+ ## Building New Workbooks
77
+
78
+ ```python
79
+ from openpyxl import Workbook
80
+ from openpyxl.styles import Font, PatternFill, Alignment
81
+
82
+ wb = Workbook()
83
+ ws = wb.active
84
+
85
+ ws['A1'] = 'Header'
86
+ ws.append(['Row', 'of', 'data'])
87
+ ws['B2'] = '=SUM(A1:A10)'
88
+
89
+ ws['A1'].font = Font(bold=True, color='FF0000')
90
+ ws['A1'].fill = PatternFill('solid', start_color='FFFF00')
91
+ ws['A1'].alignment = Alignment(horizontal='center')
92
+ ws.column_dimensions['A'].width = 20
93
+
94
+ wb.save('output.xlsx')
95
+ ```
96
+
97
+ ## Modifying Existing Files
98
+
99
+ ```python
100
+ from openpyxl import load_workbook
101
+
102
+ wb = load_workbook('existing.xlsx')
103
+ ws = wb.active # or wb['SheetName']
104
+
105
+ for name in wb.sheetnames:
106
+ print("Sheet: {}".format(name))
107
+
108
+ ws['A1'] = 'Updated'
109
+ ws.insert_rows(2)
110
+ ws.delete_cols(3)
111
+
112
+ extra = wb.create_sheet('Extra')
113
+ extra['A1'] = 'New data'
114
+
115
+ wb.save('modified.xlsx')
116
+ ```
117
+
118
+ ## Formula Recalculation
119
+
120
+ Workbooks produced by openpyxl contain formula *strings* but no cached results. Use the bundled helper to populate those values:
121
+
122
+ ```bash
123
+ python scripts/recalc.py <excel_file> [timeout_seconds]
124
+ ```
125
+
126
+ **What it does:**
127
+ - Deploys a LibreOffice Basic macro (first run only)
128
+ - Invokes LibreOffice headless to recalculate every formula
129
+ - Scans all cells for Excel error markers
130
+ - Emits structured JSON
131
+
132
+ **Prerequisite:** LibreOffice must be installed. The helper handles first-run configuration automatically, including sandboxed environments where Unix sockets are restricted (via `scripts/office/soffice.py`).
133
+
134
+ ### Interpreting the Output
135
+
136
+ ```json
137
+ {
138
+ "status": "success",
139
+ "total_errors": 0,
140
+ "total_formulas": 42,
141
+ "error_summary": {}
142
+ }
143
+ ```
144
+
145
+ When `status` is `"errors_found"`, `error_summary` lists each error type with count and cell locations (up to 20 per type).
146
+
147
+ ---
148
+
149
+ # Output Quality Standards
150
+
151
+ ## General — All Workbooks
152
+
153
+ | Area | Requirement |
154
+ |------|-------------|
155
+ | **Typography** | Use a single professional typeface (Arial, Times New Roman, …) throughout, unless the user specifies otherwise |
156
+ | **Error-free delivery** | Zero formula errors — no `#REF!`, `#DIV/0!`, `#VALUE!`, `#N/A`, `#NAME?` |
157
+ | **Template fidelity** | When editing an existing file, study and replicate its formatting conventions exactly; never impose a different style on an already-patterned workbook |
158
+
159
+ ## Financial Models
160
+
161
+ ### Colour Conventions (override only when the user or template says otherwise)
162
+
163
+ | Element | RGB | Hex | Meaning |
164
+ |---------|-----|-----|---------|
165
+ | Blue text | `(0,0,255)` | `#0000FF` | Hard-coded inputs / scenario toggles |
166
+ | Black text | `(0,0,0)` | `#000000` | All formulas and calculated values |
167
+ | Green text | `(0,128,0)` | `#008000` | Cross-sheet references within the same workbook |
168
+ | Red text | `(255,0,0)` | `#FF0000` | External links to other files |
169
+ | Yellow fill | `(255,255,0)` | `#FFFF00` | Key assumptions or cells requiring review |
170
+
171
+ ### Number Formatting
172
+
173
+ | Data Type | Excel Format Code | Display Example | Notes |
174
+ |-----------|-------------------|-----------------|-------|
175
+ | Calendar years | _(plain text)_ | `2024` | Never `2,024` — no thousands separator |
176
+ | Currency | `$#,##0` | `$1,250` | Always label units in headers, e.g. _Revenue ($mm)_ |
177
+ | Zero values | `$#,##0;($#,##0);-` | `-` | Custom three-section format |
178
+ | Percentages | `0.0%` | `12.5%` | One decimal place |
179
+ | Multiples | `0.0x` | `3.2x` | For EV/EBITDA, P/E, etc. |
180
+ | Negatives | `(#,##0)` | `(123)` | Parenthesised, never `-123` |
181
+
182
+ ### Formula Best Practices
183
+
184
+ - **Centralise assumptions** — growth rates, margins, multiples belong in labelled assumption cells; formulas should reference those cells, not embed literals.
185
+ ```
186
+ =B5*(1+$B$6) ✓
187
+ =B5*1.05 ✗
188
+ ```
189
+ - **Prevent errors** — verify references, check range boundaries, confirm consistent formulas across projection periods, test edge cases.
190
+ - **No circular references** — unless explicitly designed and documented.
191
+ - **Document hard-codes** — add a cell comment or adjacent note:
192
+ `Source: Company 10-K, FY2024, Page 45, Revenue Note, [SEC EDGAR URL]`
193
+
194
+ ## Formula Verification Checklist
195
+
196
+ - Spot-check 2–3 references before building the full model.
197
+ - Confirm column mapping (column 64 → BL, not BK).
198
+ - Remember row offsets (DataFrame row 5 = Excel row 6).
199
+ - Guard against `NaN` — use `pd.notna()`.
200
+ - Test far-right columns (FY data often sits in column 50+).
201
+ - Handle multiple matches — search all occurrences, not just the first.
202
+
203
+ ### Testing Strategy
204
+
205
+ 1. Validate formulas on a small range first.
206
+ 2. Verify every referenced cell exists.
207
+ 3. Include zero, negative, and very large values.
208
+
209
+ ## Code Style
210
+
211
+ When generating Python that manipulates spreadsheets:
212
+ - Keep code concise — no verbose names, no gratuitous comments.
213
+ - Skip unnecessary `print()` calls.
214
+
215
+ For the workbook itself:
216
+ - Comment cells that contain complex formulas or key assumptions.
217
+ - Cite data sources for every hard-coded figure.
218
+ - Add section headers and notes for major model blocks.
219
+
220
+ ## Library Tips
221
+
222
+ ### openpyxl
223
+
224
+ - Indices are **1-based** — `(row=1, column=1)` is cell A1.
225
+ - `data_only=True` reads cached values; **warning:** saving afterward strips all formulas permanently.
226
+ - For large files: `read_only=True` (reading) or `write_only=True` (writing).
227
+ - Formulas are stored as strings and require `scripts/recalc.py` to populate values.
228
+
229
+ ### pandas
230
+
231
+ - Specify dtypes to avoid inference surprises: `pd.read_excel('f.xlsx', dtype={'id': str})`.
232
+ - Limit columns on large files: `usecols=['A', 'C', 'E']`.
233
+ - Parse dates explicitly: `parse_dates=['date_column']`.
@@ -0,0 +1 @@
1
+ # helpers sub-package (intentionally empty)
@@ -0,0 +1,226 @@
1
+ #!/usr/bin/env python3
2
+ # ──────────────────────────────────────────────────────────────────
3
+ # Coalesce adjacent <w:r> elements sharing identical <w:rPr> in DOCX.
4
+ #
5
+ # Pre-processing steps that enable merging:
6
+ # 1. Strip all proofErr elements (spell/grammar markers)
7
+ # 2. Remove rsid* attributes from runs (revision metadata)
8
+ #
9
+ # After merging, adjacent <w:t> children inside the same run are
10
+ # concatenated into a single text node.
11
+ # ──────────────────────────────────────────────────────────────────
12
+
13
+ from pathlib import Path
14
+
15
+ import defusedxml.minidom
16
+
17
+
18
+ def merge_runs(input_dir: str) -> tuple[int, str]:
19
+ """Entry point – merge runs in word/document.xml and return (count, msg)."""
20
+ doc = Path(input_dir) / "word" / "document.xml"
21
+
22
+ if not doc.exists():
23
+ return 0, "Error: {} not found".format(doc)
24
+
25
+ try:
26
+ tree = defusedxml.minidom.parseString(doc.read_text(encoding="utf-8"))
27
+ top = tree.documentElement
28
+
29
+ # housekeeping
30
+ _purge_by_tag(top, "proofErr")
31
+ _drop_rsid_attrs(top)
32
+
33
+ # collect unique parent containers of all <w:r>
34
+ parents = {nd.parentNode for nd in _query_tag(top, "r")}
35
+
36
+ merged = 0
37
+ for p in parents:
38
+ merged += _coalesce_in_container(p)
39
+
40
+ doc.write_bytes(tree.toxml(encoding="UTF-8"))
41
+ return merged, "Merged {} runs".format(merged)
42
+
43
+ except Exception as ex:
44
+ return 0, "Error: {}".format(ex)
45
+
46
+
47
+ # ──────────────────────────────────────────────────────────────────
48
+ # DOM traversal helpers
49
+ # ──────────────────────────────────────────────────────────────────
50
+
51
+ def _query_tag(root, local_name: str) -> list:
52
+ """Recursively find all elements whose local name matches *local_name*."""
53
+ hits = []
54
+
55
+ def _walk(nd):
56
+ if nd.nodeType != nd.ELEMENT_NODE:
57
+ return
58
+ tag = nd.localName or nd.tagName
59
+ if tag == local_name or tag.endswith(":{}".format(local_name)):
60
+ hits.append(nd)
61
+ for ch in nd.childNodes:
62
+ _walk(ch)
63
+
64
+ _walk(root)
65
+ return hits
66
+
67
+
68
+ def _child_by_tag(parent, local_name: str):
69
+ """Return the first direct child element matching *local_name*."""
70
+ for ch in parent.childNodes:
71
+ if ch.nodeType != ch.ELEMENT_NODE:
72
+ continue
73
+ tag = ch.localName or ch.tagName
74
+ if tag == local_name or tag.endswith(":{}".format(local_name)):
75
+ return ch
76
+ return None
77
+
78
+
79
+ def _children_by_tag(parent, local_name: str) -> list:
80
+ """Return every direct child element matching *local_name*."""
81
+ return [
82
+ ch for ch in parent.childNodes
83
+ if ch.nodeType == ch.ELEMENT_NODE
84
+ and ((ch.localName or ch.tagName) == local_name
85
+ or (ch.localName or ch.tagName).endswith(":{}".format(local_name)))
86
+ ]
87
+
88
+
89
+ def _only_whitespace_between(a, b) -> bool:
90
+ """True when nothing meaningful sits between siblings *a* and *b*."""
91
+ cur = a.nextSibling
92
+ while cur is not None:
93
+ if cur is b:
94
+ return True
95
+ if cur.nodeType == cur.ELEMENT_NODE:
96
+ return False
97
+ if cur.nodeType == cur.TEXT_NODE and cur.data.strip():
98
+ return False
99
+ cur = cur.nextSibling
100
+ return False
101
+
102
+
103
+ # ──────────────────────────────────────────────────────────────────
104
+ # Cleanup passes
105
+ # ──────────────────────────────────────────────────────────────────
106
+
107
+ def _purge_by_tag(root, local_name: str):
108
+ """Remove every element whose local name matches *local_name*."""
109
+ for nd in _query_tag(root, local_name):
110
+ if nd.parentNode is not None:
111
+ nd.parentNode.removeChild(nd)
112
+
113
+
114
+ def _drop_rsid_attrs(root):
115
+ """Strip revision-save-ID attributes from all <w:r> elements."""
116
+ for r in _query_tag(root, "r"):
117
+ doomed = [a for a in r.attributes.values() if "rsid" in a.name.lower()]
118
+ for a in doomed:
119
+ r.removeAttribute(a.name)
120
+
121
+
122
+ # ──────────────────────────────────────────────────────────────────
123
+ # Core merging logic
124
+ # ──────────────────────────────────────────────────────────────────
125
+
126
+ def _tag_is_run(nd) -> bool:
127
+ tag = nd.localName or nd.tagName
128
+ return tag == "r" or tag.endswith(":r")
129
+
130
+
131
+ def _next_elem(nd):
132
+ """Return the next element sibling (skip text/comment nodes)."""
133
+ s = nd.nextSibling
134
+ while s is not None:
135
+ if s.nodeType == s.ELEMENT_NODE:
136
+ return s
137
+ s = s.nextSibling
138
+ return None
139
+
140
+
141
+ def _next_run_sibling(nd):
142
+ """Walk forward until we hit another <w:r> element."""
143
+ s = nd.nextSibling
144
+ while s is not None:
145
+ if s.nodeType == s.ELEMENT_NODE and _tag_is_run(s):
146
+ return s
147
+ s = s.nextSibling
148
+ return None
149
+
150
+
151
+ def _first_run_child(container):
152
+ """Return the first child that is a run element."""
153
+ for ch in container.childNodes:
154
+ if ch.nodeType == ch.ELEMENT_NODE and _tag_is_run(ch):
155
+ return ch
156
+ return None
157
+
158
+
159
+ def _runs_compatible(a, b) -> bool:
160
+ """Two runs are compatible when their <w:rPr> serialisations match."""
161
+ rpr_a = _child_by_tag(a, "rPr")
162
+ rpr_b = _child_by_tag(b, "rPr")
163
+ if (rpr_a is None) != (rpr_b is None):
164
+ return False
165
+ return True if rpr_a is None else rpr_a.toxml() == rpr_b.toxml()
166
+
167
+
168
+ def _absorb_run(dst, src):
169
+ """Move non-rPr children from *src* into *dst*."""
170
+ for ch in list(src.childNodes):
171
+ if ch.nodeType != ch.ELEMENT_NODE:
172
+ continue
173
+ tag = ch.localName or ch.tagName
174
+ if tag == "rPr" or tag.endswith(":rPr"):
175
+ continue
176
+ dst.appendChild(ch)
177
+
178
+
179
+ def _squash_text_nodes(run):
180
+ """Concatenate adjacent <w:t> children into one."""
181
+ t_nodes = _children_by_tag(run, "t")
182
+
183
+ idx = len(t_nodes) - 1
184
+ while idx > 0:
185
+ cur, prev = t_nodes[idx], t_nodes[idx - 1]
186
+
187
+ if _only_whitespace_between(prev, cur):
188
+ txt_prev = prev.firstChild.data if prev.firstChild else ""
189
+ txt_cur = cur.firstChild.data if cur.firstChild else ""
190
+ combined = txt_prev + txt_cur
191
+
192
+ if prev.firstChild:
193
+ prev.firstChild.data = combined
194
+ else:
195
+ prev.appendChild(run.ownerDocument.createTextNode(combined))
196
+
197
+ if combined.startswith(" ") or combined.endswith(" "):
198
+ prev.setAttribute("xml:space", "preserve")
199
+ elif prev.hasAttribute("xml:space"):
200
+ prev.removeAttribute("xml:space")
201
+
202
+ run.removeChild(cur)
203
+
204
+ idx -= 1
205
+
206
+
207
+ def _coalesce_in_container(container) -> int:
208
+ """Merge compatible adjacent runs inside *container*."""
209
+ count = 0
210
+ cur = _first_run_child(container)
211
+
212
+ while cur is not None:
213
+ # absorb as many consecutive compatible runs as possible
214
+ while True:
215
+ nxt = _next_elem(cur)
216
+ if nxt is not None and _tag_is_run(nxt) and _runs_compatible(cur, nxt):
217
+ _absorb_run(cur, nxt)
218
+ container.removeChild(nxt)
219
+ count += 1
220
+ else:
221
+ break
222
+
223
+ _squash_text_nodes(cur)
224
+ cur = _next_run_sibling(cur)
225
+
226
+ return count
@@ -0,0 +1,198 @@
1
+ #!/usr/bin/env python3
2
+ # ──────────────────────────────────────────────────────────────────
3
+ # Merge adjacent tracked-change wrappers (<w:ins> / <w:del>) when they
4
+ # share the same author. Reduces visual clutter in heavily-redlined
5
+ # DOCX documents without altering semantics.
6
+ #
7
+ # Constraints:
8
+ # • Only merges elements of the *same* tag (ins↔ins, del↔del)
9
+ # • Author must match (timestamps are ignored)
10
+ # • Only merges truly adjacent elements (whitespace-only gap allowed)
11
+ # ──────────────────────────────────────────────────────────────────
12
+
13
+ import xml.etree.ElementTree as ET
14
+ import zipfile
15
+ from pathlib import Path
16
+
17
+ import defusedxml.minidom
18
+
19
+ _WML_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
20
+
21
+
22
+ def simplify_redlines(input_dir: str) -> tuple[int, str]:
23
+ """Merge adjacent tracked changes in word/document.xml."""
24
+ doc = Path(input_dir) / "word" / "document.xml"
25
+
26
+ if not doc.exists():
27
+ return 0, "Error: {} not found".format(doc)
28
+
29
+ try:
30
+ tree = defusedxml.minidom.parseString(doc.read_text(encoding="utf-8"))
31
+ top = tree.documentElement
32
+
33
+ total = 0
34
+ for box in _collect_elements(top, "p") + _collect_elements(top, "tc"):
35
+ total += _coalesce_tracked(box, "ins")
36
+ total += _coalesce_tracked(box, "del")
37
+
38
+ doc.write_bytes(tree.toxml(encoding="UTF-8"))
39
+ return total, "Simplified {} tracked changes".format(total)
40
+
41
+ except Exception as ex:
42
+ return 0, "Error: {}".format(ex)
43
+
44
+
45
+ # ──────────────────────────────────────────────────────────────────
46
+ # Internal helpers
47
+ # ──────────────────────────────────────────────────────────────────
48
+
49
+ def _coalesce_tracked(container, kind: str) -> int:
50
+ """Merge adjacent <w:ins> or <w:del> elements inside *container*."""
51
+ nodes = [
52
+ ch for ch in container.childNodes
53
+ if ch.nodeType == ch.ELEMENT_NODE and _matches_tag(ch, kind)
54
+ ]
55
+ if len(nodes) < 2:
56
+ return 0
57
+
58
+ merged = 0
59
+ pos = 0
60
+ while pos < len(nodes) - 1:
61
+ this, that = nodes[pos], nodes[pos + 1]
62
+ if _same_author_adjacent(this, that):
63
+ _move_children(this, that)
64
+ container.removeChild(that)
65
+ nodes.pop(pos + 1)
66
+ merged += 1
67
+ else:
68
+ pos += 1
69
+
70
+ return merged
71
+
72
+
73
+ def _matches_tag(nd, local: str) -> bool:
74
+ tag = nd.localName or nd.tagName
75
+ return tag == local or tag.endswith(":{}".format(local))
76
+
77
+
78
+ def _author_of(elem) -> str:
79
+ """Extract the w:author attribute value from a tracked-change element."""
80
+ val = elem.getAttribute("w:author")
81
+ if val:
82
+ return val
83
+ for attr in elem.attributes.values():
84
+ if attr.localName == "author" or attr.name.endswith(":author"):
85
+ return attr.value
86
+ return ""
87
+
88
+
89
+ def _same_author_adjacent(a, b) -> bool:
90
+ """True when *a* and *b* share an author and have no elements between them."""
91
+ if _author_of(a) != _author_of(b):
92
+ return False
93
+ cur = a.nextSibling
94
+ while cur is not None and cur is not b:
95
+ if cur.nodeType == cur.ELEMENT_NODE:
96
+ return False
97
+ if cur.nodeType == cur.TEXT_NODE and cur.data.strip():
98
+ return False
99
+ cur = cur.nextSibling
100
+ return True
101
+
102
+
103
+ def _move_children(dst, src):
104
+ """Transplant every child of *src* into *dst*."""
105
+ while src.firstChild is not None:
106
+ kid = src.firstChild
107
+ src.removeChild(kid)
108
+ dst.appendChild(kid)
109
+
110
+
111
+ def _collect_elements(root, local: str) -> list:
112
+ """Depth-first collection of elements matching *local*."""
113
+ out = []
114
+
115
+ def _dfs(nd):
116
+ if nd.nodeType == nd.ELEMENT_NODE:
117
+ tag = nd.localName or nd.tagName
118
+ if tag == local or tag.endswith(":{}".format(local)):
119
+ out.append(nd)
120
+ for ch in nd.childNodes:
121
+ _dfs(ch)
122
+
123
+ _dfs(root)
124
+ return out
125
+
126
+
127
+ # ──────────────────────────────────────────────────────────────────
128
+ # Author-analysis utilities (used by pack.py / infer_author)
129
+ # ──────────────────────────────────────────────────────────────────
130
+
131
+ def get_tracked_change_authors(xml_path: Path) -> dict[str, int]:
132
+ """Count tracked-change occurrences per author in an XML file."""
133
+ if not xml_path.exists():
134
+ return {}
135
+ try:
136
+ parsed = ET.parse(xml_path)
137
+ except ET.ParseError:
138
+ return {}
139
+
140
+ ns = {"w": _WML_NS}
141
+ attr_key = "{{{}}}author".format(_WML_NS)
142
+
143
+ counts: dict[str, int] = {}
144
+ for tag in ("ins", "del"):
145
+ for el in parsed.getroot().findall(".//w:{}".format(tag), ns):
146
+ who = el.get(attr_key)
147
+ if who:
148
+ counts[who] = counts.get(who, 0) + 1
149
+ return counts
150
+
151
+
152
+ def _get_authors_from_docx(docx: Path) -> dict[str, int]:
153
+ """Extract author counts from a packed .docx without full unpacking."""
154
+ try:
155
+ with zipfile.ZipFile(docx, "r") as zf:
156
+ if "word/document.xml" not in zf.namelist():
157
+ return {}
158
+ with zf.open("word/document.xml") as fh:
159
+ parsed = ET.parse(fh)
160
+ ns = {"w": _WML_NS}
161
+ attr_key = "{{{}}}author".format(_WML_NS)
162
+ counts: dict[str, int] = {}
163
+ for tag in ("ins", "del"):
164
+ for el in parsed.getroot().findall(".//w:{}".format(tag), ns):
165
+ who = el.get(attr_key)
166
+ if who:
167
+ counts[who] = counts.get(who, 0) + 1
168
+ return counts
169
+ except (zipfile.BadZipFile, ET.ParseError):
170
+ return {}
171
+
172
+
173
+ def infer_author(modified_dir: Path, original_docx: Path, default: str = "Claude") -> str:
174
+ """Guess which author introduced new tracked changes."""
175
+ mod_xml = modified_dir / "word" / "document.xml"
176
+ mod_counts = get_tracked_change_authors(mod_xml)
177
+
178
+ if not mod_counts:
179
+ return default
180
+
181
+ orig_counts = _get_authors_from_docx(original_docx)
182
+
183
+ delta: dict[str, int] = {}
184
+ for who, n in mod_counts.items():
185
+ diff = n - orig_counts.get(who, 0)
186
+ if diff > 0:
187
+ delta[who] = diff
188
+
189
+ if not delta:
190
+ return default
191
+
192
+ if len(delta) == 1:
193
+ return next(iter(delta))
194
+
195
+ raise ValueError(
196
+ "Multiple authors added new changes: {}. "
197
+ "Cannot infer which author to validate.".format(delta)
198
+ )