dimcode-darwin-x64 0.1.2-beta.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/bin/dimcode +0 -0
  2. package/package.json +1 -1
  3. package/bin/runtime/sandbox/dim-sandbox-runner +0 -0
  4. package/bin/runtime/sandbox/manifest.json +0 -15
  5. package/bin/skills-assets/deep-investigate/SKILL.md +0 -101
  6. package/bin/skills-assets/deep-investigate/references/prompts.md +0 -75
  7. package/bin/skills-assets/deep-investigate/references/templates.md +0 -73
  8. package/bin/skills-assets/deep-investigate/references/thinking-tools.md +0 -36
  9. package/bin/skills-assets/docs-sprint/SKILL.md +0 -73
  10. package/bin/skills-assets/docs-sprint/agents/openai.yaml +0 -4
  11. package/bin/skills-assets/docs-sprint/references/contract-discipline.md +0 -30
  12. package/bin/skills-assets/docs-sprint/references/delivery-plan.md +0 -162
  13. package/bin/skills-assets/docs-sprint/references/documentation-system.md +0 -109
  14. package/bin/skills-assets/docs-sprint/references/ui-layout.md +0 -73
  15. package/bin/skills-assets/docs-sprint/references/worktree-guide.md +0 -45
  16. package/bin/skills-assets/docx/SKILL.md +0 -273
  17. package/bin/skills-assets/docx/assets/styles/academic_styles.xml +0 -250
  18. package/bin/skills-assets/docx/assets/styles/corporate_styles.xml +0 -284
  19. package/bin/skills-assets/docx/assets/styles/default_styles.xml +0 -449
  20. package/bin/skills-assets/docx/assets/xsd/aesthetic-rules.xsd +0 -470
  21. package/bin/skills-assets/docx/assets/xsd/business-rules.xsd +0 -130
  22. package/bin/skills-assets/docx/assets/xsd/common-types.xsd +0 -159
  23. package/bin/skills-assets/docx/assets/xsd/wml-subset.xsd +0 -589
  24. package/bin/skills-assets/docx/references/cjk_typography.md +0 -357
  25. package/bin/skills-assets/docx/references/cjk_university_template_guide.md +0 -184
  26. package/bin/skills-assets/docx/references/comments_guide.md +0 -191
  27. package/bin/skills-assets/docx/references/design_good_bad_examples.md +0 -829
  28. package/bin/skills-assets/docx/references/design_principles.md +0 -819
  29. package/bin/skills-assets/docx/references/openxml_element_order.md +0 -308
  30. package/bin/skills-assets/docx/references/openxml_encyclopedia_part1.md +0 -4061
  31. package/bin/skills-assets/docx/references/openxml_encyclopedia_part2.md +0 -2820
  32. package/bin/skills-assets/docx/references/openxml_encyclopedia_part3.md +0 -3381
  33. package/bin/skills-assets/docx/references/openxml_namespaces.md +0 -82
  34. package/bin/skills-assets/docx/references/openxml_units.md +0 -72
  35. package/bin/skills-assets/docx/references/scenario_a_create.md +0 -284
  36. package/bin/skills-assets/docx/references/scenario_b_edit_content.md +0 -295
  37. package/bin/skills-assets/docx/references/scenario_c_apply_template.md +0 -456
  38. package/bin/skills-assets/docx/references/track_changes_guide.md +0 -200
  39. package/bin/skills-assets/docx/references/troubleshooting.md +0 -506
  40. package/bin/skills-assets/docx/references/typography_guide.md +0 -294
  41. package/bin/skills-assets/docx/references/xsd_validation_guide.md +0 -158
  42. package/bin/skills-assets/docx/scripts/doc_to_docx.sh +0 -40
  43. package/bin/skills-assets/docx/scripts/docx_preview.sh +0 -37
  44. package/bin/skills-assets/docx/scripts/dotnet/Docx.Cli/Docx.Cli.csproj +0 -19
  45. package/bin/skills-assets/docx/scripts/dotnet/Docx.Cli/Program.cs +0 -18
  46. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/AnalyzeCommand.cs +0 -147
  47. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/ApplyTemplateCommand.cs +0 -322
  48. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/CreateCommand.cs +0 -324
  49. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/DiffCommand.cs +0 -155
  50. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/EditContentCommand.cs +0 -487
  51. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/FixOrderCommand.cs +0 -108
  52. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/MergeRunsCommand.cs +0 -122
  53. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/ValidateCommand.cs +0 -107
  54. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Docx.Core.csproj +0 -15
  55. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/CommentSynchronizer.cs +0 -169
  56. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/ElementOrder.cs +0 -80
  57. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/NamespaceConstants.cs +0 -42
  58. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/RunMerger.cs +0 -81
  59. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/StyleAnalyzer.cs +0 -81
  60. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/TrackChangesHelper.cs +0 -99
  61. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/UnitConverter.cs +0 -23
  62. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/AestheticRecipeSamples.cs +0 -1832
  63. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/AestheticRecipeSamples_Batch1.cs +0 -910
  64. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/AestheticRecipeSamples_Batch2.cs +0 -999
  65. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/AestheticRecipeSamples_Batch3.cs +0 -1048
  66. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/AestheticRecipeSamples_Batch4.cs +0 -1038
  67. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/CharacterFormattingSamples.cs +0 -1020
  68. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/DocumentCreationSamples.cs +0 -1121
  69. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/FieldAndTocSamples.cs +0 -624
  70. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/FootnoteAndCommentSamples.cs +0 -675
  71. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/HeaderFooterSamples.cs +0 -838
  72. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/ImageSamples.cs +0 -917
  73. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/ListAndNumberingSamples.cs +0 -826
  74. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/ParagraphFormattingSamples.cs +0 -1199
  75. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/StyleSystemSamples.cs +0 -1487
  76. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/TableSamples.cs +0 -1163
  77. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/TrackChangesSamples.cs +0 -595
  78. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Typography/CjkHelper.cs +0 -39
  79. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Typography/FontDefaults.cs +0 -24
  80. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Typography/PageSizes.cs +0 -20
  81. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Validation/BusinessRuleValidator.cs +0 -224
  82. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Validation/GateCheckValidator.cs +0 -148
  83. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Validation/ValidationResult.cs +0 -23
  84. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Validation/XsdValidator.cs +0 -69
  85. package/bin/skills-assets/docx/scripts/dotnet/Docx.slnx +0 -4
  86. package/bin/skills-assets/docx/scripts/env_check.sh +0 -196
  87. package/bin/skills-assets/docx/scripts/setup.ps1 +0 -274
  88. package/bin/skills-assets/docx/scripts/setup.sh +0 -504
  89. package/bin/skills-assets/pdf/README.md +0 -222
  90. package/bin/skills-assets/pdf/SKILL.md +0 -191
  91. package/bin/skills-assets/pdf/design/design.md +0 -381
  92. package/bin/skills-assets/pdf/scripts/cover.py +0 -1579
  93. package/bin/skills-assets/pdf/scripts/fill_inspect.py +0 -200
  94. package/bin/skills-assets/pdf/scripts/fill_write.py +0 -242
  95. package/bin/skills-assets/pdf/scripts/make.sh +0 -491
  96. package/bin/skills-assets/pdf/scripts/merge.py +0 -112
  97. package/bin/skills-assets/pdf/scripts/palette.py +0 -521
  98. package/bin/skills-assets/pdf/scripts/reformat_parse.py +0 -374
  99. package/bin/skills-assets/pdf/scripts/render_body.py +0 -1052
  100. package/bin/skills-assets/pdf/scripts/render_cover.js +0 -111
  101. package/bin/skills-assets/pptx-generator/SKILL.md +0 -248
  102. package/bin/skills-assets/pptx-generator/references/design-system.md +0 -392
  103. package/bin/skills-assets/pptx-generator/references/editing.md +0 -162
  104. package/bin/skills-assets/pptx-generator/references/pitfalls.md +0 -112
  105. package/bin/skills-assets/pptx-generator/references/pptxgenjs.md +0 -420
  106. package/bin/skills-assets/pptx-generator/references/slide-types.md +0 -413
  107. package/bin/skills-assets/skill-creator/SKILL.md +0 -368
  108. package/bin/skills-assets/skill-creator/agents/openai.yaml +0 -5
  109. package/bin/skills-assets/skill-creator/assets/skill-creator-small.svg +0 -3
  110. package/bin/skills-assets/skill-creator/assets/skill-creator.png +0 -0
  111. package/bin/skills-assets/skill-creator/license.txt +0 -202
  112. package/bin/skills-assets/skill-creator/references/openai_yaml.md +0 -49
  113. package/bin/skills-assets/skill-creator/scripts/generate_openai_yaml.py +0 -226
  114. package/bin/skills-assets/skill-creator/scripts/init_skill.py +0 -397
  115. package/bin/skills-assets/skill-creator/scripts/quick_validate.py +0 -101
  116. package/bin/skills-assets/skill-installer/LICENSE.txt +0 -202
  117. package/bin/skills-assets/skill-installer/SKILL.md +0 -58
  118. package/bin/skills-assets/skill-installer/agents/openai.yaml +0 -5
  119. package/bin/skills-assets/skill-installer/assets/skill-installer-small.svg +0 -3
  120. package/bin/skills-assets/skill-installer/assets/skill-installer.png +0 -0
  121. package/bin/skills-assets/skill-installer/scripts/github_utils.py +0 -21
  122. package/bin/skills-assets/skill-installer/scripts/install-skill-from-github.py +0 -308
  123. package/bin/skills-assets/skill-installer/scripts/list-skills.py +0 -107
  124. package/bin/skills-assets/xlsx/SKILL.md +0 -137
  125. package/bin/skills-assets/xlsx/references/create.md +0 -691
  126. package/bin/skills-assets/xlsx/references/edit.md +0 -684
  127. package/bin/skills-assets/xlsx/references/fix.md +0 -37
  128. package/bin/skills-assets/xlsx/references/format.md +0 -768
  129. package/bin/skills-assets/xlsx/references/ooxml-cheatsheet.md +0 -231
  130. package/bin/skills-assets/xlsx/references/read-analyze.md +0 -97
  131. package/bin/skills-assets/xlsx/references/validate.md +0 -772
  132. package/bin/skills-assets/xlsx/scripts/formula_check.py +0 -422
  133. package/bin/skills-assets/xlsx/scripts/libreoffice_recalc.py +0 -248
  134. package/bin/skills-assets/xlsx/scripts/shared_strings_builder.py +0 -163
  135. package/bin/skills-assets/xlsx/scripts/style_audit.py +0 -575
  136. package/bin/skills-assets/xlsx/scripts/xlsx_add_column.py +0 -395
  137. package/bin/skills-assets/xlsx/scripts/xlsx_insert_row.py +0 -274
  138. package/bin/skills-assets/xlsx/scripts/xlsx_pack.py +0 -87
  139. package/bin/skills-assets/xlsx/scripts/xlsx_reader.py +0 -362
  140. package/bin/skills-assets/xlsx/scripts/xlsx_shift_rows.py +0 -396
  141. package/bin/skills-assets/xlsx/scripts/xlsx_unpack.py +0 -130
  142. package/bin/skills-assets/xlsx/templates/minimal_xlsx/[Content_Types].xml +0 -9
  143. package/bin/skills-assets/xlsx/templates/minimal_xlsx/_rels/.rels +0 -6
  144. package/bin/skills-assets/xlsx/templates/minimal_xlsx/xl/_rels/workbook.xml.rels +0 -19
  145. package/bin/skills-assets/xlsx/templates/minimal_xlsx/xl/sharedStrings.xml +0 -33
  146. package/bin/skills-assets/xlsx/templates/minimal_xlsx/xl/styles.xml +0 -160
  147. package/bin/skills-assets/xlsx/templates/minimal_xlsx/xl/workbook.xml +0 -30
  148. package/bin/skills-assets/xlsx/templates/minimal_xlsx/xl/worksheets/sheet1.xml +0 -70
@@ -1,87 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- """
4
- xlsx_pack.py — Pack a working directory back into a valid xlsx file.
5
-
6
- Usage:
7
- python3 xlsx_pack.py <source_dir> <output.xlsx>
8
-
9
- Requirements:
10
- - source_dir must contain [Content_Types].xml at its root
11
- - All XML files are re-validated for well-formedness before packing
12
-
13
- The resulting xlsx is a valid ZIP archive with correct OOXML structure.
14
- """
15
-
16
- import sys
17
- import os
18
- import zipfile
19
- import xml.etree.ElementTree as ET
20
-
21
-
22
- def validate_xml_files(source_dir: str) -> list[str]:
23
- """Return list of XML files that fail to parse."""
24
- bad = []
25
- for dirpath, _, filenames in os.walk(source_dir):
26
- for fname in filenames:
27
- if fname.endswith(".xml") or fname.endswith(".rels"):
28
- fpath = os.path.join(dirpath, fname)
29
- try:
30
- ET.parse(fpath)
31
- except ET.ParseError as e:
32
- rel = os.path.relpath(fpath, source_dir)
33
- bad.append(f"{rel}: {e}")
34
- return bad
35
-
36
-
37
- def pack(source_dir: str, xlsx_path: str) -> None:
38
- if not os.path.isdir(source_dir):
39
- print(f"ERROR: Directory not found: {source_dir}", file=sys.stderr)
40
- sys.exit(1)
41
-
42
- content_types = os.path.join(source_dir, "[Content_Types].xml")
43
- if not os.path.isfile(content_types):
44
- print(
45
- f"ERROR: Missing [Content_Types].xml in {source_dir}\n"
46
- " This file is required at the root of every valid xlsx package.",
47
- file=sys.stderr,
48
- )
49
- sys.exit(1)
50
-
51
- # Validate XML well-formedness before packing
52
- print("Validating XML files...")
53
- bad_files = validate_xml_files(source_dir)
54
- if bad_files:
55
- print("ERROR: The following files have XML parse errors:", file=sys.stderr)
56
- for b in bad_files:
57
- print(f" {b}", file=sys.stderr)
58
- print(
59
- "\nFix all XML errors before packing. "
60
- "A malformed xlsx cannot be opened by Excel or LibreOffice.",
61
- file=sys.stderr,
62
- )
63
- sys.exit(1)
64
-
65
- print("✓ All XML files are well-formed")
66
-
67
- # Count files to pack
68
- file_count = sum(len(files) for _, _, files in os.walk(source_dir))
69
-
70
- with zipfile.ZipFile(xlsx_path, "w", compression=zipfile.ZIP_DEFLATED) as z:
71
- for dirpath, _, filenames in os.walk(source_dir):
72
- for fname in filenames:
73
- fpath = os.path.join(dirpath, fname)
74
- arcname = os.path.relpath(fpath, source_dir)
75
- z.write(fpath, arcname)
76
-
77
- size = os.path.getsize(xlsx_path)
78
- print(f"Packed {file_count} files → '{xlsx_path}' ({size:,} bytes)")
79
- print("\nNext step: run formula_check.py to validate formulas:")
80
- print(f" python3 formula_check.py {xlsx_path}")
81
-
82
-
83
- if __name__ == "__main__":
84
- if len(sys.argv) != 3:
85
- print("Usage: xlsx_pack.py <source_dir> <output.xlsx>")
86
- sys.exit(1)
87
- pack(sys.argv[1], sys.argv[2])
@@ -1,362 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: MIT
3
- """
4
- xlsx_reader.py — Structure discovery and data analysis tool for Excel/CSV files.
5
-
6
- Usage:
7
- python3 xlsx_reader.py <file> # full structure report
8
- python3 xlsx_reader.py <file> --sheet Sales # analyze one sheet
9
- python3 xlsx_reader.py <file> --json # machine-readable output
10
- python3 xlsx_reader.py <file> --quality # data quality audit only
11
-
12
- Supports: .xlsx, .xlsm, .csv, .tsv
13
- Does NOT modify the source file in any way.
14
-
15
- Exit codes:
16
- 0 — success
17
- 1 — file not found / unsupported format / encoding failure
18
- """
19
-
20
- import sys
21
- import json
22
- import argparse
23
- from pathlib import Path
24
-
25
-
26
- # ---------------------------------------------------------------------------
27
- # Format detection and loading
28
- # ---------------------------------------------------------------------------
29
-
30
- def detect_and_load(file_path: str, sheet_name_filter: str | None = None) -> dict:
31
- """
32
- Load file into {sheet_name: DataFrame} dict.
33
- CSV/TSV files are mapped to a single-key dict using the file stem as key.
34
-
35
- Raises ValueError for unsupported formats or encoding failures.
36
- """
37
- try:
38
- import pandas as pd
39
- except ImportError:
40
- raise RuntimeError(
41
- "pandas is not installed. Run: pip install pandas openpyxl"
42
- )
43
-
44
- path = Path(file_path)
45
- if not path.exists():
46
- raise FileNotFoundError(f"File not found: {file_path}")
47
-
48
- suffix = path.suffix.lower()
49
-
50
- if suffix in (".xlsx", ".xlsm"):
51
- target = sheet_name_filter if sheet_name_filter else None
52
- result = pd.read_excel(file_path, sheet_name=target)
53
- # pd.read_excel with sheet_name=None returns dict; with a name, returns DataFrame
54
- if isinstance(result, dict):
55
- return result
56
- else:
57
- return {sheet_name_filter: result}
58
-
59
- elif suffix in (".csv", ".tsv"):
60
- sep = "\t" if suffix == ".tsv" else ","
61
- encodings = ["utf-8-sig", "gbk", "utf-8", "latin-1"]
62
- last_error = None
63
- for enc in encodings:
64
- try:
65
- import pandas as pd
66
- df = pd.read_csv(file_path, sep=sep, encoding=enc)
67
- df._reader_encoding = enc # attach metadata (non-standard, for reporting)
68
- return {path.stem: df}
69
- except (UnicodeDecodeError, Exception) as e:
70
- last_error = e
71
- continue
72
- raise ValueError(
73
- f"Cannot decode {file_path}. Tried encodings: {encodings}. "
74
- f"Last error: {last_error}"
75
- )
76
-
77
- elif suffix == ".xls":
78
- raise ValueError(
79
- ".xls is a legacy binary format not supported by this tool. "
80
- "Please open the file in Excel and save as .xlsx, then retry."
81
- )
82
-
83
- else:
84
- raise ValueError(
85
- f"Unsupported file format: {suffix}. "
86
- "Supported formats: .xlsx, .xlsm, .csv, .tsv"
87
- )
88
-
89
-
90
- # ---------------------------------------------------------------------------
91
- # Structure discovery
92
- # ---------------------------------------------------------------------------
93
-
94
- def explore_structure(sheets: dict) -> dict:
95
- """
96
- Return a structured dict describing each sheet.
97
- Keys: sheet_name -> {shape, columns, dtypes, null_counts, preview}
98
- """
99
- result = {}
100
- for sheet_name, df in sheets.items():
101
- null_counts = df.isnull().sum()
102
- null_info = {
103
- col: {"count": int(cnt), "pct": round(cnt / max(len(df), 1) * 100, 1)}
104
- for col, cnt in null_counts.items()
105
- if cnt > 0
106
- }
107
- result[sheet_name] = {
108
- "shape": {"rows": df.shape[0], "cols": df.shape[1]},
109
- "columns": list(df.columns),
110
- "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()},
111
- "null_columns": null_info,
112
- "preview": df.head(5).to_dict(orient="records"),
113
- }
114
- return result
115
-
116
-
117
- # ---------------------------------------------------------------------------
118
- # Data quality audit
119
- # ---------------------------------------------------------------------------
120
-
121
- def audit_quality(sheets: dict) -> dict:
122
- """
123
- Return data quality findings per sheet.
124
- Checks: nulls, duplicates, mixed-type columns, potential year formatting issues.
125
- """
126
- import pandas as pd
127
-
128
- findings = {}
129
- for sheet_name, df in sheets.items():
130
- sheet_findings = []
131
-
132
- # Null values
133
- null_counts = df.isnull().sum()
134
- for col, cnt in null_counts.items():
135
- if cnt > 0:
136
- pct = round(cnt / max(len(df), 1) * 100, 1)
137
- sheet_findings.append({
138
- "type": "null_values",
139
- "column": col,
140
- "count": int(cnt),
141
- "pct": pct,
142
- "note": f"Column '{col}' has {cnt} null values ({pct}%). "
143
- "If this column contains Excel formulas, null values may "
144
- "indicate that the formula cache has not been populated "
145
- "(file was never opened in Excel after the formulas were written)."
146
- })
147
-
148
- # Duplicate rows
149
- dup_count = int(df.duplicated().sum())
150
- if dup_count > 0:
151
- sheet_findings.append({
152
- "type": "duplicate_rows",
153
- "count": dup_count,
154
- "note": f"{dup_count} fully duplicate rows found."
155
- })
156
-
157
- # Mixed-type object columns (numeric data stored as text)
158
- for col in df.select_dtypes(include="object").columns:
159
- numeric_converted = pd.to_numeric(df[col], errors="coerce")
160
- convertible = int(numeric_converted.notna().sum())
161
- non_null_total = int(df[col].notna().sum())
162
- if 0 < convertible < non_null_total:
163
- sheet_findings.append({
164
- "type": "mixed_type",
165
- "column": col,
166
- "convertible_to_numeric": convertible,
167
- "non_convertible": non_null_total - convertible,
168
- "note": f"Column '{col}' appears to contain mixed types: "
169
- f"{convertible} values can be parsed as numbers, "
170
- f"{non_null_total - convertible} cannot. "
171
- "Use pd.to_numeric(df[col], errors='coerce') to unify."
172
- })
173
-
174
- # Year column formatting (e.g., 2024.0 stored as float)
175
- for col in df.select_dtypes(include="number").columns:
176
- col_lower = str(col).lower()
177
- # "年" is the Chinese character for "year" — detect year columns in CJK spreadsheets
178
- if "year" in col_lower or "yr" in col_lower or "年" in col_lower:
179
- if df[col].dropna().between(1900, 2200).all():
180
- if df[col].dtype == float:
181
- sheet_findings.append({
182
- "type": "year_as_float",
183
- "column": col,
184
- "note": f"Column '{col}' appears to be a year column stored as float "
185
- "(e.g., 2024.0). Convert with df[col].astype(int).astype(str) "
186
- "to get clean year strings like '2024'."
187
- })
188
-
189
- # Outliers via IQR on numeric columns
190
- for col in df.select_dtypes(include="number").columns:
191
- series = df[col].dropna()
192
- if len(series) < 4:
193
- continue
194
- Q1, Q3 = series.quantile(0.25), series.quantile(0.75)
195
- IQR = Q3 - Q1
196
- if IQR == 0:
197
- continue
198
- outlier_mask = (df[col] < Q1 - 1.5 * IQR) | (df[col] > Q3 + 1.5 * IQR)
199
- outlier_count = int(outlier_mask.sum())
200
- if outlier_count > 0:
201
- sheet_findings.append({
202
- "type": "outliers_iqr",
203
- "column": col,
204
- "count": outlier_count,
205
- "note": f"Column '{col}' has {outlier_count} potential outlier(s) "
206
- f"(outside 1.5×IQR bounds: [{Q1 - 1.5*IQR:.2f}, {Q3 + 1.5*IQR:.2f}])."
207
- })
208
-
209
- findings[sheet_name] = sheet_findings
210
-
211
- return findings
212
-
213
-
214
- # ---------------------------------------------------------------------------
215
- # Summary statistics
216
- # ---------------------------------------------------------------------------
217
-
218
- def compute_stats(sheets: dict) -> dict:
219
- """Compute descriptive statistics for numeric columns per sheet."""
220
- stats = {}
221
- for sheet_name, df in sheets.items():
222
- numeric_df = df.select_dtypes(include="number")
223
- if numeric_df.empty:
224
- stats[sheet_name] = {}
225
- continue
226
- desc = numeric_df.describe().round(4)
227
- stats[sheet_name] = desc.to_dict()
228
- return stats
229
-
230
-
231
- # ---------------------------------------------------------------------------
232
- # Human-readable report rendering
233
- # ---------------------------------------------------------------------------
234
-
235
- def render_report(
236
- file_path: str,
237
- structure: dict,
238
- quality: dict,
239
- stats: dict,
240
- ) -> str:
241
- lines = []
242
- p = lines.append
243
-
244
- p("=" * 60)
245
- p(f"ANALYSIS REPORT: {Path(file_path).name}")
246
- p("=" * 60)
247
-
248
- # File overview
249
- sheet_list = list(structure.keys())
250
- total_rows = sum(s["shape"]["rows"] for s in structure.values())
251
- p(f"\nSheets ({len(sheet_list)}): {', '.join(sheet_list)}")
252
- p(f"Total rows across all sheets: {total_rows:,}")
253
-
254
- for sheet_name, info in structure.items():
255
- p(f"\n{'─' * 50}")
256
- p(f"Sheet: {sheet_name}")
257
- p(f"{'─' * 50}")
258
- p(f" Size: {info['shape']['rows']:,} rows × {info['shape']['cols']} cols")
259
- p(f" Columns: {info['columns']}")
260
-
261
- # Data types
262
- p("\n Column types:")
263
- for col, dtype in info["dtypes"].items():
264
- p(f" {col}: {dtype}")
265
-
266
- # Nulls
267
- if info["null_columns"]:
268
- p("\n Null values (columns with nulls only):")
269
- for col, null_info in info["null_columns"].items():
270
- p(f" {col}: {null_info['count']} nulls ({null_info['pct']}%)")
271
- else:
272
- p("\n Null values: none")
273
-
274
- # Stats
275
- sheet_stats = stats.get(sheet_name, {})
276
- if sheet_stats:
277
- p("\n Numeric column statistics:")
278
- numeric_cols = list(sheet_stats.keys())
279
- # Show only first 6 to keep report readable
280
- for col in numeric_cols[:6]:
281
- col_stats = sheet_stats[col]
282
- p(f" {col}:")
283
- p(f" count={col_stats.get('count', 'N/A')} "
284
- f"mean={col_stats.get('mean', 'N/A')} "
285
- f"min={col_stats.get('min', 'N/A')} "
286
- f"max={col_stats.get('max', 'N/A')}")
287
- if len(numeric_cols) > 6:
288
- p(f" ... and {len(numeric_cols) - 6} more numeric columns")
289
-
290
- # Quality findings for this sheet
291
- sheet_quality = quality.get(sheet_name, [])
292
- if sheet_quality:
293
- p(f"\n Data quality issues ({len(sheet_quality)} found):")
294
- for finding in sheet_quality:
295
- p(f" [{finding['type'].upper()}] {finding['note']}")
296
- else:
297
- p("\n Data quality: no issues found")
298
-
299
- # Preview
300
- if info["preview"]:
301
- p("\n Preview (first 3 rows):")
302
- import pandas as pd
303
- preview_df = pd.DataFrame(info["preview"][:3])
304
- for line in preview_df.to_string(index=False).splitlines():
305
- p(f" {line}")
306
-
307
- p("\n" + "=" * 60)
308
- quality_issue_count = sum(len(v) for v in quality.values())
309
- if quality_issue_count == 0:
310
- p("RESULT: No data quality issues detected.")
311
- else:
312
- p(f"RESULT: {quality_issue_count} data quality issue(s) found. See details above.")
313
- p("=" * 60)
314
-
315
- return "\n".join(lines)
316
-
317
-
318
- # ---------------------------------------------------------------------------
319
- # CLI entry point
320
- # ---------------------------------------------------------------------------
321
-
322
- def main() -> None:
323
- parser = argparse.ArgumentParser(
324
- description="Read and analyze Excel/CSV files without modifying them."
325
- )
326
- parser.add_argument("file", help="Path to .xlsx, .xlsm, .csv, or .tsv file")
327
- parser.add_argument("--sheet", help="Analyze a specific sheet only", default=None)
328
- parser.add_argument(
329
- "--json", action="store_true", help="Output machine-readable JSON"
330
- )
331
- parser.add_argument(
332
- "--quality", action="store_true",
333
- help="Run data quality audit only (skip stats)"
334
- )
335
- args = parser.parse_args()
336
-
337
- try:
338
- sheets = detect_and_load(args.file, sheet_name_filter=args.sheet)
339
- except (FileNotFoundError, ValueError, RuntimeError) as e:
340
- print(f"ERROR: {e}", file=sys.stderr)
341
- sys.exit(1)
342
-
343
- structure = explore_structure(sheets)
344
- quality = audit_quality(sheets)
345
- stats = {} if args.quality else compute_stats(sheets)
346
-
347
- if args.json:
348
- output = {
349
- "file": args.file,
350
- "structure": structure,
351
- "quality": quality,
352
- "stats": stats,
353
- }
354
- # Convert preview records to serializable form (handle non-JSON types)
355
- print(json.dumps(output, indent=2, ensure_ascii=False, default=str))
356
- else:
357
- report = render_report(args.file, structure, quality, stats)
358
- print(report)
359
-
360
-
361
- if __name__ == "__main__":
362
- main()