@nguyenphp/antigravity-marketing 1.0.18 → 1.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/README.md +186 -78
  2. package/package.json +4 -3
  3. package/templates/.agent/skills/marketing-report-expert/SKILL.md +70 -0
  4. package/templates/.agent/skills/minimax-docx/LICENSE +21 -0
  5. package/templates/.agent/skills/minimax-docx/SKILL.md +274 -0
  6. package/templates/.agent/skills/minimax-docx/assets/styles/academic_styles.xml +250 -0
  7. package/templates/.agent/skills/minimax-docx/assets/styles/corporate_styles.xml +284 -0
  8. package/templates/.agent/skills/minimax-docx/assets/styles/default_styles.xml +449 -0
  9. package/templates/.agent/skills/minimax-docx/assets/xsd/aesthetic-rules.xsd +470 -0
  10. package/templates/.agent/skills/minimax-docx/assets/xsd/business-rules.xsd +130 -0
  11. package/templates/.agent/skills/minimax-docx/assets/xsd/common-types.xsd +159 -0
  12. package/templates/.agent/skills/minimax-docx/assets/xsd/wml-subset.xsd +589 -0
  13. package/templates/.agent/skills/minimax-docx/references/cjk_typography.md +357 -0
  14. package/templates/.agent/skills/minimax-docx/references/cjk_university_template_guide.md +184 -0
  15. package/templates/.agent/skills/minimax-docx/references/comments_guide.md +191 -0
  16. package/templates/.agent/skills/minimax-docx/references/design_good_bad_examples.md +829 -0
  17. package/templates/.agent/skills/minimax-docx/references/design_principles.md +819 -0
  18. package/templates/.agent/skills/minimax-docx/references/openxml_element_order.md +308 -0
  19. package/templates/.agent/skills/minimax-docx/references/openxml_encyclopedia_part1.md +4061 -0
  20. package/templates/.agent/skills/minimax-docx/references/openxml_encyclopedia_part2.md +2820 -0
  21. package/templates/.agent/skills/minimax-docx/references/openxml_encyclopedia_part3.md +3381 -0
  22. package/templates/.agent/skills/minimax-docx/references/openxml_namespaces.md +82 -0
  23. package/templates/.agent/skills/minimax-docx/references/openxml_units.md +72 -0
  24. package/templates/.agent/skills/minimax-docx/references/scenario_a_create.md +284 -0
  25. package/templates/.agent/skills/minimax-docx/references/scenario_b_edit_content.md +295 -0
  26. package/templates/.agent/skills/minimax-docx/references/scenario_c_apply_template.md +456 -0
  27. package/templates/.agent/skills/minimax-docx/references/track_changes_guide.md +200 -0
  28. package/templates/.agent/skills/minimax-docx/references/troubleshooting.md +506 -0
  29. package/templates/.agent/skills/minimax-docx/references/typography_guide.md +294 -0
  30. package/templates/.agent/skills/minimax-docx/references/xsd_validation_guide.md +158 -0
  31. package/templates/.agent/skills/minimax-docx/scripts/doc_to_docx.sh +40 -0
  32. package/templates/.agent/skills/minimax-docx/scripts/docx_preview.sh +37 -0
  33. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Cli/MiniMaxAIDocx.Cli.csproj +19 -0
  34. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Cli/Program.cs +18 -0
  35. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Commands/AnalyzeCommand.cs +147 -0
  36. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Commands/ApplyTemplateCommand.cs +322 -0
  37. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Commands/CreateCommand.cs +324 -0
  38. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Commands/DiffCommand.cs +155 -0
  39. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Commands/EditContentCommand.cs +487 -0
  40. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Commands/FixOrderCommand.cs +108 -0
  41. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Commands/MergeRunsCommand.cs +122 -0
  42. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Commands/ValidateCommand.cs +107 -0
  43. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/MiniMaxAIDocx.Core.csproj +15 -0
  44. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/OpenXml/CommentSynchronizer.cs +169 -0
  45. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/OpenXml/ElementOrder.cs +80 -0
  46. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/OpenXml/NamespaceConstants.cs +42 -0
  47. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/OpenXml/RunMerger.cs +81 -0
  48. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/OpenXml/StyleAnalyzer.cs +81 -0
  49. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/OpenXml/TrackChangesHelper.cs +99 -0
  50. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/OpenXml/UnitConverter.cs +23 -0
  51. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/AestheticRecipeSamples.cs +1832 -0
  52. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/AestheticRecipeSamples_Batch1.cs +910 -0
  53. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/AestheticRecipeSamples_Batch2.cs +999 -0
  54. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/AestheticRecipeSamples_Batch3.cs +1048 -0
  55. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/AestheticRecipeSamples_Batch4.cs +1038 -0
  56. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/CharacterFormattingSamples.cs +1020 -0
  57. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/DocumentCreationSamples.cs +1121 -0
  58. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/FieldAndTocSamples.cs +624 -0
  59. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/FootnoteAndCommentSamples.cs +675 -0
  60. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/HeaderFooterSamples.cs +838 -0
  61. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/ImageSamples.cs +917 -0
  62. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/ListAndNumberingSamples.cs +826 -0
  63. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/ParagraphFormattingSamples.cs +1199 -0
  64. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/StyleSystemSamples.cs +1487 -0
  65. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/TableSamples.cs +1163 -0
  66. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Samples/TrackChangesSamples.cs +595 -0
  67. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Typography/CjkHelper.cs +39 -0
  68. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Typography/FontDefaults.cs +24 -0
  69. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Typography/PageSizes.cs +20 -0
  70. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Validation/BusinessRuleValidator.cs +224 -0
  71. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Validation/GateCheckValidator.cs +148 -0
  72. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Validation/ValidationResult.cs +23 -0
  73. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.Core/Validation/XsdValidator.cs +69 -0
  74. package/templates/.agent/skills/minimax-docx/scripts/dotnet/MiniMaxAIDocx.slnx +4 -0
  75. package/templates/.agent/skills/minimax-docx/scripts/env_check.sh +196 -0
  76. package/templates/.agent/skills/minimax-docx/scripts/setup.ps1 +274 -0
  77. package/templates/.agent/skills/minimax-docx/scripts/setup.sh +504 -0
  78. package/templates/.agent/skills/minimax-multimodal-toolkit/SKILL.md +359 -0
  79. package/templates/.agent/skills/minimax-pdf/README.md +222 -0
  80. package/templates/.agent/skills/minimax-pdf/SKILL.md +201 -0
  81. package/templates/.agent/skills/minimax-pdf/design/design.md +381 -0
  82. package/templates/.agent/skills/minimax-pdf/scripts/cover.py +1579 -0
  83. package/templates/.agent/skills/minimax-pdf/scripts/fill_inspect.py +200 -0
  84. package/templates/.agent/skills/minimax-pdf/scripts/fill_write.py +242 -0
  85. package/templates/.agent/skills/minimax-pdf/scripts/make.sh +491 -0
  86. package/templates/.agent/skills/minimax-pdf/scripts/merge.py +112 -0
  87. package/templates/.agent/skills/minimax-pdf/scripts/palette.py +559 -0
  88. package/templates/.agent/skills/minimax-pdf/scripts/reformat_parse.py +374 -0
  89. package/templates/.agent/skills/minimax-pdf/scripts/render_body.py +1055 -0
  90. package/templates/.agent/skills/minimax-pdf/scripts/render_cover.cjs +111 -0
  91. package/templates/.agent/skills/minimax-xlsx/SKILL.md +138 -0
  92. package/templates/.agent/skills/minimax-xlsx/references/create.md +691 -0
  93. package/templates/.agent/skills/minimax-xlsx/references/edit.md +684 -0
  94. package/templates/.agent/skills/minimax-xlsx/references/fix.md +37 -0
  95. package/templates/.agent/skills/minimax-xlsx/references/format.md +768 -0
  96. package/templates/.agent/skills/minimax-xlsx/references/ooxml-cheatsheet.md +231 -0
  97. package/templates/.agent/skills/minimax-xlsx/references/read-analyze.md +97 -0
  98. package/templates/.agent/skills/minimax-xlsx/references/validate.md +772 -0
  99. package/templates/.agent/skills/minimax-xlsx/scripts/formula_check.py +422 -0
  100. package/templates/.agent/skills/minimax-xlsx/scripts/libreoffice_recalc.py +248 -0
  101. package/templates/.agent/skills/minimax-xlsx/scripts/shared_strings_builder.py +163 -0
  102. package/templates/.agent/skills/minimax-xlsx/scripts/style_audit.py +575 -0
  103. package/templates/.agent/skills/minimax-xlsx/scripts/xlsx_add_column.py +395 -0
  104. package/templates/.agent/skills/minimax-xlsx/scripts/xlsx_insert_row.py +274 -0
  105. package/templates/.agent/skills/minimax-xlsx/scripts/xlsx_pack.py +87 -0
  106. package/templates/.agent/skills/minimax-xlsx/scripts/xlsx_reader.py +362 -0
  107. package/templates/.agent/skills/minimax-xlsx/scripts/xlsx_shift_rows.py +396 -0
  108. package/templates/.agent/skills/minimax-xlsx/scripts/xlsx_unpack.py +130 -0
  109. package/templates/.agent/skills/minimax-xlsx/templates/minimal_xlsx/[Content_Types].xml +9 -0
  110. package/templates/.agent/skills/minimax-xlsx/templates/minimal_xlsx/_rels/.rels +6 -0
  111. package/templates/.agent/skills/minimax-xlsx/templates/minimal_xlsx/xl/_rels/workbook.xml.rels +19 -0
  112. package/templates/.agent/skills/minimax-xlsx/templates/minimal_xlsx/xl/sharedStrings.xml +33 -0
  113. package/templates/.agent/skills/minimax-xlsx/templates/minimal_xlsx/xl/styles.xml +160 -0
  114. package/templates/.agent/skills/minimax-xlsx/templates/minimal_xlsx/xl/workbook.xml +30 -0
  115. package/templates/.agent/skills/minimax-xlsx/templates/minimal_xlsx/xl/worksheets/sheet1.xml +70 -0
  116. package/templates/.agent/skills/pptx-generator/SKILL.md +249 -0
  117. package/templates/.agent/skills/pptx-generator/references/design-system.md +392 -0
  118. package/templates/.agent/skills/pptx-generator/references/editing.md +162 -0
  119. package/templates/.agent/skills/pptx-generator/references/pitfalls.md +112 -0
  120. package/templates/.agent/skills/pptx-generator/references/pptxgenjs.md +420 -0
  121. package/templates/.agent/skills/pptx-generator/references/slide-types.md +413 -0
  122. package/templates/.agent/skills/tutorial-video-expert/SKILL.md +88 -0
  123. package/templates/.agent/skills/ui-ux-pro-max/SKILL.md +170 -585
  124. package/templates/.agent/skills/vision-analysis/SKILL.md +174 -0
  125. package/templates/.agent/workflows/analyze.md +3 -0
  126. package/templates/.agent/workflows/brand-report.md +44 -0
  127. package/templates/.agent/workflows/report.md +49 -0
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ """
4
+ xlsx_pack.py — Pack a working directory back into a valid xlsx file.
5
+
6
+ Usage:
7
+ python3 xlsx_pack.py <source_dir> <output.xlsx>
8
+
9
+ Requirements:
10
+ - source_dir must contain [Content_Types].xml at its root
11
+ - All XML files are re-validated for well-formedness before packing
12
+
13
+ The resulting xlsx is a valid ZIP archive with correct OOXML structure.
14
+ """
15
+
16
+ import sys
17
+ import os
18
+ import zipfile
19
+ import xml.etree.ElementTree as ET
20
+
21
+
22
+ def validate_xml_files(source_dir: str) -> list[str]:
23
+ """Return list of XML files that fail to parse."""
24
+ bad = []
25
+ for dirpath, _, filenames in os.walk(source_dir):
26
+ for fname in filenames:
27
+ if fname.endswith(".xml") or fname.endswith(".rels"):
28
+ fpath = os.path.join(dirpath, fname)
29
+ try:
30
+ ET.parse(fpath)
31
+ except ET.ParseError as e:
32
+ rel = os.path.relpath(fpath, source_dir)
33
+ bad.append(f"{rel}: {e}")
34
+ return bad
35
+
36
+
37
+ def pack(source_dir: str, xlsx_path: str) -> None:
38
+ if not os.path.isdir(source_dir):
39
+ print(f"ERROR: Directory not found: {source_dir}", file=sys.stderr)
40
+ sys.exit(1)
41
+
42
+ content_types = os.path.join(source_dir, "[Content_Types].xml")
43
+ if not os.path.isfile(content_types):
44
+ print(
45
+ f"ERROR: Missing [Content_Types].xml in {source_dir}\n"
46
+ " This file is required at the root of every valid xlsx package.",
47
+ file=sys.stderr,
48
+ )
49
+ sys.exit(1)
50
+
51
+ # Validate XML well-formedness before packing
52
+ print("Validating XML files...")
53
+ bad_files = validate_xml_files(source_dir)
54
+ if bad_files:
55
+ print("ERROR: The following files have XML parse errors:", file=sys.stderr)
56
+ for b in bad_files:
57
+ print(f" {b}", file=sys.stderr)
58
+ print(
59
+ "\nFix all XML errors before packing. "
60
+ "A malformed xlsx cannot be opened by Excel or LibreOffice.",
61
+ file=sys.stderr,
62
+ )
63
+ sys.exit(1)
64
+
65
+ print("✓ All XML files are well-formed")
66
+
67
+ # Count files to pack
68
+ file_count = sum(len(files) for _, _, files in os.walk(source_dir))
69
+
70
+ with zipfile.ZipFile(xlsx_path, "w", compression=zipfile.ZIP_DEFLATED) as z:
71
+ for dirpath, _, filenames in os.walk(source_dir):
72
+ for fname in filenames:
73
+ fpath = os.path.join(dirpath, fname)
74
+ arcname = os.path.relpath(fpath, source_dir)
75
+ z.write(fpath, arcname)
76
+
77
+ size = os.path.getsize(xlsx_path)
78
+ print(f"Packed {file_count} files → '{xlsx_path}' ({size:,} bytes)")
79
+ print("\nNext step: run formula_check.py to validate formulas:")
80
+ print(f" python3 formula_check.py {xlsx_path}")
81
+
82
+
83
+ if __name__ == "__main__":
84
+ if len(sys.argv) != 3:
85
+ print("Usage: xlsx_pack.py <source_dir> <output.xlsx>")
86
+ sys.exit(1)
87
+ pack(sys.argv[1], sys.argv[2])
@@ -0,0 +1,362 @@
1
+ #!/usr/bin/env python3
2
+ # SPDX-License-Identifier: MIT
3
+ """
4
+ xlsx_reader.py — Structure discovery and data analysis tool for Excel/CSV files.
5
+
6
+ Usage:
7
+ python3 xlsx_reader.py <file> # full structure report
8
+ python3 xlsx_reader.py <file> --sheet Sales # analyze one sheet
9
+ python3 xlsx_reader.py <file> --json # machine-readable output
10
+ python3 xlsx_reader.py <file> --quality # data quality audit only
11
+
12
+ Supports: .xlsx, .xlsm, .csv, .tsv
13
+ Does NOT modify the source file in any way.
14
+
15
+ Exit codes:
16
+ 0 — success
17
+ 1 — file not found / unsupported format / encoding failure
18
+ """
19
+
20
+ import sys
21
+ import json
22
+ import argparse
23
+ from pathlib import Path
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Format detection and loading
28
+ # ---------------------------------------------------------------------------
29
+
30
+ def detect_and_load(file_path: str, sheet_name_filter: str | None = None) -> dict:
31
+ """
32
+ Load file into {sheet_name: DataFrame} dict.
33
+ CSV/TSV files are mapped to a single-key dict using the file stem as key.
34
+
35
+ Raises ValueError for unsupported formats or encoding failures.
36
+ """
37
+ try:
38
+ import pandas as pd
39
+ except ImportError:
40
+ raise RuntimeError(
41
+ "pandas is not installed. Run: pip install pandas openpyxl"
42
+ )
43
+
44
+ path = Path(file_path)
45
+ if not path.exists():
46
+ raise FileNotFoundError(f"File not found: {file_path}")
47
+
48
+ suffix = path.suffix.lower()
49
+
50
+ if suffix in (".xlsx", ".xlsm"):
51
+ target = sheet_name_filter if sheet_name_filter else None
52
+ result = pd.read_excel(file_path, sheet_name=target)
53
+ # pd.read_excel with sheet_name=None returns dict; with a name, returns DataFrame
54
+ if isinstance(result, dict):
55
+ return result
56
+ else:
57
+ return {sheet_name_filter: result}
58
+
59
+ elif suffix in (".csv", ".tsv"):
60
+ sep = "\t" if suffix == ".tsv" else ","
61
+ encodings = ["utf-8-sig", "gbk", "utf-8", "latin-1"]
62
+ last_error = None
63
+ for enc in encodings:
64
+ try:
65
+ import pandas as pd
66
+ df = pd.read_csv(file_path, sep=sep, encoding=enc)
67
+ df._reader_encoding = enc # attach metadata (non-standard, for reporting)
68
+ return {path.stem: df}
69
+ except (UnicodeDecodeError, Exception) as e:
70
+ last_error = e
71
+ continue
72
+ raise ValueError(
73
+ f"Cannot decode {file_path}. Tried encodings: {encodings}. "
74
+ f"Last error: {last_error}"
75
+ )
76
+
77
+ elif suffix == ".xls":
78
+ raise ValueError(
79
+ ".xls is a legacy binary format not supported by this tool. "
80
+ "Please open the file in Excel and save as .xlsx, then retry."
81
+ )
82
+
83
+ else:
84
+ raise ValueError(
85
+ f"Unsupported file format: {suffix}. "
86
+ "Supported formats: .xlsx, .xlsm, .csv, .tsv"
87
+ )
88
+
89
+
90
+ # ---------------------------------------------------------------------------
91
+ # Structure discovery
92
+ # ---------------------------------------------------------------------------
93
+
94
+ def explore_structure(sheets: dict) -> dict:
95
+ """
96
+ Return a structured dict describing each sheet.
97
+ Keys: sheet_name -> {shape, columns, dtypes, null_counts, preview}
98
+ """
99
+ result = {}
100
+ for sheet_name, df in sheets.items():
101
+ null_counts = df.isnull().sum()
102
+ null_info = {
103
+ col: {"count": int(cnt), "pct": round(cnt / max(len(df), 1) * 100, 1)}
104
+ for col, cnt in null_counts.items()
105
+ if cnt > 0
106
+ }
107
+ result[sheet_name] = {
108
+ "shape": {"rows": df.shape[0], "cols": df.shape[1]},
109
+ "columns": list(df.columns),
110
+ "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()},
111
+ "null_columns": null_info,
112
+ "preview": df.head(5).to_dict(orient="records"),
113
+ }
114
+ return result
115
+
116
+
117
+ # ---------------------------------------------------------------------------
118
+ # Data quality audit
119
+ # ---------------------------------------------------------------------------
120
+
121
+ def audit_quality(sheets: dict) -> dict:
122
+ """
123
+ Return data quality findings per sheet.
124
+ Checks: nulls, duplicates, mixed-type columns, potential year formatting issues.
125
+ """
126
+ import pandas as pd
127
+
128
+ findings = {}
129
+ for sheet_name, df in sheets.items():
130
+ sheet_findings = []
131
+
132
+ # Null values
133
+ null_counts = df.isnull().sum()
134
+ for col, cnt in null_counts.items():
135
+ if cnt > 0:
136
+ pct = round(cnt / max(len(df), 1) * 100, 1)
137
+ sheet_findings.append({
138
+ "type": "null_values",
139
+ "column": col,
140
+ "count": int(cnt),
141
+ "pct": pct,
142
+ "note": f"Column '{col}' has {cnt} null values ({pct}%). "
143
+ "If this column contains Excel formulas, null values may "
144
+ "indicate that the formula cache has not been populated "
145
+ "(file was never opened in Excel after the formulas were written)."
146
+ })
147
+
148
+ # Duplicate rows
149
+ dup_count = int(df.duplicated().sum())
150
+ if dup_count > 0:
151
+ sheet_findings.append({
152
+ "type": "duplicate_rows",
153
+ "count": dup_count,
154
+ "note": f"{dup_count} fully duplicate rows found."
155
+ })
156
+
157
+ # Mixed-type object columns (numeric data stored as text)
158
+ for col in df.select_dtypes(include="object").columns:
159
+ numeric_converted = pd.to_numeric(df[col], errors="coerce")
160
+ convertible = int(numeric_converted.notna().sum())
161
+ non_null_total = int(df[col].notna().sum())
162
+ if 0 < convertible < non_null_total:
163
+ sheet_findings.append({
164
+ "type": "mixed_type",
165
+ "column": col,
166
+ "convertible_to_numeric": convertible,
167
+ "non_convertible": non_null_total - convertible,
168
+ "note": f"Column '{col}' appears to contain mixed types: "
169
+ f"{convertible} values can be parsed as numbers, "
170
+ f"{non_null_total - convertible} cannot. "
171
+ "Use pd.to_numeric(df[col], errors='coerce') to unify."
172
+ })
173
+
174
+ # Year column formatting (e.g., 2024.0 stored as float)
175
+ for col in df.select_dtypes(include="number").columns:
176
+ col_lower = str(col).lower()
177
+ # "年" is the Chinese character for "year" — detect year columns in CJK spreadsheets
178
+ if "year" in col_lower or "yr" in col_lower or "年" in col_lower:
179
+ if df[col].dropna().between(1900, 2200).all():
180
+ if df[col].dtype == float:
181
+ sheet_findings.append({
182
+ "type": "year_as_float",
183
+ "column": col,
184
+ "note": f"Column '{col}' appears to be a year column stored as float "
185
+ "(e.g., 2024.0). Convert with df[col].astype(int).astype(str) "
186
+ "to get clean year strings like '2024'."
187
+ })
188
+
189
+ # Outliers via IQR on numeric columns
190
+ for col in df.select_dtypes(include="number").columns:
191
+ series = df[col].dropna()
192
+ if len(series) < 4:
193
+ continue
194
+ Q1, Q3 = series.quantile(0.25), series.quantile(0.75)
195
+ IQR = Q3 - Q1
196
+ if IQR == 0:
197
+ continue
198
+ outlier_mask = (df[col] < Q1 - 1.5 * IQR) | (df[col] > Q3 + 1.5 * IQR)
199
+ outlier_count = int(outlier_mask.sum())
200
+ if outlier_count > 0:
201
+ sheet_findings.append({
202
+ "type": "outliers_iqr",
203
+ "column": col,
204
+ "count": outlier_count,
205
+ "note": f"Column '{col}' has {outlier_count} potential outlier(s) "
206
+ f"(outside 1.5×IQR bounds: [{Q1 - 1.5*IQR:.2f}, {Q3 + 1.5*IQR:.2f}])."
207
+ })
208
+
209
+ findings[sheet_name] = sheet_findings
210
+
211
+ return findings
212
+
213
+
214
+ # ---------------------------------------------------------------------------
215
+ # Summary statistics
216
+ # ---------------------------------------------------------------------------
217
+
218
+ def compute_stats(sheets: dict) -> dict:
219
+ """Compute descriptive statistics for numeric columns per sheet."""
220
+ stats = {}
221
+ for sheet_name, df in sheets.items():
222
+ numeric_df = df.select_dtypes(include="number")
223
+ if numeric_df.empty:
224
+ stats[sheet_name] = {}
225
+ continue
226
+ desc = numeric_df.describe().round(4)
227
+ stats[sheet_name] = desc.to_dict()
228
+ return stats
229
+
230
+
231
+ # ---------------------------------------------------------------------------
232
+ # Human-readable report rendering
233
+ # ---------------------------------------------------------------------------
234
+
235
+ def render_report(
236
+ file_path: str,
237
+ structure: dict,
238
+ quality: dict,
239
+ stats: dict,
240
+ ) -> str:
241
+ lines = []
242
+ p = lines.append
243
+
244
+ p("=" * 60)
245
+ p(f"ANALYSIS REPORT: {Path(file_path).name}")
246
+ p("=" * 60)
247
+
248
+ # File overview
249
+ sheet_list = list(structure.keys())
250
+ total_rows = sum(s["shape"]["rows"] for s in structure.values())
251
+ p(f"\nSheets ({len(sheet_list)}): {', '.join(sheet_list)}")
252
+ p(f"Total rows across all sheets: {total_rows:,}")
253
+
254
+ for sheet_name, info in structure.items():
255
+ p(f"\n{'─' * 50}")
256
+ p(f"Sheet: {sheet_name}")
257
+ p(f"{'─' * 50}")
258
+ p(f" Size: {info['shape']['rows']:,} rows × {info['shape']['cols']} cols")
259
+ p(f" Columns: {info['columns']}")
260
+
261
+ # Data types
262
+ p("\n Column types:")
263
+ for col, dtype in info["dtypes"].items():
264
+ p(f" {col}: {dtype}")
265
+
266
+ # Nulls
267
+ if info["null_columns"]:
268
+ p("\n Null values (columns with nulls only):")
269
+ for col, null_info in info["null_columns"].items():
270
+ p(f" {col}: {null_info['count']} nulls ({null_info['pct']}%)")
271
+ else:
272
+ p("\n Null values: none")
273
+
274
+ # Stats
275
+ sheet_stats = stats.get(sheet_name, {})
276
+ if sheet_stats:
277
+ p("\n Numeric column statistics:")
278
+ numeric_cols = list(sheet_stats.keys())
279
+ # Show only first 6 to keep report readable
280
+ for col in numeric_cols[:6]:
281
+ col_stats = sheet_stats[col]
282
+ p(f" {col}:")
283
+ p(f" count={col_stats.get('count', 'N/A')} "
284
+ f"mean={col_stats.get('mean', 'N/A')} "
285
+ f"min={col_stats.get('min', 'N/A')} "
286
+ f"max={col_stats.get('max', 'N/A')}")
287
+ if len(numeric_cols) > 6:
288
+ p(f" ... and {len(numeric_cols) - 6} more numeric columns")
289
+
290
+ # Quality findings for this sheet
291
+ sheet_quality = quality.get(sheet_name, [])
292
+ if sheet_quality:
293
+ p(f"\n Data quality issues ({len(sheet_quality)} found):")
294
+ for finding in sheet_quality:
295
+ p(f" [{finding['type'].upper()}] {finding['note']}")
296
+ else:
297
+ p("\n Data quality: no issues found")
298
+
299
+ # Preview
300
+ if info["preview"]:
301
+ p("\n Preview (first 3 rows):")
302
+ import pandas as pd
303
+ preview_df = pd.DataFrame(info["preview"][:3])
304
+ for line in preview_df.to_string(index=False).splitlines():
305
+ p(f" {line}")
306
+
307
+ p("\n" + "=" * 60)
308
+ quality_issue_count = sum(len(v) for v in quality.values())
309
+ if quality_issue_count == 0:
310
+ p("RESULT: No data quality issues detected.")
311
+ else:
312
+ p(f"RESULT: {quality_issue_count} data quality issue(s) found. See details above.")
313
+ p("=" * 60)
314
+
315
+ return "\n".join(lines)
316
+
317
+
318
+ # ---------------------------------------------------------------------------
319
+ # CLI entry point
320
+ # ---------------------------------------------------------------------------
321
+
322
+ def main() -> None:
323
+ parser = argparse.ArgumentParser(
324
+ description="Read and analyze Excel/CSV files without modifying them."
325
+ )
326
+ parser.add_argument("file", help="Path to .xlsx, .xlsm, .csv, or .tsv file")
327
+ parser.add_argument("--sheet", help="Analyze a specific sheet only", default=None)
328
+ parser.add_argument(
329
+ "--json", action="store_true", help="Output machine-readable JSON"
330
+ )
331
+ parser.add_argument(
332
+ "--quality", action="store_true",
333
+ help="Run data quality audit only (skip stats)"
334
+ )
335
+ args = parser.parse_args()
336
+
337
+ try:
338
+ sheets = detect_and_load(args.file, sheet_name_filter=args.sheet)
339
+ except (FileNotFoundError, ValueError, RuntimeError) as e:
340
+ print(f"ERROR: {e}", file=sys.stderr)
341
+ sys.exit(1)
342
+
343
+ structure = explore_structure(sheets)
344
+ quality = audit_quality(sheets)
345
+ stats = {} if args.quality else compute_stats(sheets)
346
+
347
+ if args.json:
348
+ output = {
349
+ "file": args.file,
350
+ "structure": structure,
351
+ "quality": quality,
352
+ "stats": stats,
353
+ }
354
+ # Convert preview records to serializable form (handle non-JSON types)
355
+ print(json.dumps(output, indent=2, ensure_ascii=False, default=str))
356
+ else:
357
+ report = render_report(args.file, structure, quality, stats)
358
+ print(report)
359
+
360
+
361
+ if __name__ == "__main__":
362
+ main()