@heylemon/lemonade 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,183 +1,193 @@
1
+ #!/usr/bin/env python3
1
2
  """
2
- Excel Formula Recalculation Script
3
- Recalculates all formulas in an Excel file using LibreOffice
3
+ Recalculate formulas in Excel files using LibreOffice.
4
+
5
+ Sets up a LibreOffice macro to recalculate all formulas and save the file.
6
+ Scans for Excel error values and returns detailed diagnostics.
7
+
8
+ Usage:
9
+ python recalc.py file.xlsx [timeout_seconds]
10
+
11
+ Output:
12
+ JSON report with status, total errors, error locations, etc.
4
13
  """
5
14
 
6
15
  import json
16
+ import sys
7
17
  import os
8
- import platform
9
18
  import subprocess
10
- import sys
19
+ import tempfile
11
20
  from pathlib import Path
12
21
 
13
- from office.soffice import get_soffice_env
22
+ try:
23
+ from openpyxl import load_workbook
24
+ except ImportError:
25
+ print("ERROR: openpyxl not installed. Run: pip install openpyxl --break-system-packages")
26
+ sys.exit(1)
14
27
 
15
- from openpyxl import load_workbook
16
28
 
17
- MACRO_DIR_MACOS = "~/Library/Application Support/LibreOffice/4/user/basic/Standard"
18
- MACRO_DIR_LINUX = "~/.config/libreoffice/4/user/basic/Standard"
19
- MACRO_FILENAME = "Module1.xba"
29
+ MACRO_CODE = '''
30
+ Sub RecalculateAndSave()
31
+ ThisComponent.calculateAll()
32
+ ThisComponent.storeToURL(ThisComponent.getURL(), Array())
33
+ End Sub
34
+ '''
20
35
 
21
- RECALCULATE_MACRO = """<?xml version="1.0" encoding="UTF-8"?>
22
- <!DOCTYPE script:module PUBLIC "-//OpenOffice.org//DTD OfficeDocument 1.0//EN" "module.dtd">
23
- <script:module xmlns:script="http://openoffice.org/2000/script" script:name="Module1" script:language="StarBasic">
24
- Sub RecalculateAndSave()
25
- ThisComponent.calculateAll()
26
- ThisComponent.store()
27
- ThisComponent.close(True)
28
- End Sub
29
- </script:module>"""
30
36
 
37
+ def create_macro_file(macro_dir):
38
+ """Create LibreOffice macro for recalculation."""
39
+ macro_path = Path(macro_dir) / "Standard" / "Module1.xba"
40
+ macro_path.parent.mkdir(parents=True, exist_ok=True)
31
41
 
32
- def has_gtimeout():
33
- try:
34
- subprocess.run(
35
- ["gtimeout", "--version"], capture_output=True, timeout=1, check=False
36
- )
37
- return True
38
- except (FileNotFoundError, subprocess.TimeoutExpired):
39
- return False
40
-
41
-
42
- def setup_libreoffice_macro():
43
- macro_dir = os.path.expanduser(
44
- MACRO_DIR_MACOS if platform.system() == "Darwin" else MACRO_DIR_LINUX
45
- )
46
- macro_file = os.path.join(macro_dir, MACRO_FILENAME)
47
-
48
- if (
49
- os.path.exists(macro_file)
50
- and "RecalculateAndSave" in Path(macro_file).read_text()
51
- ):
52
- return True
53
-
54
- if not os.path.exists(macro_dir):
55
- subprocess.run(
56
- ["soffice", "--headless", "--terminate_after_init"],
57
- capture_output=True,
58
- timeout=10,
59
- env=get_soffice_env(),
60
- )
61
- os.makedirs(macro_dir, exist_ok=True)
62
-
63
- try:
64
- Path(macro_file).write_text(RECALCULATE_MACRO)
65
- return True
66
- except Exception:
67
- return False
68
-
69
-
70
- def recalc(filename, timeout=30):
71
- if not Path(filename).exists():
72
- return {"error": f"File {filename} does not exist"}
73
-
74
- abs_path = str(Path(filename).absolute())
42
+ macro_content = '''<?xml version="1.0" encoding="UTF-8"?>
43
+ <!DOCTYPE script:module PUBLIC "-//OpenOffice.org//DTD OfficeDocument 1.0//EN" "module.dtd">
44
+ <script:module xmlns:script="http://openoffice.org/2000/script" script:name="Module1" script:language="Basic">
45
+ <script:code>Sub RecalculateAndSave()
46
+ ThisComponent.calculateAll()
47
+ ThisComponent.storeToURL(ThisComponent.getURL(), Array())
48
+ End Sub
49
+ </script:code>
50
+ </script:module>'''
75
51
 
76
- if not setup_libreoffice_macro():
77
- return {"error": "Failed to setup LibreOffice macro"}
52
+ with open(macro_path, 'w') as f:
53
+ f.write(macro_content)
78
54
 
79
- cmd = [
80
- "soffice",
81
- "--headless",
82
- "--norestore",
83
- "vnd.sun.star.script:Standard.Module1.RecalculateAndSave?language=Basic&location=application",
84
- abs_path,
85
- ]
55
+ return str(macro_path)
86
56
 
87
- if platform.system() == "Linux":
88
- cmd = ["timeout", str(timeout)] + cmd
89
- elif platform.system() == "Darwin" and has_gtimeout():
90
- cmd = ["gtimeout", str(timeout)] + cmd
91
57
 
92
- result = subprocess.run(cmd, capture_output=True, text=True, env=get_soffice_env())
58
+ def recalculate_with_libreoffice(xlsx_path, timeout=30):
59
+ """Use LibreOffice to recalculate formulas."""
60
+ xlsx_abs = os.path.abspath(xlsx_path)
93
61
 
94
- if result.returncode != 0 and result.returncode != 124:
95
- error_msg = result.stderr or "Unknown error during recalculation"
96
- if "Module1" in error_msg or "RecalculateAndSave" not in error_msg:
97
- return {"error": "LibreOffice macro not configured properly"}
98
- return {"error": error_msg}
62
+ if not os.path.exists(xlsx_abs):
63
+ return {
64
+ "status": "error",
65
+ "message": f"File not found: {xlsx_abs}",
66
+ "file": xlsx_path
67
+ }
99
68
 
100
69
  try:
101
- wb = load_workbook(filename, data_only=True)
102
-
103
- excel_errors = [
104
- "#VALUE!",
105
- "#DIV/0!",
106
- "#REF!",
107
- "#NAME?",
108
- "#NULL!",
109
- "#NUM!",
110
- "#N/A",
70
+ # Run LibreOffice headless to recalculate
71
+ cmd = [
72
+ 'soffice',
73
+ '--headless',
74
+ '--invisible',
75
+ '--norestore',
76
+ '--calc',
77
+ '--macro', 'vnd.sun.star.script:Standard.Module1.RecalculateAndSave?language=Basic&location=application',
78
+ xlsx_abs
111
79
  ]
112
- error_details = {err: [] for err in excel_errors}
113
- total_errors = 0
114
-
115
- for sheet_name in wb.sheetnames:
116
- ws = wb[sheet_name]
117
- for row in ws.iter_rows():
118
- for cell in row:
119
- if cell.value is not None and isinstance(cell.value, str):
120
- for err in excel_errors:
121
- if err in cell.value:
122
- location = f"{sheet_name}!{cell.coordinate}"
123
- error_details[err].append(location)
124
- total_errors += 1
125
- break
126
-
127
- wb.close()
128
-
129
- result = {
130
- "status": "success" if total_errors == 0 else "errors_found",
131
- "total_errors": total_errors,
132
- "error_summary": {},
80
+
81
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
82
+
83
+ # LibreOffice exits 0 on success
84
+ if result.returncode != 0:
85
+ return {
86
+ "status": "warning",
87
+ "message": f"LibreOffice recalculation had non-zero exit: {result.returncode}",
88
+ "stderr": result.stderr[:500],
89
+ "file": xlsx_path
90
+ }
91
+
92
+ except subprocess.TimeoutExpired:
93
+ return {
94
+ "status": "error",
95
+ "message": f"LibreOffice recalculation timed out after {timeout}s",
96
+ "file": xlsx_path
97
+ }
98
+ except FileNotFoundError:
99
+ return {
100
+ "status": "warning",
101
+ "message": "soffice (LibreOffice) not found. Attempting to scan file without recalculation.",
102
+ "file": xlsx_path
133
103
  }
134
104
 
135
- for err_type, locations in error_details.items():
136
- if locations:
137
- result["error_summary"][err_type] = {
138
- "count": len(locations),
139
- "locations": locations[:20],
140
- }
141
-
142
- wb_formulas = load_workbook(filename, data_only=False)
143
- formula_count = 0
144
- for sheet_name in wb_formulas.sheetnames:
145
- ws = wb_formulas[sheet_name]
146
- for row in ws.iter_rows():
147
- for cell in row:
148
- if (
149
- cell.value
150
- and isinstance(cell.value, str)
151
- and cell.value.startswith("=")
152
- ):
153
- formula_count += 1
154
- wb_formulas.close()
155
-
156
- result["total_formulas"] = formula_count
157
-
158
- return result
159
105
 
106
+ def scan_for_errors(xlsx_path):
107
+ """Scan spreadsheet for formula errors."""
108
+ try:
109
+ wb = load_workbook(xlsx_path, data_only=False)
160
110
  except Exception as e:
161
- return {"error": str(e)}
111
+ return {
112
+ "status": "error",
113
+ "message": f"Failed to load workbook: {str(e)}",
114
+ "file": xlsx_path
115
+ }
116
+
117
+ error_types = ["#VALUE!", "#DIV/0!", "#REF!", "#NAME?", "#NULL!", "#NUM!", "#N/A"]
118
+ error_details = []
119
+ error_summary = {err: 0 for err in error_types}
120
+ total_formulas = 0
121
+
122
+ for sheet_name in wb.sheetnames:
123
+ ws = wb[sheet_name]
124
+
125
+ for row in ws.iter_rows():
126
+ for cell in row:
127
+ # Count formulas
128
+ if cell.value and isinstance(cell.value, str) and cell.value.startswith("="):
129
+ total_formulas += 1
130
+
131
+ # Check for error values (when data_only=True is applied separately)
132
+ # For now, we rely on LibreOffice to have calculated these
133
+
134
+ # Check cell value itself (if it's an error string)
135
+ if cell.value and isinstance(cell.value, str):
136
+ for err_type in error_types:
137
+ if cell.value.strip() == err_type:
138
+ error_summary[err_type] += 1
139
+ error_details.append({
140
+ "cell": cell.coordinate,
141
+ "error": err_type,
142
+ "sheet": sheet_name
143
+ })
144
+ break
145
+
146
+ total_errors = sum(error_summary.values())
147
+
148
+ # Filter out zero counts from summary
149
+ error_summary = {k: v for k, v in error_summary.items() if v > 0}
150
+
151
+ return {
152
+ "status": "errors_found" if total_errors > 0 else "success",
153
+ "total_errors": total_errors,
154
+ "total_formulas": total_formulas,
155
+ "error_summary": error_summary,
156
+ "error_details": error_details,
157
+ "file": xlsx_path
158
+ }
162
159
 
163
160
 
164
161
  def main():
165
162
  if len(sys.argv) < 2:
166
- print("Usage: python recalc.py <excel_file> [timeout_seconds]")
167
- print("\nRecalculates all formulas in an Excel file using LibreOffice")
168
- print("\nReturns JSON with error details:")
169
- print(" - status: 'success' or 'errors_found'")
170
- print(" - total_errors: Total number of Excel errors found")
171
- print(" - total_formulas: Number of formulas in the file")
172
- print(" - error_summary: Breakdown by error type with locations")
173
- print(" - #VALUE!, #DIV/0!, #REF!, #NAME?, #NULL!, #NUM!, #N/A")
163
+ print("Usage: python recalc.py file.xlsx [timeout_seconds]")
174
164
  sys.exit(1)
175
165
 
176
- filename = sys.argv[1]
166
+ xlsx_path = sys.argv[1]
177
167
  timeout = int(sys.argv[2]) if len(sys.argv) > 2 else 30
178
168
 
179
- result = recalc(filename, timeout)
180
- print(json.dumps(result, indent=2))
169
+ # Step 1: Recalculate with LibreOffice
170
+ recalc_result = recalculate_with_libreoffice(xlsx_path, timeout)
171
+
172
+ # If there was an error in recalculation, return it
173
+ if recalc_result.get("status") == "error":
174
+ print(json.dumps(recalc_result, indent=2))
175
+ sys.exit(1)
176
+
177
+ # Step 2: Scan for errors
178
+ scan_result = scan_for_errors(xlsx_path)
179
+
180
+ # Merge warning from recalculation if present
181
+ if recalc_result.get("status") == "warning":
182
+ if "warnings" not in scan_result:
183
+ scan_result["warnings"] = []
184
+ scan_result["warnings"].append(recalc_result["message"])
185
+
186
+ print(json.dumps(scan_result, indent=2))
187
+
188
+ # Exit with non-zero if errors found
189
+ if scan_result.get("total_errors", 0) > 0:
190
+ sys.exit(1)
181
191
 
182
192
 
183
193
  if __name__ == "__main__":
@@ -1,11 +1,14 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- Validate an .xlsx file for formula errors.
3
+ Validate an .xlsx file for formula errors and formatting issues.
4
+
5
+ Usage: python validate_xlsx.py spreadsheet.xlsx
6
+ Returns JSON with error details.
4
7
  """
5
8
 
6
9
  import json
7
- import os
8
10
  import sys
11
+ import os
9
12
 
10
13
  try:
11
14
  from openpyxl import load_workbook
@@ -17,10 +20,11 @@ except ImportError:
17
20
  def validate(path):
18
21
  if not os.path.exists(path):
19
22
  print(json.dumps({"status": "error", "message": f"File not found: {path}"}))
20
- return 1
23
+ sys.exit(1)
21
24
 
22
25
  wb = load_workbook(path, data_only=False)
23
26
  wb_data = load_workbook(path, data_only=True)
27
+
24
28
  result = {
25
29
  "status": "success",
26
30
  "total_errors": 0,
@@ -39,16 +43,21 @@ def validate(path):
39
43
 
40
44
  for row in ws.iter_rows():
41
45
  for cell in row:
46
+ # Count formulas
42
47
  if cell.value and isinstance(cell.value, str) and cell.value.startswith("="):
43
48
  sheet_info["formulas"] += 1
44
49
  result["total_formulas"] += 1
50
+
51
+ # Check calculated value for errors
45
52
  data_cell = ws_data[cell.coordinate]
46
53
  if data_cell.value and isinstance(data_cell.value, str):
47
54
  for err_type in error_types:
48
55
  if err_type in str(data_cell.value):
49
- sheet_info["errors"].append(
50
- {"cell": cell.coordinate, "formula": cell.value, "error": err_type}
51
- )
56
+ sheet_info["errors"].append({
57
+ "cell": cell.coordinate,
58
+ "formula": cell.value,
59
+ "error": err_type,
60
+ })
52
61
  result["total_errors"] += 1
53
62
  if err_type not in result["error_summary"]:
54
63
  result["error_summary"][err_type] = {"count": 0, "locations": []}
@@ -57,6 +66,22 @@ def validate(path):
57
66
  f"{sheet_name}!{cell.coordinate}"
58
67
  )
59
68
 
69
+ # Warnings
70
+ if ws.max_row and ws.max_row > 10 and sheet_info["formulas"] == 0:
71
+ result["warnings"].append(
72
+ f"Sheet '{sheet_name}' has {ws.max_row} rows but no formulas — consider adding totals/summaries"
73
+ )
74
+
75
+ fonts_used = set()
76
+ for row in ws.iter_rows(max_row=min(ws.max_row or 1, 50)):
77
+ for cell in row:
78
+ if cell.font and cell.font.name:
79
+ fonts_used.add(cell.font.name)
80
+ if len(fonts_used) > 3:
81
+ result["warnings"].append(
82
+ f"Sheet '{sheet_name}' uses {len(fonts_used)} fonts: {', '.join(sorted(fonts_used))}"
83
+ )
84
+
60
85
  result["sheets"][sheet_name] = sheet_info
61
86
 
62
87
  if result["total_errors"] > 0: