@heylemon/lemonade 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/dist/build-info.json +3 -3
  2. package/dist/canvas-host/a2ui/.bundle.hash +1 -1
  3. package/dist/gateway/skills-http.js +74 -19
  4. package/package.json +1 -1
  5. package/skills/docx/SKILL.md +25 -30
  6. package/skills/docx/scripts/accept_changes.py +0 -17
  7. package/skills/docx/scripts/comment.py +10 -39
  8. package/skills/docx/scripts/office/helpers/merge_runs.py +1 -33
  9. package/skills/docx/scripts/office/helpers/simplify_redlines.py +0 -43
  10. package/skills/docx/scripts/office/pack.py +0 -30
  11. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -1499
  12. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -1085
  13. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -3081
  14. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -287
  15. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -1676
  16. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -174
  17. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -582
  18. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -4439
  19. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -570
  20. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -116
  21. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -42
  22. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -50
  23. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -49
  24. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -33
  25. package/skills/docx/scripts/office/soffice.py +0 -55
  26. package/skills/docx/scripts/office/unpack.py +5 -27
  27. package/skills/docx/scripts/office/validate.py +19 -14
  28. package/skills/docx/scripts/office/validators/base.py +48 -224
  29. package/skills/docx/scripts/office/validators/docx.py +44 -117
  30. package/skills/docx/scripts/office/validators/pptx.py +2 -42
  31. package/skills/docx/scripts/office/validators/redlining.py +3 -40
  32. package/skills/pdf/SKILL.md +22 -15
  33. package/skills/pdf/{FORMS.md → forms.md} +0 -14
  34. package/skills/pdf/scripts/check_bounding_boxes.py +0 -5
  35. package/skills/pdf/scripts/check_fillable_fields.py +0 -1
  36. package/skills/pdf/scripts/convert_pdf_to_images.py +0 -2
  37. package/skills/pdf/scripts/create_validation_image.py +0 -4
  38. package/skills/pdf/scripts/extract_form_field_info.py +1 -31
  39. package/skills/pdf/scripts/extract_form_structure.py +0 -9
  40. package/skills/pdf/scripts/fill_fillable_fields.py +0 -23
  41. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +3 -38
  42. package/skills/pptx/SKILL.md +2 -29
  43. package/skills/pptx/editing.md +2 -2
  44. package/skills/pptx/pptxgenjs.md +53 -8
  45. package/skills/pptx/scripts/add_slide.py +0 -30
  46. package/skills/pptx/scripts/clean.py +0 -23
  47. package/skills/pptx/scripts/office/helpers/merge_runs.py +1 -33
  48. package/skills/pptx/scripts/office/helpers/simplify_redlines.py +0 -43
  49. package/skills/pptx/scripts/office/pack.py +0 -30
  50. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -1499
  51. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -1085
  52. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -3081
  53. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -287
  54. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -1676
  55. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -174
  56. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -582
  57. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -4439
  58. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -570
  59. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -116
  60. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -42
  61. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -50
  62. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -49
  63. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -33
  64. package/skills/pptx/scripts/office/soffice.py +0 -55
  65. package/skills/pptx/scripts/office/unpack.py +5 -27
  66. package/skills/pptx/scripts/office/validate.py +19 -14
  67. package/skills/pptx/scripts/office/validators/base.py +48 -224
  68. package/skills/pptx/scripts/office/validators/docx.py +44 -117
  69. package/skills/pptx/scripts/office/validators/pptx.py +2 -42
  70. package/skills/pptx/scripts/office/validators/redlining.py +3 -40
  71. package/skills/pptx/scripts/thumbnail.py +0 -31
  72. package/skills/xlsx/SKILL.md +3 -26
  73. package/skills/xlsx/scripts/office/helpers/merge_runs.py +1 -33
  74. package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +0 -43
  75. package/skills/xlsx/scripts/office/pack.py +0 -30
  76. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -1499
  77. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -1085
  78. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -3081
  79. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -287
  80. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -1676
  81. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -174
  82. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -582
  83. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -4439
  84. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -570
  85. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -116
  86. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -42
  87. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -50
  88. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -49
  89. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -33
  90. package/skills/xlsx/scripts/office/soffice.py +0 -55
  91. package/skills/xlsx/scripts/office/unpack.py +5 -27
  92. package/skills/xlsx/scripts/office/validate.py +19 -14
  93. package/skills/xlsx/scripts/office/validators/base.py +48 -224
  94. package/skills/xlsx/scripts/office/validators/docx.py +44 -117
  95. package/skills/xlsx/scripts/office/validators/pptx.py +2 -42
  96. package/skills/xlsx/scripts/office/validators/redlining.py +3 -40
  97. package/skills/xlsx/scripts/recalc.py +2 -26
  98. package/skills/docx/scripts/__init__.py +0 -1
  99. package/skills/docx/scripts/office/helpers/__init__.py +0 -0
  100. package/skills/docx/scripts/office/validators/__init__.py +0 -15
  101. package/skills/pptx/scripts/__init__.py +0 -0
  102. package/skills/pptx/scripts/office/helpers/__init__.py +0 -0
  103. package/skills/pptx/scripts/office/validators/__init__.py +0 -15
  104. package/skills/xlsx/scripts/office/helpers/__init__.py +0 -0
  105. package/skills/xlsx/scripts/office/validators/__init__.py +0 -15
  106. /package/skills/pdf/{REFERENCE.md → reference.md} +0 -0
@@ -20,17 +20,8 @@ import subprocess
20
20
  import tempfile
21
21
  from pathlib import Path
22
22
 
23
- # ---------------------------------------------------------------------------
24
- # Public API
25
- # ---------------------------------------------------------------------------
26
23
 
27
24
  def get_soffice_env() -> dict:
28
- """Return an env dict suitable for running soffice headlessly.
29
-
30
- Always sets SAL_USE_VCLPLUGIN=svp for headless rendering (no X11).
31
- In sandboxed environments where AF_UNIX sockets are blocked, also adds
32
- LD_PRELOAD (socket shim).
33
- """
34
25
  env = os.environ.copy()
35
26
  env["SAL_USE_VCLPLUGIN"] = "svp"
36
27
 
@@ -42,27 +33,15 @@ def get_soffice_env() -> dict:
42
33
 
43
34
 
44
35
  def run_soffice(args: list[str], **kwargs) -> subprocess.CompletedProcess:
45
- """Run soffice with the given arguments, applying the socket shim
46
- if needed. Accepts the same keyword arguments as subprocess.run.
47
-
48
- In sandboxed environments the shim handles clean shutdown by calling
49
- _exit(0) when soffice.bin's listener socket closes (after conversion
50
- is complete). This avoids the hang that otherwise occurs because
51
- oosplash gets stuck and never tells soffice.bin to quit.
52
- """
53
36
  env = get_soffice_env()
54
37
  return subprocess.run(["soffice"] + args, env=env, **kwargs)
55
38
 
56
39
 
57
- # ---------------------------------------------------------------------------
58
- # Internals
59
- # ---------------------------------------------------------------------------
60
40
 
61
41
  _SHIM_SO = Path(tempfile.gettempdir()) / "lo_socket_shim.so"
62
42
 
63
43
 
64
44
  def _needs_shim() -> bool:
65
- """Check whether AF_UNIX sockets are blocked."""
66
45
  try:
67
46
  s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
68
47
  s.close()
@@ -72,7 +51,6 @@ def _needs_shim() -> bool:
72
51
 
73
52
 
74
53
  def _ensure_shim() -> Path:
75
- """Compile the shim .so if not already cached."""
76
54
  if _SHIM_SO.exists():
77
55
  return _SHIM_SO
78
56
 
@@ -87,36 +65,6 @@ def _ensure_shim() -> Path:
87
65
  return _SHIM_SO
88
66
 
89
67
 
90
- # ---------------------------------------------------------------------------
91
- # LD_PRELOAD shim – C source
92
- #
93
- # Problem
94
- # -------
95
- # LibreOffice uses AF_UNIX sockets for single-instance management
96
- # (OSL_PIPE). In sandboxed environments the seccomp filter blocks
97
- # socket(AF_UNIX) while allowing socketpair(AF_UNIX). Without this
98
- # shim, soffice either crashes or hangs after conversion:
99
- #
100
- # 1. oosplash (the LO launcher) creates a splash pipe, passes the
101
- # write end to soffice.bin via --splash-pipe=<fd>, and blocks
102
- # reading the read end.
103
- # 2. soffice.bin tries socket(AF_UNIX) → EPERM. It crashes.
104
- # 3. With a minimal shim that fakes socket/bind/listen, soffice.bin
105
- # starts and converts successfully – but then its main event loop
106
- # idles forever waiting for oosplash (which is stuck) to signal
107
- # quit.
108
- #
109
- # Solution
110
- # --------
111
- # Intercept the relevant calls and provide working substitutes:
112
- #
113
- # socket(AF_UNIX) → socketpair() fallback (real AF_UNIX FDs)
114
- # listen() → no-op on shimmed FDs
115
- # accept() → blocks on a wake-pipe until close() fires
116
- # close() → wakes accept(), cleans up, and – when the
117
- # listener socket is closed – calls _exit(0)
118
- # to terminate cleanly after conversion.
119
- # ---------------------------------------------------------------------------
120
68
 
121
69
  _SHIM_SOURCE = r"""
122
70
  #define _GNU_SOURCE
@@ -228,9 +176,6 @@ int close(int fd) {
228
176
  """
229
177
 
230
178
 
231
- # ---------------------------------------------------------------------------
232
- # CLI entry point
233
- # ---------------------------------------------------------------------------
234
179
 
235
180
  if __name__ == "__main__":
236
181
  import sys
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env python3
2
1
  """Unpack Office files (DOCX, PPTX, XLSX) for editing.
3
2
 
4
3
  Extracts the ZIP archive, pretty-prints XML files, and optionally:
@@ -24,12 +23,11 @@ import defusedxml.minidom
24
23
  from helpers.merge_runs import merge_runs as do_merge_runs
25
24
  from helpers.simplify_redlines import simplify_redlines as do_simplify_redlines
26
25
 
27
- # Smart quotes that get mangled by the tokenizer - convert to XML entities
28
26
  SMART_QUOTE_REPLACEMENTS = {
29
- "\u201c": "&#x201C;", # Left double quote "
30
- "\u201d": "&#x201D;", # Right double quote "
31
- "\u2018": "&#x2018;", # Left single quote '
32
- "\u2019": "&#x2019;", # Right single quote '
27
+ "\u201c": "&#x201C;",
28
+ "\u201d": "&#x201D;",
29
+ "\u2018": "&#x2018;",
30
+ "\u2019": "&#x2019;",
33
31
  }
34
32
 
35
33
 
@@ -39,17 +37,6 @@ def unpack(
39
37
  merge_runs: bool = True,
40
38
  simplify_redlines: bool = True,
41
39
  ) -> tuple[None, str]:
42
- """Unpack an Office file and prepare for editing.
43
-
44
- Args:
45
- input_file: Path to Office file (.docx, .pptx, .xlsx)
46
- output_directory: Path to output directory
47
- merge_runs: If True, merge adjacent runs with identical formatting (DOCX only)
48
- simplify_redlines: If True, merge adjacent tracked changes from same author (DOCX only)
49
-
50
- Returns:
51
- (None, message) - message indicates success or failure
52
- """
53
40
  input_path = Path(input_file)
54
41
  output_path = Path(output_directory)
55
42
  suffix = input_path.suffix.lower()
@@ -61,33 +48,26 @@ def unpack(
61
48
  return None, f"Error: {input_file} must be a .docx, .pptx, or .xlsx file"
62
49
 
63
50
  try:
64
- # Create output directory
65
51
  output_path.mkdir(parents=True, exist_ok=True)
66
52
 
67
- # Extract ZIP contents
68
53
  with zipfile.ZipFile(input_path, "r") as zf:
69
54
  zf.extractall(output_path)
70
55
 
71
- # Pretty print all XML files
72
56
  xml_files = list(output_path.rglob("*.xml")) + list(output_path.rglob("*.rels"))
73
57
  for xml_file in xml_files:
74
58
  _pretty_print_xml(xml_file)
75
59
 
76
60
  message = f"Unpacked {input_file} ({len(xml_files)} XML files)"
77
61
 
78
- # DOCX-specific processing
79
62
  if suffix == ".docx":
80
- # Simplify tracked changes (before merge_runs so more runs can merge)
81
63
  if simplify_redlines:
82
64
  simplify_count, _ = do_simplify_redlines(str(output_path))
83
65
  message += f", simplified {simplify_count} tracked changes"
84
66
 
85
- # Merge runs
86
67
  if merge_runs:
87
68
  merge_count, _ = do_merge_runs(str(output_path))
88
69
  message += f", merged {merge_count} runs"
89
70
 
90
- # Escape smart quotes AFTER transformations (which rewrite files)
91
71
  for xml_file in xml_files:
92
72
  _escape_smart_quotes(xml_file)
93
73
 
@@ -100,17 +80,15 @@ def unpack(
100
80
 
101
81
 
102
82
  def _pretty_print_xml(xml_file: Path) -> None:
103
- """Pretty print an XML file with indentation."""
104
83
  try:
105
84
  content = xml_file.read_text(encoding="utf-8")
106
85
  dom = defusedxml.minidom.parseString(content)
107
86
  xml_file.write_bytes(dom.toprettyxml(indent=" ", encoding="utf-8"))
108
87
  except Exception:
109
- pass # Skip files that can't be parsed
88
+ pass
110
89
 
111
90
 
112
91
  def _escape_smart_quotes(xml_file: Path) -> None:
113
- """Replace smart quotes with XML entities so they survive tokenization."""
114
92
  try:
115
93
  content = xml_file.read_text(encoding="utf-8")
116
94
  for char, entity in SMART_QUOTE_REPLACEMENTS.items():
@@ -1,9 +1,8 @@
1
- #!/usr/bin/env python3
2
1
  """
3
2
  Command line tool to validate Office document XML files against XSD schemas and tracked changes.
4
3
 
5
4
  Usage:
6
- python validate.py <path> --original <original_file> [--auto-repair] [--author NAME]
5
+ python validate.py <path> [--original <original_file>] [--auto-repair] [--author NAME]
7
6
 
8
7
  The first argument can be either:
9
8
  - An unpacked directory containing the Office document XML files
@@ -31,8 +30,9 @@ def main():
31
30
  )
32
31
  parser.add_argument(
33
32
  "--original",
34
- required=True,
35
- help="Path to original file (.docx/.pptx/.xlsx)",
33
+ required=False,
34
+ default=None,
35
+ help="Path to original file (.docx/.pptx/.xlsx). If omitted, all XSD errors are reported and redlining validation is skipped.",
36
36
  )
37
37
  parser.add_argument(
38
38
  "-v",
@@ -52,17 +52,22 @@ def main():
52
52
  )
53
53
  args = parser.parse_args()
54
54
 
55
- # Validate paths
56
55
  path = Path(args.path)
57
- original_file = Path(args.original)
58
- file_extension = original_file.suffix.lower()
59
56
  assert path.exists(), f"Error: {path} does not exist"
60
- assert original_file.is_file(), f"Error: {original_file} is not a file"
57
+
58
+ original_file = None
59
+ if args.original:
60
+ original_file = Path(args.original)
61
+ assert original_file.is_file(), f"Error: {original_file} is not a file"
62
+ assert original_file.suffix.lower() in [".docx", ".pptx", ".xlsx"], (
63
+ f"Error: {original_file} must be a .docx, .pptx, or .xlsx file"
64
+ )
65
+
66
+ file_extension = (original_file or path).suffix.lower()
61
67
  assert file_extension in [".docx", ".pptx", ".xlsx"], (
62
- f"Error: {original_file} must be a .docx, .pptx, or .xlsx file"
68
+ f"Error: Cannot determine file type from {path}. Use --original or provide a .docx/.pptx/.xlsx file."
63
69
  )
64
70
 
65
- # If path is a packed file, unpack to temp directory
66
71
  if path.is_file() and path.suffix.lower() in [".docx", ".pptx", ".xlsx"]:
67
72
  temp_dir = tempfile.mkdtemp()
68
73
  with zipfile.ZipFile(path, "r") as zf:
@@ -72,13 +77,15 @@ def main():
72
77
  assert path.is_dir(), f"Error: {path} is not a directory or Office file"
73
78
  unpacked_dir = path
74
79
 
75
- # Create validators based on file type
76
80
  match file_extension:
77
81
  case ".docx":
78
82
  validators = [
79
83
  DOCXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose),
80
- RedliningValidator(unpacked_dir, original_file, verbose=args.verbose, author=args.author),
81
84
  ]
85
+ if original_file:
86
+ validators.append(
87
+ RedliningValidator(unpacked_dir, original_file, verbose=args.verbose, author=args.author)
88
+ )
82
89
  case ".pptx":
83
90
  validators = [
84
91
  PPTXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose),
@@ -87,13 +94,11 @@ def main():
87
94
  print(f"Error: Validation not supported for file type {file_extension}")
88
95
  sys.exit(1)
89
96
 
90
- # Auto-repair if requested
91
97
  if args.auto_repair:
92
98
  total_repairs = sum(v.repair() for v in validators)
93
99
  if total_repairs:
94
100
  print(f"Auto-repaired {total_repairs} issue(s)")
95
101
 
96
- # Run validators
97
102
  success = all(v.validate() for v in validators)
98
103
 
99
104
  if success: