cdxml-toolkit 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. cdxml_toolkit/__init__.py +18 -0
  2. cdxml_toolkit/_jre/__init__.py +2 -0
  3. cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
  4. cdxml_toolkit/analysis/__init__.py +35 -0
  5. cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
  6. cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
  7. cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
  8. cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
  9. cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
  10. cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
  11. cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
  12. cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
  13. cdxml_toolkit/analysis/extract_nmr.py +47 -0
  14. cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
  15. cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
  16. cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
  17. cdxml_toolkit/cdxml_builder.py +920 -0
  18. cdxml_toolkit/cdxml_utils.py +342 -0
  19. cdxml_toolkit/chemdraw/__init__.py +5 -0
  20. cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
  21. cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
  22. cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
  23. cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
  24. cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
  25. cdxml_toolkit/constants.py +304 -0
  26. cdxml_toolkit/coord_normalizer.py +438 -0
  27. cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
  28. cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
  29. cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
  30. cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
  31. cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
  32. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
  33. cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
  34. cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
  35. cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
  36. cdxml_toolkit/image/__init__.py +15 -0
  37. cdxml_toolkit/image/reaction_from_image.py +2103 -0
  38. cdxml_toolkit/image/structure_from_image.py +1711 -0
  39. cdxml_toolkit/layout/__init__.py +5 -0
  40. cdxml_toolkit/layout/alignment.py +1642 -0
  41. cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
  42. cdxml_toolkit/layout/scheme_merger.py +2260 -0
  43. cdxml_toolkit/mcp_server/__init__.py +0 -0
  44. cdxml_toolkit/mcp_server/__main__.py +5 -0
  45. cdxml_toolkit/mcp_server/server.py +1567 -0
  46. cdxml_toolkit/naming/__init__.py +6 -0
  47. cdxml_toolkit/naming/aligned_namer.py +2342 -0
  48. cdxml_toolkit/naming/mol_builder.py +3722 -0
  49. cdxml_toolkit/naming/name_decomposer.py +2843 -0
  50. cdxml_toolkit/naming/reactions_datamol.json +2414 -0
  51. cdxml_toolkit/office/__init__.py +5 -0
  52. cdxml_toolkit/office/doc_from_template.py +722 -0
  53. cdxml_toolkit/office/ole_embedder.py +808 -0
  54. cdxml_toolkit/office/ole_extractor.py +272 -0
  55. cdxml_toolkit/perception/__init__.py +10 -0
  56. cdxml_toolkit/perception/compound_search.py +229 -0
  57. cdxml_toolkit/perception/eln_csv_parser.py +240 -0
  58. cdxml_toolkit/perception/rdf_parser.py +664 -0
  59. cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
  60. cdxml_toolkit/perception/reaction_parser.py +2150 -0
  61. cdxml_toolkit/perception/scheme_reader.py +2948 -0
  62. cdxml_toolkit/perception/scheme_refine.py +1404 -0
  63. cdxml_toolkit/perception/scheme_segmenter.py +619 -0
  64. cdxml_toolkit/perception/spatial_assignment.py +1013 -0
  65. cdxml_toolkit/rdkit_utils.py +605 -0
  66. cdxml_toolkit/render/__init__.py +17 -0
  67. cdxml_toolkit/render/auto_layout.py +229 -0
  68. cdxml_toolkit/render/compact_parser.py +632 -0
  69. cdxml_toolkit/render/parser.py +706 -0
  70. cdxml_toolkit/render/render_scheme.py +267 -0
  71. cdxml_toolkit/render/renderer.py +2387 -0
  72. cdxml_toolkit/render/schema.py +90 -0
  73. cdxml_toolkit/render/scheme_maker.py +1043 -0
  74. cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
  75. cdxml_toolkit/resolve/__init__.py +13 -0
  76. cdxml_toolkit/resolve/cas_resolver.py +430 -0
  77. cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
  78. cdxml_toolkit/resolve/condensed_formula.py +493 -0
  79. cdxml_toolkit/resolve/jre_manager.py +195 -0
  80. cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
  81. cdxml_toolkit/resolve/reagent_db.py +285 -0
  82. cdxml_toolkit/resolve/superatom_data.json +2856 -0
  83. cdxml_toolkit/resolve/superatom_table.py +146 -0
  84. cdxml_toolkit/text_formatting.py +298 -0
  85. cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
  86. cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
  87. cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
  88. cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
  89. cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
  90. cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
  91. cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,479 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Format Procedure Entry — Agent-driven lab book entry formatter.
4
+
5
+ Takes a JSON list of entries from an LLM agent and produces a complete lab
6
+ book entry. All numbers (RT, m/z, area%, purity, conversion%) come from
7
+ re-parsing the LCMS PDFs via parse_report(). The LLM only provides:
8
+ - Peak identification: (name, approximate RT, ion) as search keys
9
+ - Labels, detector choice, free-form text
10
+ - Peak assignments (compound_related flags for conversion)
11
+
12
+ Entry types:
13
+ text — passthrough (procedure, section headers, notes)
14
+ lcms-species — RT + ion + UV for assigned peaks, optionally purity
15
+ lcms-areas — area% for assigned peaks from one file
16
+ lcms-manual — area% from manually integrated LC PDF, optionally with
17
+ MS data from a Waters report
18
+ nmr — NMR data strings, rendered verbatim
19
+
20
+ Usage:
21
+ python format_procedure_entry.py --input assignments.json
22
+ python format_procedure_entry.py --input assignments.json --output entry.txt
23
+ echo '{ ... }' | python format_procedure_entry.py
24
+ """
25
+
26
+ import argparse
27
+ import json
28
+ import math
29
+ import os
30
+ import sys
31
+ from typing import List, Optional, Dict, Tuple
32
+
33
+ from .lcms_analyzer import (
34
+ parse_report, LCMSReport, ChromPeak,
35
+ parse_manual_report, ManualLCMSReport, ManualLCMSSample, ManualPeak,
36
+ )
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Constants
40
+ # ---------------------------------------------------------------------------
41
+
42
+ RT_TOLERANCE = 0.05 # minutes — tolerance for peak matching by RT
43
+
44
+ # ---------------------------------------------------------------------------
45
+ # Peak matching
46
+ # ---------------------------------------------------------------------------
47
+
48
+
49
+ def _match_peak(report: LCMSReport, rt: float,
50
+ ion: Optional[dict] = None,
51
+ tol: float = RT_TOLERANCE) -> Optional[ChromPeak]:
52
+ """Find a peak in *report* matching the given RT (±tol).
53
+
54
+ If *ion* is provided ({"mode": "ES+", "mz": 346.1}), it is used to
55
+ disambiguate when multiple peaks fall within the RT window: the peak
56
+ whose top ion is closest to the requested m/z wins. If no ion is
57
+ given, the peak closest in RT wins.
58
+ """
59
+ candidates = [p for p in report.peaks if abs(p.rt - rt) <= tol]
60
+ if not candidates:
61
+ return None
62
+ if len(candidates) == 1:
63
+ return candidates[0]
64
+
65
+ # Multiple candidates — disambiguate
66
+ if ion:
67
+ target_mode = ion.get("mode", "ES+")
68
+ target_mz = ion.get("mz")
69
+ if target_mz is not None:
70
+ def _ion_distance(peak: ChromPeak) -> float:
71
+ for spec in peak.ms_spectra:
72
+ if spec.mode == target_mode and spec.top_ions:
73
+ return min(abs(mz - target_mz) for mz in spec.top_ions)
74
+ return 9999.0
75
+ candidates.sort(key=_ion_distance)
76
+ if _ion_distance(candidates[0]) < 2.0: # reasonable m/z match
77
+ return candidates[0]
78
+
79
+ # Fall back to closest RT
80
+ candidates.sort(key=lambda p: abs(p.rt - rt))
81
+ return candidates[0]
82
+
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # Formatting helpers
86
+ # ---------------------------------------------------------------------------
87
+
88
+
89
+ def _format_lambda_max(wavelengths: List[float]) -> str:
90
+ """Format UV lambda max values, e.g. 'λmax 218, 254 nm'."""
91
+ if not wavelengths:
92
+ return ""
93
+ wl_strs = [str(math.floor(wl + 0.5)) for wl in sorted(wavelengths)]
94
+ return f"λmax {', '.join(wl_strs)} nm"
95
+
96
+
97
+ def _select_ion(peak: ChromPeak, override: Optional[dict] = None) -> str:
98
+ """Select the representative ion string for a peak.
99
+
100
+ Returns e.g. "ESI+ 346.1" or "ESI- 344.1" or "UV only".
101
+ If *override* is given ({"mode": "ES-", "rank": 1}), use that mode/rank.
102
+ """
103
+ if override:
104
+ mode = override.get("mode", "ES+")
105
+ rank = override.get("rank", 0) # 0-indexed
106
+ for spec in peak.ms_spectra:
107
+ if spec.mode == mode and len(spec.top_ions) > rank:
108
+ mode_label = "ESI+" if mode == "ES+" else "ESI−"
109
+ return f"{mode_label} {spec.top_ions[rank]:.1f}"
110
+
111
+ # Default: top ESI+ ion, then ESI-, then UV only
112
+ for mode_pref in ("ES+", "ES-"):
113
+ for spec in peak.ms_spectra:
114
+ if spec.mode == mode_pref and spec.top_ions:
115
+ mode_label = "ESI+" if mode_pref == "ES+" else "ESI−"
116
+ return f"{mode_label} {spec.top_ions[0]:.1f}"
117
+
118
+ return "UV only"
119
+
120
+
121
+ def _select_area(peak: ChromPeak, detector: str) -> Optional[float]:
122
+ """Read area% for the given detector."""
123
+ if detector.upper() == "TAC":
124
+ return peak.area_pct
125
+ elif detector == "220nm":
126
+ return peak.area_pct_220nm
127
+ elif detector == "254nm":
128
+ return peak.area_pct_254nm
129
+ return None
130
+
131
+
132
+ def _method_header(report: LCMSReport) -> str:
133
+ """Format '(instrument, method)' string."""
134
+ return f"({report.instrument}, {report.method_short})"
135
+
136
+
137
+ # ---------------------------------------------------------------------------
138
+ # Report cache — avoid re-parsing the same PDF multiple times
139
+ # ---------------------------------------------------------------------------
140
+
141
+ _report_cache: Dict[str, LCMSReport] = {}
142
+ _manual_cache: Dict[str, ManualLCMSReport] = {}
143
+
144
+
145
+ def _get_report(path: str) -> LCMSReport:
146
+ """Parse and cache a standard Waters LCMS report."""
147
+ abs_path = os.path.abspath(path)
148
+ if abs_path not in _report_cache:
149
+ _report_cache[abs_path] = parse_report(path)
150
+ return _report_cache[abs_path]
151
+
152
+
153
+ def _get_manual_report(path: str) -> ManualLCMSReport:
154
+ """Parse and cache a manual integration PDF."""
155
+ abs_path = os.path.abspath(path)
156
+ if abs_path not in _manual_cache:
157
+ _manual_cache[abs_path] = parse_manual_report(path)
158
+ return _manual_cache[abs_path]
159
+
160
+
161
+ def _find_manual_sample(report: ManualLCMSReport,
162
+ sample_name: Optional[str]) -> ManualLCMSSample:
163
+ """Find a sample by name in a manual integration report.
164
+
165
+ If *sample_name* is None and there's only one sample, return it.
166
+ Otherwise match by substring (case-insensitive).
167
+ """
168
+ if not report.samples:
169
+ raise ValueError(f"No samples in {report.filename}")
170
+ if sample_name is None:
171
+ if len(report.samples) == 1:
172
+ return report.samples[0]
173
+ raise ValueError(
174
+ f"{report.filename} has {len(report.samples)} samples; "
175
+ f"specify 'sample' in the entry. Available: "
176
+ + ", ".join(s.sample_name for s in report.samples))
177
+ # Substring match (case-insensitive)
178
+ target = sample_name.lower()
179
+ for s in report.samples:
180
+ if target in s.sample_name.lower():
181
+ return s
182
+ raise ValueError(
183
+ f"Sample '{sample_name}' not found in {report.filename}. "
184
+ f"Available: " + ", ".join(s.sample_name for s in report.samples))
185
+
186
+
187
+ def _match_manual_peak(sample: ManualLCMSSample, rt: float,
188
+ tol: float = RT_TOLERANCE) -> Optional[ManualPeak]:
189
+ """Find a peak in a manual sample by RT proximity."""
190
+ candidates = [p for p in sample.peaks if abs(p.rt - rt) <= tol]
191
+ if not candidates:
192
+ return None
193
+ candidates.sort(key=lambda p: abs(p.rt - rt))
194
+ return candidates[0]
195
+
196
+
197
+ # ---------------------------------------------------------------------------
198
+ # Entry processors
199
+ # ---------------------------------------------------------------------------
200
+
201
+
202
+ def _process_text(entry: dict) -> str:
203
+ """Passthrough text entry."""
204
+ return entry.get("content", "")
205
+
206
+
207
+ def _process_nmr(entry: dict) -> str:
208
+ """NMR data entry — render verbatim, one per line."""
209
+ data = entry.get("data", [])
210
+ if isinstance(data, str):
211
+ data = [data]
212
+ return "\n".join(data)
213
+
214
+
215
+ def _process_lcms_species(entry: dict) -> str:
216
+ """Format RT + ion + UV for assigned peaks, optionally with purity.
217
+
218
+ Output: "{label} ({instrument}, {method}): Name1 RT = ...; Name2 RT = ..."
219
+ """
220
+ report = _get_report(entry["file"])
221
+ peaks_cfg = entry.get("peaks", [])
222
+
223
+ parts = []
224
+ for pcfg in peaks_cfg:
225
+ peak = _match_peak(report, pcfg["rt"], pcfg.get("ion"))
226
+ if peak is None:
227
+ parts.append(f"{pcfg['name']} n.d.")
228
+ continue
229
+
230
+ # RT + ion
231
+ ion_str = _select_ion(peak, pcfg.get("ion_override"))
232
+ lmax = _format_lambda_max(peak.uv_lambda_max)
233
+
234
+ segments = [f"{pcfg['name']} RT = {peak.rt:.2f} min", ion_str]
235
+ if lmax:
236
+ segments.append(lmax)
237
+
238
+ # Purity (optional)
239
+ if pcfg.get("purity"):
240
+ purity_parts = []
241
+ if peak.area_pct is not None:
242
+ purity_parts.append(f"TAC {peak.area_pct:.1f}%")
243
+ if peak.area_pct_220nm is not None:
244
+ purity_parts.append(f"220nm {peak.area_pct_220nm:.1f}%")
245
+ if peak.area_pct_254nm is not None:
246
+ purity_parts.append(f"254nm {peak.area_pct_254nm:.1f}%")
247
+ if purity_parts:
248
+ segments.append("purity " + ", ".join(purity_parts))
249
+
250
+ parts.append(", ".join(segments))
251
+
252
+ header = f"{entry.get('label', 'LCMS')} {_method_header(report)}"
253
+ return f"{header}: {'; '.join(parts)}"
254
+
255
+
256
+ def _process_lcms_areas(entry: dict) -> str:
257
+ """Format area% for assigned peaks from one file.
258
+
259
+ Output: "{label} ({instrument}, {method}, {detector}): Name1 x%, Name2 y%"
260
+ Optionally with conversion for the SM peak.
261
+ """
262
+ report = _get_report(entry["file"])
263
+ detector = entry.get("detector", "TAC")
264
+ peaks_cfg = entry.get("peaks", [])
265
+ show_conversion = entry.get("show_conversion", False)
266
+
267
+ # Match all peaks
268
+ matched = []
269
+ for pcfg in peaks_cfg:
270
+ peak = _match_peak(report, pcfg["rt"], pcfg.get("ion"))
271
+ area = _select_area(peak, detector) if peak else None
272
+ matched.append({
273
+ "name": pcfg["name"],
274
+ "area": area,
275
+ "compound_related": pcfg.get("compound_related", False),
276
+ "peak": peak,
277
+ })
278
+
279
+ # Compute conversion if requested
280
+ conversion_str = None
281
+ if show_conversion:
282
+ sm_area = None
283
+ compound_total = 0.0
284
+ has_compound = False
285
+ for m in matched:
286
+ if m["name"].upper() == "SM" and m["area"] is not None:
287
+ sm_area = m["area"]
288
+ if m["compound_related"] and m["area"] is not None:
289
+ compound_total += m["area"]
290
+ has_compound = True
291
+
292
+ if sm_area is None and has_compound:
293
+ conversion_str = "complete"
294
+ elif sm_area is not None and compound_total > 0:
295
+ conv = (1.0 - sm_area / compound_total) * 100.0
296
+ conversion_str = f"{conv:.0f}%"
297
+
298
+ # Format parts
299
+ parts = []
300
+ for m in matched:
301
+ if m["area"] is None:
302
+ parts.append(f"{m['name']} n.d.")
303
+ else:
304
+ area_str = f"{m['name']} {m['area']:.1f}%"
305
+ # Append conversion to SM
306
+ if (show_conversion and m["name"].upper() == "SM"
307
+ and conversion_str is not None):
308
+ area_str += f" ({conversion_str} conversion)"
309
+ parts.append(area_str)
310
+
311
+ header = f"{entry.get('label', 'LCMS')} {_method_header(report)}, {detector}"
312
+ return f"{header}: {', '.join(parts)}"
313
+
314
+
315
+ def _process_lcms_manual(entry: dict) -> str:
316
+ """Format area% from a manually integrated LCMS PDF.
317
+
318
+ Handles two cases:
319
+ 1. Tracking/composition: area% from manual file, no MS data.
320
+ 2. Purity: area% from manual file, MS data from a Waters report
321
+ (specified via optional 'ms_file').
322
+
323
+ JSON examples:
324
+
325
+ Tracking:
326
+ {"type": "lcms-manual", "label": "Manual LCAP",
327
+ "file": "manint.pdf", "sample": "KL-7001-023-50min",
328
+ "peaks": [{"name": "DP", "rt": 0.57}, {"name": "SM", "rt": 1.01}],
329
+ "show_conversion": true}
330
+
331
+ Purity:
332
+ {"type": "lcms-manual", "label": "Purified product LC",
333
+ "file": "LC.pdf",
334
+ "ms_file": "driedOWE-dil.pdf",
335
+ "peaks": [{"name": "DP", "rt": 1.12, "purity": true,
336
+ "ion": {"mode": "ES+", "mz": 346.0}}]}
337
+ """
338
+ manual = _get_manual_report(entry["file"])
339
+ sample = _find_manual_sample(manual, entry.get("sample"))
340
+ peaks_cfg = entry.get("peaks", [])
341
+ show_conversion = entry.get("show_conversion", False)
342
+
343
+ # Optionally load a Waters report for MS data
344
+ ms_report = _get_report(entry["ms_file"]) if entry.get("ms_file") else None
345
+
346
+ parts = []
347
+ sm_area = None
348
+ compound_total = 0.0
349
+ has_compound = False
350
+ matched_list = []
351
+
352
+ for pcfg in peaks_cfg:
353
+ mpeak = _match_manual_peak(sample, pcfg["rt"])
354
+ area = mpeak.area_pct if mpeak else None
355
+ is_compound = pcfg.get("compound_related", False)
356
+
357
+ if area is not None:
358
+ if pcfg["name"].upper() == "SM":
359
+ sm_area = area
360
+ if is_compound:
361
+ compound_total += area
362
+ has_compound = True
363
+
364
+ matched_list.append({
365
+ "name": pcfg["name"],
366
+ "area": area,
367
+ "mpeak": mpeak,
368
+ "cfg": pcfg,
369
+ })
370
+
371
+ # Conversion
372
+ conversion_str = None
373
+ if show_conversion:
374
+ if sm_area is None and has_compound:
375
+ conversion_str = "complete"
376
+ elif sm_area is not None and compound_total > 0:
377
+ conv = (1.0 - sm_area / compound_total) * 100.0
378
+ conversion_str = f"{conv:.0f}%"
379
+
380
+ for m in matched_list:
381
+ pcfg = m["cfg"]
382
+
383
+ if pcfg.get("purity") and m["mpeak"] is not None:
384
+ # Purity mode: RT + ion from Waters report, purity from manual
385
+ segments = [f"{m['name']} RT = {m['mpeak'].rt:.2f} min"]
386
+ if ms_report:
387
+ ws_peak = _match_peak(ms_report, pcfg["rt"], pcfg.get("ion"))
388
+ if ws_peak:
389
+ segments.append(_select_ion(ws_peak, pcfg.get("ion_override")))
390
+ lmax = _format_lambda_max(ws_peak.uv_lambda_max)
391
+ if lmax:
392
+ segments.append(lmax)
393
+ segments.append(f"purity {m['mpeak'].area_pct:.1f}% (manual integration)")
394
+ parts.append(", ".join(segments))
395
+ elif m["area"] is None:
396
+ parts.append(f"{m['name']} n.d.")
397
+ else:
398
+ area_str = f"{m['name']} {m['area']:.1f}%"
399
+ if (show_conversion and m["name"].upper() == "SM"
400
+ and conversion_str is not None):
401
+ area_str += f" ({conversion_str} conversion)"
402
+ parts.append(area_str)
403
+
404
+ header = f"{entry.get('label', 'Manual LC')} ({manual.instrument})"
405
+ return f"{header}: {', '.join(parts)}"
406
+
407
+
408
+ # ---------------------------------------------------------------------------
409
+ # Entry dispatch
410
+ # ---------------------------------------------------------------------------
411
+
412
+ _PROCESSORS = {
413
+ "text": _process_text,
414
+ "nmr": _process_nmr,
415
+ "lcms-species": _process_lcms_species,
416
+ "lcms-areas": _process_lcms_areas,
417
+ "lcms-manual": _process_lcms_manual,
418
+ }
419
+
420
+
421
+ def process_entries(entries: List[dict]) -> str:
422
+ """Process all entries in order, return the formatted lab book entry."""
423
+ lines = []
424
+ for entry in entries:
425
+ entry_type = entry.get("type", "text")
426
+ processor = _PROCESSORS.get(entry_type)
427
+ if processor is None:
428
+ print(f"Warning: unknown entry type '{entry_type}', skipping",
429
+ file=sys.stderr)
430
+ continue
431
+ lines.append(processor(entry))
432
+ return "\n".join(lines)
433
+
434
+
435
+ # ---------------------------------------------------------------------------
436
+ # CLI
437
+ # ---------------------------------------------------------------------------
438
+
439
+
440
+ def main(argv=None) -> int:
441
+ parser = argparse.ArgumentParser(
442
+ description="Format a lab book entry from LLM agent assignments")
443
+ parser.add_argument('--input', '-i', type=str, default=None,
444
+ help='JSON input file (default: stdin)')
445
+ parser.add_argument('--output', '-o', type=str, default=None,
446
+ help='Output file (default: stdout)')
447
+ args = parser.parse_args(argv)
448
+
449
+ # Read JSON
450
+ if args.input:
451
+ with open(args.input, 'r', encoding='utf-8') as f:
452
+ data = json.load(f)
453
+ else:
454
+ data = json.load(sys.stdin)
455
+
456
+ entries = data.get("entries", [])
457
+ if not entries:
458
+ print("Error: no entries in input JSON", file=sys.stderr)
459
+ return 1
460
+
461
+ # Clear cache before run
462
+ _report_cache.clear()
463
+ _manual_cache.clear()
464
+
465
+ result = process_entries(entries)
466
+
467
+ if args.output:
468
+ with open(args.output, 'w', encoding='utf-8') as f:
469
+ f.write(result + "\n")
470
+ print(f"Output written to {args.output}", file=sys.stderr)
471
+ else:
472
+ sys.stdout.buffer.write(result.encode('utf-8'))
473
+ sys.stdout.buffer.write(b'\n')
474
+
475
+ return 0
476
+
477
+
478
+ if __name__ == "__main__":
479
+ sys.exit(main())