@yeyuan98/opencode-bioresearcher-plugin 1.6.4 → 1.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/shared/skill-sync.js +6 -3
- package/dist/skills/pzfx-io/SKILL.md +111 -0
- package/dist/skills/pzfx-io/guides/convert.md +102 -0
- package/dist/skills/pzfx-io/guides/read-parse.md +116 -0
- package/dist/skills/pzfx-io/guides/schema-reference.md +167 -0
- package/dist/skills/pzfx-io/guides/write-edit.md +150 -0
- package/dist/skills/pzfx-io/pyproject.toml +6 -0
- package/dist/skills/pzfx-io/pzfx.py +1156 -0
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,1156 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""GraphPad Prism PZFX file reader, writer, and converter.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
uv run python pzfx.py inspect <file>
|
|
6
|
+
uv run python pzfx.py read <file> [--table INDEX] [--format json|csv|tsv] ...
|
|
7
|
+
uv run python pzfx.py write <output_file> --data-file <path> --data-format <fmt> ...
|
|
8
|
+
uv run python pzfx.py edit <file> --column-title <name> --subcolumn INDEX --values '<json>' ...
|
|
9
|
+
uv run python pzfx.py add-column <file> --title "Col" --subcolumns N --values '<json>' ...
|
|
10
|
+
uv run python pzfx.py convert <file> [--format csv|tsv|json] ...
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import argparse
|
|
14
|
+
import csv
|
|
15
|
+
import io
|
|
16
|
+
import json
|
|
17
|
+
import os
|
|
18
|
+
import re
|
|
19
|
+
import sys
|
|
20
|
+
import xml.etree.ElementTree as ET
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from typing import Any, Optional
|
|
23
|
+
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
# Constants
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
VALID_TABLE_TYPES = {"OneWay", "TwoWay", "XY", "Survival", "Contingency", "PartsOfWhole"}
|
|
29
|
+
VALID_X_FORMATS = {"none", "numbers", "date", "error"}
|
|
30
|
+
VALID_Y_FORMATS = {"replicates", "SD", "SE", "CV", "SDN", "SEN", "CVN", "low-high", "upper-lower-limits"}
|
|
31
|
+
TABLE_TYPE_X_FORMAT = {
|
|
32
|
+
"OneWay": "none", "TwoWay": "none", "Contingency": "none",
|
|
33
|
+
"PartsOfWhole": "none", "XY": "numbers", "Survival": "numbers",
|
|
34
|
+
}
|
|
35
|
+
YFORMAT_SUBCOL_COUNT = {
|
|
36
|
+
"SD": 2, "SE": 2, "CV": 2,
|
|
37
|
+
"SDN": 3, "SEN": 3, "CVN": 3, "low-high": 3, "upper-lower-limits": 3,
|
|
38
|
+
}
|
|
39
|
+
YFORMAT_SUFFIXES: dict[str, list[str]] = {
|
|
40
|
+
"replicates": [],
|
|
41
|
+
"SD": ["_MEAN", "_SD"],
|
|
42
|
+
"SE": ["_MEAN", "_SE"],
|
|
43
|
+
"CV": ["_MEAN", "_CV"],
|
|
44
|
+
"SDN": ["_MEAN", "_SD", "_N"],
|
|
45
|
+
"SEN": ["_MEAN", "_SEM", "_N"],
|
|
46
|
+
"CVN": ["_MEAN", "_CV", "_N"],
|
|
47
|
+
"low-high": ["_MEAN", "_PLUSERROR", "_MINUSERROR"],
|
|
48
|
+
"upper-lower-limits": ["_MEAN", "_UPPERLIMIT", "_LOWERLIMIT"],
|
|
49
|
+
}
|
|
50
|
+
MINIMAL_TEMPLATE = (
|
|
51
|
+
"<!--Analyses, graphs and layouts as compressed binary."
|
|
52
|
+
" Don't edit this part of the file.-->\n\n"
|
|
53
|
+
'<Template dt:dt="bin.base64" xmlns:dt="urn:schemas-microsoft-com:datatypes">'
|
|
54
|
+
"eNqL ricCAAAPAw==</Template>"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
# Data classes
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class Cell:
|
|
64
|
+
value: Optional[str] = None
|
|
65
|
+
excluded: bool = False
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class ColumnData:
|
|
70
|
+
title: str
|
|
71
|
+
decimals: int = 0
|
|
72
|
+
width: int = 81
|
|
73
|
+
subcolumns: list[list[Cell]] = field(default_factory=list)
|
|
74
|
+
is_x_advanced: bool = False
|
|
75
|
+
x_advanced_version: Optional[str] = None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class PzfxTable:
|
|
80
|
+
table_id: str = "Table0"
|
|
81
|
+
table_type: str = "OneWay"
|
|
82
|
+
x_format: str = "none"
|
|
83
|
+
y_format: Optional[str] = None
|
|
84
|
+
replicates: Optional[int] = None
|
|
85
|
+
title: str = "Data 1"
|
|
86
|
+
is_huge: bool = False
|
|
87
|
+
floating_notes: list[str] = field(default_factory=list)
|
|
88
|
+
row_titles: Optional[list[str]] = None
|
|
89
|
+
x_column: Optional[ColumnData] = None
|
|
90
|
+
x_advanced_column: Optional[ColumnData] = None
|
|
91
|
+
y_columns: list[ColumnData] = field(default_factory=list)
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def row_count(self) -> int:
|
|
95
|
+
if self.y_columns and self.y_columns[0].subcolumns:
|
|
96
|
+
return len(self.y_columns[0].subcolumns[0])
|
|
97
|
+
if self.x_column and self.x_column.subcolumns:
|
|
98
|
+
return len(self.x_column.subcolumns[0])
|
|
99
|
+
if self.row_titles is not None:
|
|
100
|
+
return len(self.row_titles)
|
|
101
|
+
return 0
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass
|
|
105
|
+
class PzfxFile:
|
|
106
|
+
tables: list[PzfxTable] = field(default_factory=list)
|
|
107
|
+
template_raw: str = ""
|
|
108
|
+
created_xml: str = ""
|
|
109
|
+
info_xml: str = ""
|
|
110
|
+
table_sequence_xml: str = ""
|
|
111
|
+
info_data: Optional[dict] = None
|
|
112
|
+
root_attrib: dict[str, str] = field(default_factory=dict)
|
|
113
|
+
_raw_pre_template: str = ""
|
|
114
|
+
|
|
115
|
+
# ------------------------------------------------------------------
|
|
116
|
+
# Factory: parse existing file
|
|
117
|
+
# ------------------------------------------------------------------
|
|
118
|
+
|
|
119
|
+
@staticmethod
|
|
120
|
+
def from_file(path: str) -> "PzfxFile":
|
|
121
|
+
if not os.path.exists(path):
|
|
122
|
+
_fail("FILE_NOT_FOUND", f"File not found: {path}", "Check the file path exists")
|
|
123
|
+
|
|
124
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
125
|
+
text = f.read()
|
|
126
|
+
|
|
127
|
+
template_raw = ""
|
|
128
|
+
template_start = -1
|
|
129
|
+
|
|
130
|
+
comment_markers = ["<!--Analyses", "<!--Analyses,"]
|
|
131
|
+
for marker in comment_markers:
|
|
132
|
+
idx = text.find(marker)
|
|
133
|
+
if idx >= 0:
|
|
134
|
+
template_start = idx
|
|
135
|
+
break
|
|
136
|
+
|
|
137
|
+
if template_start < 0:
|
|
138
|
+
t_idx = text.find("<Template")
|
|
139
|
+
if t_idx >= 0:
|
|
140
|
+
nl = text.rfind("\n", 0, t_idx)
|
|
141
|
+
if nl >= 0 and text[nl:t_idx].strip().startswith("<!--"):
|
|
142
|
+
template_start = nl
|
|
143
|
+
else:
|
|
144
|
+
template_start = t_idx
|
|
145
|
+
|
|
146
|
+
if template_start >= 0:
|
|
147
|
+
template_raw = text[template_start:]
|
|
148
|
+
xml_text = text[:template_start].rstrip()
|
|
149
|
+
root_tag = _detect_root_tag(xml_text)
|
|
150
|
+
if root_tag:
|
|
151
|
+
xml_text += f"\n</{root_tag}>"
|
|
152
|
+
else:
|
|
153
|
+
xml_text = text
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
root = ET.fromstring(xml_text)
|
|
157
|
+
except ET.ParseError as e:
|
|
158
|
+
_fail("PARSE_ERROR", f"XML parse error: {e}", "The file may be corrupted")
|
|
159
|
+
|
|
160
|
+
_strip_namespaces(root)
|
|
161
|
+
|
|
162
|
+
pf = PzfxFile()
|
|
163
|
+
pf.template_raw = template_raw
|
|
164
|
+
pf._raw_pre_template = xml_text
|
|
165
|
+
pf.root_attrib = dict(root.attrib)
|
|
166
|
+
|
|
167
|
+
created_el = root.find("Created")
|
|
168
|
+
if created_el is not None:
|
|
169
|
+
pf.created_xml = _element_to_string(created_el)
|
|
170
|
+
|
|
171
|
+
info_el = root.find("Info")
|
|
172
|
+
if info_el is not None:
|
|
173
|
+
pf.info_xml = _element_to_string(info_el)
|
|
174
|
+
pf.info_data = _parse_info(info_el)
|
|
175
|
+
|
|
176
|
+
ts_el = root.find("TableSequence")
|
|
177
|
+
if ts_el is not None:
|
|
178
|
+
pf.table_sequence_xml = _element_to_string(ts_el)
|
|
179
|
+
|
|
180
|
+
for tbl_el in root.iter():
|
|
181
|
+
tag = tbl_el.tag
|
|
182
|
+
if tag in ("Table", "HugeTable"):
|
|
183
|
+
tbl = _parse_table(tbl_el, tag == "HugeTable")
|
|
184
|
+
pf.tables.append(tbl)
|
|
185
|
+
|
|
186
|
+
return pf
|
|
187
|
+
|
|
188
|
+
# ------------------------------------------------------------------
|
|
189
|
+
# Factory: create from scratch
|
|
190
|
+
# ------------------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
@staticmethod
|
|
193
|
+
def from_scratch(
|
|
194
|
+
table_type: str,
|
|
195
|
+
x_format: Optional[str] = None,
|
|
196
|
+
y_format: Optional[str] = None,
|
|
197
|
+
replicates: int = 1,
|
|
198
|
+
title: str = "Data 1",
|
|
199
|
+
) -> "PzfxFile":
|
|
200
|
+
if x_format is None:
|
|
201
|
+
x_format = TABLE_TYPE_X_FORMAT.get(table_type, "none")
|
|
202
|
+
if y_format is None:
|
|
203
|
+
y_format = "replicates"
|
|
204
|
+
|
|
205
|
+
tbl = PzfxTable(
|
|
206
|
+
table_id="Table0",
|
|
207
|
+
table_type=table_type,
|
|
208
|
+
x_format=x_format,
|
|
209
|
+
y_format=y_format,
|
|
210
|
+
replicates=replicates if y_format == "replicates" else None,
|
|
211
|
+
title=title,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
pf = PzfxFile()
|
|
215
|
+
pf.tables = [tbl]
|
|
216
|
+
pf.template_raw = MINIMAL_TEMPLATE
|
|
217
|
+
pf.created_xml = '<Created>\n<OriginalVersion CreatedByProgram="pzfx-io" CreatedByVersion="0.1.0" Login="" DateTime=""></OriginalVersion>\n</Created>'
|
|
218
|
+
pf.table_sequence_xml = '<TableSequence>\n\n<Ref ID="Table0" Selected="1"></Ref>\n</TableSequence>'
|
|
219
|
+
return pf
|
|
220
|
+
|
|
221
|
+
# ------------------------------------------------------------------
|
|
222
|
+
# Write
|
|
223
|
+
# ------------------------------------------------------------------
|
|
224
|
+
|
|
225
|
+
def to_file(self, path: str) -> dict[str, Any]:
|
|
226
|
+
xml_bytes = self._serialize()
|
|
227
|
+
try:
|
|
228
|
+
parent = os.path.dirname(path)
|
|
229
|
+
if parent:
|
|
230
|
+
os.makedirs(parent, exist_ok=True)
|
|
231
|
+
with open(path, "wb") as f:
|
|
232
|
+
f.write(b'<?xml version="1.0" encoding="UTF-8"?>')
|
|
233
|
+
if self.template_raw:
|
|
234
|
+
close_tag = b"</GraphPadPrismFile>"
|
|
235
|
+
if xml_bytes.endswith(close_tag):
|
|
236
|
+
xml_bytes = xml_bytes[:-len(close_tag)]
|
|
237
|
+
f.write(xml_bytes)
|
|
238
|
+
f.write(b"\n\n")
|
|
239
|
+
f.write(self.template_raw.encode("utf-8"))
|
|
240
|
+
else:
|
|
241
|
+
f.write(xml_bytes)
|
|
242
|
+
except OSError as e:
|
|
243
|
+
_fail("WRITE_ERROR", f"Failed to write file: {e}", "Check output path and permissions")
|
|
244
|
+
return {"success": True, "file": path}
|
|
245
|
+
|
|
246
|
+
def _serialize(self) -> bytes:
|
|
247
|
+
root = ET.Element("GraphPadPrismFile", self.root_attrib if self.root_attrib else {"PrismXMLVersion": "5.00"})
|
|
248
|
+
|
|
249
|
+
if self.created_xml:
|
|
250
|
+
_append_xml_string(root, self.created_xml)
|
|
251
|
+
|
|
252
|
+
info_seq = ET.SubElement(root, "InfoSequence")
|
|
253
|
+
ET.SubElement(info_seq, "Ref", {"ID": "Info0", "Selected": "1"})
|
|
254
|
+
|
|
255
|
+
if self.info_xml:
|
|
256
|
+
_append_xml_string(root, self.info_xml)
|
|
257
|
+
else:
|
|
258
|
+
info = ET.SubElement(root, "Info", {"ID": "Info0"})
|
|
259
|
+
ET.SubElement(info, "Title").text = "Project info 1"
|
|
260
|
+
ET.SubElement(info, "Notes")
|
|
261
|
+
for name in ("Experiment Date", "Experiment ID", "Notebook ID", "Project", "Experimenter", "Protocol"):
|
|
262
|
+
c = ET.SubElement(info, "Constant")
|
|
263
|
+
ET.SubElement(c, "Name").text = name
|
|
264
|
+
ET.SubElement(c, "Value")
|
|
265
|
+
|
|
266
|
+
if self.table_sequence_xml:
|
|
267
|
+
_append_xml_string(root, self.table_sequence_xml)
|
|
268
|
+
else:
|
|
269
|
+
ts = ET.SubElement(root, "TableSequence")
|
|
270
|
+
for i, tbl in enumerate(self.tables):
|
|
271
|
+
ET.SubElement(ts, "Ref", {"ID": tbl.table_id, "Selected": "1"})
|
|
272
|
+
|
|
273
|
+
for tbl in self.tables:
|
|
274
|
+
tbl_el = _build_table_element(tbl)
|
|
275
|
+
root.append(tbl_el)
|
|
276
|
+
|
|
277
|
+
ET.indent(root, space="")
|
|
278
|
+
return ET.tostring(root, encoding="unicode", xml_declaration=False).encode("utf-8")
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
# ---------------------------------------------------------------------------
|
|
282
|
+
# Parsing helpers
|
|
283
|
+
# ---------------------------------------------------------------------------
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def _extract_title(el: ET.Element) -> str:
|
|
287
|
+
title_el = el.find("Title")
|
|
288
|
+
if title_el is None:
|
|
289
|
+
return ""
|
|
290
|
+
if title_el.text and title_el.text.strip():
|
|
291
|
+
return title_el.text.strip()
|
|
292
|
+
ta = title_el.find("TextAlign")
|
|
293
|
+
if ta is not None and ta.text:
|
|
294
|
+
return ta.text.strip()
|
|
295
|
+
return ""
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _parse_subcolumn(sub_el: ET.Element) -> list[Cell]:
|
|
299
|
+
cells = []
|
|
300
|
+
for d_el in sub_el.iter("d"):
|
|
301
|
+
val = d_el.text
|
|
302
|
+
excluded = d_el.get("Excluded") == "1"
|
|
303
|
+
cells.append(Cell(value=val, excluded=excluded))
|
|
304
|
+
return cells
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _parse_column_data(el: ET.Element, is_x_advanced: bool = False) -> ColumnData:
|
|
308
|
+
title = _extract_title(el)
|
|
309
|
+
decimals = int(el.get("Decimals", "0"))
|
|
310
|
+
width = int(el.get("Width", "81"))
|
|
311
|
+
subcols = []
|
|
312
|
+
for sc_el in el.findall("Subcolumn"):
|
|
313
|
+
subcols.append(_parse_subcolumn(sc_el))
|
|
314
|
+
col = ColumnData(
|
|
315
|
+
title=title, decimals=decimals, width=width,
|
|
316
|
+
subcolumns=subcols, is_x_advanced=is_x_advanced,
|
|
317
|
+
)
|
|
318
|
+
if is_x_advanced:
|
|
319
|
+
col.x_advanced_version = el.get("Version")
|
|
320
|
+
return col
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def _parse_table(tbl_el: ET.Element, is_huge: bool) -> PzfxTable:
|
|
324
|
+
tbl = PzfxTable()
|
|
325
|
+
tbl.table_id = tbl_el.get("ID", "Table0")
|
|
326
|
+
tbl.table_type = tbl_el.get("TableType", "OneWay")
|
|
327
|
+
tbl.x_format = tbl_el.get("XFormat", "none")
|
|
328
|
+
tbl.y_format = tbl_el.get("YFormat")
|
|
329
|
+
rep_str = tbl_el.get("Replicates")
|
|
330
|
+
tbl.replicates = int(rep_str) if rep_str else None
|
|
331
|
+
tbl.title = _extract_title(tbl_el)
|
|
332
|
+
tbl.is_huge = is_huge
|
|
333
|
+
|
|
334
|
+
for fn_el in tbl_el:
|
|
335
|
+
tag = fn_el.tag
|
|
336
|
+
if tag == "FloatingNote":
|
|
337
|
+
tbl.floating_notes.append(_element_to_string(fn_el))
|
|
338
|
+
|
|
339
|
+
rt_el = tbl_el.find("RowTitlesColumn")
|
|
340
|
+
if rt_el is not None:
|
|
341
|
+
sc = rt_el.find("Subcolumn")
|
|
342
|
+
if sc is not None:
|
|
343
|
+
tbl.row_titles = [
|
|
344
|
+
_get_cell_text(d) for d in sc.iter("d")
|
|
345
|
+
]
|
|
346
|
+
else:
|
|
347
|
+
tbl.row_titles = []
|
|
348
|
+
|
|
349
|
+
xc_el = tbl_el.find("XColumn")
|
|
350
|
+
if xc_el is not None:
|
|
351
|
+
tbl.x_column = _parse_column_data(xc_el)
|
|
352
|
+
|
|
353
|
+
xac_el = tbl_el.find("XAdvancedColumn")
|
|
354
|
+
if xac_el is not None:
|
|
355
|
+
tbl.x_advanced_column = _parse_column_data(xac_el, is_x_advanced=True)
|
|
356
|
+
|
|
357
|
+
for yc_el in tbl_el.iter("YColumn"):
|
|
358
|
+
tbl.y_columns.append(_parse_column_data(yc_el))
|
|
359
|
+
|
|
360
|
+
return tbl
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def _get_cell_text(d_el: ET.Element) -> str:
|
|
364
|
+
if d_el.text:
|
|
365
|
+
return d_el.text.strip()
|
|
366
|
+
ta = d_el.find("TextAlign")
|
|
367
|
+
if ta is not None and ta.text:
|
|
368
|
+
return ta.text.strip()
|
|
369
|
+
return ""
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _parse_info(info_el: ET.Element) -> dict[str, Any]:
|
|
373
|
+
result: dict[str, Any] = {}
|
|
374
|
+
title_el = info_el.find("Title")
|
|
375
|
+
if title_el is not None and title_el.text:
|
|
376
|
+
result["title"] = title_el.text
|
|
377
|
+
constants: dict[str, str] = {}
|
|
378
|
+
for c_el in info_el.iter("Constant"):
|
|
379
|
+
name_el = c_el.find("Name")
|
|
380
|
+
val_el = c_el.find("Value")
|
|
381
|
+
if name_el is not None:
|
|
382
|
+
constants[name_el.text or ""] = val_el.text if val_el is not None else ""
|
|
383
|
+
if constants:
|
|
384
|
+
result["constants"] = constants
|
|
385
|
+
return result
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def _strip_namespaces(root: ET.Element) -> None:
|
|
389
|
+
for el in root.iter():
|
|
390
|
+
if "}" in el.tag:
|
|
391
|
+
el.tag = el.tag.split("}", 1)[1]
|
|
392
|
+
attribs = {}
|
|
393
|
+
for key, val in el.attrib.items():
|
|
394
|
+
if "}" in key:
|
|
395
|
+
key = key.split("}", 1)[1]
|
|
396
|
+
attribs[key] = val
|
|
397
|
+
el.attrib = attribs
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def _detect_root_tag(xml_text: str) -> str:
|
|
401
|
+
m = re.search(r"<(\w+[\w:-]*)(?:\s[^>]*)?>", xml_text.lstrip())
|
|
402
|
+
if m:
|
|
403
|
+
return m.group(1)
|
|
404
|
+
return "GraphPadPrismFile"
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def _element_to_string(el: ET.Element) -> str:
|
|
408
|
+
ET.indent(el, space="")
|
|
409
|
+
return ET.tostring(el, encoding="unicode", xml_declaration=False)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def _append_xml_string(parent: ET.Element, xml_str: str) -> None:
|
|
413
|
+
try:
|
|
414
|
+
el = ET.fromstring(xml_str)
|
|
415
|
+
parent.append(el)
|
|
416
|
+
except ET.ParseError:
|
|
417
|
+
pass
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
# ---------------------------------------------------------------------------
|
|
421
|
+
# Serialization helpers
|
|
422
|
+
# ---------------------------------------------------------------------------
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def _build_title_element(title: str) -> ET.Element:
|
|
426
|
+
ta_match = re.match(r"^\s*$", title)
|
|
427
|
+
if ta_match:
|
|
428
|
+
el = ET.Element("Title")
|
|
429
|
+
el.text = title
|
|
430
|
+
return el
|
|
431
|
+
return _make_title_el(title)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def _make_title_el(title: str) -> ET.Element:
|
|
435
|
+
el = ET.Element("Title")
|
|
436
|
+
el.text = title
|
|
437
|
+
return el
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def _build_subcolumn_element(cells: list[Cell]) -> ET.Element:
|
|
441
|
+
sc = ET.Element("Subcolumn")
|
|
442
|
+
for cell in cells:
|
|
443
|
+
d = ET.Element("d")
|
|
444
|
+
if cell.value is not None:
|
|
445
|
+
d.text = cell.value
|
|
446
|
+
if cell.excluded:
|
|
447
|
+
d.set("Excluded", "1")
|
|
448
|
+
sc.append(d)
|
|
449
|
+
return sc
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def _build_column_element(col: ColumnData, tag: str = "YColumn") -> ET.Element:
|
|
453
|
+
el = ET.Element(tag)
|
|
454
|
+
el.set("Width", str(col.width))
|
|
455
|
+
el.set("Decimals", str(col.decimals))
|
|
456
|
+
el.set("Subcolumns", str(len(col.subcolumns)))
|
|
457
|
+
el.append(_make_title_el(col.title))
|
|
458
|
+
for cells in col.subcolumns:
|
|
459
|
+
el.append(_build_subcolumn_element(cells))
|
|
460
|
+
return el
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def _build_x_column_element(col: ColumnData) -> ET.Element:
|
|
464
|
+
el = ET.Element("XColumn")
|
|
465
|
+
el.set("Width", str(col.width))
|
|
466
|
+
el.set("Subcolumns", str(len(col.subcolumns)))
|
|
467
|
+
el.set("Decimals", str(col.decimals))
|
|
468
|
+
el.append(_make_title_el(col.title))
|
|
469
|
+
for cells in col.subcolumns:
|
|
470
|
+
el.append(_build_subcolumn_element(cells))
|
|
471
|
+
return el
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def _build_x_advanced_element(col: ColumnData) -> ET.Element:
|
|
475
|
+
el = ET.Element("XAdvancedColumn")
|
|
476
|
+
if col.x_advanced_version:
|
|
477
|
+
el.set("Version", col.x_advanced_version)
|
|
478
|
+
el.set("Width", str(col.width))
|
|
479
|
+
el.set("Decimals", str(col.decimals))
|
|
480
|
+
el.set("Subcolumns", str(len(col.subcolumns)))
|
|
481
|
+
el.append(_make_title_el(col.title))
|
|
482
|
+
for cells in col.subcolumns:
|
|
483
|
+
el.append(_build_subcolumn_element(cells))
|
|
484
|
+
return el
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def _build_table_element(tbl: PzfxTable) -> ET.Element:
|
|
488
|
+
tag = "HugeTable" if tbl.is_huge else "Table"
|
|
489
|
+
el = ET.Element(tag)
|
|
490
|
+
el.set("ID", tbl.table_id)
|
|
491
|
+
el.set("XFormat", tbl.x_format)
|
|
492
|
+
el.set("TableType", tbl.table_type)
|
|
493
|
+
el.set("EVFormat", "AsteriskAfterNumber")
|
|
494
|
+
if tbl.y_format:
|
|
495
|
+
el.set("YFormat", tbl.y_format)
|
|
496
|
+
if tbl.replicates is not None:
|
|
497
|
+
el.set("Replicates", str(tbl.replicates))
|
|
498
|
+
|
|
499
|
+
el.append(_make_title_el(tbl.title))
|
|
500
|
+
|
|
501
|
+
for fn_xml in tbl.floating_notes:
|
|
502
|
+
_append_xml_string(el, fn_xml)
|
|
503
|
+
|
|
504
|
+
if tbl.row_titles is not None:
|
|
505
|
+
rt = ET.SubElement(el, "RowTitlesColumn", {"Width": "81"})
|
|
506
|
+
sc = ET.SubElement(rt, "Subcolumn")
|
|
507
|
+
for rt_val in tbl.row_titles:
|
|
508
|
+
d = ET.SubElement(sc, "d")
|
|
509
|
+
d.text = rt_val
|
|
510
|
+
|
|
511
|
+
if tbl.x_column is not None:
|
|
512
|
+
el.append(_build_x_column_element(tbl.x_column))
|
|
513
|
+
|
|
514
|
+
if tbl.x_advanced_column is not None:
|
|
515
|
+
el.append(_build_x_advanced_element(tbl.x_advanced_column))
|
|
516
|
+
|
|
517
|
+
for ycol in tbl.y_columns:
|
|
518
|
+
el.append(_build_column_element(ycol))
|
|
519
|
+
|
|
520
|
+
return el
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
# ---------------------------------------------------------------------------
|
|
524
|
+
# Value normalization
|
|
525
|
+
# ---------------------------------------------------------------------------
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _normalize_value(val: Optional[str]) -> Optional[Any]:
|
|
529
|
+
if val is None or val.strip() == "":
|
|
530
|
+
return None
|
|
531
|
+
val = val.strip()
|
|
532
|
+
val_comma = val.replace(",", ".")
|
|
533
|
+
try:
|
|
534
|
+
if "." in val_comma:
|
|
535
|
+
return float(val_comma)
|
|
536
|
+
else:
|
|
537
|
+
iv = int(val_comma)
|
|
538
|
+
try:
|
|
539
|
+
fv = float(val_comma)
|
|
540
|
+
if fv == iv and "." in val:
|
|
541
|
+
return val
|
|
542
|
+
return iv
|
|
543
|
+
except ValueError:
|
|
544
|
+
return val
|
|
545
|
+
except ValueError:
|
|
546
|
+
return val
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def _cell_to_output(cell: Cell, mode: str) -> Optional[Any]:
|
|
550
|
+
if cell.excluded and mode == "exclude":
|
|
551
|
+
return None
|
|
552
|
+
norm = _normalize_value(cell.value)
|
|
553
|
+
if cell.excluded and mode == "star":
|
|
554
|
+
if norm is not None:
|
|
555
|
+
return f"{norm}*"
|
|
556
|
+
return "*"
|
|
557
|
+
return norm
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
# ---------------------------------------------------------------------------
|
|
561
|
+
# Subcolumn naming
|
|
562
|
+
# ---------------------------------------------------------------------------
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
def _yformat_subcol_names(yformat: Optional[str], n_subcols: int, col_title: str) -> list[str]:
|
|
566
|
+
if yformat is None or yformat == "replicates":
|
|
567
|
+
if n_subcols <= 1:
|
|
568
|
+
return [col_title]
|
|
569
|
+
return [f"{col_title}_{i}" for i in range(1, n_subcols + 1)]
|
|
570
|
+
suffixes = YFORMAT_SUFFIXES.get(yformat, [])
|
|
571
|
+
if not suffixes:
|
|
572
|
+
return [col_title] if n_subcols <= 1 else [f"{col_title}_{i}" for i in range(1, n_subcols + 1)]
|
|
573
|
+
return [f"{col_title}{s}" for s in suffixes[:n_subcols]]
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
# ---------------------------------------------------------------------------
|
|
577
|
+
# Read / output
|
|
578
|
+
# ---------------------------------------------------------------------------
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def _table_to_json(tbl: PzfxTable, exclude_mode: str, include_row_titles: bool, include_x: bool) -> dict[str, Any]:
|
|
582
|
+
result: dict[str, Any] = {
|
|
583
|
+
"table_id": tbl.table_id,
|
|
584
|
+
"table_type": tbl.table_type,
|
|
585
|
+
"format": "json",
|
|
586
|
+
"exclude_mode": exclude_mode,
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
columns = []
|
|
590
|
+
|
|
591
|
+
if include_x and tbl.x_column is not None:
|
|
592
|
+
xcol = tbl.x_column
|
|
593
|
+
names = [xcol.title] * len(xcol.subcolumns) if len(xcol.subcolumns) <= 1 else [f"{xcol.title}_{i}" for i in range(len(xcol.subcolumns))]
|
|
594
|
+
data = []
|
|
595
|
+
for sc in xcol.subcolumns:
|
|
596
|
+
data.append([_cell_to_output(c, "keep") for c in sc])
|
|
597
|
+
columns.append({"title": xcol.title, "subcolumn_names": names, "data": data})
|
|
598
|
+
|
|
599
|
+
for ycol in tbl.y_columns:
|
|
600
|
+
n_sc = len(ycol.subcolumns)
|
|
601
|
+
names = _yformat_subcol_names(tbl.y_format, n_sc, ycol.title)
|
|
602
|
+
data = []
|
|
603
|
+
for sc in ycol.subcolumns:
|
|
604
|
+
data.append([_cell_to_output(c, exclude_mode) for c in sc])
|
|
605
|
+
columns.append({"title": ycol.title, "subcolumn_names": names, "data": data})
|
|
606
|
+
|
|
607
|
+
result["columns"] = columns
|
|
608
|
+
|
|
609
|
+
if include_row_titles and tbl.row_titles is not None:
|
|
610
|
+
result["row_titles"] = tbl.row_titles
|
|
611
|
+
|
|
612
|
+
return result
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def _table_to_tabular(tbl: PzfxTable, exclude_mode: str, include_row_titles: bool, include_x: bool, delimiter: str = "\t") -> str:
|
|
616
|
+
headers: list[str] = []
|
|
617
|
+
all_data: list[list[Optional[Any]]] = []
|
|
618
|
+
|
|
619
|
+
if include_row_titles and tbl.row_titles is not None:
|
|
620
|
+
headers.append("ROWTITLE")
|
|
621
|
+
all_data.append(list(tbl.row_titles))
|
|
622
|
+
|
|
623
|
+
if include_x and tbl.x_column is not None:
|
|
624
|
+
xcol = tbl.x_column
|
|
625
|
+
x_names = [xcol.title] * len(xcol.subcolumns) if len(xcol.subcolumns) <= 1 else [f"{xcol.title}_{i}" for i in range(len(xcol.subcolumns))]
|
|
626
|
+
for i, name in enumerate(x_names):
|
|
627
|
+
headers.append(name)
|
|
628
|
+
if i < len(xcol.subcolumns):
|
|
629
|
+
all_data.append([_cell_to_output(c, "keep") for c in xcol.subcolumns[i]])
|
|
630
|
+
|
|
631
|
+
for ycol in tbl.y_columns:
|
|
632
|
+
n_sc = len(ycol.subcolumns)
|
|
633
|
+
names = _yformat_subcol_names(tbl.y_format, n_sc, ycol.title)
|
|
634
|
+
for i, name in enumerate(names):
|
|
635
|
+
headers.append(name)
|
|
636
|
+
if i < len(ycol.subcolumns):
|
|
637
|
+
all_data.append([_cell_to_output(c, exclude_mode) for c in ycol.subcolumns[i]])
|
|
638
|
+
|
|
639
|
+
max_rows = max((len(col) for col in all_data), default=0)
|
|
640
|
+
|
|
641
|
+
output = io.StringIO()
|
|
642
|
+
writer = csv.writer(output, delimiter=delimiter, lineterminator="\n")
|
|
643
|
+
writer.writerow(headers)
|
|
644
|
+
for row_idx in range(max_rows):
|
|
645
|
+
row = []
|
|
646
|
+
for col in all_data:
|
|
647
|
+
if row_idx < len(col):
|
|
648
|
+
val = col[row_idx]
|
|
649
|
+
row.append("" if val is None else str(val))
|
|
650
|
+
else:
|
|
651
|
+
row.append("")
|
|
652
|
+
writer.writerow(row)
|
|
653
|
+
|
|
654
|
+
return output.getvalue()
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
# ---------------------------------------------------------------------------
|
|
658
|
+
# Data input parsing (for write command)
|
|
659
|
+
# ---------------------------------------------------------------------------
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def _parse_input_data(data_file: str, data_format: str) -> dict[str, Any]:
|
|
663
|
+
if not os.path.exists(data_file):
|
|
664
|
+
_fail("FILE_NOT_FOUND", f"Data file not found: {data_file}", "Check the data file path")
|
|
665
|
+
|
|
666
|
+
with open(data_file, "r", encoding="utf-8") as f:
|
|
667
|
+
raw = f.read()
|
|
668
|
+
|
|
669
|
+
if data_format == "json":
|
|
670
|
+
try:
|
|
671
|
+
return json.loads(raw)
|
|
672
|
+
except json.JSONDecodeError as e:
|
|
673
|
+
_fail("PARSE_ERROR", f"JSON parse error: {e}", "Check JSON syntax")
|
|
674
|
+
|
|
675
|
+
sep = "\t" if data_format == "tsv" else ","
|
|
676
|
+
reader = csv.reader(io.StringIO(raw), delimiter=sep)
|
|
677
|
+
rows = list(reader)
|
|
678
|
+
if not rows:
|
|
679
|
+
return {"headers": [], "rows": []}
|
|
680
|
+
|
|
681
|
+
headers = rows[0]
|
|
682
|
+
data_rows = rows[1:]
|
|
683
|
+
columns: dict[str, list[str]] = {}
|
|
684
|
+
for h in headers:
|
|
685
|
+
columns[h] = []
|
|
686
|
+
for row in data_rows:
|
|
687
|
+
for i, h in enumerate(headers):
|
|
688
|
+
if i < len(row):
|
|
689
|
+
columns[h].append(row[i])
|
|
690
|
+
else:
|
|
691
|
+
columns[h].append("")
|
|
692
|
+
|
|
693
|
+
return {"headers": headers, "columns": columns, "rows": data_rows}
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
def _data_to_table(data: dict[str, Any], table_type: str, x_format: str,
|
|
697
|
+
y_format: str, replicates: int, title: str) -> PzfxTable:
|
|
698
|
+
tbl = PzfxTable(
|
|
699
|
+
table_id="Table0",
|
|
700
|
+
table_type=table_type,
|
|
701
|
+
x_format=x_format,
|
|
702
|
+
y_format=y_format,
|
|
703
|
+
replicates=replicates if y_format == "replicates" else None,
|
|
704
|
+
title=title,
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
if "headers" in data and "columns" in data:
|
|
708
|
+
headers = data["headers"]
|
|
709
|
+
cols = data["columns"]
|
|
710
|
+
|
|
711
|
+
row_title_key = None
|
|
712
|
+
for h in headers:
|
|
713
|
+
hl = h.upper().strip()
|
|
714
|
+
if hl in ("ROWTITLE", "ROW_TITLE", "ROWTITLES"):
|
|
715
|
+
row_title_key = h
|
|
716
|
+
break
|
|
717
|
+
|
|
718
|
+
x_keys = []
|
|
719
|
+
y_keys = []
|
|
720
|
+
|
|
721
|
+
if x_format != "none":
|
|
722
|
+
first_non_rt = None
|
|
723
|
+
for h in headers:
|
|
724
|
+
if h == row_title_key:
|
|
725
|
+
continue
|
|
726
|
+
if first_non_rt is None:
|
|
727
|
+
first_non_rt = h
|
|
728
|
+
x_keys.append(h)
|
|
729
|
+
else:
|
|
730
|
+
y_keys.append(h)
|
|
731
|
+
else:
|
|
732
|
+
for h in headers:
|
|
733
|
+
if h == row_title_key:
|
|
734
|
+
continue
|
|
735
|
+
y_keys.append(h)
|
|
736
|
+
|
|
737
|
+
if row_title_key and row_title_key in cols:
|
|
738
|
+
tbl.row_titles = cols[row_title_key]
|
|
739
|
+
|
|
740
|
+
if x_keys:
|
|
741
|
+
x_title = x_keys[0]
|
|
742
|
+
x_vals = cols.get(x_title, [])
|
|
743
|
+
tbl.x_column = ColumnData(
|
|
744
|
+
title=x_title, decimals=8,
|
|
745
|
+
subcolumns=[[Cell(v) for v in x_vals]],
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
for yk in y_keys:
|
|
749
|
+
y_vals = cols.get(yk, [])
|
|
750
|
+
subcols_count = replicates if y_format == "replicates" else YFORMAT_SUBCOL_COUNT.get(y_format, 1)
|
|
751
|
+
per_sc = len(y_vals) // subcols_count if subcols_count > 0 else len(y_vals)
|
|
752
|
+
scs = []
|
|
753
|
+
for si in range(subcols_count):
|
|
754
|
+
start = si * per_sc
|
|
755
|
+
end = start + per_sc
|
|
756
|
+
chunk = y_vals[start:end]
|
|
757
|
+
scs.append([Cell(v) for v in chunk])
|
|
758
|
+
if scs:
|
|
759
|
+
tbl.y_columns.append(ColumnData(
|
|
760
|
+
title=yk, decimals=2,
|
|
761
|
+
width=162, subcolumns=scs,
|
|
762
|
+
))
|
|
763
|
+
|
|
764
|
+
elif "headers" in data and "rows" in data:
|
|
765
|
+
headers = data["headers"]
|
|
766
|
+
rows = data["rows"]
|
|
767
|
+
for h in headers:
|
|
768
|
+
vals = [row[headers.index(h)] if h in row and headers.index(h) < len(row) else "" for row in rows]
|
|
769
|
+
tbl.y_columns.append(ColumnData(
|
|
770
|
+
title=h, decimals=2,
|
|
771
|
+
subcolumns=[[Cell(v) for v in vals]],
|
|
772
|
+
))
|
|
773
|
+
|
|
774
|
+
return tbl
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
# ---------------------------------------------------------------------------
|
|
778
|
+
# Validation
|
|
779
|
+
# ---------------------------------------------------------------------------
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
def _validate_table(tbl: PzfxTable) -> list[str]:
|
|
783
|
+
errors: list[str] = []
|
|
784
|
+
if tbl.table_type not in VALID_TABLE_TYPES:
|
|
785
|
+
errors.append(f"Invalid TableType: {tbl.table_type}")
|
|
786
|
+
if tbl.x_format not in VALID_X_FORMATS:
|
|
787
|
+
errors.append(f"Invalid XFormat: {tbl.x_format}")
|
|
788
|
+
if tbl.y_format is not None and tbl.y_format not in VALID_Y_FORMATS:
|
|
789
|
+
errors.append(f"Invalid YFormat: {tbl.y_format}")
|
|
790
|
+
|
|
791
|
+
expected_xf = TABLE_TYPE_X_FORMAT.get(tbl.table_type)
|
|
792
|
+
if expected_xf and tbl.x_format != expected_xf:
|
|
793
|
+
if tbl.table_type == "XY" and tbl.x_format not in ("numbers", "date", "error"):
|
|
794
|
+
errors.append(f"TableType {tbl.table_type} requires XFormat in (numbers, date, error), got {tbl.x_format}")
|
|
795
|
+
elif tbl.table_type != "XY" and tbl.x_format != expected_xf:
|
|
796
|
+
errors.append(f"TableType {tbl.table_type} requires XFormat={expected_xf}, got {tbl.x_format}")
|
|
797
|
+
|
|
798
|
+
if tbl.y_format and tbl.y_format != "replicates":
|
|
799
|
+
expected = YFORMAT_SUBCOL_COUNT.get(tbl.y_format)
|
|
800
|
+
if expected:
|
|
801
|
+
for yc in tbl.y_columns:
|
|
802
|
+
if len(yc.subcolumns) != expected:
|
|
803
|
+
errors.append(f"YFormat {tbl.y_format} requires {expected} subcolumns, column '{yc.title}' has {len(yc.subcolumns)}")
|
|
804
|
+
|
|
805
|
+
if tbl.table_type == "PartsOfWhole" and len(tbl.y_columns) != 1:
|
|
806
|
+
errors.append(f"PartsOfWhole requires exactly 1 YColumn, got {len(tbl.y_columns)}")
|
|
807
|
+
|
|
808
|
+
if tbl.table_type == "Contingency" and tbl.row_titles is None:
|
|
809
|
+
errors.append("Contingency requires RowTitlesColumn")
|
|
810
|
+
|
|
811
|
+
titles = [yc.title for yc in tbl.y_columns]
|
|
812
|
+
if len(titles) != len(set(titles)):
|
|
813
|
+
errors.append("YColumn titles must be unique")
|
|
814
|
+
|
|
815
|
+
return errors
|
|
816
|
+
|
|
817
|
+
|
|
818
|
+
# ---------------------------------------------------------------------------
|
|
819
|
+
# CLI Commands
|
|
820
|
+
# ---------------------------------------------------------------------------
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
def cmd_inspect(args: argparse.Namespace) -> None:
|
|
824
|
+
pf = PzfxFile.from_file(args.file)
|
|
825
|
+
result: dict[str, Any] = {"success": True, "file": os.path.abspath(args.file)}
|
|
826
|
+
tables_info = []
|
|
827
|
+
for tbl in pf.tables:
|
|
828
|
+
cols_info = []
|
|
829
|
+
if tbl.x_column:
|
|
830
|
+
cols_info.append({"title": tbl.x_column.title, "type": "XColumn", "subcolumns": len(tbl.x_column.subcolumns)})
|
|
831
|
+
if tbl.x_advanced_column:
|
|
832
|
+
cols_info.append({"title": tbl.x_advanced_column.title, "type": "XAdvancedColumn", "subcolumns": len(tbl.x_advanced_column.subcolumns)})
|
|
833
|
+
for yc in tbl.y_columns:
|
|
834
|
+
cols_info.append({"title": yc.title, "type": "YColumn", "subcolumns": len(yc.subcolumns)})
|
|
835
|
+
tables_info.append({
|
|
836
|
+
"table_id": tbl.table_id,
|
|
837
|
+
"table_type": tbl.table_type,
|
|
838
|
+
"x_format": tbl.x_format,
|
|
839
|
+
"y_format": tbl.y_format,
|
|
840
|
+
"replicates": tbl.replicates,
|
|
841
|
+
"title": tbl.title,
|
|
842
|
+
"is_huge": tbl.is_huge,
|
|
843
|
+
"row_count": tbl.row_count,
|
|
844
|
+
"columns": cols_info,
|
|
845
|
+
})
|
|
846
|
+
result["tables"] = tables_info
|
|
847
|
+
if pf.info_data:
|
|
848
|
+
result["info"] = pf.info_data
|
|
849
|
+
print(json.dumps(result, indent=2))
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
def cmd_read(args: argparse.Namespace) -> None:
|
|
853
|
+
pf = PzfxFile.from_file(args.file)
|
|
854
|
+
idx = args.table
|
|
855
|
+
if idx < 0 or idx >= len(pf.tables):
|
|
856
|
+
_fail("VALIDATION_ERROR", f"Table index {idx} out of range (0-{len(pf.tables) - 1})", "Use inspect to see available tables")
|
|
857
|
+
|
|
858
|
+
tbl = pf.tables[idx]
|
|
859
|
+
exclude_mode = args.exclude_mode
|
|
860
|
+
|
|
861
|
+
if args.format in ("csv", "tsv"):
|
|
862
|
+
delim = "\t" if args.format == "tsv" else ","
|
|
863
|
+
output = _table_to_tabular(tbl, exclude_mode, args.include_row_titles, args.include_x, delim)
|
|
864
|
+
print(output, end="")
|
|
865
|
+
else:
|
|
866
|
+
result = _table_to_json(tbl, exclude_mode, args.include_row_titles, args.include_x)
|
|
867
|
+
result["success"] = True
|
|
868
|
+
print(json.dumps(result, indent=2))
|
|
869
|
+
|
|
870
|
+
|
|
871
|
+
def cmd_write(args: argparse.Namespace) -> None:
|
|
872
|
+
table_type = args.table_type
|
|
873
|
+
x_format = args.x_format or TABLE_TYPE_X_FORMAT.get(table_type, "none")
|
|
874
|
+
y_format = args.y_format or "replicates"
|
|
875
|
+
replicates = args.replicates or 1
|
|
876
|
+
title = args.title or "Data 1"
|
|
877
|
+
|
|
878
|
+
data = _parse_input_data(args.data_file, args.data_format)
|
|
879
|
+
|
|
880
|
+
tbl = _data_to_table(data, table_type, x_format, y_format, replicates, title)
|
|
881
|
+
errors = _validate_table(tbl)
|
|
882
|
+
if errors:
|
|
883
|
+
_fail("VALIDATION_ERROR", "; ".join(errors), "Fix data to match table type constraints")
|
|
884
|
+
|
|
885
|
+
pf = PzfxFile.from_scratch(table_type, x_format, y_format, replicates, title)
|
|
886
|
+
pf.tables = [tbl]
|
|
887
|
+
|
|
888
|
+
if args.dry_run:
|
|
889
|
+
preview = {
|
|
890
|
+
"success": True, "dry_run": True,
|
|
891
|
+
"output_file": os.path.abspath(args.output_file),
|
|
892
|
+
"table_type": table_type, "x_format": x_format,
|
|
893
|
+
"y_format": y_format, "replicates": replicates,
|
|
894
|
+
"title": title,
|
|
895
|
+
"y_columns": [yc.title for yc in tbl.y_columns],
|
|
896
|
+
"row_count": tbl.row_count,
|
|
897
|
+
}
|
|
898
|
+
print(json.dumps(preview, indent=2))
|
|
899
|
+
return
|
|
900
|
+
|
|
901
|
+
pf.to_file(args.output_file)
|
|
902
|
+
print(json.dumps({"success": True, "file": os.path.abspath(args.output_file)}, indent=2))
|
|
903
|
+
|
|
904
|
+
|
|
905
|
+
def cmd_edit(args: argparse.Namespace) -> None:
|
|
906
|
+
pf = PzfxFile.from_file(args.file)
|
|
907
|
+
idx = args.table
|
|
908
|
+
if idx < 0 or idx >= len(pf.tables):
|
|
909
|
+
_fail("VALIDATION_ERROR", f"Table index {idx} out of range", "Use inspect to see available tables")
|
|
910
|
+
|
|
911
|
+
tbl = pf.tables[idx]
|
|
912
|
+
col_title = args.column_title
|
|
913
|
+
sc_idx = args.subcolumn
|
|
914
|
+
|
|
915
|
+
target_col = None
|
|
916
|
+
for yc in tbl.y_columns:
|
|
917
|
+
if yc.title == col_title:
|
|
918
|
+
target_col = yc
|
|
919
|
+
break
|
|
920
|
+
|
|
921
|
+
if target_col is None:
|
|
922
|
+
if tbl.x_column and tbl.x_column.title == col_title:
|
|
923
|
+
target_col = tbl.x_column
|
|
924
|
+
else:
|
|
925
|
+
_fail("VALIDATION_ERROR", f"Column '{col_title}' not found", "Use inspect to see column names")
|
|
926
|
+
|
|
927
|
+
if sc_idx < 0 or sc_idx >= len(target_col.subcolumns):
|
|
928
|
+
_fail("VALIDATION_ERROR", f"Subcolumn index {sc_idx} out of range (0-{len(target_col.subcolumns) - 1})", "Check subcolumn count")
|
|
929
|
+
|
|
930
|
+
try:
|
|
931
|
+
values = json.loads(args.values)
|
|
932
|
+
except json.JSONDecodeError as e:
|
|
933
|
+
_fail("PARSE_ERROR", f"Invalid JSON in --values: {e}", "Provide a JSON array")
|
|
934
|
+
|
|
935
|
+
exclude_values = None
|
|
936
|
+
if args.exclude_values:
|
|
937
|
+
try:
|
|
938
|
+
exclude_values = json.loads(args.exclude_values)
|
|
939
|
+
except json.JSONDecodeError as e:
|
|
940
|
+
_fail("PARSE_ERROR", f"Invalid JSON in --exclude-values: {e}", "Provide a JSON array of booleans")
|
|
941
|
+
|
|
942
|
+
if args.dry_run:
|
|
943
|
+
old_vals = [c.value for c in target_col.subcolumns[sc_idx]]
|
|
944
|
+
preview = {
|
|
945
|
+
"success": True, "dry_run": True,
|
|
946
|
+
"column": col_title, "subcolumn": sc_idx,
|
|
947
|
+
"old_values": old_vals, "new_values": values,
|
|
948
|
+
"file": os.path.abspath(args.file),
|
|
949
|
+
}
|
|
950
|
+
print(json.dumps(preview, indent=2))
|
|
951
|
+
return
|
|
952
|
+
|
|
953
|
+
new_cells = []
|
|
954
|
+
for i, v in enumerate(values):
|
|
955
|
+
cell = Cell()
|
|
956
|
+
if v is None:
|
|
957
|
+
cell.value = ""
|
|
958
|
+
else:
|
|
959
|
+
cell.value = str(v)
|
|
960
|
+
if exclude_values and i < len(exclude_values):
|
|
961
|
+
cell.excluded = bool(exclude_values[i])
|
|
962
|
+
new_cells.append(cell)
|
|
963
|
+
|
|
964
|
+
target_col.subcolumns[sc_idx] = new_cells
|
|
965
|
+
|
|
966
|
+
output_path = args.output or args.file
|
|
967
|
+
pf.to_file(output_path)
|
|
968
|
+
print(json.dumps({"success": True, "file": os.path.abspath(output_path)}, indent=2))
|
|
969
|
+
|
|
970
|
+
|
|
971
|
+
def cmd_add_column(args: argparse.Namespace) -> None:
|
|
972
|
+
pf = PzfxFile.from_file(args.file)
|
|
973
|
+
idx = args.table
|
|
974
|
+
if idx < 0 or idx >= len(pf.tables):
|
|
975
|
+
_fail("VALIDATION_ERROR", f"Table index {idx} out of range", "Use inspect to see available tables")
|
|
976
|
+
|
|
977
|
+
tbl = pf.tables[idx]
|
|
978
|
+
|
|
979
|
+
try:
|
|
980
|
+
values = json.loads(args.values)
|
|
981
|
+
except json.JSONDecodeError as e:
|
|
982
|
+
_fail("PARSE_ERROR", f"Invalid JSON in --values: {e}", "Provide a JSON array of arrays")
|
|
983
|
+
|
|
984
|
+
if not isinstance(values, list):
|
|
985
|
+
_fail("VALIDATION_ERROR", "--values must be a JSON array of arrays", "Example: [[1,2,3],[4,5,6]]")
|
|
986
|
+
|
|
987
|
+
n_sc = args.subcolumns
|
|
988
|
+
subcols = []
|
|
989
|
+
for i in range(n_sc):
|
|
990
|
+
if i < len(values):
|
|
991
|
+
sc_data = values[i]
|
|
992
|
+
cells = [Cell(str(v) if v is not None else "") for v in sc_data]
|
|
993
|
+
else:
|
|
994
|
+
cells = []
|
|
995
|
+
subcols.append(cells)
|
|
996
|
+
|
|
997
|
+
new_col = ColumnData(
|
|
998
|
+
title=args.title, decimals=2,
|
|
999
|
+
width=162, subcolumns=subcols,
|
|
1000
|
+
)
|
|
1001
|
+
tbl.y_columns.append(new_col)
|
|
1002
|
+
|
|
1003
|
+
errors = _validate_table(tbl)
|
|
1004
|
+
if errors:
|
|
1005
|
+
_fail("VALIDATION_ERROR", "; ".join(errors), "Fix column data")
|
|
1006
|
+
|
|
1007
|
+
if args.dry_run:
|
|
1008
|
+
preview = {
|
|
1009
|
+
"success": True, "dry_run": True,
|
|
1010
|
+
"added_column": args.title,
|
|
1011
|
+
"subcolumns": n_sc,
|
|
1012
|
+
"row_count": max((len(sc) for sc in subcols), default=0),
|
|
1013
|
+
"total_y_columns": len(tbl.y_columns),
|
|
1014
|
+
}
|
|
1015
|
+
print(json.dumps(preview, indent=2))
|
|
1016
|
+
return
|
|
1017
|
+
|
|
1018
|
+
output_path = args.output or args.file
|
|
1019
|
+
pf.to_file(output_path)
|
|
1020
|
+
print(json.dumps({"success": True, "file": os.path.abspath(output_path)}, indent=2))
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
def cmd_convert(args: argparse.Namespace) -> None:
|
|
1024
|
+
pf = PzfxFile.from_file(args.file)
|
|
1025
|
+
fmt = args.format or "csv"
|
|
1026
|
+
exclude_mode = args.exclude_mode
|
|
1027
|
+
|
|
1028
|
+
idx = args.table
|
|
1029
|
+
if idx is not None:
|
|
1030
|
+
if idx < 0 or idx >= len(pf.tables):
|
|
1031
|
+
_fail("VALIDATION_ERROR", f"Table index {idx} out of range", "Use inspect to see available tables")
|
|
1032
|
+
tables = [pf.tables[idx]]
|
|
1033
|
+
else:
|
|
1034
|
+
tables = pf.tables
|
|
1035
|
+
|
|
1036
|
+
base = os.path.splitext(args.file)[0]
|
|
1037
|
+
|
|
1038
|
+
for i, tbl in enumerate(tables):
|
|
1039
|
+
if fmt == "json":
|
|
1040
|
+
result = _table_to_json(tbl, exclude_mode, True, True)
|
|
1041
|
+
result["success"] = True
|
|
1042
|
+
result["source_file"] = os.path.abspath(args.file)
|
|
1043
|
+
out_path = args.output or f"{base}.json"
|
|
1044
|
+
with open(out_path, "w", encoding="utf-8") as f:
|
|
1045
|
+
json.dump(result, f, indent=2)
|
|
1046
|
+
else:
|
|
1047
|
+
delim = "\t" if fmt == "tsv" else ","
|
|
1048
|
+
ext = ".tsv" if fmt == "tsv" else ".csv"
|
|
1049
|
+
if len(tables) > 1:
|
|
1050
|
+
out_path = args.output or f"{base}_Table{i}{ext}"
|
|
1051
|
+
else:
|
|
1052
|
+
out_path = args.output or f"{base}{ext}"
|
|
1053
|
+
output = _table_to_tabular(tbl, exclude_mode, True, True, delim)
|
|
1054
|
+
with open(out_path, "w", encoding="utf-8", newline="") as f:
|
|
1055
|
+
f.write(output)
|
|
1056
|
+
|
|
1057
|
+
print(json.dumps({"success": True, "format": fmt, "tables_converted": len(tables)}, indent=2))
|
|
1058
|
+
|
|
1059
|
+
|
|
1060
|
+
# ---------------------------------------------------------------------------
|
|
1061
|
+
# Error handling
|
|
1062
|
+
# ---------------------------------------------------------------------------
|
|
1063
|
+
|
|
1064
|
+
|
|
1065
|
+
def _fail(category: str, message: str, hint: str = "") -> None:
|
|
1066
|
+
result = {"success": False, "error": f"{category}: {message}"}
|
|
1067
|
+
if hint:
|
|
1068
|
+
result["hint"] = hint
|
|
1069
|
+
print(json.dumps(result, indent=2))
|
|
1070
|
+
sys.exit(1)
|
|
1071
|
+
|
|
1072
|
+
|
|
1073
|
+
# ---------------------------------------------------------------------------
|
|
1074
|
+
# Main
|
|
1075
|
+
# ---------------------------------------------------------------------------
|
|
1076
|
+
|
|
1077
|
+
|
|
1078
|
+
def main() -> None:
|
|
1079
|
+
parser = argparse.ArgumentParser(description="GraphPad Prism PZFX file reader/writer")
|
|
1080
|
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
1081
|
+
|
|
1082
|
+
# inspect
|
|
1083
|
+
p = subparsers.add_parser("inspect", help="Show file metadata")
|
|
1084
|
+
p.add_argument("file", help="Path to .pzfx file")
|
|
1085
|
+
|
|
1086
|
+
# read
|
|
1087
|
+
p = subparsers.add_parser("read", help="Extract table data")
|
|
1088
|
+
p.add_argument("file", help="Path to .pzfx file")
|
|
1089
|
+
p.add_argument("--table", type=int, default=0, help="Table index (default: 0)")
|
|
1090
|
+
p.add_argument("--format", choices=["json", "csv", "tsv"], default="json", help="Output format")
|
|
1091
|
+
p.add_argument("--exclude-mode", choices=["keep", "exclude", "star"], default="exclude", help="Excluded data handling")
|
|
1092
|
+
p.add_argument("--include-row-titles", action="store_true", help="Include row titles")
|
|
1093
|
+
p.add_argument("--include-x", action="store_true", help="Include X column data")
|
|
1094
|
+
|
|
1095
|
+
# write
|
|
1096
|
+
p = subparsers.add_parser("write", help="Create new PZFX file")
|
|
1097
|
+
p.add_argument("output_file", help="Output .pzfx file path")
|
|
1098
|
+
p.add_argument("--data-file", required=True, help="Input data file")
|
|
1099
|
+
p.add_argument("--data-format", choices=["csv", "tsv", "json"], required=True, help="Input format")
|
|
1100
|
+
p.add_argument("--table-type", required=True, choices=sorted(VALID_TABLE_TYPES), help="Table type")
|
|
1101
|
+
p.add_argument("--x-format", choices=sorted(VALID_X_FORMATS), help="X format (auto-detected if omitted)")
|
|
1102
|
+
p.add_argument("--y-format", choices=sorted(VALID_Y_FORMATS), help="Y format (default: replicates)")
|
|
1103
|
+
p.add_argument("--replicates", type=int, help="Number of replicates")
|
|
1104
|
+
p.add_argument("--title", help="Table title")
|
|
1105
|
+
p.add_argument("--dry-run", action="store_true", help="Validate without writing")
|
|
1106
|
+
|
|
1107
|
+
# edit
|
|
1108
|
+
p = subparsers.add_parser("edit", help="Modify existing PZFX file")
|
|
1109
|
+
p.add_argument("file", help="Path to .pzfx file")
|
|
1110
|
+
p.add_argument("--table", type=int, default=0, help="Table index")
|
|
1111
|
+
p.add_argument("--column-title", required=True, help="Column title to edit")
|
|
1112
|
+
p.add_argument("--subcolumn", type=int, default=0, help="Subcolumn index (0-based)")
|
|
1113
|
+
p.add_argument("--values", required=True, help="JSON array of values")
|
|
1114
|
+
p.add_argument("--exclude-values", help="JSON array of booleans for exclusion")
|
|
1115
|
+
p.add_argument("--dry-run", action="store_true", help="Preview changes")
|
|
1116
|
+
p.add_argument("--output", help="Output file path (default: in-place)")
|
|
1117
|
+
|
|
1118
|
+
# add-column
|
|
1119
|
+
p = subparsers.add_parser("add-column", help="Add column to existing table")
|
|
1120
|
+
p.add_argument("file", help="Path to .pzfx file")
|
|
1121
|
+
p.add_argument("--table", type=int, default=0, help="Table index")
|
|
1122
|
+
p.add_argument("--title", required=True, help="New column title")
|
|
1123
|
+
p.add_argument("--subcolumns", type=int, required=True, help="Number of subcolumns")
|
|
1124
|
+
p.add_argument("--values", required=True, help="JSON array of arrays")
|
|
1125
|
+
p.add_argument("--dry-run", action="store_true", help="Preview changes")
|
|
1126
|
+
p.add_argument("--output", help="Output file path (default: in-place)")
|
|
1127
|
+
|
|
1128
|
+
# convert
|
|
1129
|
+
p = subparsers.add_parser("convert", help="Convert PZFX to other formats")
|
|
1130
|
+
p.add_argument("file", help="Path to .pzfx file")
|
|
1131
|
+
p.add_argument("--format", choices=["csv", "tsv", "json"], default="csv", help="Output format")
|
|
1132
|
+
p.add_argument("--output", help="Output file path")
|
|
1133
|
+
p.add_argument("--table", type=int, default=None, help="Table index (default: all)")
|
|
1134
|
+
p.add_argument("--exclude-mode", choices=["keep", "exclude", "star"], default="exclude", help="Excluded data handling")
|
|
1135
|
+
|
|
1136
|
+
args = parser.parse_args()
|
|
1137
|
+
|
|
1138
|
+
if args.command == "inspect":
|
|
1139
|
+
cmd_inspect(args)
|
|
1140
|
+
elif args.command == "read":
|
|
1141
|
+
cmd_read(args)
|
|
1142
|
+
elif args.command == "write":
|
|
1143
|
+
cmd_write(args)
|
|
1144
|
+
elif args.command == "edit":
|
|
1145
|
+
cmd_edit(args)
|
|
1146
|
+
elif args.command == "add-column":
|
|
1147
|
+
cmd_add_column(args)
|
|
1148
|
+
elif args.command == "convert":
|
|
1149
|
+
cmd_convert(args)
|
|
1150
|
+
else:
|
|
1151
|
+
parser.print_help()
|
|
1152
|
+
sys.exit(1)
|
|
1153
|
+
|
|
1154
|
+
|
|
1155
|
+
if __name__ == "__main__":
|
|
1156
|
+
main()
|