falsiflow 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- falsiflow/__init__.py +5 -0
- falsiflow/adapters.py +664 -0
- falsiflow/adoption.py +775 -0
- falsiflow/browser_demo.py +1228 -0
- falsiflow/bundle.py +806 -0
- falsiflow/casebook_check.py +426 -0
- falsiflow/claim_check.py +344 -0
- falsiflow/cli.py +2544 -0
- falsiflow/core.py +4072 -0
- falsiflow/demo.py +104 -0
- falsiflow/discovery.py +331 -0
- falsiflow/doctor.py +392 -0
- falsiflow/local_server.py +261 -0
- falsiflow/public_release.py +1805 -0
- falsiflow/quickstart.py +147 -0
- falsiflow/release.py +1783 -0
- falsiflow/scaffold.py +574 -0
- falsiflow/template_check.py +230 -0
- falsiflow/template_discovery.py +102 -0
- falsiflow/template_gallery.py +289 -0
- falsiflow/template_install.py +472 -0
- falsiflow/template_pack.py +544 -0
- falsiflow/template_provenance.py +343 -0
- falsiflow/template_registry.py +373 -0
- falsiflow/template_release.py +521 -0
- falsiflow/templates/ai_claim_evaluation/evidence_pass_demo.csv +19 -0
- falsiflow/templates/ai_claim_evaluation/evidence_placeholder_demo.csv +2 -0
- falsiflow/templates/ai_claim_evaluation/project.json +222 -0
- falsiflow/templates/ai_claim_evaluation/source_files/ai_eval_raw_export.csv +14 -0
- falsiflow/templates/ai_claim_evaluation/template.json +9 -0
- falsiflow/templates/biointerface_coatings/evidence_pass_demo.csv +15 -0
- falsiflow/templates/biointerface_coatings/evidence_placeholder_demo.csv +2 -0
- falsiflow/templates/biointerface_coatings/project.json +186 -0
- falsiflow/templates/biointerface_coatings/source_files/coating_raw_export.csv +15 -0
- falsiflow/templates/biointerface_coatings/template.json +9 -0
- falsiflow/templates/neural_materials/evidence_pass_demo.csv +20 -0
- falsiflow/templates/neural_materials/evidence_placeholder_demo.csv +3 -0
- falsiflow/templates/neural_materials/project.json +222 -0
- falsiflow/templates/neural_materials/source_files/demo_raw_export.csv +20 -0
- falsiflow/templates/neural_materials/template.json +9 -0
- falsiflow/templates/product_metric_launch/evidence_pass_demo.csv +15 -0
- falsiflow/templates/product_metric_launch/evidence_placeholder_demo.csv +2 -0
- falsiflow/templates/product_metric_launch/project.json +211 -0
- falsiflow/templates/product_metric_launch/source_files/product_metric_raw_export.csv +15 -0
- falsiflow/templates/product_metric_launch/template.json +9 -0
- falsiflow/templates/rfq_vendor_evidence/evidence_pass_demo.csv +10 -0
- falsiflow/templates/rfq_vendor_evidence/evidence_placeholder_demo.csv +2 -0
- falsiflow/templates/rfq_vendor_evidence/project.json +142 -0
- falsiflow/templates/rfq_vendor_evidence/source_files/vendor_reply_record.csv +10 -0
- falsiflow/templates/rfq_vendor_evidence/template.json +9 -0
- falsiflow/templates/wetware_support_hardware/evidence_pass_demo.csv +14 -0
- falsiflow/templates/wetware_support_hardware/evidence_placeholder_demo.csv +2 -0
- falsiflow/templates/wetware_support_hardware/project.json +171 -0
- falsiflow/templates/wetware_support_hardware/source_files/hardware_raw_export.csv +14 -0
- falsiflow/templates/wetware_support_hardware/template.json +9 -0
- falsiflow-0.1.2.dist-info/METADATA +1236 -0
- falsiflow-0.1.2.dist-info/RECORD +61 -0
- falsiflow-0.1.2.dist-info/WHEEL +5 -0
- falsiflow-0.1.2.dist-info/entry_points.txt +2 -0
- falsiflow-0.1.2.dist-info/licenses/LICENSE +21 -0
- falsiflow-0.1.2.dist-info/top_level.txt +1 -0
falsiflow/__init__.py
ADDED
falsiflow/adapters.py
ADDED
|
@@ -0,0 +1,664 @@
|
|
|
1
|
+
"""Adapters that translate existing project sidecars into Falsiflow inputs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import csv
|
|
6
|
+
import json
|
|
7
|
+
import re
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from .core import write_csv
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
EVIDENCE_FIELDS = [
|
|
16
|
+
"gate_id",
|
|
17
|
+
"candidate_id",
|
|
18
|
+
"sample_id",
|
|
19
|
+
"field",
|
|
20
|
+
"value",
|
|
21
|
+
"source_file",
|
|
22
|
+
"measured_at",
|
|
23
|
+
"operator_or_agent",
|
|
24
|
+
"instrument_id",
|
|
25
|
+
"notes",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
PHASE_TO_GATE = {
|
|
29
|
+
"H-A": "h_a_medium_stability",
|
|
30
|
+
"H-B": "h_b_electrical_interface",
|
|
31
|
+
"H-C": "h_c_network_response",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
GATE_TITLES = {
|
|
35
|
+
"h_a_medium_stability": "H-A medium stability and physical integrity",
|
|
36
|
+
"h_b_electrical_interface": "H-B electrical and physical interface benefit",
|
|
37
|
+
"h_c_network_response": "H-C cell and network response",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
DEFAULT_PLACEHOLDERS = [
|
|
41
|
+
"pending_real_measurement",
|
|
42
|
+
"record_actual",
|
|
43
|
+
"record_exact",
|
|
44
|
+
"record_lot",
|
|
45
|
+
"source_file_missing",
|
|
46
|
+
"to_be_recorded",
|
|
47
|
+
"not_measured",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
ADAPTER_PROFILES: dict[str, dict[str, object]] = {
|
|
51
|
+
"generic-wide": {
|
|
52
|
+
"description": "Generic wide CSV with sample_id plus measured value columns.",
|
|
53
|
+
"sample_id_column": "sample_id",
|
|
54
|
+
"gate_id_column": "",
|
|
55
|
+
"candidate_id_column": "",
|
|
56
|
+
"source_file_column": "",
|
|
57
|
+
"measured_at_column": "",
|
|
58
|
+
"operator_or_agent_column": "",
|
|
59
|
+
"instrument_id_column": "",
|
|
60
|
+
"notes_column": "",
|
|
61
|
+
"exclude_columns": [],
|
|
62
|
+
},
|
|
63
|
+
"vendor-measurement": {
|
|
64
|
+
"description": "Vendor or external-lab measurement return with sample, source file, contact, instrument, and notes columns.",
|
|
65
|
+
"sample_id_column": "sample",
|
|
66
|
+
"gate_id_column": "",
|
|
67
|
+
"candidate_id_column": "article",
|
|
68
|
+
"source_file_column": "source_file",
|
|
69
|
+
"measured_at_column": "measured_at",
|
|
70
|
+
"operator_or_agent_column": "vendor_contact",
|
|
71
|
+
"instrument_id_column": "instrument_id",
|
|
72
|
+
"notes_column": "notes",
|
|
73
|
+
"exclude_columns": ["vendor", "quote_id", "work_order"],
|
|
74
|
+
},
|
|
75
|
+
"instrument-export": {
|
|
76
|
+
"description": "Instrument export with sample_id, timestamp, operator, instrument_id, raw_file, and measured columns.",
|
|
77
|
+
"sample_id_column": "sample_id",
|
|
78
|
+
"gate_id_column": "",
|
|
79
|
+
"candidate_id_column": "candidate_id",
|
|
80
|
+
"source_file_column": "raw_file",
|
|
81
|
+
"measured_at_column": "timestamp",
|
|
82
|
+
"operator_or_agent_column": "operator",
|
|
83
|
+
"instrument_id_column": "instrument_id",
|
|
84
|
+
"notes_column": "notes",
|
|
85
|
+
"exclude_columns": ["run_id", "method", "batch_id"],
|
|
86
|
+
},
|
|
87
|
+
"plate-reader": {
|
|
88
|
+
"description": "Plate-reader style export with well_id, read_at, operator, plate_reader_id, raw_file, and assay columns.",
|
|
89
|
+
"sample_id_column": "well_id",
|
|
90
|
+
"gate_id_column": "",
|
|
91
|
+
"candidate_id_column": "sample_name",
|
|
92
|
+
"source_file_column": "raw_file",
|
|
93
|
+
"measured_at_column": "read_at",
|
|
94
|
+
"operator_or_agent_column": "operator",
|
|
95
|
+
"instrument_id_column": "plate_reader_id",
|
|
96
|
+
"notes_column": "notes",
|
|
97
|
+
"exclude_columns": ["plate_id", "assay_id", "well_id"],
|
|
98
|
+
},
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def adapter_profile_names() -> list[str]:
|
|
103
|
+
return sorted(ADAPTER_PROFILES)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def adapter_profile_summary() -> list[dict[str, object]]:
|
|
107
|
+
return [
|
|
108
|
+
{
|
|
109
|
+
"profile": name,
|
|
110
|
+
**profile,
|
|
111
|
+
}
|
|
112
|
+
for name, profile in sorted(ADAPTER_PROFILES.items())
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def resolve_wide_adapter_settings(
|
|
117
|
+
profile: str,
|
|
118
|
+
sample_id_column: str,
|
|
119
|
+
exclude_columns: list[str],
|
|
120
|
+
gate_id_column: str,
|
|
121
|
+
candidate_id_column: str,
|
|
122
|
+
source_file_column: str,
|
|
123
|
+
measured_at_column: str,
|
|
124
|
+
operator_or_agent_column: str,
|
|
125
|
+
instrument_id_column: str,
|
|
126
|
+
notes_column: str,
|
|
127
|
+
) -> dict[str, object]:
|
|
128
|
+
if profile not in ADAPTER_PROFILES:
|
|
129
|
+
raise ValueError(f"Unknown adapter profile `{profile}`. Expected one of: {', '.join(adapter_profile_names())}.")
|
|
130
|
+
profile_data = ADAPTER_PROFILES[profile]
|
|
131
|
+
|
|
132
|
+
def column(name: str, override: str) -> str:
|
|
133
|
+
return clean(override) or clean(profile_data.get(name, ""))
|
|
134
|
+
|
|
135
|
+
profile_excludes = [str(item) for item in profile_data.get("exclude_columns", []) if str(item)]
|
|
136
|
+
merged_excludes = list(dict.fromkeys([*profile_excludes, *exclude_columns]))
|
|
137
|
+
settings = {
|
|
138
|
+
"profile": profile,
|
|
139
|
+
"profile_description": str(profile_data.get("description", "")),
|
|
140
|
+
"sample_id_column": column("sample_id_column", sample_id_column),
|
|
141
|
+
"gate_id_column": column("gate_id_column", gate_id_column),
|
|
142
|
+
"candidate_id_column": column("candidate_id_column", candidate_id_column),
|
|
143
|
+
"source_file_column": column("source_file_column", source_file_column),
|
|
144
|
+
"measured_at_column": column("measured_at_column", measured_at_column),
|
|
145
|
+
"operator_or_agent_column": column("operator_or_agent_column", operator_or_agent_column),
|
|
146
|
+
"instrument_id_column": column("instrument_id_column", instrument_id_column),
|
|
147
|
+
"notes_column": column("notes_column", notes_column),
|
|
148
|
+
"exclude_columns": merged_excludes,
|
|
149
|
+
}
|
|
150
|
+
if not settings["sample_id_column"]:
|
|
151
|
+
raise ValueError(f"Adapter profile `{profile}` does not define a sample_id_column and no --sample-id-column override was provided.")
|
|
152
|
+
return settings
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def rule(field: str, operator: str, value: Any, reason: str, **filters: Any) -> dict[str, Any]:
|
|
156
|
+
payload = {
|
|
157
|
+
"field": field,
|
|
158
|
+
"operator": operator,
|
|
159
|
+
"value": value,
|
|
160
|
+
"reason": reason,
|
|
161
|
+
}
|
|
162
|
+
payload.update({key: val for key, val in filters.items() if val})
|
|
163
|
+
return payload
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def derived(field: str, operation: str, **refs: str) -> dict[str, str]:
|
|
167
|
+
payload = {"field": field, "operation": operation}
|
|
168
|
+
payload.update(refs)
|
|
169
|
+
return payload
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def add_if_fields_present(
|
|
173
|
+
gate: dict[str, Any],
|
|
174
|
+
available_fields: set[str],
|
|
175
|
+
required_fields: set[str],
|
|
176
|
+
derived_field: dict[str, Any] | None = None,
|
|
177
|
+
acceptance_rule: dict[str, Any] | None = None,
|
|
178
|
+
) -> None:
|
|
179
|
+
if not required_fields <= available_fields:
|
|
180
|
+
return
|
|
181
|
+
if derived_field is not None:
|
|
182
|
+
gate.setdefault("derived_fields", []).append(derived_field)
|
|
183
|
+
if acceptance_rule is not None:
|
|
184
|
+
gate.setdefault("acceptance_rules", []).append(acceptance_rule)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def apply_limina_scientific_overlay(gate: dict[str, Any], available_fields: set[str]) -> None:
|
|
188
|
+
gate_id = str(gate.get("id", ""))
|
|
189
|
+
if gate_id == "h_a_medium_stability":
|
|
190
|
+
add_if_fields_present(
|
|
191
|
+
gate,
|
|
192
|
+
available_fields,
|
|
193
|
+
{"initial.pH", "final.pH"},
|
|
194
|
+
derived("ph_drift_abs", "abs_delta", left="final.pH", right="initial.pH"),
|
|
195
|
+
rule("ph_drift_abs", "<=", 0.10, "H-A pH drift must remain within 0.10 pH units."),
|
|
196
|
+
)
|
|
197
|
+
add_if_fields_present(
|
|
198
|
+
gate,
|
|
199
|
+
available_fields,
|
|
200
|
+
{"initial.osmolality", "final.osmolality"},
|
|
201
|
+
derived(
|
|
202
|
+
"osmolality_drift_pct",
|
|
203
|
+
"abs_pct_change",
|
|
204
|
+
before="initial.osmolality",
|
|
205
|
+
after="final.osmolality",
|
|
206
|
+
),
|
|
207
|
+
rule("osmolality_drift_pct", "<=", 5, "H-A osmolality drift must remain within 5 percent."),
|
|
208
|
+
)
|
|
209
|
+
add_if_fields_present(
|
|
210
|
+
gate,
|
|
211
|
+
available_fields,
|
|
212
|
+
{"initial.conductivity", "final.conductivity"},
|
|
213
|
+
derived(
|
|
214
|
+
"conductivity_drift_pct",
|
|
215
|
+
"abs_pct_change",
|
|
216
|
+
before="initial.conductivity",
|
|
217
|
+
after="final.conductivity",
|
|
218
|
+
),
|
|
219
|
+
rule("conductivity_drift_pct", "<=", 5, "H-A conductivity drift must remain within 5 percent."),
|
|
220
|
+
)
|
|
221
|
+
add_if_fields_present(
|
|
222
|
+
gate,
|
|
223
|
+
available_fields,
|
|
224
|
+
{"physical_inspection.visible_precipitate"},
|
|
225
|
+
acceptance_rule=rule(
|
|
226
|
+
"physical_inspection.visible_precipitate",
|
|
227
|
+
"==",
|
|
228
|
+
False,
|
|
229
|
+
"H-A must show no visible precipitate.",
|
|
230
|
+
),
|
|
231
|
+
)
|
|
232
|
+
add_if_fields_present(
|
|
233
|
+
gate,
|
|
234
|
+
available_fields,
|
|
235
|
+
{"physical_inspection.visible_shedding"},
|
|
236
|
+
acceptance_rule=rule(
|
|
237
|
+
"physical_inspection.visible_shedding",
|
|
238
|
+
"==",
|
|
239
|
+
False,
|
|
240
|
+
"H-A must show no visible shedding.",
|
|
241
|
+
),
|
|
242
|
+
)
|
|
243
|
+
add_if_fields_present(
|
|
244
|
+
gate,
|
|
245
|
+
available_fields,
|
|
246
|
+
{"physical_inspection.swelling_fraction"},
|
|
247
|
+
acceptance_rule=rule(
|
|
248
|
+
"physical_inspection.swelling_fraction",
|
|
249
|
+
"<=",
|
|
250
|
+
0.20,
|
|
251
|
+
"H-A swelling must remain below 20 percent.",
|
|
252
|
+
),
|
|
253
|
+
)
|
|
254
|
+
if gate_id == "h_b_electrical_interface":
|
|
255
|
+
add_if_fields_present(
|
|
256
|
+
gate,
|
|
257
|
+
available_fields,
|
|
258
|
+
{"eis_1khz_initial_ohm", "eis_1khz_final_ohm"},
|
|
259
|
+
derived(
|
|
260
|
+
"eis_1khz_reduction_pct",
|
|
261
|
+
"reduction_pct",
|
|
262
|
+
before="eis_1khz_initial_ohm",
|
|
263
|
+
after="eis_1khz_final_ohm",
|
|
264
|
+
),
|
|
265
|
+
rule(
|
|
266
|
+
"eis_1khz_reduction_pct",
|
|
267
|
+
">=",
|
|
268
|
+
25,
|
|
269
|
+
"Lead H-B electrical readout must improve impedance by at least 25 percent.",
|
|
270
|
+
candidate_id_contains="lead_nhi_pedot_low_loading",
|
|
271
|
+
),
|
|
272
|
+
)
|
|
273
|
+
add_if_fields_present(
|
|
274
|
+
gate,
|
|
275
|
+
available_fields,
|
|
276
|
+
{"charge_storage_capacity_initial", "charge_storage_capacity_final"},
|
|
277
|
+
derived(
|
|
278
|
+
"charge_storage_gain_pct",
|
|
279
|
+
"gain_pct",
|
|
280
|
+
before="charge_storage_capacity_initial",
|
|
281
|
+
after="charge_storage_capacity_final",
|
|
282
|
+
),
|
|
283
|
+
rule(
|
|
284
|
+
"charge_storage_gain_pct",
|
|
285
|
+
">=",
|
|
286
|
+
15,
|
|
287
|
+
"Lead H-B electrical readout must improve charge storage by at least 15 percent.",
|
|
288
|
+
candidate_id_contains="lead_nhi_pedot_low_loading",
|
|
289
|
+
),
|
|
290
|
+
)
|
|
291
|
+
for field, op, value, reason in [
|
|
292
|
+
("swelling_fraction", "<=", 0.20, "H-B swelling must remain below 20 percent."),
|
|
293
|
+
("delamination_score", "<=", 0, "H-B must show no delamination."),
|
|
294
|
+
("optical_transparency_fraction", ">=", 0.80, "H-B transparency must remain inspectable."),
|
|
295
|
+
]:
|
|
296
|
+
add_if_fields_present(gate, available_fields, {field}, acceptance_rule=rule(field, op, value, reason))
|
|
297
|
+
if gate_id == "h_c_network_response":
|
|
298
|
+
for field, op, value, reason in [
|
|
299
|
+
("viability_fraction", ">=", 0.85, "H-C lead viability must remain non-inferior enough to continue."),
|
|
300
|
+
("ldh_fold_control", "<=", 1.20, "H-C lead cytotoxicity must not materially exceed control."),
|
|
301
|
+
("electrode_yield_fraction", ">=", 0.80, "H-C lead electrode yield must remain acceptable."),
|
|
302
|
+
]:
|
|
303
|
+
add_if_fields_present(
|
|
304
|
+
gate,
|
|
305
|
+
available_fields,
|
|
306
|
+
{field},
|
|
307
|
+
acceptance_rule=rule(
|
|
308
|
+
field,
|
|
309
|
+
op,
|
|
310
|
+
value,
|
|
311
|
+
reason,
|
|
312
|
+
candidate_id_contains="lead_nhi_pedot_low_loading",
|
|
313
|
+
),
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def clean(value: Any) -> str:
|
|
318
|
+
return str(value if value is not None else "").strip()
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def read_csv(path: Path) -> list[dict[str, str]]:
|
|
322
|
+
with path.open("r", encoding="utf-8", newline="") as handle:
|
|
323
|
+
return [dict(row) for row in csv.DictReader(handle)]
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def infer_phase(row: dict[str, str]) -> str:
|
|
327
|
+
phase = clean(row.get("phase"))
|
|
328
|
+
if phase:
|
|
329
|
+
return phase
|
|
330
|
+
run_id = clean(row.get("run_id"))
|
|
331
|
+
for phase_id in PHASE_TO_GATE:
|
|
332
|
+
if f"-{phase_id}-" in run_id:
|
|
333
|
+
return phase_id
|
|
334
|
+
return ""
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def infer_gate_id(row: dict[str, str], default_gate: str) -> str:
|
|
338
|
+
return PHASE_TO_GATE.get(infer_phase(row), default_gate)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def infer_candidate_id(row: dict[str, str], default_candidate: str) -> str:
|
|
342
|
+
for field in ["variant_id", "article_id", "candidate_id"]:
|
|
343
|
+
value = clean(row.get(field))
|
|
344
|
+
if value:
|
|
345
|
+
return value
|
|
346
|
+
run_id = clean(row.get("run_id"))
|
|
347
|
+
match = re.match(r"^NHIPEDOT-H-[ABC]-(.+?)-R\d+", run_id)
|
|
348
|
+
if not match:
|
|
349
|
+
match = re.match(r"^NHIPEDOT-LONG-[^-]+-(.+?)-R\d+", run_id)
|
|
350
|
+
return match.group(1) if match else default_candidate
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def infer_field_id(row: dict[str, str]) -> str:
|
|
354
|
+
target = clean(row.get("target_field") or row.get("field"))
|
|
355
|
+
sample_event = clean(row.get("sample_event"))
|
|
356
|
+
if sample_event:
|
|
357
|
+
return f"{sample_event}.{target}"
|
|
358
|
+
return target
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def limina_source_value_to_evidence(
|
|
362
|
+
row: dict[str, str],
|
|
363
|
+
default_candidate: str,
|
|
364
|
+
default_gate: str,
|
|
365
|
+
) -> dict[str, str] | None:
|
|
366
|
+
sample_id = clean(row.get("run_id") or row.get("sample_id"))
|
|
367
|
+
field_id = infer_field_id(row)
|
|
368
|
+
gate_id = infer_gate_id(row, default_gate)
|
|
369
|
+
if not sample_id or not field_id or not gate_id:
|
|
370
|
+
return None
|
|
371
|
+
return {
|
|
372
|
+
"gate_id": gate_id,
|
|
373
|
+
"candidate_id": infer_candidate_id(row, default_candidate),
|
|
374
|
+
"sample_id": sample_id,
|
|
375
|
+
"field": field_id,
|
|
376
|
+
"value": clean(row.get("value")),
|
|
377
|
+
"source_file": clean(row.get("source_file")),
|
|
378
|
+
"measured_at": clean(row.get("measured_at")),
|
|
379
|
+
"operator_or_agent": clean(row.get("operator_or_agent")),
|
|
380
|
+
"instrument_id": clean(row.get("instrument_id")),
|
|
381
|
+
"notes": clean(row.get("notes")),
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def convert_limina_source_values(
|
|
386
|
+
inputs: list[Path],
|
|
387
|
+
default_candidate: str,
|
|
388
|
+
default_gate: str,
|
|
389
|
+
) -> tuple[list[dict[str, str]], dict[str, Any]]:
|
|
390
|
+
evidence_rows: list[dict[str, str]] = []
|
|
391
|
+
skipped_rows = 0
|
|
392
|
+
input_summaries = []
|
|
393
|
+
for path in inputs:
|
|
394
|
+
rows = read_csv(path)
|
|
395
|
+
before = len(evidence_rows)
|
|
396
|
+
for row in rows:
|
|
397
|
+
evidence = limina_source_value_to_evidence(row, default_candidate, default_gate)
|
|
398
|
+
if evidence is None:
|
|
399
|
+
skipped_rows += 1
|
|
400
|
+
continue
|
|
401
|
+
evidence_rows.append(evidence)
|
|
402
|
+
input_summaries.append({
|
|
403
|
+
"path": str(path),
|
|
404
|
+
"input_rows": len(rows),
|
|
405
|
+
"evidence_rows": len(evidence_rows) - before,
|
|
406
|
+
})
|
|
407
|
+
return evidence_rows, {
|
|
408
|
+
"status": "converted",
|
|
409
|
+
"inputs": input_summaries,
|
|
410
|
+
"evidence_rows": len(evidence_rows),
|
|
411
|
+
"skipped_rows": skipped_rows,
|
|
412
|
+
"gates": sorted({row["gate_id"] for row in evidence_rows}),
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def convert_wide_lab_csv(
|
|
417
|
+
inputs: list[Path],
|
|
418
|
+
gate_id: str,
|
|
419
|
+
candidate_id: str,
|
|
420
|
+
sample_id_column: str,
|
|
421
|
+
field_columns: list[str],
|
|
422
|
+
exclude_columns: list[str],
|
|
423
|
+
gate_id_column: str,
|
|
424
|
+
candidate_id_column: str,
|
|
425
|
+
source_file: str,
|
|
426
|
+
source_file_column: str,
|
|
427
|
+
measured_at: str,
|
|
428
|
+
measured_at_column: str,
|
|
429
|
+
operator_or_agent: str,
|
|
430
|
+
operator_or_agent_column: str,
|
|
431
|
+
instrument_id: str,
|
|
432
|
+
instrument_id_column: str,
|
|
433
|
+
notes: str,
|
|
434
|
+
notes_column: str,
|
|
435
|
+
) -> tuple[list[dict[str, str]], dict[str, Any]]:
|
|
436
|
+
evidence_rows: list[dict[str, str]] = []
|
|
437
|
+
skipped_rows = 0
|
|
438
|
+
skipped_values = 0
|
|
439
|
+
input_summaries = []
|
|
440
|
+
|
|
441
|
+
for path in inputs:
|
|
442
|
+
rows = read_csv(path)
|
|
443
|
+
before = len(evidence_rows)
|
|
444
|
+
if field_columns:
|
|
445
|
+
value_columns = field_columns
|
|
446
|
+
else:
|
|
447
|
+
excluded = {
|
|
448
|
+
sample_id_column,
|
|
449
|
+
gate_id_column,
|
|
450
|
+
candidate_id_column,
|
|
451
|
+
source_file_column,
|
|
452
|
+
measured_at_column,
|
|
453
|
+
operator_or_agent_column,
|
|
454
|
+
instrument_id_column,
|
|
455
|
+
notes_column,
|
|
456
|
+
*exclude_columns,
|
|
457
|
+
}
|
|
458
|
+
value_columns = [column for column in rows[0].keys() if column and column not in excluded] if rows else []
|
|
459
|
+
|
|
460
|
+
for row in rows:
|
|
461
|
+
sample_id = clean(row.get(sample_id_column))
|
|
462
|
+
if not sample_id:
|
|
463
|
+
skipped_rows += 1
|
|
464
|
+
continue
|
|
465
|
+
row_gate = clean(row.get(gate_id_column)) if gate_id_column else ""
|
|
466
|
+
row_candidate = clean(row.get(candidate_id_column)) if candidate_id_column else ""
|
|
467
|
+
row_source = clean(row.get(source_file_column)) if source_file_column else ""
|
|
468
|
+
row_measured_at = clean(row.get(measured_at_column)) if measured_at_column else ""
|
|
469
|
+
row_operator = clean(row.get(operator_or_agent_column)) if operator_or_agent_column else ""
|
|
470
|
+
row_instrument = clean(row.get(instrument_id_column)) if instrument_id_column else ""
|
|
471
|
+
row_notes = clean(row.get(notes_column)) if notes_column else ""
|
|
472
|
+
for field in value_columns:
|
|
473
|
+
value = clean(row.get(field))
|
|
474
|
+
if not value:
|
|
475
|
+
skipped_values += 1
|
|
476
|
+
continue
|
|
477
|
+
evidence_rows.append({
|
|
478
|
+
"gate_id": row_gate or gate_id,
|
|
479
|
+
"candidate_id": row_candidate or candidate_id,
|
|
480
|
+
"sample_id": sample_id,
|
|
481
|
+
"field": field,
|
|
482
|
+
"value": value,
|
|
483
|
+
"source_file": row_source or source_file or str(path),
|
|
484
|
+
"measured_at": row_measured_at or measured_at,
|
|
485
|
+
"operator_or_agent": row_operator or operator_or_agent,
|
|
486
|
+
"instrument_id": row_instrument or instrument_id,
|
|
487
|
+
"notes": row_notes or notes,
|
|
488
|
+
})
|
|
489
|
+
input_summaries.append({
|
|
490
|
+
"path": str(path),
|
|
491
|
+
"input_rows": len(rows),
|
|
492
|
+
"value_columns": value_columns,
|
|
493
|
+
"evidence_rows": len(evidence_rows) - before,
|
|
494
|
+
})
|
|
495
|
+
|
|
496
|
+
return evidence_rows, {
|
|
497
|
+
"status": "converted",
|
|
498
|
+
"inputs": input_summaries,
|
|
499
|
+
"evidence_rows": len(evidence_rows),
|
|
500
|
+
"skipped_rows": skipped_rows,
|
|
501
|
+
"skipped_values": skipped_values,
|
|
502
|
+
"gates": sorted({row["gate_id"] for row in evidence_rows}),
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def write_wide_lab_conversion(
|
|
507
|
+
inputs: list[Path],
|
|
508
|
+
evidence_out: Path,
|
|
509
|
+
summary_out: Path | None,
|
|
510
|
+
profile: str,
|
|
511
|
+
gate_id: str,
|
|
512
|
+
candidate_id: str,
|
|
513
|
+
sample_id_column: str,
|
|
514
|
+
field_columns: list[str],
|
|
515
|
+
exclude_columns: list[str],
|
|
516
|
+
gate_id_column: str,
|
|
517
|
+
candidate_id_column: str,
|
|
518
|
+
source_file: str,
|
|
519
|
+
source_file_column: str,
|
|
520
|
+
measured_at: str,
|
|
521
|
+
measured_at_column: str,
|
|
522
|
+
operator_or_agent: str,
|
|
523
|
+
operator_or_agent_column: str,
|
|
524
|
+
instrument_id: str,
|
|
525
|
+
instrument_id_column: str,
|
|
526
|
+
notes: str,
|
|
527
|
+
notes_column: str,
|
|
528
|
+
) -> dict[str, Any]:
|
|
529
|
+
settings = resolve_wide_adapter_settings(
|
|
530
|
+
profile=profile,
|
|
531
|
+
sample_id_column=sample_id_column,
|
|
532
|
+
exclude_columns=exclude_columns,
|
|
533
|
+
gate_id_column=gate_id_column,
|
|
534
|
+
candidate_id_column=candidate_id_column,
|
|
535
|
+
source_file_column=source_file_column,
|
|
536
|
+
measured_at_column=measured_at_column,
|
|
537
|
+
operator_or_agent_column=operator_or_agent_column,
|
|
538
|
+
instrument_id_column=instrument_id_column,
|
|
539
|
+
notes_column=notes_column,
|
|
540
|
+
)
|
|
541
|
+
evidence_rows, summary = convert_wide_lab_csv(
|
|
542
|
+
inputs=inputs,
|
|
543
|
+
gate_id=gate_id,
|
|
544
|
+
candidate_id=candidate_id,
|
|
545
|
+
sample_id_column=str(settings["sample_id_column"]),
|
|
546
|
+
field_columns=field_columns,
|
|
547
|
+
exclude_columns=[str(item) for item in settings["exclude_columns"]],
|
|
548
|
+
gate_id_column=str(settings["gate_id_column"]),
|
|
549
|
+
candidate_id_column=str(settings["candidate_id_column"]),
|
|
550
|
+
source_file=source_file,
|
|
551
|
+
source_file_column=str(settings["source_file_column"]),
|
|
552
|
+
measured_at=measured_at,
|
|
553
|
+
measured_at_column=str(settings["measured_at_column"]),
|
|
554
|
+
operator_or_agent=operator_or_agent,
|
|
555
|
+
operator_or_agent_column=str(settings["operator_or_agent_column"]),
|
|
556
|
+
instrument_id=instrument_id,
|
|
557
|
+
instrument_id_column=str(settings["instrument_id_column"]),
|
|
558
|
+
notes=notes,
|
|
559
|
+
notes_column=str(settings["notes_column"]),
|
|
560
|
+
)
|
|
561
|
+
write_csv(evidence_out, evidence_rows, EVIDENCE_FIELDS)
|
|
562
|
+
summary["evidence_out"] = str(evidence_out)
|
|
563
|
+
summary["adapter_profile"] = profile
|
|
564
|
+
summary["adapter_profile_description"] = settings["profile_description"]
|
|
565
|
+
summary["adapter_settings"] = settings
|
|
566
|
+
if summary_out is not None:
|
|
567
|
+
summary_out.parent.mkdir(parents=True, exist_ok=True)
|
|
568
|
+
summary_out.write_text(json.dumps(summary, indent=2, sort_keys=True), encoding="utf-8")
|
|
569
|
+
return summary
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def build_project_from_evidence(
|
|
573
|
+
evidence_rows: list[dict[str, str]],
|
|
574
|
+
project_id: str,
|
|
575
|
+
claim_id: str,
|
|
576
|
+
claim_statement: str,
|
|
577
|
+
source_file_base_dir: str,
|
|
578
|
+
allowed_source_roots: list[str],
|
|
579
|
+
) -> dict[str, Any]:
|
|
580
|
+
fields_by_gate: dict[str, set[str]] = defaultdict(set)
|
|
581
|
+
samples_by_gate: dict[str, set[tuple[str, str]]] = defaultdict(set)
|
|
582
|
+
for row in evidence_rows:
|
|
583
|
+
gate_id = row["gate_id"]
|
|
584
|
+
fields_by_gate[gate_id].add(row["field"])
|
|
585
|
+
samples_by_gate[gate_id].add((row["candidate_id"], row["sample_id"]))
|
|
586
|
+
|
|
587
|
+
gate_order = [gate for gate in PHASE_TO_GATE.values() if gate in fields_by_gate]
|
|
588
|
+
gate_order.extend(sorted(set(fields_by_gate) - set(gate_order)))
|
|
589
|
+
gates = []
|
|
590
|
+
for gate_id in gate_order:
|
|
591
|
+
gates.append({
|
|
592
|
+
"id": gate_id,
|
|
593
|
+
"title": GATE_TITLES.get(gate_id, gate_id.replace("_", " ").title()),
|
|
594
|
+
"samples": [
|
|
595
|
+
{"candidate_id": candidate_id, "sample_id": sample_id}
|
|
596
|
+
for candidate_id, sample_id in sorted(samples_by_gate[gate_id])
|
|
597
|
+
],
|
|
598
|
+
"required_fields": sorted(fields_by_gate[gate_id]),
|
|
599
|
+
"acceptance_rules": [],
|
|
600
|
+
})
|
|
601
|
+
apply_limina_scientific_overlay(gates[-1], fields_by_gate[gate_id])
|
|
602
|
+
|
|
603
|
+
return {
|
|
604
|
+
"project": {
|
|
605
|
+
"id": project_id,
|
|
606
|
+
"name": project_id.replace("_", " ").title(),
|
|
607
|
+
"domain": "limina-neural-materials",
|
|
608
|
+
"version": "0.1.0",
|
|
609
|
+
},
|
|
610
|
+
"claim": {
|
|
611
|
+
"id": claim_id,
|
|
612
|
+
"statement": claim_statement,
|
|
613
|
+
"requires_gates": gate_order,
|
|
614
|
+
},
|
|
615
|
+
"evidence_policy": {
|
|
616
|
+
"source_file_base_dir": source_file_base_dir,
|
|
617
|
+
"require_source_files": True,
|
|
618
|
+
"reject_placeholder_values": True,
|
|
619
|
+
"allowed_source_roots": allowed_source_roots,
|
|
620
|
+
"required_metadata_fields": [
|
|
621
|
+
"source_file",
|
|
622
|
+
"measured_at",
|
|
623
|
+
"operator_or_agent",
|
|
624
|
+
],
|
|
625
|
+
"placeholder_markers": DEFAULT_PLACEHOLDERS,
|
|
626
|
+
},
|
|
627
|
+
"gates": gates,
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def write_limina_conversion(
|
|
632
|
+
inputs: list[Path],
|
|
633
|
+
evidence_out: Path,
|
|
634
|
+
summary_out: Path | None,
|
|
635
|
+
project_out: Path | None,
|
|
636
|
+
default_candidate: str,
|
|
637
|
+
default_gate: str,
|
|
638
|
+
project_id: str,
|
|
639
|
+
claim_id: str,
|
|
640
|
+
claim_statement: str,
|
|
641
|
+
source_file_base_dir: str,
|
|
642
|
+
allowed_source_roots: list[str],
|
|
643
|
+
) -> dict[str, Any]:
|
|
644
|
+
evidence_rows, summary = convert_limina_source_values(inputs, default_candidate, default_gate)
|
|
645
|
+
write_csv(evidence_out, evidence_rows, EVIDENCE_FIELDS)
|
|
646
|
+
summary["evidence_out"] = str(evidence_out)
|
|
647
|
+
|
|
648
|
+
if project_out is not None:
|
|
649
|
+
project = build_project_from_evidence(
|
|
650
|
+
evidence_rows,
|
|
651
|
+
project_id=project_id,
|
|
652
|
+
claim_id=claim_id,
|
|
653
|
+
claim_statement=claim_statement,
|
|
654
|
+
source_file_base_dir=source_file_base_dir,
|
|
655
|
+
allowed_source_roots=allowed_source_roots,
|
|
656
|
+
)
|
|
657
|
+
project_out.parent.mkdir(parents=True, exist_ok=True)
|
|
658
|
+
project_out.write_text(json.dumps(project, indent=2, sort_keys=True), encoding="utf-8")
|
|
659
|
+
summary["project_out"] = str(project_out)
|
|
660
|
+
summary["project_gate_count"] = len(project["gates"])
|
|
661
|
+
if summary_out is not None:
|
|
662
|
+
summary_out.parent.mkdir(parents=True, exist_ok=True)
|
|
663
|
+
summary_out.write_text(json.dumps(summary, indent=2, sort_keys=True), encoding="utf-8")
|
|
664
|
+
return summary
|