PyPI - cdxml-toolkit - Versions diffs - 0.5.0__py3-none-any.whl - Mend

cdxml-toolkit 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

cdxml_toolkit/__init__.py +18 -0
cdxml_toolkit/_jre/__init__.py +2 -0
cdxml_toolkit/_jre/temurin-21-jre-win-x64.zip +0 -0
cdxml_toolkit/analysis/__init__.py +35 -0
cdxml_toolkit/analysis/deterministic/__init__.py +12 -0
cdxml_toolkit/analysis/deterministic/discover_experiment_files.py +413 -0
cdxml_toolkit/analysis/deterministic/lab_book_formatter.py +701 -0
cdxml_toolkit/analysis/deterministic/lcms_file_categorizer.py +928 -0
cdxml_toolkit/analysis/deterministic/lcms_identifier.py +598 -0
cdxml_toolkit/analysis/deterministic/mass_resolver.py +654 -0
cdxml_toolkit/analysis/deterministic/multi_lcms_analyzer.py +1412 -0
cdxml_toolkit/analysis/deterministic/procedure_writer.py +446 -0
cdxml_toolkit/analysis/extract_nmr.py +47 -0
cdxml_toolkit/analysis/format_procedure_entry.py +479 -0
cdxml_toolkit/analysis/lcms_analyzer.py +1299 -0
cdxml_toolkit/analysis/parse_analysis_file.py +134 -0
cdxml_toolkit/cdxml_builder.py +920 -0
cdxml_toolkit/cdxml_utils.py +342 -0
cdxml_toolkit/chemdraw/__init__.py +5 -0
cdxml_toolkit/chemdraw/_chemscript_server.py +562 -0
cdxml_toolkit/chemdraw/cdx_converter.py +527 -0
cdxml_toolkit/chemdraw/cdxml_to_image.py +262 -0
cdxml_toolkit/chemdraw/cdxml_to_image_rdkit.py +296 -0
cdxml_toolkit/chemdraw/chemscript_bridge.py +901 -0
cdxml_toolkit/constants.py +304 -0
cdxml_toolkit/coord_normalizer.py +438 -0
cdxml_toolkit/deterministic_pipeline/__init__.py +6 -0
cdxml_toolkit/deterministic_pipeline/legacy/__init__.py +5 -0
cdxml_toolkit/deterministic_pipeline/legacy/eln_cdx_cleanup.py +509 -0
cdxml_toolkit/deterministic_pipeline/legacy/eln_enrichment.py +1394 -0
cdxml_toolkit/deterministic_pipeline/legacy/scheme_aligner.py +428 -0
cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher.py +1337 -0
cdxml_toolkit/deterministic_pipeline/legacy/scheme_polisher_v2.py +1340 -0
cdxml_toolkit/deterministic_pipeline/scheme_reader_audit.py +931 -0
cdxml_toolkit/deterministic_pipeline/scheme_reader_verify.py +1160 -0
cdxml_toolkit/image/__init__.py +15 -0
cdxml_toolkit/image/reaction_from_image.py +2103 -0
cdxml_toolkit/image/structure_from_image.py +1711 -0
cdxml_toolkit/layout/__init__.py +5 -0
cdxml_toolkit/layout/alignment.py +1642 -0
cdxml_toolkit/layout/reaction_cleanup.py +1002 -0
cdxml_toolkit/layout/scheme_merger.py +2260 -0
cdxml_toolkit/mcp_server/__init__.py +0 -0
cdxml_toolkit/mcp_server/__main__.py +5 -0
cdxml_toolkit/mcp_server/server.py +1567 -0
cdxml_toolkit/naming/__init__.py +6 -0
cdxml_toolkit/naming/aligned_namer.py +2342 -0
cdxml_toolkit/naming/mol_builder.py +3722 -0
cdxml_toolkit/naming/name_decomposer.py +2843 -0
cdxml_toolkit/naming/reactions_datamol.json +2414 -0
cdxml_toolkit/office/__init__.py +5 -0
cdxml_toolkit/office/doc_from_template.py +722 -0
cdxml_toolkit/office/ole_embedder.py +808 -0
cdxml_toolkit/office/ole_extractor.py +272 -0
cdxml_toolkit/perception/__init__.py +10 -0
cdxml_toolkit/perception/compound_search.py +229 -0
cdxml_toolkit/perception/eln_csv_parser.py +240 -0
cdxml_toolkit/perception/rdf_parser.py +664 -0
cdxml_toolkit/perception/reactant_heuristic.py +1045 -0
cdxml_toolkit/perception/reaction_parser.py +2150 -0
cdxml_toolkit/perception/scheme_reader.py +2948 -0
cdxml_toolkit/perception/scheme_refine.py +1404 -0
cdxml_toolkit/perception/scheme_segmenter.py +619 -0
cdxml_toolkit/perception/spatial_assignment.py +1013 -0
cdxml_toolkit/rdkit_utils.py +605 -0
cdxml_toolkit/render/__init__.py +17 -0
cdxml_toolkit/render/auto_layout.py +229 -0
cdxml_toolkit/render/compact_parser.py +632 -0
cdxml_toolkit/render/parser.py +706 -0
cdxml_toolkit/render/render_scheme.py +267 -0
cdxml_toolkit/render/renderer.py +2387 -0
cdxml_toolkit/render/schema.py +90 -0
cdxml_toolkit/render/scheme_maker.py +1043 -0
cdxml_toolkit/render/scheme_yaml_writer.py +1487 -0
cdxml_toolkit/resolve/__init__.py +13 -0
cdxml_toolkit/resolve/cas_resolver.py +430 -0
cdxml_toolkit/resolve/chemscanner_abbreviations.json +28813 -0
cdxml_toolkit/resolve/condensed_formula.py +493 -0
cdxml_toolkit/resolve/jre_manager.py +195 -0
cdxml_toolkit/resolve/reagent_abbreviations.json +1046 -0
cdxml_toolkit/resolve/reagent_db.py +285 -0
cdxml_toolkit/resolve/superatom_data.json +2856 -0
cdxml_toolkit/resolve/superatom_table.py +146 -0
cdxml_toolkit/text_formatting.py +298 -0
cdxml_toolkit-0.5.0.dist-info/METADATA +318 -0
cdxml_toolkit-0.5.0.dist-info/RECORD +91 -0
cdxml_toolkit-0.5.0.dist-info/WHEEL +5 -0
cdxml_toolkit-0.5.0.dist-info/entry_points.txt +17 -0
cdxml_toolkit-0.5.0.dist-info/licenses/LICENSE +21 -0
cdxml_toolkit-0.5.0.dist-info/licenses/NOTICE.md +37 -0
cdxml_toolkit-0.5.0.dist-info/top_level.txt +1 -0

cdxml_toolkit/analysis/deterministic/lcms_identifier.py ADDED Viewed

@@ -0,0 +1,598 @@
+#!/usr/bin/env python3
+"""
+LCMS Identifier — Species Identification from Ion m/z Values
+Matches observed LCMS ions against expected species adducts to identify
+compounds in tracking and purified-product chromatograms.  Handles both
+multi-file tracking analysis (via multi_lcms_analyzer) and single-file
+purified product analysis.
+Key types:
+  - IdentifiedCompound: a multi-LCMS compound matched to an expected species
+  - IdentifiedPeak: a single-report peak matched to an expected species
+  - TrackingAnalysis: wrapper for multi-LCMS tracking results
+  - PurifiedAnalysis: wrapper for purified product LCMS results
+Usage:
+    from lcms_identifier import (
+        match_ions_to_species, run_tracking_analysis, run_purified_analysis,
+    )
+"""
+import os
+import sys
+from dataclasses import dataclass, field
+from typing import List, Optional, Tuple
+from ..lcms_analyzer import parse_report, LCMSReport, ChromPeak
+from .lcms_file_categorizer import (
+    categorize_lcms_files_batch,
+    calibrate_sort_keys_hybrid,
+)
+from .multi_lcms_analyzer import (
+    FileEntry as MultiFileEntry,
+    analyze as multi_analyze,
+    AnalysisResult,
+    Compound as MultiCompound,
+    IonCluster,
+    extract_run_datetime,
+    load_analysis_from_json,
+)
+from cdxml_toolkit.constants import MASS_TOLERANCE
+from .mass_resolver import (
+    ExpectedSpecies,
+    ADDUCTS, ADDUCT_PRIORITY, MODE_PREFERENCE,
+)
+# Role-based priority: SM/DP preferred over reactants, which beat byproducts.
+# Lower number = preferred.
+ROLE_PRIORITY = {
+    "substrate": 0,
+    "product": 0,
+    "reactant": 1,
+    "reagent": 1,
+    "byproduct": 2,
+}
+# ---------------------------------------------------------------------------
+# Data structures
+# ---------------------------------------------------------------------------
+@dataclass
+class IdentifiedCompound:
+    """A multi-LCMS compound matched to an expected species."""
+    compound: MultiCompound
+    species: ExpectedSpecies
+    adduct: str          # e.g. "[M+H]+"
+    matched_mz: float    # observed m/z that matched
+@dataclass
+class TrackingAnalysis:
+    """Results of multi-LCMS tracking analysis with species identification."""
+    result: Optional[AnalysisResult] = None
+    identified: List[IdentifiedCompound] = field(default_factory=list)
+    unidentified: List[MultiCompound] = field(default_factory=list)
+    files: List[MultiFileEntry] = field(default_factory=list)
+@dataclass
+class IdentifiedPeak:
+    """A single-report chromatographic peak matched to an expected species."""
+    peak: ChromPeak
+    species: ExpectedSpecies
+    adduct: str
+    matched_mz: float
+@dataclass
+class PurifiedAnalysis:
+    """Results of purified product LCMS analysis."""
+    report: Optional[LCMSReport] = None
+    file_info: Optional[object] = None
+    identified: List[IdentifiedPeak] = field(default_factory=list)
+    # Per-detector purity of the product peak (None = not detected)
+    purity_tac: Optional[float] = None
+    purity_220nm: Optional[float] = None
+    purity_254nm: Optional[float] = None
+    # True when no "final" file was found and a workup file was used instead
+    is_crude_fallback: bool = False
+# ---------------------------------------------------------------------------
+# Ion matching
+# ---------------------------------------------------------------------------
+def match_ions_to_species(
+    ions: List[Tuple[str, float, int]],
+    expected: List[ExpectedSpecies],
+    tolerance: float = MASS_TOLERANCE,
+) -> Optional[Tuple[ExpectedSpecies, str, float]]:
+    """
+    Match observed ions against expected species adducts.
+    Candidate matches are ranked by:
+      1. Adduct priority — [M+H]+/[M-H]- preferred over [M+Na]+/[M+formate]-
+      2. Role priority — SM/DP preferred over reactants, over byproducts
+      3. Ion rank — lower rank = more intense = preferred
+      4. ESI mode — ESI+ preferred over ESI- (tiebreaker)
+      5. Mass accuracy — closer delta preferred (final tiebreaker)
+    Args:
+        ions: list of (mode, m/z, rank) tuples.
+              rank 0 = base peak (most intense).
+        expected: list of ExpectedSpecies with computed adducts
+        tolerance: matching tolerance in Da
+    Returns:
+        (species, adduct_name, matched_mz) or None
+    """
+    candidates = []
+    for obs_mode, obs_mz, obs_rank in ions:
+        for species in expected:
+            for adduct_name, expected_mz in species.adducts.items():
+                adduct_mode = ADDUCTS[adduct_name][0]
+                if obs_mode != adduct_mode:
+                    continue
+                delta = abs(obs_mz - expected_mz)
+                if delta < tolerance:
+                    candidates.append((
+                        ADDUCT_PRIORITY[adduct_name],   # 0=primary, 1=secondary
+                        ROLE_PRIORITY.get(species.role, 1),  # 0=SM/DP, 2=byproduct
+                        obs_rank,                       # 0=base peak
+                        MODE_PREFERENCE.get(obs_mode, 1),  # 0=ES+, 1=ES-
+                        delta,                          # mass accuracy
+                        species, adduct_name, obs_mz,
+                    ))
+    if not candidates:
+        return None
+    candidates.sort(key=lambda c: c[:5])
+    best = candidates[0]
+    return (best[5], best[6], best[7])
+def _try_assign_species(match, used_species):
+    """Assign a matched species, with isomer fallback for products.
+    When a compound's ions match a product species (e.g. "DP") that has
+    already been assigned to a larger peak, this creates a "{name}-isomer"
+    variant instead of dropping the compound to unidentified.  Handles
+    regioisomeric / diastereomeric products that share the same exact mass.
+    Args:
+        match: result from match_ions_to_species(), or None
+        used_species: set of already-assigned species names (modified in-place)
+    Returns:
+        (species, adduct, mz) if assigned, or None
+    """
+    if match is None:
+        return None
+    species, adduct, mz = match
+    if species.name not in used_species:
+        used_species.add(species.name)
+        return (species, adduct, mz)
+    # Isomer fallback for products
+    if species.role == "product":
+        isomer_name = f"{species.name}-isomer"
+        if isomer_name not in used_species:
+            isomer_sp = ExpectedSpecies(
+                name=isomer_name,
+                role=species.role,
+                exact_mass=species.exact_mass,
+                smiles=species.smiles,
+                adducts=dict(species.adducts),
+                source_file=species.source_file,
+            )
+            used_species.add(isomer_name)
+            return (isomer_sp, adduct, mz)
+    return None
+# ---------------------------------------------------------------------------
+# Tracking LCMS analysis (via multi_lcms_analyzer)
+# ---------------------------------------------------------------------------
+def _run_single_file_tracking(
+    lf,
+    expected: List[ExpectedSpecies],
+) -> TrackingAnalysis:
+    """
+    Analyze a single tracking LCMS file without multi_lcms_analyzer.
+    Parses the report, matches peaks to expected species, and wraps results
+    in TrackingAnalysis-compatible structures.  No cross-file trending or
+    ion-recurrence filtering — those require multiple files.
+    """
+    try:
+        report = parse_report(lf.path)
+        lf.report = report
+        print(f"  Parsed tracking: {lf.filename} "
+              f"({len(report.peaks)} peaks)", file=sys.stderr)
+    except Exception as e:
+        print(f"  Warning: Could not parse {lf.filename}: {e}",
+              file=sys.stderr)
+        return TrackingAnalysis()
+    # Build FileEntry for compatibility with notes builder
+    fe = MultiFileEntry(
+        path=os.path.abspath(lf.path),
+        filename=lf.filename,
+        category=lf.category,
+        sort_key=lf.sort_key,
+        report=report,
+    )
+    # Match peaks to expected species (same approach as purified analysis)
+    identified = []
+    unidentified_compounds = []
+    used_species = set()
+    next_id = 1
+    # Sort peaks by area descending — match larger peaks first
+    sorted_peaks = sorted(report.peaks,
+                          key=lambda p: p.area_pct or 0, reverse=True)
+    for peak in sorted_peaks:
+        # Build ions list from peak's mass spectra
+        ions = []
+        for spec in peak.ms_spectra:
+            for rank, mz in enumerate(spec.top_ions):
+                ions.append((spec.mode, mz, rank))
+        # Build a MultiCompound wrapper for this peak
+        mc = MultiCompound(compound_id=next_id, canonical_rt=peak.rt)
+        mc.max_area = peak.area_pct or 0.0
+        mc.uv_lambda_max = list(peak.uv_lambda_max) if peak.uv_lambda_max else []
+        mc.trend = "stable"
+        mc.trend_detail = "single file"
+        # area maps keyed by file index (only index 0 for single file)
+        if peak.area_pct is not None:
+            mc.area_pct_by_file[0] = peak.area_pct
+        if peak.area_pct_220nm is not None:
+            mc.area_pct_220_by_file[0] = peak.area_pct_220nm
+        if peak.area_pct_254nm is not None:
+            mc.area_pct_254_by_file[0] = peak.area_pct_254nm
+        next_id += 1
+        match = match_ions_to_species(ions, expected)
+        assigned = _try_assign_species(match, used_species)
+        if assigned:
+            species, adduct, mz = assigned
+            identified.append(IdentifiedCompound(
+                compound=mc, species=species, adduct=adduct, matched_mz=mz,
+            ))
+        else:
+            unidentified_compounds.append(mc)
+    # Build AnalysisResult for compatibility with characterization builder
+    all_compounds = [ic.compound for ic in identified] + unidentified_compounds
+    result = AnalysisResult(
+        instrument=report.instrument or "Unknown",
+        method_short=report.method_short or "Unknown",
+        files=[fe],
+        compounds=all_compounds,
+    )
+    return TrackingAnalysis(
+        result=result,
+        identified=identified,
+        unidentified=unidentified_compounds,
+        files=[fe],
+    )
+def _cross_validate_method(file_entries: List[MultiFileEntry]) -> List[str]:
+    """
+    Cross-validate filename method modifier (e.g. -AmB) against the actual
+    PDF method path.  Returns a list of warning strings for mismatches.
+    """
+    warnings = []
+    for fe in file_entries:
+        if not fe.method_variant or not fe.report:
+            continue
+        pdf_method = fe.report.method_path.lower()
+        # Map modifier to the substring expected in the method path
+        variant = fe.method_variant.lower()
+        # Strip 'foc' suffix — "-AmBfoc" still means buffer is AmB
+        core_variant = variant.replace('foc', '')
+        if core_variant and core_variant not in pdf_method:
+            warnings.append(
+                f"Method mismatch: {fe.filename} has filename modifier "
+                f"'-{fe.method_variant}' but PDF method is "
+                f"'{os.path.basename(fe.report.method_path)}'"
+            )
+    return warnings
+def run_tracking_analysis(
+    exp,
+    expected: List[ExpectedSpecies],
+) -> TrackingAnalysis:
+    """
+    Analyze tracking LCMS files and identify compounds.
+    Single file  → direct parse + ion matching (no multi_lcms_analyzer).
+    Multiple files → multi_lcms_analyzer for cross-file compound tracking.
+    Groups files by (instrument, method) and picks the largest group.
+    Uses hybrid sort keys: filename tokens for group 1, PDF acquisition
+    timestamps for groups 2+.
+    """
+    tracking_files = [lf for lf in exp.lcms_files if lf.category == "tracking"]
+    if not tracking_files:
+        return TrackingAnalysis()
+    if len(tracking_files) == 1:
+        return _run_single_file_tracking(tracking_files[0], expected)
+    # --- Multiple tracking files: use multi_lcms_analyzer ---
+    file_entries = []
+    for lf in tracking_files:
+        try:
+            report = parse_report(lf.path)
+            run_dt = extract_run_datetime(lf.path)
+            fe = MultiFileEntry(
+                path=os.path.abspath(lf.path),
+                filename=lf.filename,
+                category=lf.category,
+                sort_key=lf.sort_key,
+                report=report,
+                run_datetime=run_dt,
+                group_prefix=getattr(lf, 'group_prefix', None),
+                method_variant=getattr(lf, 'method_variant', None),
+            )
+            file_entries.append(fe)
+            print(f"  Parsed tracking: {lf.filename} "
+                  f"({len(report.peaks)} peaks)", file=sys.stderr)
+        except Exception as e:
+            print(f"  Warning: Could not parse {lf.filename}: {e}",
+                  file=sys.stderr)
+    if not file_entries:
+        return TrackingAnalysis(files=file_entries)
+    if len(file_entries) == 1:
+        # Only one file parsed successfully — fall back to single-file
+        lf = tracking_files[0]
+        lf.report = file_entries[0].report
+        return _run_single_file_tracking(lf, expected)
+    # --- Hybrid sort key recalibration ---
+    # Recalibrate groups 2+ using real PDF acquisition timestamps.
+    # Group 1 keeps filename-derived sort keys (chemist controls submission
+    # order at the start of a reaction).
+    #
+    # Recover the tracking group info from the batch categorizer.
+    # We need it to know which files belong to which prefix-group.
+    tracking_filenames = [lf.filename for lf in tracking_files]
+    batch = categorize_lcms_files_batch(
+        tracking_filenames,
+        exp.experiment_name if hasattr(exp, 'experiment_name') else "")
+    if batch.tracking_groups and len(batch.tracking_groups) > 1:
+        run_dts = {fe.filename: fe.run_datetime
+                   for fe in file_entries if fe.run_datetime}
+        if run_dts:
+            calibrate_sort_keys_hybrid(
+                batch.tracking_groups, batch, run_dts)
+            # Update FileEntry sort_keys from recalibrated batch result
+            for fe in file_entries:
+                fc = batch.files.get(fe.filename)
+                if fc is not None:
+                    fe.sort_key = fc.sort_key
+    # --- Method cross-validation ---
+    method_warnings = _cross_validate_method(file_entries)
+    # --- Run multi-LCMS analysis ---
+    # Group by (instrument, method); pick only the biggest group.
+    results = multi_analyze(
+        files=file_entries,
+        rt_tol=0.02,
+        mz_tol=0.5,
+        trend_threshold=0.2,
+        ignore_instrument=False,
+        use_run_time=False,           # sort_key is now the single source of truth
+        pick_biggest_group=True,
+    )
+    if not results:
+        return TrackingAnalysis(files=file_entries)
+    # Take the (now single) result from the biggest group
+    analysis = results[0]
+    # Append method cross-validation warnings
+    if method_warnings:
+        analysis.warnings.extend(method_warnings)
+    # Match compounds to expected species
+    identified = []
+    unidentified = []
+    used_species = set()
+    # Sort compounds by max_area descending (match larger compounds first)
+    sorted_compounds = sorted(
+        analysis.compounds, key=lambda c: c.max_area, reverse=True)
+    for compound in sorted_compounds:
+        # Collect all ions as (mode, mz, rank) tuples — recurring first
+        ions = []
+        for ic in compound.recurring_ions:
+            ions.append((ic.mode, ic.mean_mz, ic.best_rank))
+        for ic in compound.other_ions:
+            ions.append((ic.mode, ic.mean_mz, ic.best_rank))
+        match = match_ions_to_species(ions, expected)
+        assigned = _try_assign_species(match, used_species)
+        if assigned:
+            species, adduct, mz = assigned
+            identified.append(IdentifiedCompound(
+                compound=compound,
+                species=species,
+                adduct=adduct,
+                matched_mz=mz,
+            ))
+        else:
+            unidentified.append(compound)
+    return TrackingAnalysis(
+        result=analysis,
+        identified=identified,
+        unidentified=unidentified,
+        files=file_entries,
+    )
+def run_tracking_from_result(
+    analysis: AnalysisResult,
+    expected: List[ExpectedSpecies],
+) -> TrackingAnalysis:
+    """
+    Identify compounds in a pre-computed AnalysisResult.
+    Same species-matching logic as run_tracking_analysis(), but skips PDF
+    parsing and multi_analyze() — accepts an already-computed result
+    (e.g. loaded from JSON via load_analysis_from_json()).
+    """
+    identified = []
+    unidentified = []
+    used_species = set()
+    sorted_compounds = sorted(
+        analysis.compounds, key=lambda c: c.max_area, reverse=True)
+    for compound in sorted_compounds:
+        ions = []
+        for ic in compound.recurring_ions:
+            ions.append((ic.mode, ic.mean_mz, ic.best_rank))
+        for ic in compound.other_ions:
+            ions.append((ic.mode, ic.mean_mz, ic.best_rank))
+        match = match_ions_to_species(ions, expected)
+        assigned = _try_assign_species(match, used_species)
+        if assigned:
+            species, adduct, mz = assigned
+            identified.append(IdentifiedCompound(
+                compound=compound,
+                species=species,
+                adduct=adduct,
+                matched_mz=mz,
+            ))
+        else:
+            unidentified.append(compound)
+    return TrackingAnalysis(
+        result=analysis,
+        identified=identified,
+        unidentified=unidentified,
+        files=analysis.files,
+    )
+# ---------------------------------------------------------------------------
+# Purified product LCMS analysis
+# ---------------------------------------------------------------------------
+def run_purified_analysis(
+    exp,
+    expected: List[ExpectedSpecies],
+) -> PurifiedAnalysis:
+    """Parse and analyze the purified product LCMS file.
+    Selection order:
+    1. Files categorized as "final" (e.g. NPpurified, C18-purified)
+    2. Fallback: last workup file chronologically (e.g. crude, wash)
+    """
+    final_files = [lf for lf in exp.lcms_files if lf.category == "final"]
+    crude_fallback = False
+    if not final_files:
+        # Fallback: use the chronologically last workup file
+        workup_files = [lf for lf in exp.lcms_files
+                        if lf.category == "workup"]
+        if workup_files:
+            crude_fallback = True
+            # Sort by actual LCMS run datetime (preferred) then sort_key
+            for wf in workup_files:
+                wf._run_dt = extract_run_datetime(wf.path)
+            # Files with run_datetime sort after those without; among
+            # those with datetime, latest wins; ties break by sort_key.
+            workup_files.sort(
+                key=lambda f: (f._run_dt or "", f.sort_key))
+            lf = workup_files[-1]
+            print(f"  No purified-product LCMS file found — "
+                  f"using last workup file: {lf.filename}"
+                  f"{' (run ' + lf._run_dt + ')' if lf._run_dt else ''}",
+                  file=sys.stderr)
+        else:
+            return PurifiedAnalysis()
+    else:
+        # Use the last final file (most relevant)
+        lf = final_files[-1]
+    try:
+        report = parse_report(lf.path)
+        lf.report = report
+        print(f"  Parsed purified: {lf.filename} "
+              f"({len(report.peaks)} peaks)", file=sys.stderr)
+    except Exception as e:
+        print(f"  Warning: Could not parse {lf.filename}: {e}",
+              file=sys.stderr)
+        return PurifiedAnalysis(file_info=lf)
+    # Match peaks to expected species
+    identified = []
+    for peak in report.peaks:
+        # Build ions list from peak's mass spectra (with rank)
+        ions = []
+        for spec in peak.ms_spectra:
+            for rank, mz in enumerate(spec.top_ions):
+                ions.append((spec.mode, mz, rank))
+        match = match_ions_to_species(ions, expected)
+        if match:
+            species, adduct, mz = match
+            identified.append(IdentifiedPeak(
+                peak=peak, species=species, adduct=adduct, matched_mz=mz,
+            ))
+    # Product purity: area% of the product peak on each detector.
+    # If multiple peaks match the product, use the highest-area one.
+    purity_tac = None
+    purity_220 = None
+    purity_254 = None
+    for ip in identified:
+        if ip.species.role == "product":
+            if ip.peak.area_pct is not None and (
+                    purity_tac is None or ip.peak.area_pct > purity_tac):
+                purity_tac = ip.peak.area_pct
+            if ip.peak.area_pct_220nm is not None and (
+                    purity_220 is None or ip.peak.area_pct_220nm > purity_220):
+                purity_220 = ip.peak.area_pct_220nm
+            if ip.peak.area_pct_254nm is not None and (
+                    purity_254 is None or ip.peak.area_pct_254nm > purity_254):
+                purity_254 = ip.peak.area_pct_254nm
+    return PurifiedAnalysis(
+        report=report,
+        file_info=lf,
+        identified=identified,
+        purity_tac=purity_tac,
+        purity_220nm=purity_220,
+        purity_254nm=purity_254,
+        is_crude_fallback=crude_fallback,
+    )
+# ---------------------------------------------------------------------------
+# CLI placeholder
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    print("lcms_identifier: no standalone CLI — "
+          "import from procedure_writer.py or use directly")