masster 0.5.18__tar.gz → 0.5.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (98) hide show
  1. {masster-0.5.18 → masster-0.5.20}/PKG-INFO +1 -1
  2. {masster-0.5.18 → masster-0.5.20}/pyproject.toml +1 -1
  3. {masster-0.5.18 → masster-0.5.20}/src/masster/_version.py +1 -1
  4. {masster-0.5.18 → masster-0.5.20}/src/masster/lib/lib.py +137 -0
  5. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/defaults/find_ms2_def.py +5 -5
  6. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/h5.py +8 -8
  7. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/processing.py +2 -2
  8. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/sciex.py +2 -2
  9. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/thermo.py +1 -1
  10. {masster-0.5.18 → masster-0.5.20}/src/masster/study/id.py +17 -9
  11. {masster-0.5.18 → masster-0.5.20}/src/masster/wizard/README.md +15 -15
  12. {masster-0.5.18 → masster-0.5.20}/src/masster/wizard/wizard.py +82 -28
  13. masster-0.5.20/uv.lock +3664 -0
  14. masster-0.5.18/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
  15. masster-0.5.18/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
  16. masster-0.5.18/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
  17. masster-0.5.18/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
  18. masster-0.5.18/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
  19. masster-0.5.18/src/masster/data/libs/hilic.csv +0 -4812
  20. masster-0.5.18/uv.lock +0 -3370
  21. {masster-0.5.18 → masster-0.5.20}/.github/workflows/publish.yml +0 -0
  22. {masster-0.5.18 → masster-0.5.20}/.github/workflows/security.yml +0 -0
  23. {masster-0.5.18 → masster-0.5.20}/.github/workflows/test.yml +0 -0
  24. {masster-0.5.18 → masster-0.5.20}/.gitignore +0 -0
  25. {masster-0.5.18 → masster-0.5.20}/.pre-commit-config.yaml +0 -0
  26. {masster-0.5.18 → masster-0.5.20}/LICENSE +0 -0
  27. {masster-0.5.18 → masster-0.5.20}/Makefile +0 -0
  28. {masster-0.5.18 → masster-0.5.20}/README.md +0 -0
  29. {masster-0.5.18 → masster-0.5.20}/TESTING.md +0 -0
  30. {masster-0.5.18 → masster-0.5.20}/demo/example_batch_process.py +0 -0
  31. {masster-0.5.18 → masster-0.5.20}/demo/example_sample_process.py +0 -0
  32. {masster-0.5.18 → masster-0.5.20}/src/masster/__init__.py +0 -0
  33. {masster-0.5.18 → masster-0.5.20}/src/masster/chromatogram.py +0 -0
  34. {masster-0.5.18 → masster-0.5.20}/src/masster/data/libs/aa.csv +0 -0
  35. {masster-0.5.18 → masster-0.5.20}/src/masster/data/libs/ccm.csv +0 -0
  36. {masster-0.5.18 → masster-0.5.20}/src/masster/data/libs/urine.csv +0 -0
  37. {masster-0.5.18 → masster-0.5.20}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  38. {masster-0.5.18 → masster-0.5.20}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  39. {masster-0.5.18 → masster-0.5.20}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  40. {masster-0.5.18 → masster-0.5.20}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  41. {masster-0.5.18 → masster-0.5.20}/src/masster/lib/__init__.py +0 -0
  42. {masster-0.5.18 → masster-0.5.20}/src/masster/logger.py +0 -0
  43. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/__init__.py +0 -0
  44. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/adducts.py +0 -0
  45. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/defaults/__init__.py +0 -0
  46. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/defaults/find_adducts_def.py +0 -0
  47. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/defaults/find_features_def.py +0 -0
  48. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
  49. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/defaults/sample_def.py +0 -0
  50. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/helpers.py +0 -0
  51. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/lib.py +0 -0
  52. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/load.py +0 -0
  53. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/parameters.py +0 -0
  54. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/plot.py +0 -0
  55. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/quant.py +0 -0
  56. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/sample.py +0 -0
  57. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/sample5_schema.json +0 -0
  58. {masster-0.5.18 → masster-0.5.20}/src/masster/sample/save.py +0 -0
  59. {masster-0.5.18 → masster-0.5.20}/src/masster/spectrum.py +0 -0
  60. {masster-0.5.18 → masster-0.5.20}/src/masster/study/__init__.py +0 -0
  61. {masster-0.5.18 → masster-0.5.20}/src/masster/study/analysis.py +0 -0
  62. {masster-0.5.18 → masster-0.5.20}/src/masster/study/defaults/__init__.py +0 -0
  63. {masster-0.5.18 → masster-0.5.20}/src/masster/study/defaults/align_def.py +0 -0
  64. {masster-0.5.18 → masster-0.5.20}/src/masster/study/defaults/export_def.py +0 -0
  65. {masster-0.5.18 → masster-0.5.20}/src/masster/study/defaults/fill_def.py +0 -0
  66. {masster-0.5.18 → masster-0.5.20}/src/masster/study/defaults/find_consensus_def.py +0 -0
  67. {masster-0.5.18 → masster-0.5.20}/src/masster/study/defaults/find_ms2_def.py +0 -0
  68. {masster-0.5.18 → masster-0.5.20}/src/masster/study/defaults/identify_def.py +0 -0
  69. {masster-0.5.18 → masster-0.5.20}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
  70. {masster-0.5.18 → masster-0.5.20}/src/masster/study/defaults/integrate_def.py +0 -0
  71. {masster-0.5.18 → masster-0.5.20}/src/masster/study/defaults/merge_def.py +0 -0
  72. {masster-0.5.18 → masster-0.5.20}/src/masster/study/defaults/study_def.py +0 -0
  73. {masster-0.5.18 → masster-0.5.20}/src/masster/study/export.py +0 -0
  74. {masster-0.5.18 → masster-0.5.20}/src/masster/study/h5.py +0 -0
  75. {masster-0.5.18 → masster-0.5.20}/src/masster/study/helpers.py +0 -0
  76. {masster-0.5.18 → masster-0.5.20}/src/masster/study/importers.py +0 -0
  77. {masster-0.5.18 → masster-0.5.20}/src/masster/study/load.py +0 -0
  78. {masster-0.5.18 → masster-0.5.20}/src/masster/study/merge.py +0 -0
  79. {masster-0.5.18 → masster-0.5.20}/src/masster/study/parameters.py +0 -0
  80. {masster-0.5.18 → masster-0.5.20}/src/masster/study/plot.py +0 -0
  81. {masster-0.5.18 → masster-0.5.20}/src/masster/study/processing.py +0 -0
  82. {masster-0.5.18 → masster-0.5.20}/src/masster/study/save.py +0 -0
  83. {masster-0.5.18 → masster-0.5.20}/src/masster/study/study.py +0 -0
  84. {masster-0.5.18 → masster-0.5.20}/src/masster/study/study5_schema.json +0 -0
  85. {masster-0.5.18 → masster-0.5.20}/src/masster/wizard/__init__.py +0 -0
  86. {masster-0.5.18 → masster-0.5.20}/src/masster/wizard/example.py +0 -0
  87. {masster-0.5.18 → masster-0.5.20}/tests/conftest.py +0 -0
  88. {masster-0.5.18 → masster-0.5.20}/tests/test_chromatogram.py +0 -0
  89. {masster-0.5.18 → masster-0.5.20}/tests/test_defaults.py +0 -0
  90. {masster-0.5.18 → masster-0.5.20}/tests/test_imports.py +0 -0
  91. {masster-0.5.18 → masster-0.5.20}/tests/test_integration.py +0 -0
  92. {masster-0.5.18 → masster-0.5.20}/tests/test_logger.py +0 -0
  93. {masster-0.5.18 → masster-0.5.20}/tests/test_parameters.py +0 -0
  94. {masster-0.5.18 → masster-0.5.20}/tests/test_sample.py +0 -0
  95. {masster-0.5.18 → masster-0.5.20}/tests/test_spectrum.py +0 -0
  96. {masster-0.5.18 → masster-0.5.20}/tests/test_study.py +0 -0
  97. {masster-0.5.18 → masster-0.5.20}/tests/test_version.py +0 -0
  98. {masster-0.5.18 → masster-0.5.20}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.5.18
3
+ Version: 0.5.20
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -1,7 +1,7 @@
1
1
 
2
2
  [project]
3
3
  name = "masster"
4
- version = "0.5.18"
4
+ version = "0.5.20"
5
5
  description = "Mass spectrometry data analysis package"
6
6
  authors = [
7
7
  { name = "Zamboni Lab" }
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.5.18"
4
+ __version__ = "0.5.19"
5
5
 
6
6
 
7
7
  def get_version():
@@ -46,6 +46,7 @@ annotations = lib.annotate_features(sample.features_df)
46
46
  """
47
47
 
48
48
  import os
49
+ import json
49
50
  from typing import Optional, Union, List, Dict, Any, TYPE_CHECKING
50
51
  import warnings
51
52
 
@@ -685,6 +686,142 @@ class Lib:
685
686
  if skipped_compounds > 0:
686
687
  print(f"All {total_compounds} compounds were skipped due to invalid formulas")
687
688
 
689
+ def import_json(self,
690
+ jsonfile: str,
691
+ polarity: Optional[str] = None,
692
+ adducts: Optional[List[str]] = None,
693
+ min_probability: float = 0.03) -> None:
694
+ """
695
+ Import compound library from a JSON file created by csv_to_json.py.
696
+
697
+ This method reads a JSON file with the structure created by csv_to_json.py
698
+ and generates adduct variants for each compound.
699
+
700
+ Args:
701
+ jsonfile: Path to the JSON file
702
+ polarity: Ionization polarity ("positive", "negative", or None for positive)
703
+ adducts: Specific adducts to generate. If None, generates defaults for the polarity
704
+ min_probability: Minimum probability threshold for adduct filtering
705
+
706
+ Expected JSON structure:
707
+ {
708
+ "version": "1.0",
709
+ "creation_date": "2025-10-07T09:17:06.142290",
710
+ "description": "Converted from CSV file...",
711
+ "source_file": "filename.csv",
712
+ "record_count": 123,
713
+ "data": [
714
+ {
715
+ "name": "compound name",
716
+ "smiles": "SMILES string",
717
+ "inchikey": "InChI key",
718
+ "formula": "molecular formula",
719
+ "db_id": "database ID",
720
+ "db": "database name"
721
+ },
722
+ ...
723
+ ]
724
+ }
725
+
726
+ Raises:
727
+ FileNotFoundError: If JSON file doesn't exist
728
+ ValueError: If JSON structure is invalid or required data is missing
729
+ """
730
+ if not os.path.exists(jsonfile):
731
+ raise FileNotFoundError(f"JSON file not found: {jsonfile}")
732
+
733
+ # Read and parse JSON file
734
+ try:
735
+ with open(jsonfile, 'r', encoding='utf-8') as f:
736
+ json_data = json.load(f)
737
+ except json.JSONDecodeError as e:
738
+ raise ValueError(f"Invalid JSON file: {e}") from e
739
+ except Exception as e:
740
+ raise ValueError(f"Error reading JSON file: {e}") from e
741
+
742
+ # Validate JSON structure
743
+ if not isinstance(json_data, dict):
744
+ raise ValueError("JSON file must contain a dictionary at root level")
745
+
746
+ if "data" not in json_data:
747
+ raise ValueError("JSON file must contain a 'data' field with compound records")
748
+
749
+ data = json_data["data"]
750
+ if not isinstance(data, list):
751
+ raise ValueError("'data' field must be a list of compound records")
752
+
753
+ # Extract metadata for reporting
754
+ version = json_data.get("version", "unknown")
755
+ source_file = json_data.get("source_file", "unknown")
756
+ record_count = json_data.get("record_count", len(data))
757
+
758
+ print(f"Loading JSON library: version {version}, source: {source_file}, records: {record_count}")
759
+
760
+ # Process each compound
761
+ all_variants = []
762
+ cmpd_id_counter = 1
763
+ lib_id_counter = 1
764
+ total_compounds = 0
765
+ skipped_compounds = 0
766
+
767
+ for compound_record in data:
768
+ total_compounds += 1
769
+
770
+ # Validate required fields
771
+ if not isinstance(compound_record, dict):
772
+ skipped_compounds += 1
773
+ continue
774
+
775
+ formula = compound_record.get("formula", "")
776
+ if not formula or not isinstance(formula, str):
777
+ skipped_compounds += 1
778
+ continue
779
+
780
+ # Extract compound data, handling both CSV column names and JSON field names
781
+ compound_level_uid = cmpd_id_counter
782
+ cmpd_id_counter += 1
783
+
784
+ compound_data = {
785
+ "name": compound_record.get("name", compound_record.get("Name", "")),
786
+ "shortname": compound_record.get("shortname", ""),
787
+ "class": compound_record.get("class", ""),
788
+ "smiles": compound_record.get("smiles", compound_record.get("SMILES", "")),
789
+ "inchi": compound_record.get("inchi", compound_record.get("InChI", "")),
790
+ "inchikey": compound_record.get("inchikey", compound_record.get("InChIKey", "")),
791
+ "formula": formula,
792
+ "rt": self._safe_float_conversion(compound_record.get("rt", compound_record.get("RT", None))),
793
+ "db_id": compound_record.get("db_id", compound_record.get("database_id", None)),
794
+ "db": compound_record.get("db", compound_record.get("database", None)),
795
+ "cmpd_uid": compound_level_uid,
796
+ }
797
+
798
+ # Generate adduct variants
799
+ variants, lib_id_counter = self._generate_adduct_variants(
800
+ compound_data, adducts=adducts, polarity=polarity,
801
+ lib_id_counter=lib_id_counter, min_probability=min_probability
802
+ )
803
+ all_variants.extend(variants)
804
+
805
+ # Track if compound was skipped due to invalid formula
806
+ if len(variants) == 0:
807
+ skipped_compounds += 1
808
+
809
+ # Convert to DataFrame and store
810
+ if all_variants:
811
+ new_lib_df = pl.DataFrame(all_variants)
812
+
813
+ # Combine with existing data if any
814
+ if self.lib_df is not None and len(self.lib_df) > 0:
815
+ self.lib_df = pl.concat([self.lib_df, new_lib_df])
816
+ else:
817
+ self.lib_df = new_lib_df
818
+
819
+ print(f"Imported {len(all_variants)} library entries from {jsonfile}")
820
+ else:
821
+ print(f"No valid compounds found in {jsonfile}")
822
+ if skipped_compounds > 0:
823
+ print(f"All {total_compounds} compounds were skipped due to invalid formulas")
824
+
688
825
  def _map_csv_columns(self, columns: List[str]) -> Dict[str, str]:
689
826
  """
690
827
  Map CSV column names to standardized internal names (case-insensitive).
@@ -42,7 +42,7 @@ class find_ms2_defaults:
42
42
  - get_description(param_name): Get parameter description
43
43
  - get_info(param_name): Get full parameter metadata
44
44
  - list_parameters(): Get list of all parameter names
45
- - get_mz_tolerance(file_type): Get appropriate m/z tolerance based on file type
45
+ - get_mz_tolerance(type): Get appropriate m/z tolerance based on type
46
46
  """
47
47
 
48
48
  # Core MS2 linking parameters
@@ -270,16 +270,16 @@ class find_ms2_defaults:
270
270
 
271
271
  return len(invalid_params) == 0, invalid_params
272
272
 
273
- def get_mz_tolerance(self, file_type=None):
273
+ def get_mz_tolerance(self, type=None):
274
274
  """
275
- Get the appropriate m/z tolerance based on file type.
275
+ Get the appropriate m/z tolerance based on type.
276
276
 
277
277
  Args:
278
- file_type (str, optional): File type ('ztscan', 'dia', or other)
278
+ type (str, optional): Acquisition type ('ztscan', 'dia', or other)
279
279
 
280
280
  Returns:
281
281
  float: Appropriate m/z tolerance value
282
282
  """
283
- if file_type is not None and file_type.lower() in ["ztscan", "dia"]:
283
+ if type is not None and type.lower() in ["ztscan", "dia"]:
284
284
  return self.get("mz_tol_ztscan")
285
285
  return self.get("mz_tol")
@@ -1,4 +1,4 @@
1
- import json
1
+ import json
2
2
  import os
3
3
 
4
4
  import h5py
@@ -94,8 +94,8 @@ def _save_sample5(
94
94
  metadata_group.attrs["file_source"] = str(self.file_source)
95
95
  else:
96
96
  metadata_group.attrs["file_source"] = ""
97
- if hasattr(self, 'file_type') and self.file_type is not None:
98
- metadata_group.attrs["file_type"] = str(self.file_type)
97
+ if hasattr(self, 'type') and self.type is not None:
98
+ metadata_group.attrs["file_type"] = str(self.type)
99
99
  else:
100
100
  metadata_group.attrs["file_type"] = ""
101
101
  if self.label is not None:
@@ -393,7 +393,7 @@ def _load_sample5(self, filename: str, map: bool = False):
393
393
  else:
394
394
  self.file_source = self.file_path
395
395
 
396
- self.file_type = decode_metadata_attr(
396
+ self.type = decode_metadata_attr(
397
397
  metadata_group.attrs.get("file_type", ""),
398
398
  )
399
399
  self.label = decode_metadata_attr(metadata_group.attrs.get("label", ""))
@@ -1160,7 +1160,7 @@ def _load_sample5_study(self, filename: str, map: bool = False):
1160
1160
  else:
1161
1161
  self.file_source = self.file_path
1162
1162
 
1163
- self.file_type = decode_metadata_attr(
1163
+ self.type = decode_metadata_attr(
1164
1164
  metadata_group.attrs.get("file_type", ""),
1165
1165
  )
1166
1166
  self.label = decode_metadata_attr(metadata_group.attrs.get("label", ""))
@@ -2302,7 +2302,7 @@ def create_h5_metadata_group(
2302
2302
  f: h5py.File,
2303
2303
  file_path: Optional[str],
2304
2304
  file_source: Optional[str],
2305
- file_type: Optional[str],
2305
+ type: Optional[str],
2306
2306
  label: Optional[str],
2307
2307
  ) -> None:
2308
2308
  """
@@ -2312,7 +2312,7 @@ def create_h5_metadata_group(
2312
2312
  f: The HDF5 file object
2313
2313
  file_path: Source file path
2314
2314
  file_source: Original source file path
2315
- file_type: Source file type
2315
+ type: Source file type
2316
2316
  label: Sample label
2317
2317
  """
2318
2318
  metadata_group = f.create_group("metadata")
@@ -2321,5 +2321,5 @@ def create_h5_metadata_group(
2321
2321
  metadata_group.attrs["file_source"] = (
2322
2322
  str(file_source) if file_source is not None else ""
2323
2323
  )
2324
- metadata_group.attrs["file_type"] = str(file_type) if file_type is not None else ""
2324
+ metadata_group.attrs["file_type"] = str(type) if type is not None else ""
2325
2325
  metadata_group.attrs["label"] = str(label) if label is not None else ""
@@ -1028,7 +1028,7 @@ def find_ms2(self, **kwargs):
1028
1028
 
1029
1029
  - mz_tol (float):
1030
1030
  Precursor m/z tolerance used for matching. The effective tolerance may be
1031
- adjusted by file type (the defaults class provides ``get_mz_tolerance(file_type)``).
1031
+ adjusted by type (the defaults class provides ``get_mz_tolerance(type)``).
1032
1032
  Default: 0.5 (ztscan/DIA defaults may be larger).
1033
1033
 
1034
1034
  - centroid (bool):
@@ -1077,7 +1077,7 @@ def find_ms2(self, **kwargs):
1077
1077
 
1078
1078
  # Extract parameter values
1079
1079
  features = params.get("features")
1080
- mz_tol = params.get_mz_tolerance(self.file_type)
1080
+ mz_tol = params.get_mz_tolerance(self.type)
1081
1081
  centroid = params.get("centroid")
1082
1082
  deisotope = params.get("deisotope")
1083
1083
  dia_stats = params.get("dia_stats")
@@ -379,7 +379,7 @@ class SciexWiffData:
379
379
  self._raw_file_path = ""
380
380
  self.centroided = centroided
381
381
  self.creation_time = ""
382
- self.file_type = "sciex"
382
+ self.type = "sciex"
383
383
  self.instrument = "sciex"
384
384
 
385
385
  if self.centroided:
@@ -616,7 +616,7 @@ if __name__ == "__main__":
616
616
  print(f" - Number of spectra: {len(wiff_data.spectrum_df)}")
617
617
  print(f" - Number of peaks: {len(wiff_data.peak_df)}")
618
618
  print(f" - Creation time: {wiff_data.creation_time}")
619
- print(f" - File type: {wiff_data.file_type}")
619
+ print(f" - File type: {wiff_data.type}")
620
620
  print(f" - Instrument: {wiff_data.instrument}")
621
621
 
622
622
  # Test getting peaks from first spectrum
@@ -524,7 +524,7 @@ class ThermoRawData:
524
524
  # File and instrument information
525
525
  self._raw_file_path = ""
526
526
  self.creation_time = ""
527
- self.file_type = "thermo"
527
+ self.type = "thermo"
528
528
  self.instrument = "thermo"
529
529
 
530
530
  # Processing parameters
@@ -21,10 +21,10 @@ def lib_load(
21
21
 
22
22
  Args:
23
23
  study: Study instance
24
- lib_source: either a CSV file path (str) or a Lib instance
25
- polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV path.
24
+ lib_source: either a CSV/JSON file path (str) or a Lib instance
25
+ polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV/JSON path.
26
26
  If None, uses study.polarity automatically.
27
- adducts: specific adducts to generate - used when lib_source is a CSV path
27
+ adducts: specific adducts to generate - used when lib_source is a CSV/JSON path
28
28
  iso: isotope generation mode ("13C" to generate 13C isotopes, None for no isotopes)
29
29
 
30
30
  Side effects:
@@ -38,7 +38,7 @@ def lib_load(
38
38
  Lib = None
39
39
 
40
40
  if lib_source is None:
41
- raise ValueError("lib_source must be a CSV file path (str) or a Lib instance")
41
+ raise ValueError("lib_source must be a CSV/JSON file path (str) or a Lib instance")
42
42
 
43
43
  # Use study polarity if not explicitly provided
44
44
  if polarity is None:
@@ -52,15 +52,23 @@ def lib_load(
52
52
  polarity = "positive" # Default fallback
53
53
  study.logger.debug(f"Using study polarity: {polarity}")
54
54
 
55
- # Handle string input (CSV file path)
55
+ # Handle string input (CSV or JSON file path)
56
56
  if isinstance(lib_source, str):
57
57
  if Lib is None:
58
58
  raise ImportError(
59
- "Could not import masster.lib.lib.Lib - required for CSV loading",
59
+ "Could not import masster.lib.lib.Lib - required for CSV/JSON loading",
60
60
  )
61
61
 
62
62
  lib_obj = Lib()
63
- lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
63
+
64
+ # Determine file type by extension
65
+ if lib_source.lower().endswith('.json'):
66
+ lib_obj.import_json(lib_source, polarity=polarity, adducts=adducts)
67
+ elif lib_source.lower().endswith('.csv'):
68
+ lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
69
+ else:
70
+ # Default to CSV behavior for backward compatibility
71
+ lib_obj.import_csv(lib_source, polarity=polarity, adducts=adducts)
64
72
 
65
73
  # Handle Lib instance
66
74
  elif Lib is not None and isinstance(lib_source, Lib):
@@ -72,7 +80,7 @@ def lib_load(
72
80
 
73
81
  else:
74
82
  raise TypeError(
75
- "lib_source must be a CSV file path (str), a masster.lib.Lib instance, or have a 'lib_df' attribute",
83
+ "lib_source must be a CSV/JSON file path (str), a masster.lib.Lib instance, or have a 'lib_df' attribute",
76
84
  )
77
85
 
78
86
  # Ensure lib_df is populated
@@ -101,7 +109,7 @@ def lib_load(
101
109
  # Store pointer and DataFrame on study
102
110
  study._lib = lib_obj
103
111
 
104
- # Add source_id column with filename (without path) if loading from CSV
112
+ # Add source_id column with filename (without path) if loading from CSV/JSON
105
113
  if isinstance(lib_source, str):
106
114
  import os
107
115
  filename_only = os.path.basename(lib_source)
@@ -12,7 +12,7 @@ from masster import Wizard
12
12
  # Create wizard with minimal configuration
13
13
  wizard = Wizard(
14
14
  data_source="./raw_data", # Directory with raw files
15
- study_folder="./processed", # Output directory
15
+ study_folder="./processed", # Output directory
16
16
  polarity="positive", # or "negative"
17
17
  num_cores=4 # CPU cores to use
18
18
  )
@@ -35,22 +35,22 @@ params = wizard_def(
35
35
  study_folder="./processed_advanced",
36
36
  polarity="negative",
37
37
  num_cores=8,
38
-
38
+
39
39
  # File discovery
40
40
  file_extensions=[".wiff", ".raw", ".mzML"],
41
41
  search_subfolders=True,
42
42
  skip_patterns=["blank", "QC", "test"],
43
-
43
+
44
44
  # Processing parameters
45
45
  adducts=["H-1:-:0.95", "Cl:-:0.05", "CH2O2:0:0.2"],
46
46
  chrom_fwhm=0.15,
47
47
  noise_threshold=5e4,
48
-
48
+
49
49
  # Study assembly
50
50
  rt_tolerance=1.0,
51
51
  mz_tolerance=0.008,
52
52
  min_samples_for_merge=30,
53
-
53
+
54
54
  # Output options
55
55
  export_formats=["csv", "xlsx", "mgf", "parquet"],
56
56
  generate_plots=True,
@@ -73,7 +73,7 @@ wizard.run_full_pipeline()
73
73
 
74
74
  ### 💾 Intelligent Resume
75
75
  - **Checkpoint System**: Automatically saves progress at key points
76
- - **File Tracking**: Remembers which files have been processed successfully
76
+ - **File Tracking**: Remembers which files have been processed successfully
77
77
  - **Smart Recovery**: Resumes from last successful step after interruption
78
78
  - **Validation**: Verifies existing outputs before skipping
79
79
 
@@ -112,7 +112,7 @@ wizard.run_full_pipeline()
112
112
 
113
113
  ### 4. Feature Alignment
114
114
  - **RT Alignment**: Corrects retention time shifts between samples
115
- - **Mass Alignment**: Accounts for mass calibration differences
115
+ - **Mass Alignment**: Accounts for mass calibration differences
116
116
  - **Algorithm Selection**: Supports KD-tree, QT-clustering, and chunked methods
117
117
  - **Validation**: Reports alignment statistics and quality metrics
118
118
 
@@ -232,7 +232,7 @@ Returns detailed status dictionary with current step, processed files, timing, a
232
232
  - Ensure sufficient disk space in output directory
233
233
  - Close any applications that might lock files
234
234
 
235
- **Processing Failures**
235
+ **Processing Failures**
236
236
  - Check individual file integrity
237
237
  - Review `skip_patterns` to exclude problematic files
238
238
  - Examine detailed logs in `wizard.log` and `processing.log`
@@ -249,7 +249,7 @@ The Wizard includes built-in validation at each step:
249
249
 
250
250
  - **File Validation**: Checks file accessibility and format compatibility
251
251
  - **Processing Validation**: Verifies sample5 outputs can be loaded
252
- - **Study Validation**: Ensures study assembly completed successfully
252
+ - **Study Validation**: Ensures study assembly completed successfully
253
253
  - **Alignment Validation**: Reports alignment statistics and warnings
254
254
  - **Export Validation**: Confirms all requested outputs were created
255
255
 
@@ -257,7 +257,7 @@ The Wizard includes built-in validation at each step:
257
257
 
258
258
  ### System Requirements
259
259
  - **Minimum**: 4 CPU cores, 8 GB RAM
260
- - **Recommended**: 8+ CPU cores, 16+ GB RAM
260
+ - **Recommended**: 8+ CPU cores, 16+ GB RAM
261
261
  - **Large Studies**: 16+ CPU cores, 32+ GB RAM
262
262
  - **Storage**: SSD recommended, ~2-3x raw data size free space
263
263
 
@@ -265,7 +265,7 @@ The Wizard includes built-in validation at each step:
265
265
 
266
266
  **For Small Studies (< 50 samples)**
267
267
  - Use `num_cores = 4-6`
268
- - Set `batch_size = 4-8`
268
+ - Set `batch_size = 4-8`
269
269
  - Use `merge_method = "kd"`
270
270
  - Enable all export formats
271
271
 
@@ -297,7 +297,7 @@ if not wizard.study_folder_path.glob("*.sample5"):
297
297
 
298
298
  # Continue with study-level processing
299
299
  wizard.assemble_study()
300
- wizard.align_and_merge()
300
+ wizard.align_and_merge()
301
301
  wizard.export_results()
302
302
  ```
303
303
 
@@ -312,7 +312,7 @@ studies = [
312
312
  for study_config in studies:
313
313
  wizard = Wizard(**study_config, num_cores=8)
314
314
  success = wizard.run_full_pipeline()
315
-
315
+
316
316
  if success:
317
317
  print(f"✅ {study_config['output']} completed")
318
318
  else:
@@ -338,7 +338,7 @@ if hasattr(wizard.study, 'features_filter'):
338
338
  )
339
339
  wizard.study.features_filter(selection)
340
340
 
341
- # Continue with standard pipeline
341
+ # Continue with standard pipeline
342
342
  wizard.align_and_merge()
343
343
  wizard.generate_plots()
344
344
  ```
@@ -370,4 +370,4 @@ The Wizard generates several types of output files:
370
370
  - `sample_name.mgf` - Individual sample MS2 spectra
371
371
  - `sample_name_2d.html` - Individual sample 2D plot
372
372
 
373
- The Wizard provides a complete, automated solution for mass spectrometry data processing while maintaining flexibility for custom workflows and providing robust error handling and recovery capabilities.
373
+ The Wizard provides a complete, automated solution for mass spectrometry data processing while maintaining flexibility for custom workflows and providing robust error handling and recovery capabilities.