msreport 0.0.29__tar.gz → 0.0.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {msreport-0.0.29 → msreport-0.0.30}/PKG-INFO +10 -2
  2. {msreport-0.0.29 → msreport-0.0.30}/README.md +8 -0
  3. {msreport-0.0.29 → msreport-0.0.30}/msreport/__init__.py +1 -1
  4. {msreport-0.0.29 → msreport-0.0.30}/msreport/plot/distribution.py +2 -1
  5. {msreport-0.0.29 → msreport-0.0.30}/msreport/qtable.py +21 -11
  6. {msreport-0.0.29 → msreport-0.0.30}/msreport/reader.py +108 -20
  7. {msreport-0.0.29 → msreport-0.0.30}/msreport.egg-info/PKG-INFO +10 -2
  8. {msreport-0.0.29 → msreport-0.0.30}/msreport.egg-info/requires.txt +1 -1
  9. {msreport-0.0.29 → msreport-0.0.30}/pyproject.toml +1 -1
  10. {msreport-0.0.29 → msreport-0.0.30}/tests/test_analyze.py +1 -1
  11. {msreport-0.0.29 → msreport-0.0.30}/tests/test_plot.py +11 -1
  12. {msreport-0.0.29 → msreport-0.0.30}/tests/test_qtable.py +17 -31
  13. {msreport-0.0.29 → msreport-0.0.30}/LICENSE.txt +0 -0
  14. {msreport-0.0.29 → msreport-0.0.30}/msreport/aggregate/__init__.py +0 -0
  15. {msreport-0.0.29 → msreport-0.0.30}/msreport/aggregate/condense.py +0 -0
  16. {msreport-0.0.29 → msreport-0.0.30}/msreport/aggregate/pivot.py +0 -0
  17. {msreport-0.0.29 → msreport-0.0.30}/msreport/aggregate/summarize.py +0 -0
  18. {msreport-0.0.29 → msreport-0.0.30}/msreport/analyze.py +0 -0
  19. {msreport-0.0.29 → msreport-0.0.30}/msreport/errors.py +0 -0
  20. {msreport-0.0.29 → msreport-0.0.30}/msreport/export.py +0 -0
  21. {msreport-0.0.29 → msreport-0.0.30}/msreport/fasta.py +0 -0
  22. {msreport-0.0.29 → msreport-0.0.30}/msreport/helper/__init__.py +0 -0
  23. {msreport-0.0.29 → msreport-0.0.30}/msreport/helper/calc.py +0 -0
  24. {msreport-0.0.29 → msreport-0.0.30}/msreport/helper/maxlfq.py +0 -0
  25. {msreport-0.0.29 → msreport-0.0.30}/msreport/helper/table.py +0 -0
  26. {msreport-0.0.29 → msreport-0.0.30}/msreport/helper/temp.py +0 -0
  27. {msreport-0.0.29 → msreport-0.0.30}/msreport/impute.py +0 -0
  28. {msreport-0.0.29 → msreport-0.0.30}/msreport/isobar.py +0 -0
  29. {msreport-0.0.29 → msreport-0.0.30}/msreport/normalize.py +0 -0
  30. {msreport-0.0.29 → msreport-0.0.30}/msreport/peptidoform.py +0 -0
  31. {msreport-0.0.29 → msreport-0.0.30}/msreport/plot/__init__.py +0 -0
  32. {msreport-0.0.29 → msreport-0.0.30}/msreport/plot/_partial_plots.py +0 -0
  33. {msreport-0.0.29 → msreport-0.0.30}/msreport/plot/comparison.py +0 -0
  34. {msreport-0.0.29 → msreport-0.0.30}/msreport/plot/multivariate.py +0 -0
  35. {msreport-0.0.29 → msreport-0.0.30}/msreport/plot/quality.py +0 -0
  36. {msreport-0.0.29 → msreport-0.0.30}/msreport/plot/style.py +0 -0
  37. {msreport-0.0.29 → msreport-0.0.30}/msreport/plot/style_sheets/msreport-notebook.mplstyle +0 -0
  38. {msreport-0.0.29 → msreport-0.0.30}/msreport/plot/style_sheets/seaborn-whitegrid.mplstyle +0 -0
  39. {msreport-0.0.29 → msreport-0.0.30}/msreport/rinterface/__init__.py +0 -0
  40. {msreport-0.0.29 → msreport-0.0.30}/msreport/rinterface/limma.py +0 -0
  41. {msreport-0.0.29 → msreport-0.0.30}/msreport/rinterface/rinstaller.py +0 -0
  42. {msreport-0.0.29 → msreport-0.0.30}/msreport/rinterface/rscripts/limma.R +0 -0
  43. {msreport-0.0.29 → msreport-0.0.30}/msreport.egg-info/SOURCES.txt +0 -0
  44. {msreport-0.0.29 → msreport-0.0.30}/msreport.egg-info/dependency_links.txt +0 -0
  45. {msreport-0.0.29 → msreport-0.0.30}/msreport.egg-info/top_level.txt +0 -0
  46. {msreport-0.0.29 → msreport-0.0.30}/setup.cfg +0 -0
  47. {msreport-0.0.29 → msreport-0.0.30}/setup.py +0 -0
  48. {msreport-0.0.29 → msreport-0.0.30}/tests/test_export.py +0 -0
  49. {msreport-0.0.29 → msreport-0.0.30}/tests/test_helper.py +0 -0
  50. {msreport-0.0.29 → msreport-0.0.30}/tests/test_impute.py +0 -0
  51. {msreport-0.0.29 → msreport-0.0.30}/tests/test_isobar.py +0 -0
  52. {msreport-0.0.29 → msreport-0.0.30}/tests/test_maxlfq.py +0 -0
  53. {msreport-0.0.29 → msreport-0.0.30}/tests/test_peptidoform.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: msreport
3
- Version: 0.0.29
3
+ Version: 0.0.30
4
4
  Summary: Post processing and analysis of quantitative proteomics data
5
5
  Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -29,7 +29,7 @@ Requires-Dist: seaborn>=0.12.0
29
29
  Requires-Dist: statsmodels>=0.13.2
30
30
  Requires-Dist: typing_extensions>=4
31
31
  Provides-Extra: r
32
- Requires-Dist: rpy2!=3.5.13,>=3.5.3; extra == "r"
32
+ Requires-Dist: rpy2<3.5.13,>=3.5.3; extra == "r"
33
33
  Provides-Extra: dev
34
34
  Requires-Dist: mypy>=1.15.0; extra == "dev"
35
35
  Requires-Dist: pytest>=8.3.5; extra == "dev"
@@ -40,6 +40,7 @@ Dynamic: license-file
40
40
  # MsReport
41
41
 
42
42
  [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
43
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15309090.svg)](https://doi.org/10.5281/zenodo.15309090)
43
44
  ![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fhollenstein%2Fmsreport%2Fmain%2Fpyproject.toml)
44
45
  [![Run tests](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml/badge.svg)](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml)
45
46
 
@@ -55,6 +56,7 @@ bottom-up mass spectrometry experiments.
55
56
  - [Additional requirements](#additional-requirements)
56
57
  - [Optional Dependencies](#optional-dependencies)
57
58
  - [Development status](#development-status)
59
+ - [How to cite](#how-to-cite)
58
60
 
59
61
  ## What is MsReport?
60
62
 
@@ -134,3 +136,9 @@ For example, the R home directory might look like this on Windows: `C:\Program F
134
136
  ## Development status
135
137
 
136
138
  MsReport is a stable and reliable library that has been used on a daily basis for over two years in the Mass Spectrometry Facility at the Max Perutz Labs and the Mass Spectrometry Facility of IMP/IMBA/GMI. While the current interface of MsReport is stable, the library is still under active development, with new features being added regularly. Please note that a major rewrite is planned, which may introduce changes to the API in the future.
139
+
140
+ ## How to cite
141
+
142
+ If you use MsReport for your research or publications, please include the following citation and consider giving the project a star on GitHub.
143
+
144
+ > Hollenstein, D. M., & Hartl, M. (2025). hollenstein/msreport: v0.0.29 (0.0.29). Zenodo. https://doi.org/10.5281/zenodo.15309090
@@ -1,6 +1,7 @@
1
1
  # MsReport
2
2
 
3
3
  [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
4
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15309090.svg)](https://doi.org/10.5281/zenodo.15309090)
4
5
  ![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fhollenstein%2Fmsreport%2Fmain%2Fpyproject.toml)
5
6
  [![Run tests](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml/badge.svg)](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml)
6
7
 
@@ -16,6 +17,7 @@ bottom-up mass spectrometry experiments.
16
17
  - [Additional requirements](#additional-requirements)
17
18
  - [Optional Dependencies](#optional-dependencies)
18
19
  - [Development status](#development-status)
20
+ - [How to cite](#how-to-cite)
19
21
 
20
22
  ## What is MsReport?
21
23
 
@@ -95,3 +97,9 @@ For example, the R home directory might look like this on Windows: `C:\Program F
95
97
  ## Development status
96
98
 
97
99
  MsReport is a stable and reliable library that has been used on a daily basis for over two years in the Mass Spectrometry Facility at the Max Perutz Labs and the Mass Spectrometry Facility of IMP/IMBA/GMI. While the current interface of MsReport is stable, the library is still under active development, with new features being added regularly. Please note that a major rewrite is planned, which may introduce changes to the API in the future.
100
+
101
+ ## How to cite
102
+
103
+ If you use MsReport for your research or publications, please include the following citation and consider giving the project a star on GitHub.
104
+
105
+ > Hollenstein, D. M., & Hartl, M. (2025). hollenstein/msreport: v0.0.29 (0.0.29). Zenodo. https://doi.org/10.5281/zenodo.15309090
@@ -8,4 +8,4 @@ from msreport.fasta import import_protein_database
8
8
  from msreport.qtable import Qtable
9
9
  from msreport.reader import FragPipeReader, MaxQuantReader, SpectronautReader
10
10
 
11
- __version__ = "0.0.29"
11
+ __version__ = "0.0.30"
@@ -204,7 +204,8 @@ def experiment_ratios(
204
204
  mask = np.all([(qtable.data[f"Events {exp}"] > 0) for exp in experiments], axis=0)
205
205
  if exclude_invalid:
206
206
  mask = mask & qtable["Valid"]
207
- experiment_data = experiment_data[mask]
207
+ # Use `mask.to_numpy` to solve issue with different indices of mask and dataframe
208
+ experiment_data = experiment_data[mask.to_numpy()]
208
209
  pseudo_reference = np.nanmean(experiment_data, axis=1)
209
210
  ratio_data = experiment_data.subtract(pseudo_reference, axis=0)
210
211
 
@@ -27,13 +27,11 @@ class Qtable:
27
27
  design: A pandas.DataFrame describing the experimental design.
28
28
  """
29
29
 
30
- _default_id_column = "Representative protein"
31
-
32
30
  def __init__(
33
31
  self,
34
32
  data: pd.DataFrame,
35
- design: Optional[pd.DataFrame] = None,
36
- id_column: str = "Representative protein",
33
+ design: pd.DataFrame,
34
+ id_column: str,
37
35
  ):
38
36
  """Initializes the Qtable.
39
37
 
@@ -42,12 +40,13 @@ class Qtable:
42
40
 
43
41
  Args:
44
42
  data: A dataframe containing quantitative proteomics data in a wide format.
43
+ The index of the dataframe must contain unique values.
45
44
  design: A dataframe describing the experimental design that must at least
46
45
  contain the columns "Sample" and "Experiment". The "Sample" entries
47
46
  should correspond to the Sample names present in the quantitative
48
47
  columns of the data.
49
48
  id_column: The name of the column that contains the unique identifiers for
50
- the entries in the data table. Default is "Representative protein".
49
+ the entries in the data table.
51
50
 
52
51
  Raises:
53
52
  KeyError: If the specified id_column is not found in data.
@@ -76,8 +75,7 @@ class Qtable:
76
75
  self._id_column = id_column
77
76
  if "Valid" not in self.data.columns:
78
77
  self.data["Valid"] = True
79
- if design is not None:
80
- self.add_design(design)
78
+ self.add_design(design)
81
79
 
82
80
  self._expression_columns: list[str] = []
83
81
  self._expression_features: list[str] = []
@@ -438,6 +436,11 @@ class Qtable:
438
436
 
439
437
  Returns:
440
438
  An instance of Qtable loaded from the specified files.
439
+
440
+ Raises:
441
+ ValueError: If the loaded config file does not contain the
442
+ "Unique ID column" key. This is due to the qtable being saved with a
443
+ version of msreport <= 0.0.27.
441
444
  """
442
445
  filepaths = _get_qtable_export_filepaths(directory, basename)
443
446
  with open(filepaths["config"]) as openfile:
@@ -458,13 +461,20 @@ class Qtable:
458
461
  filepaths["design"], sep="\t", index_col=0, keep_default_na=True
459
462
  )
460
463
 
461
- qtable = Qtable(data, design)
464
+ if "Unique ID column" not in config_data:
465
+ # Mention that the qtable was likely saved with a version of msreport <= 0.0.27
466
+ raise ValueError(
467
+ "The qtable config file does not contain the 'Unique ID column' key. "
468
+ "This is likely due to the qtable being saved with a version of "
469
+ "msreport <= 0.0.27."
470
+ )
471
+ id_column = config_data["Unique ID column"]
472
+
473
+ qtable = Qtable(data, design, id_column)
462
474
  qtable._expression_columns = config_data["Expression columns"]
463
475
  qtable._expression_features = config_data["Expression features"]
464
476
  qtable._expression_sample_mapping = config_data["Expression sample mapping"]
465
477
  # This check is required for backwards compatibility with msreport <= 0.0.27
466
- if "Unique ID column" in config_data:
467
- qtable._id_column = config_data["Unique ID column"]
468
478
  return qtable
469
479
 
470
480
  def to_tsv(self, path: str, index: bool = False):
@@ -570,7 +580,7 @@ class Qtable:
570
580
  self._expression_sample_mapping = {}
571
581
 
572
582
  def __copy__(self) -> Qtable:
573
- new_instance = Qtable(self.data, self.design)
583
+ new_instance = Qtable(self.data, self.design, self.id_column)
574
584
  # Copy all private attributes
575
585
  for attr in dir(self):
576
586
  if (
@@ -541,6 +541,8 @@ class FragPipeReader(ResultReader):
541
541
  """FragPipe result reader.
542
542
 
543
543
  Methods:
544
+ import_design: Reads a "fragpipe-files.fp-manifest" file and returns a
545
+ processed design dataframe.
544
546
  import_proteins: Reads a "combined_protein.tsv" or "protein.tsv" file and
545
547
  returns a processed dataframe, conforming to the MsReport naming
546
548
  convention.
@@ -583,12 +585,19 @@ class FragPipeReader(ResultReader):
583
585
  "ions": "combined_ion.tsv",
584
586
  "ion_evidence": "ion.tsv",
585
587
  "psm_evidence": "psm.tsv",
588
+ "design": "fragpipe-files.fp-manifest",
586
589
  }
587
590
  isobar_filenames: dict[str, str] = {
588
591
  "proteins": "protein.tsv",
589
592
  "peptides": "peptide.tsv",
590
593
  "ions": "ion.tsv",
591
594
  }
595
+ sil_filenames: dict[str, str] = {
596
+ "proteins": "combined_protein_label_quant.tsv",
597
+ "peptides": "combined_modified_peptide_label_quant.tsv",
598
+ "ions": "combined_ion_label_quant.tsv",
599
+ }
600
+
592
601
  protected_columns: list[str] = []
593
602
  sample_column_tags: list[str] = [
594
603
  "Spectral Count",
@@ -609,6 +618,7 @@ class FragPipeReader(ResultReader):
609
618
  "Modified Sequence": "Modified sequence", # Modified peptide and ion
610
619
  "Start": "Start position", # Peptide and ion
611
620
  "End": "End position", # Peptide and ion
621
+ "Mapped Proteins": "Mapped proteins", # All PSM, ion, and peptide tables
612
622
  "Combined Total Peptides": "Total peptides", # From LFQ
613
623
  "Total Peptides": "Total peptides", # From TMT
614
624
  "Description": "Protein name",
@@ -638,7 +648,11 @@ class FragPipeReader(ResultReader):
638
648
  protein_info_tags: list[str] = []
639
649
 
640
650
  def __init__(
641
- self, directory: str, isobar: bool = False, contaminant_tag: str = "contam_"
651
+ self,
652
+ directory: str,
653
+ isobar: bool = False,
654
+ sil: bool = False,
655
+ contaminant_tag: str = "contam_",
642
656
  ) -> None:
643
657
  """Initializes the FragPipeReader.
644
658
 
@@ -646,16 +660,69 @@ class FragPipeReader(ResultReader):
646
660
  directory: Location of the FragPipe result folder
647
661
  isobar: Set to True if quantification strategy was TMT, iTRAQ or similar;
648
662
  default False.
663
+ sil: Set to True if the FragPipe result files are from a stable isotope
664
+ labeling experiment, such as SILAC; default False.
649
665
  contaminant_tag: Prefix of Protein ID entries to identify contaminants;
650
666
  default "contam_".
651
667
  """
668
+ if sil and isobar:
669
+ raise ValueError("Cannot set both 'isobar' and 'sil' to True.")
652
670
  self._add_data_directory(directory)
653
671
  self._isobar: bool = isobar
672
+ self._sil: bool = sil
654
673
  self._contaminant_tag: str = contaminant_tag
655
- if not isobar:
674
+ if isobar:
675
+ self.filenames = self.isobar_filenames
676
+ elif sil:
677
+ self.filenames = self.sil_filenames
678
+ else:
656
679
  self.filenames = self.default_filenames
680
+
681
+ def import_design(
682
+ self, filename: Optional[str] = None, sort: bool = False
683
+ ) -> pd.DataFrame:
684
+ """Reads a 'fp-manifest' file and returns a processed design dataframe.
685
+
686
+ Args:
687
+ filename: Allows specifying an alternative filename, otherwise the default
688
+ filename is used.
689
+ sort: If True, the design dataframe is sorted by "Experiment" and
690
+ "Replicate"; default False.
691
+
692
+ Returns:
693
+ A dataframe containing the processed design table with columns:
694
+ "Sample", "Experiment", "Replicate", "Rawfile".
695
+
696
+ Raises:
697
+ FileNotFoundError: If the specified manifest file does not exist.
698
+ """
699
+ if filename is None:
700
+ filepath = os.path.join(self.data_directory, self.filenames["design"])
657
701
  else:
658
- self.filenames = self.isobar_filenames
702
+ filepath = os.path.join(self.data_directory, filename)
703
+ if not os.path.exists(filepath):
704
+ raise FileNotFoundError(
705
+ f"File '{filepath}' does not exist. Please check the file path."
706
+ )
707
+ fp_manifest = pd.read_csv(filepath, sep="\t", header=None, dtype=str)
708
+ fp_manifest.columns = ["Path", "Experiment", "Bioreplicate", "Data type"]
709
+
710
+ design = pd.DataFrame(
711
+ {
712
+ "Sample": fp_manifest["Experiment"] + "_" + fp_manifest["Bioreplicate"],
713
+ "Experiment": fp_manifest["Experiment"],
714
+ "Replicate": fp_manifest["Bioreplicate"],
715
+ "Rawfile": fp_manifest["Path"].apply(
716
+ # Required to handle Windows and Unix style paths on either system
717
+ lambda x: x.replace("\\", "/").split("/")[-1]
718
+ ),
719
+ }
720
+ )
721
+
722
+ if sort:
723
+ design.sort_values(by=["Experiment", "Replicate"], inplace=True)
724
+ design.reset_index(drop=True, inplace=True)
725
+ return design
659
726
 
660
727
  def import_proteins(
661
728
  self,
@@ -737,6 +804,7 @@ class FragPipeReader(ResultReader):
737
804
  df = self._read_file("peptides" if filename is None else filename)
738
805
  df["Protein reported by software"] = _extract_protein_ids(df["Protein"])
739
806
  df["Representative protein"] = df["Protein reported by software"]
807
+ df["Mapped Proteins"] = self._collect_mapped_proteins(df)
740
808
  # Note that _add_protein_entries would need to be adapted for the peptide table.
741
809
  # df = self._add_protein_entries(df)
742
810
  if rename_columns:
@@ -793,6 +861,8 @@ class FragPipeReader(ResultReader):
793
861
  # 'Indistinguishable Proteins' to the ion table.
794
862
  df["Protein reported by software"] = _extract_protein_ids(df["Protein"])
795
863
  df["Representative protein"] = df["Protein reported by software"]
864
+ df["Mapped Proteins"] = self._collect_mapped_proteins(df)
865
+
796
866
  if rename_columns:
797
867
  df = self._rename_columns(df, prefix_column_tags)
798
868
  if rewrite_modifications and rename_columns:
@@ -879,6 +949,8 @@ class FragPipeReader(ResultReader):
879
949
  # 'Indistinguishable Proteins' to the ion table.
880
950
  df["Protein reported by software"] = _extract_protein_ids(df["Protein"])
881
951
  df["Representative protein"] = df["Protein reported by software"]
952
+ df["Mapped Proteins"] = self._collect_mapped_proteins(df)
953
+
882
954
  if rename_columns:
883
955
  df = self._rename_columns(df, prefix_column_tags)
884
956
  if rewrite_modifications and rename_columns:
@@ -928,23 +1000,7 @@ class FragPipeReader(ResultReader):
928
1000
 
929
1001
  df["Protein reported by software"] = _extract_protein_ids(df["Protein"])
930
1002
  df["Representative protein"] = df["Protein reported by software"]
931
- df["Mapped Proteins"] = df["Mapped Proteins"].astype(str).replace("nan", "")
932
-
933
- # FP only lists additional mapped proteins in the "Mapped Proteins" column
934
- # MsReport reports all matching proteins in the "Mapped proteins" column
935
- mapped_proteins_entries = []
936
- for protein, mapped_protein_fp in zip(
937
- df["Representative protein"], df["Mapped Proteins"], strict=True
938
- ):
939
- if mapped_protein_fp == "":
940
- mapped_proteins = [protein]
941
- else:
942
- additional_mapped_proteins = msreport.reader._extract_protein_ids(
943
- mapped_protein_fp.split(", ")
944
- )
945
- mapped_proteins = [protein] + additional_mapped_proteins
946
- mapped_proteins_entries.append(";".join(mapped_proteins))
947
- df["Mapped proteins"] = mapped_proteins_entries
1003
+ df["Mapped Proteins"] = self._collect_mapped_proteins(df)
948
1004
 
949
1005
  if rename_columns:
950
1006
  df = self._rename_columns(df, prefix_tag=True)
@@ -980,6 +1036,35 @@ class FragPipeReader(ResultReader):
980
1036
  df[key] = protein_entry_table[key]
981
1037
  return df
982
1038
 
1039
+ def _collect_mapped_proteins(self, df: pd.DataFrame) -> list[str]:
1040
+ """Generates a list of mapped proteins entries.
1041
+
1042
+ This method extracts protein IDs from the 'Representative protein' and the
1043
+ 'Mapped Proteins' column and combines them into a single string for each row,
1044
+ where multiple protein IDs are separated by semicolons.
1045
+
1046
+ Args:
1047
+ df: DataFrame containing the 'Mapped Proteins' column.
1048
+
1049
+ Returns:
1050
+ A list of mapped proteins entries.
1051
+ """
1052
+ mapped_proteins_entries = []
1053
+ for protein, mapped_protein_fp in zip(
1054
+ df["Representative protein"],
1055
+ df["Mapped Proteins"].astype(str).replace("nan", ""),
1056
+ strict=True,
1057
+ ):
1058
+ if mapped_protein_fp == "":
1059
+ mapped_proteins = [protein]
1060
+ else:
1061
+ additional_mapped_proteins = msreport.reader._extract_protein_ids(
1062
+ mapped_protein_fp.split(", ")
1063
+ )
1064
+ mapped_proteins = [protein] + additional_mapped_proteins
1065
+ mapped_proteins_entries.append(";".join(mapped_proteins))
1066
+ return mapped_proteins_entries
1067
+
983
1068
  def _collect_leading_protein_entries(self, df: pd.DataFrame) -> list[list[str]]:
984
1069
  """Generates a list of leading protein entries.
985
1070
 
@@ -995,6 +1080,9 @@ class FragPipeReader(ResultReader):
995
1080
  A list of the same length as the input dataframe. Each position contains a
996
1081
  list of leading protein entries, which a minimum of one entry.
997
1082
  """
1083
+ if self._sil: # No "Indistinguishable Proteins" columns in 'SIL' data
1084
+ return [[p] for p in df["Protein"]]
1085
+
998
1086
  leading_protein_entries = []
999
1087
  for protein_entry, indist_protein_entry in zip(
1000
1088
  df["Protein"], df["Indistinguishable Proteins"].fillna("").astype(str)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: msreport
3
- Version: 0.0.29
3
+ Version: 0.0.30
4
4
  Summary: Post processing and analysis of quantitative proteomics data
5
5
  Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -29,7 +29,7 @@ Requires-Dist: seaborn>=0.12.0
29
29
  Requires-Dist: statsmodels>=0.13.2
30
30
  Requires-Dist: typing_extensions>=4
31
31
  Provides-Extra: r
32
- Requires-Dist: rpy2!=3.5.13,>=3.5.3; extra == "r"
32
+ Requires-Dist: rpy2<3.5.13,>=3.5.3; extra == "r"
33
33
  Provides-Extra: dev
34
34
  Requires-Dist: mypy>=1.15.0; extra == "dev"
35
35
  Requires-Dist: pytest>=8.3.5; extra == "dev"
@@ -40,6 +40,7 @@ Dynamic: license-file
40
40
  # MsReport
41
41
 
42
42
  [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
43
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15309090.svg)](https://doi.org/10.5281/zenodo.15309090)
43
44
  ![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fhollenstein%2Fmsreport%2Fmain%2Fpyproject.toml)
44
45
  [![Run tests](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml/badge.svg)](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml)
45
46
 
@@ -55,6 +56,7 @@ bottom-up mass spectrometry experiments.
55
56
  - [Additional requirements](#additional-requirements)
56
57
  - [Optional Dependencies](#optional-dependencies)
57
58
  - [Development status](#development-status)
59
+ - [How to cite](#how-to-cite)
58
60
 
59
61
  ## What is MsReport?
60
62
 
@@ -134,3 +136,9 @@ For example, the R home directory might look like this on Windows: `C:\Program F
134
136
  ## Development status
135
137
 
136
138
  MsReport is a stable and reliable library that has been used on a daily basis for over two years in the Mass Spectrometry Facility at the Max Perutz Labs and the Mass Spectrometry Facility of IMP/IMBA/GMI. While the current interface of MsReport is stable, the library is still under active development, with new features being added regularly. Please note that a major rewrite is planned, which may introduce changes to the API in the future.
139
+
140
+ ## How to cite
141
+
142
+ If you use MsReport for your research or publications, please include the following citation and consider giving the project a star on GitHub.
143
+
144
+ > Hollenstein, D. M., & Hartl, M. (2025). hollenstein/msreport: v0.0.29 (0.0.29). Zenodo. https://doi.org/10.5281/zenodo.15309090
@@ -12,7 +12,7 @@ statsmodels>=0.13.2
12
12
  typing_extensions>=4
13
13
 
14
14
  [R]
15
- rpy2!=3.5.13,>=3.5.3
15
+ rpy2<3.5.13,>=3.5.3
16
16
 
17
17
  [dev]
18
18
  mypy>=1.15.0
@@ -47,7 +47,7 @@ changelog = "https://github.com/hollenstein/msreport/blob/main/CHANGELOG.md"
47
47
 
48
48
  [project.optional-dependencies]
49
49
  R = [
50
- "rpy2>=3.5.3,!=3.5.13",
50
+ "rpy2>=3.5.3,<3.5.13",
51
51
  ]
52
52
  dev = [
53
53
  "mypy>=1.15.0",
@@ -46,7 +46,7 @@ def example_data():
46
46
 
47
47
  @pytest.fixture
48
48
  def example_qtable(example_data):
49
- qtable = msreport.qtable.Qtable(example_data["data"], design=example_data["design"])
49
+ qtable = msreport.qtable.Qtable(example_data["data"], example_data["design"], id_column="Representative protein") # fmt: skip
50
50
  qtable.set_expression_by_tag("Intensity")
51
51
  return qtable
52
52
 
@@ -49,11 +49,21 @@ def example_data():
49
49
 
50
50
  @pytest.fixture
51
51
  def example_qtable(example_data):
52
- qtable = msreport.qtable.Qtable(example_data["data"], design=example_data["design"])
52
+ qtable = msreport.qtable.Qtable(example_data["data"], example_data["design"], id_column="Representative protein") # fmt: skip
53
53
  qtable.set_expression_by_tag("Intensity")
54
54
  return qtable
55
55
 
56
56
 
57
+ class TestExperimentRatios:
58
+ @pytest.fixture(autouse=True)
59
+ def _init_qtable(self, example_qtable):
60
+ self.qtable = example_qtable
61
+
62
+ def test_non_default_df_index_does_not_raise_error(self):
63
+ self.qtable.data.index = range(2, len(self.qtable.data) + 2)
64
+ fig, axes = msreport.plot.experiment_ratios(self.qtable)
65
+
66
+
57
67
  class TestVolcanoMa:
58
68
  @pytest.fixture(autouse=True)
59
69
  def _init_qtable(self, example_qtable):
@@ -68,43 +68,41 @@ def example_data():
68
68
 
69
69
  @pytest.fixture
70
70
  def example_qtable(example_data):
71
- qtable = msreport.qtable.Qtable(
72
- example_data["data"], design=example_data["design"], id_column="id"
73
- )
71
+ qtable = msreport.qtable.Qtable(example_data["data"], design=example_data["design"], id_column="id") # fmt: skip
74
72
  qtable.set_expression_by_tag("Intensity")
75
73
  return qtable
76
74
 
77
75
 
78
76
  class TestQtableInitialization:
79
77
  def test_data_is_added_to_qtable(self, example_data):
80
- qtable = msreport.qtable.Qtable(example_data["data"])
78
+ qtable = msreport.qtable.Qtable(example_data["data"], example_data["design"], id_column="Representative protein") # fmt: skip
81
79
  assert qtable.data.equals(example_data["data"])
82
80
 
83
81
  def test_design_is_added_to_qtable(self, example_data):
84
- qtable = msreport.qtable.Qtable(example_data["data"], design=example_data["design"]) # fmt: skip
82
+ qtable = msreport.qtable.Qtable(example_data["data"], example_data["design"], id_column="Representative protein") # fmt: skip
85
83
  assert qtable.design.equals(example_data["design"])
86
84
 
87
85
  def test_id_column_is_added_to_qtable(self, example_data):
88
- qtable = msreport.qtable.Qtable(example_data["data"], id_column="id")
89
- assert qtable.id_column == "id"
86
+ qtable = msreport.qtable.Qtable(example_data["data"], design=example_data["design"], id_column="id") # fmt: skip
87
+ assert qtable._id_column == "id"
90
88
 
91
89
  def test_non_unique_data_index_raises_error(self, example_data):
92
90
  example_data["data"].index = [0 for _ in range(len(example_data["data"]))]
93
91
  with pytest.raises(ValueError):
94
- msreport.qtable.Qtable(example_data["data"])
92
+ msreport.qtable.Qtable(example_data["data"], design=example_data["design"], id_column="Representative protein") # fmt: skip
95
93
 
96
94
  def test_non_existing_id_column_raises_error(self, example_data):
97
95
  with pytest.raises(KeyError):
98
- msreport.qtable.Qtable(example_data["data"], id_column="non_existing_column") # fmt: skip
96
+ msreport.qtable.Qtable(example_data["data"], design=example_data["design"], id_column="non_existing_column") # fmt: skip
99
97
 
100
98
  def test_id_column_containing_non_unique_values_raises_error(self, example_data):
101
99
  example_data["data"]["id"] = "1"
102
100
  with pytest.raises(ValueError):
103
- msreport.qtable.Qtable(example_data["data"], id_column="id")
101
+ msreport.qtable.Qtable(example_data["data"], design=example_data["design"], id_column="id") # fmt: skip
104
102
 
105
103
 
106
104
  def test_qtable_add_design(example_data):
107
- qtable = msreport.qtable.Qtable(example_data["data"])
105
+ qtable = msreport.qtable.Qtable(example_data["data"], example_data["design"], id_column="Representative protein") # fmt: skip
108
106
  qtable.add_design(example_data["design"])
109
107
  assert qtable.design.equals(example_data["design"])
110
108
 
@@ -192,9 +190,7 @@ class TestMatchSamplesToTagColumns:
192
190
  class TestQtableGetData:
193
191
  @pytest.fixture(autouse=True)
194
192
  def _init_qtable(self, example_data):
195
- self.qtable = msreport.qtable.Qtable(
196
- example_data["data"], design=example_data["design"]
197
- )
193
+ self.qtable = msreport.qtable.Qtable(example_data["data"], example_data["design"], id_column="Representative protein") # fmt: skip
198
194
 
199
195
  def test_get_data(self, example_data):
200
196
  assert self.qtable.get_data().equals(example_data["data"])
@@ -229,7 +225,7 @@ def test_qtable_contains(example_qtable, key, is_present):
229
225
 
230
226
 
231
227
  def test_qtable_get_design(example_data):
232
- qtable = msreport.qtable.Qtable(example_data["data"], design=example_data["design"])
228
+ qtable = msreport.qtable.Qtable(example_data["data"], example_data["design"], id_column="Representative protein") # fmt: skip
233
229
  assert qtable.get_design().equals(example_data["design"])
234
230
 
235
231
 
@@ -265,9 +261,7 @@ class TestQtableGetExperiments:
265
261
  class TestQtableResetExpression:
266
262
  @pytest.fixture(autouse=True)
267
263
  def _init_qtable(self, example_data):
268
- self.qtable = msreport.qtable.Qtable(
269
- example_data["data"], design=example_data["design"]
270
- )
264
+ self.qtable = msreport.qtable.Qtable(example_data["data"], example_data["design"], id_column="Representative protein") # fmt: skip
271
265
 
272
266
  def test_reset_of_parameters(self):
273
267
  self.qtable._expression_columns = ["test"]
@@ -285,7 +279,7 @@ class TestQtableResetExpression:
285
279
  self.qtable._reset_expression()
286
280
  data_columns = self.qtable.data.columns
287
281
  all_expression_columns_absent_in_data = not any(
288
- [c in data_columns for c in example_data["expression_columns"]]
282
+ c in data_columns for c in example_data["expression_columns"]
289
283
  )
290
284
  assert all_expression_columns_absent_in_data
291
285
 
@@ -303,9 +297,7 @@ class TestQtableResetExpression:
303
297
  class TestQtableSetExpression:
304
298
  @pytest.fixture(autouse=True)
305
299
  def _init_qtable(self, example_data):
306
- self.qtable = msreport.qtable.Qtable(
307
- example_data["data"], design=example_data["design"]
308
- )
300
+ self.qtable = msreport.qtable.Qtable(example_data["data"], example_data["design"], id_column="Representative protein") # fmt: skip
309
301
 
310
302
  def test_correct_setting_of_private_variables(self, example_data):
311
303
  self.qtable._set_expression(example_data["intensity_cols_to_samples"])
@@ -377,9 +369,7 @@ class TestQtableSetExpression:
377
369
  class TestQtableSetExpressionByTag:
378
370
  @pytest.fixture(autouse=True)
379
371
  def _init_qtable(self, example_data):
380
- self.qtable = msreport.qtable.Qtable(
381
- example_data["data"], design=example_data["design"]
382
- )
372
+ self.qtable = msreport.qtable.Qtable(example_data["data"], example_data["design"], id_column="Representative protein") # fmt: skip
383
373
 
384
374
  def test_set_expression_by_tag(self, example_data):
385
375
  self.qtable.set_expression_by_tag(example_data["expression_tag"])
@@ -413,9 +403,7 @@ class TestQtableSetExpressionByTag:
413
403
  class TestQtableSetExpressionByColumn:
414
404
  @pytest.fixture(autouse=True)
415
405
  def _init_qtable(self, example_data):
416
- self.qtable = msreport.qtable.Qtable(
417
- example_data["data"], design=example_data["design"]
418
- )
406
+ self.qtable = msreport.qtable.Qtable(example_data["data"], example_data["design"], id_column="Representative protein") # fmt: skip
419
407
 
420
408
  def test_set_expression_by_column(self, example_data):
421
409
  self.qtable.set_expression_by_column(example_data["intensity_cols_to_samples"])
@@ -450,9 +438,7 @@ class TestQtableSetExpressionByColumn:
450
438
  class TestQtableAddExpressionFeature:
451
439
  @pytest.fixture(autouse=True)
452
440
  def _init_qtable(self, example_data):
453
- self.qtable = msreport.qtable.Qtable(
454
- example_data["data"], design=example_data["design"]
455
- )
441
+ self.qtable = msreport.qtable.Qtable(example_data["data"], example_data["design"], id_column="Representative protein") # fmt: skip
456
442
 
457
443
  def test_with_series(self):
458
444
  new_data = self.qtable.data["id"].copy()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes