msreport 0.0.28__py3-none-any.whl → 0.0.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
msreport/__init__.py CHANGED
@@ -8,4 +8,4 @@ from msreport.fasta import import_protein_database
8
8
  from msreport.qtable import Qtable
9
9
  from msreport.reader import FragPipeReader, MaxQuantReader, SpectronautReader
10
10
 
11
- __version__ = "0.0.28"
11
+ __version__ = "0.0.30"
msreport/analyze.py CHANGED
@@ -9,10 +9,19 @@ import numpy as np
9
9
  import pandas as pd
10
10
 
11
11
  import msreport.normalize
12
- import msreport.rinterface
12
+ from msreport.errors import OptionalDependencyError
13
13
  from msreport.helper import find_sample_columns
14
14
  from msreport.qtable import Qtable
15
15
 
16
+ try:
17
+ import msreport.rinterface
18
+
19
+ _rinterface_available = True
20
+ _rinterface_error = ""
21
+ except OptionalDependencyError as err:
22
+ _rinterface_available = False
23
+ _rinterface_error = str(err)
24
+
16
25
 
17
26
  class Transformer(Protocol):
18
27
  def fit(self, table: pd.DataFrame) -> Transformer:
@@ -528,8 +537,10 @@ def calculate_multi_group_limma(
528
537
  ValueError: If all values from qtable.design["Batch"] are identical when 'batch'
529
538
  is set to True.
530
539
  """
531
- _validate_experiment_pairs(qtable, experiment_pairs)
540
+ if not _rinterface_available:
541
+ raise OptionalDependencyError(_rinterface_error)
532
542
 
543
+ _validate_experiment_pairs(qtable, experiment_pairs)
533
544
  # TODO: not tested #
534
545
  if batch and "Batch" not in qtable.get_design():
535
546
  raise KeyError(
@@ -618,8 +629,10 @@ def calculate_two_group_limma(
618
629
  must have exactly two entries and the two entries must not be the same. Both
619
630
  experiments must be present in qtable.design.
620
631
  """
621
- _validate_experiment_pair(qtable, experiment_pair)
632
+ if not _rinterface_available:
633
+ raise OptionalDependencyError(_rinterface_error)
622
634
 
635
+ _validate_experiment_pair(qtable, experiment_pair)
623
636
  # TODO: LIMMA function not tested #
624
637
  table = qtable.make_expression_table(samples_as_columns=True)
625
638
  comparison_tag = " vs "
msreport/errors.py CHANGED
@@ -7,3 +7,7 @@ class NotFittedError(ValueError, AttributeError):
7
7
 
8
8
  class ProteinsNotInFastaWarning(UserWarning):
9
9
  """Warning raised when queried proteins are absent from a FASTA file."""
10
+
11
+
12
+ class OptionalDependencyError(ImportError):
13
+ """Raised when an optional dependency is required but not installed."""
@@ -204,7 +204,8 @@ def experiment_ratios(
204
204
  mask = np.all([(qtable.data[f"Events {exp}"] > 0) for exp in experiments], axis=0)
205
205
  if exclude_invalid:
206
206
  mask = mask & qtable["Valid"]
207
- experiment_data = experiment_data[mask]
207
+ # Use `mask.to_numpy` to solve issue with different indices of mask and dataframe
208
+ experiment_data = experiment_data[mask.to_numpy()]
208
209
  pseudo_reference = np.nanmean(experiment_data, axis=1)
209
210
  ratio_data = experiment_data.subtract(pseudo_reference, axis=0)
210
211
 
msreport/qtable.py CHANGED
@@ -27,13 +27,11 @@ class Qtable:
27
27
  design: A pandas.DataFrame describing the experimental design.
28
28
  """
29
29
 
30
- _default_id_column = "Representative protein"
31
-
32
30
  def __init__(
33
31
  self,
34
32
  data: pd.DataFrame,
35
- design: Optional[pd.DataFrame] = None,
36
- id_column: str = "Representative protein",
33
+ design: pd.DataFrame,
34
+ id_column: str,
37
35
  ):
38
36
  """Initializes the Qtable.
39
37
 
@@ -42,12 +40,13 @@ class Qtable:
42
40
 
43
41
  Args:
44
42
  data: A dataframe containing quantitative proteomics data in a wide format.
43
+ The index of the dataframe must contain unique values.
45
44
  design: A dataframe describing the experimental design that must at least
46
45
  contain the columns "Sample" and "Experiment". The "Sample" entries
47
46
  should correspond to the Sample names present in the quantitative
48
47
  columns of the data.
49
48
  id_column: The name of the column that contains the unique identifiers for
50
- the entries in the data table. Default is "Representative protein".
49
+ the entries in the data table.
51
50
 
52
51
  Raises:
53
52
  KeyError: If the specified id_column is not found in data.
@@ -76,8 +75,7 @@ class Qtable:
76
75
  self._id_column = id_column
77
76
  if "Valid" not in self.data.columns:
78
77
  self.data["Valid"] = True
79
- if design is not None:
80
- self.add_design(design)
78
+ self.add_design(design)
81
79
 
82
80
  self._expression_columns: list[str] = []
83
81
  self._expression_features: list[str] = []
@@ -438,6 +436,11 @@ class Qtable:
438
436
 
439
437
  Returns:
440
438
  An instance of Qtable loaded from the specified files.
439
+
440
+ Raises:
441
+ ValueError: If the loaded config file does not contain the
442
+ "Unique ID column" key. This is due to the qtable being saved with a
443
+ version of msreport <= 0.0.27.
441
444
  """
442
445
  filepaths = _get_qtable_export_filepaths(directory, basename)
443
446
  with open(filepaths["config"]) as openfile:
@@ -458,13 +461,20 @@ class Qtable:
458
461
  filepaths["design"], sep="\t", index_col=0, keep_default_na=True
459
462
  )
460
463
 
461
- qtable = Qtable(data, design)
464
+ if "Unique ID column" not in config_data:
465
+ # Mention that the qtable was likely saved with a version of msreport <= 0.0.27
466
+ raise ValueError(
467
+ "The qtable config file does not contain the 'Unique ID column' key. "
468
+ "This is likely due to the qtable being saved with a version of "
469
+ "msreport <= 0.0.27."
470
+ )
471
+ id_column = config_data["Unique ID column"]
472
+
473
+ qtable = Qtable(data, design, id_column)
462
474
  qtable._expression_columns = config_data["Expression columns"]
463
475
  qtable._expression_features = config_data["Expression features"]
464
476
  qtable._expression_sample_mapping = config_data["Expression sample mapping"]
465
477
  # This check is required for backwards compatibility with msreport <= 0.0.27
466
- if "Unique ID column" in config_data:
467
- qtable._id_column = config_data["Unique ID column"]
468
478
  return qtable
469
479
 
470
480
  def to_tsv(self, path: str, index: bool = False):
@@ -570,7 +580,7 @@ class Qtable:
570
580
  self._expression_sample_mapping = {}
571
581
 
572
582
  def __copy__(self) -> Qtable:
573
- new_instance = Qtable(self.data, self.design)
583
+ new_instance = Qtable(self.data, self.design, self.id_column)
574
584
  # Copy all private attributes
575
585
  for attr in dir(self):
576
586
  if (
msreport/reader.py CHANGED
@@ -343,7 +343,9 @@ class MaxQuantReader(ResultReader):
343
343
  Adds new columns to comply with the MsReport convention. "Modified sequence",
344
344
  "Modifications columns", "Modification localization string". "Protein reported
345
345
  by software" and "Representative protein", both contain the first entry from
346
- "Leading razor protein".
346
+ "Leading razor protein". "Ion ID" contains unique entries for each ion, which
347
+ are generated by concatenating the "Modified sequence" and "Charge" columns, and
348
+ if present, the "Compensation voltage" column.
347
349
 
348
350
  "Modified sequence" entries contain modifications within square brackets.
349
351
  "Modification" entries are strings in the form of "position:modification_tag",
@@ -376,15 +378,19 @@ class MaxQuantReader(ResultReader):
376
378
  df["Leading razor protein"]
377
379
  )
378
380
  df["Representative protein"] = df["Protein reported by software"]
381
+
379
382
  if drop_decoy:
380
383
  df = self._drop_decoy(df)
381
384
  if rename_columns:
382
- df = self._rename_columns(
383
- df, True
384
- ) # Actually there are no column tags as the table is in long format
385
+ # Actually there are no column tags as the table is in long format
386
+ df = self._rename_columns(df, prefix_tag=True)
385
387
  if rewrite_modifications and rename_columns:
386
388
  df = self._add_peptide_modification_entries(df)
387
389
  df = self._add_modification_localization_string(df)
390
+ df["Ion ID"] = df["Modified sequence"] + "_c" + df["Charge"].astype(str)
391
+ if "Compensation voltage" in df.columns:
392
+ _cv = df["Compensation voltage"].astype(str)
393
+ df["Ion ID"] = df["Ion ID"] + "_cv" + _cv
388
394
  return df
389
395
 
390
396
  def _add_protein_entries(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -535,6 +541,8 @@ class FragPipeReader(ResultReader):
535
541
  """FragPipe result reader.
536
542
 
537
543
  Methods:
544
+ import_design: Reads a "fragpipe-files.fp-manifest" file and returns a
545
+ processed design dataframe.
538
546
  import_proteins: Reads a "combined_protein.tsv" or "protein.tsv" file and
539
547
  returns a processed dataframe, conforming to the MsReport naming
540
548
  convention.
@@ -576,12 +584,20 @@ class FragPipeReader(ResultReader):
576
584
  "peptides": "combined_peptide.tsv",
577
585
  "ions": "combined_ion.tsv",
578
586
  "ion_evidence": "ion.tsv",
587
+ "psm_evidence": "psm.tsv",
588
+ "design": "fragpipe-files.fp-manifest",
579
589
  }
580
590
  isobar_filenames: dict[str, str] = {
581
591
  "proteins": "protein.tsv",
582
592
  "peptides": "peptide.tsv",
583
593
  "ions": "ion.tsv",
584
594
  }
595
+ sil_filenames: dict[str, str] = {
596
+ "proteins": "combined_protein_label_quant.tsv",
597
+ "peptides": "combined_modified_peptide_label_quant.tsv",
598
+ "ions": "combined_ion_label_quant.tsv",
599
+ }
600
+
585
601
  protected_columns: list[str] = []
586
602
  sample_column_tags: list[str] = [
587
603
  "Spectral Count",
@@ -591,10 +607,18 @@ class FragPipeReader(ResultReader):
591
607
  "MaxLFQ Intensity",
592
608
  ]
593
609
  column_mapping: dict[str, str] = {
610
+ "Peptide": "Peptide sequence", # PSM
611
+ "Modified Peptide": "Modified sequence", # PSM
612
+ "Protein Start": "Start position", # PSM
613
+ "Protein End": "End position", # PSM
614
+ "Number of Missed Cleavages": "Missed cleavage", # PSM
615
+ "PeptideProphet Probability": "Probability", # PSM
616
+ "Compensation Voltage": "Compensation voltage", # PSM and ion
594
617
  "Peptide Sequence": "Peptide sequence", # Peptide and ion
595
618
  "Modified Sequence": "Modified sequence", # Modified peptide and ion
596
619
  "Start": "Start position", # Peptide and ion
597
620
  "End": "End position", # Peptide and ion
621
+ "Mapped Proteins": "Mapped proteins", # All PSM, ion, and peptide tables
598
622
  "Combined Total Peptides": "Total peptides", # From LFQ
599
623
  "Total Peptides": "Total peptides", # From TMT
600
624
  "Description": "Protein name",
@@ -624,7 +648,11 @@ class FragPipeReader(ResultReader):
624
648
  protein_info_tags: list[str] = []
625
649
 
626
650
  def __init__(
627
- self, directory: str, isobar: bool = False, contaminant_tag: str = "contam_"
651
+ self,
652
+ directory: str,
653
+ isobar: bool = False,
654
+ sil: bool = False,
655
+ contaminant_tag: str = "contam_",
628
656
  ) -> None:
629
657
  """Initializes the FragPipeReader.
630
658
 
@@ -632,16 +660,69 @@ class FragPipeReader(ResultReader):
632
660
  directory: Location of the FragPipe result folder
633
661
  isobar: Set to True if quantification strategy was TMT, iTRAQ or similar;
634
662
  default False.
663
+ sil: Set to True if the FragPipe result files are from a stable isotope
664
+ labeling experiment, such as SILAC; default False.
635
665
  contaminant_tag: Prefix of Protein ID entries to identify contaminants;
636
666
  default "contam_".
637
667
  """
668
+ if sil and isobar:
669
+ raise ValueError("Cannot set both 'isobar' and 'sil' to True.")
638
670
  self._add_data_directory(directory)
639
671
  self._isobar: bool = isobar
672
+ self._sil: bool = sil
640
673
  self._contaminant_tag: str = contaminant_tag
641
- if not isobar:
674
+ if isobar:
675
+ self.filenames = self.isobar_filenames
676
+ elif sil:
677
+ self.filenames = self.sil_filenames
678
+ else:
642
679
  self.filenames = self.default_filenames
680
+
681
+ def import_design(
682
+ self, filename: Optional[str] = None, sort: bool = False
683
+ ) -> pd.DataFrame:
684
+ """Reads a 'fp-manifest' file and returns a processed design dataframe.
685
+
686
+ Args:
687
+ filename: Allows specifying an alternative filename, otherwise the default
688
+ filename is used.
689
+ sort: If True, the design dataframe is sorted by "Experiment" and
690
+ "Replicate"; default False.
691
+
692
+ Returns:
693
+ A dataframe containing the processed design table with columns:
694
+ "Sample", "Experiment", "Replicate", "Rawfile".
695
+
696
+ Raises:
697
+ FileNotFoundError: If the specified manifest file does not exist.
698
+ """
699
+ if filename is None:
700
+ filepath = os.path.join(self.data_directory, self.filenames["design"])
643
701
  else:
644
- self.filenames = self.isobar_filenames
702
+ filepath = os.path.join(self.data_directory, filename)
703
+ if not os.path.exists(filepath):
704
+ raise FileNotFoundError(
705
+ f"File '{filepath}' does not exist. Please check the file path."
706
+ )
707
+ fp_manifest = pd.read_csv(filepath, sep="\t", header=None, dtype=str)
708
+ fp_manifest.columns = ["Path", "Experiment", "Bioreplicate", "Data type"]
709
+
710
+ design = pd.DataFrame(
711
+ {
712
+ "Sample": fp_manifest["Experiment"] + "_" + fp_manifest["Bioreplicate"],
713
+ "Experiment": fp_manifest["Experiment"],
714
+ "Replicate": fp_manifest["Bioreplicate"],
715
+ "Rawfile": fp_manifest["Path"].apply(
716
+ # Required to handle Windows and Unix style paths on either system
717
+ lambda x: x.replace("\\", "/").split("/")[-1]
718
+ ),
719
+ }
720
+ )
721
+
722
+ if sort:
723
+ design.sort_values(by=["Experiment", "Replicate"], inplace=True)
724
+ design.reset_index(drop=True, inplace=True)
725
+ return design
645
726
 
646
727
  def import_proteins(
647
728
  self,
@@ -723,6 +804,7 @@ class FragPipeReader(ResultReader):
723
804
  df = self._read_file("peptides" if filename is None else filename)
724
805
  df["Protein reported by software"] = _extract_protein_ids(df["Protein"])
725
806
  df["Representative protein"] = df["Protein reported by software"]
807
+ df["Mapped Proteins"] = self._collect_mapped_proteins(df)
726
808
  # Note that _add_protein_entries would need to be adapted for the peptide table.
727
809
  # df = self._add_protein_entries(df)
728
810
  if rename_columns:
@@ -741,7 +823,10 @@ class FragPipeReader(ResultReader):
741
823
 
742
824
  Adds new columns to comply with the MsReport convention. "Modified sequence"
743
825
  and "Modifications columns". "Protein reported by software" and "Representative
744
- protein", both contain the first entry from "Leading razor protein".
826
+ protein", both contain the first entry from "Leading razor protein". "Ion ID"
827
+ contains unique entries for each ion, which are generated by concatenating the
828
+ "Modified sequence" and "Charge" columns, and if present, the
829
+ "Compensation voltage" column.
745
830
 
746
831
  "Modified sequence" entries contain modifications within square brackets.
747
832
  "Modification" entries are strings in the form of "position:modification_text",
@@ -776,11 +861,18 @@ class FragPipeReader(ResultReader):
776
861
  # 'Indistinguishable Proteins' to the ion table.
777
862
  df["Protein reported by software"] = _extract_protein_ids(df["Protein"])
778
863
  df["Representative protein"] = df["Protein reported by software"]
864
+ df["Mapped Proteins"] = self._collect_mapped_proteins(df)
865
+
779
866
  if rename_columns:
780
867
  df = self._rename_columns(df, prefix_column_tags)
781
868
  if rewrite_modifications and rename_columns:
782
869
  df = self._add_peptide_modification_entries(df)
783
870
  df = self._add_modification_localization_string(df, prefix_column_tags)
871
+ df["Ion ID"] = df["Modified sequence"] + "_c" + df["Charge"].astype(str)
872
+ if "Compensation voltage" in df.columns:
873
+ _cv = df["Compensation voltage"].astype(str)
874
+ df["Ion ID"] = df["Ion ID"] + "_cv" + _cv
875
+
784
876
  return df
785
877
 
786
878
  def import_ion_evidence(
@@ -795,7 +887,9 @@ class FragPipeReader(ResultReader):
795
887
  Adds new columns to comply with the MsReport convention. "Modified sequence",
796
888
  "Modifications", and "Modification localization string" columns. "Protein
797
889
  reported by software" and "Representative protein", both contain the first entry
798
- from "Leading razor protein".
890
+ from "Leading razor protein". "Ion ID" contains unique entries for each ion,
891
+ which are generated by concatenating the "Modified sequence" and "Charge"
892
+ columns, and if present, the "Compensation voltage" column.
799
893
 
800
894
  "Modified sequence" entries contain modifications within square brackets.
801
895
  "Modification" entries are strings in the form of "position:modification_text",
@@ -848,10 +942,15 @@ class FragPipeReader(ResultReader):
848
942
  df = pd.concat(ion_tables, ignore_index=True)
849
943
 
850
944
  # --- Process dataframe --- #
945
+ df["Ion ID"] = df["Modified Sequence"] + "_c" + df["Charge"].astype(str)
946
+ if "Compensation Voltage" in df.columns:
947
+ df["Ion ID"] = df["Ion ID"] + "_cv" + df["Compensation Voltage"].astype(str)
851
948
  # FUTURE: replace this by _add_protein_entries(df, False) if FragPipe adds
852
949
  # 'Indistinguishable Proteins' to the ion table.
853
950
  df["Protein reported by software"] = _extract_protein_ids(df["Protein"])
854
951
  df["Representative protein"] = df["Protein reported by software"]
952
+ df["Mapped Proteins"] = self._collect_mapped_proteins(df)
953
+
855
954
  if rename_columns:
856
955
  df = self._rename_columns(df, prefix_column_tags)
857
956
  if rewrite_modifications and rename_columns:
@@ -859,6 +958,60 @@ class FragPipeReader(ResultReader):
859
958
  df = self._add_modification_localization_string(df, prefix_column_tags)
860
959
  return df
861
960
 
961
+ def import_psm_evidence(
962
+ self,
963
+ filename: Optional[str] = None,
964
+ rename_columns: bool = True,
965
+ rewrite_modifications: bool = True,
966
+ ):
967
+ """Concatenate all "psm.tsv" files and return a processed dataframe.
968
+
969
+ Args:
970
+ filename: Allows specifying an alternative filename, otherwise the default
971
+ filename is used.
972
+ rename_columns: If True, columns are renamed according to the MsReport
973
+ convention; default True.
974
+ rewrite_modifications: If True, the peptide format in "Modified sequence" is
975
+ changed according to the MsReport convention, and a "Modifications" is
976
+ added to contains the amino acid position for all modifications.
977
+ Requires 'rename_columns' to be true. Default True.
978
+
979
+ Returns:
980
+ A DataFrame containing the processed psm evidence tables.
981
+ """
982
+ if filename is None:
983
+ filename = self.default_filenames["psm_evidence"]
984
+
985
+ psm_table_paths = []
986
+ for path in pathlib.Path(self.data_directory).iterdir():
987
+ psm_table_path = path / filename
988
+ if path.is_dir() and psm_table_path.exists():
989
+ psm_table_paths.append(psm_table_path)
990
+
991
+ psm_tables = []
992
+ for filepath in psm_table_paths:
993
+ table = pd.read_csv(filepath, sep="\t", low_memory=False)
994
+ str_cols = table.select_dtypes(include=["object"]).columns
995
+ table.loc[:, str_cols] = table.loc[:, str_cols].fillna("")
996
+
997
+ table["Sample"] = filepath.parent.name
998
+ psm_tables.append(table)
999
+ df = pd.concat(psm_tables, ignore_index=True)
1000
+
1001
+ df["Protein reported by software"] = _extract_protein_ids(df["Protein"])
1002
+ df["Representative protein"] = df["Protein reported by software"]
1003
+ df["Mapped Proteins"] = self._collect_mapped_proteins(df)
1004
+
1005
+ if rename_columns:
1006
+ df = self._rename_columns(df, prefix_tag=True)
1007
+ if rewrite_modifications and rename_columns:
1008
+ mod_entries = _generate_modification_entries_from_assigned_modifications(
1009
+ df["Peptide sequence"], df["Assigned Modifications"]
1010
+ )
1011
+ df["Modified sequence"] = mod_entries["Modified sequence"]
1012
+ df["Modifications"] = mod_entries["Modifications"]
1013
+ return df
1014
+
862
1015
  def _add_protein_entries(self, df: pd.DataFrame) -> pd.DataFrame:
863
1016
  """Adds standardized protein entry columns to the data frame.
864
1017
 
@@ -883,6 +1036,35 @@ class FragPipeReader(ResultReader):
883
1036
  df[key] = protein_entry_table[key]
884
1037
  return df
885
1038
 
1039
+ def _collect_mapped_proteins(self, df: pd.DataFrame) -> list[str]:
1040
+ """Generates a list of mapped proteins entries.
1041
+
1042
+ This method extracts protein IDs from the 'Representative protein' and the
1043
+ 'Mapped Proteins' column and combines them into a single string for each row,
1044
+ where multiple protein IDs are separated by semicolons.
1045
+
1046
+ Args:
1047
+ df: DataFrame containing the 'Mapped Proteins' column.
1048
+
1049
+ Returns:
1050
+ A list of mapped proteins entries.
1051
+ """
1052
+ mapped_proteins_entries = []
1053
+ for protein, mapped_protein_fp in zip(
1054
+ df["Representative protein"],
1055
+ df["Mapped Proteins"].astype(str).replace("nan", ""),
1056
+ strict=True,
1057
+ ):
1058
+ if mapped_protein_fp == "":
1059
+ mapped_proteins = [protein]
1060
+ else:
1061
+ additional_mapped_proteins = msreport.reader._extract_protein_ids(
1062
+ mapped_protein_fp.split(", ")
1063
+ )
1064
+ mapped_proteins = [protein] + additional_mapped_proteins
1065
+ mapped_proteins_entries.append(";".join(mapped_proteins))
1066
+ return mapped_proteins_entries
1067
+
886
1068
  def _collect_leading_protein_entries(self, df: pd.DataFrame) -> list[list[str]]:
887
1069
  """Generates a list of leading protein entries.
888
1070
 
@@ -898,6 +1080,9 @@ class FragPipeReader(ResultReader):
898
1080
  A list of the same length as the input dataframe. Each position contains a
899
1081
  list of leading protein entries, which a minimum of one entry.
900
1082
  """
1083
+ if self._sil: # No "Indistinguishable Proteins" columns in 'SIL' data
1084
+ return [[p] for p in df["Protein"]]
1085
+
901
1086
  leading_protein_entries = []
902
1087
  for protein_entry, indist_protein_entry in zip(
903
1088
  df["Protein"], df["Indistinguishable Proteins"].fillna("").astype(str)
@@ -1319,7 +1504,9 @@ class SpectronautReader(ResultReader):
1319
1504
 
1320
1505
  Adds new columns to comply with the MsReport convention. "Protein reported
1321
1506
  by software" and "Representative protein", both contain the first entry from
1322
- "PG.ProteinAccessions".
1507
+ "PG.ProteinAccessions". "Ion ID" contains unique entries for each ion, which are
1508
+ generated by concatenating the "Modified sequence" and "Charge" columns, and if
1509
+ present, the "Compensation voltage" column.
1323
1510
 
1324
1511
  (!) Note that the modified sequence and modification localization probabilities
1325
1512
  are currently not processed.
@@ -1357,6 +1544,11 @@ class SpectronautReader(ResultReader):
1357
1544
  df = self._add_protein_entries(df)
1358
1545
  if rename_columns:
1359
1546
  df = self._rename_columns(df, True)
1547
+ df["Ion ID"] = df["Modified sequence"] + "_c" + df["Charge"].astype(str)
1548
+ if "Compensation voltage" in df.columns:
1549
+ _cv = df["Compensation voltage"].astype(str)
1550
+ df["Ion ID"] = df["Ion ID"] + "_cv" + _cv
1551
+
1360
1552
  return df
1361
1553
 
1362
1554
  def _tidy_up_sample_columns(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -2141,6 +2333,57 @@ def _generate_modification_entries(
2141
2333
  return entries
2142
2334
 
2143
2335
 
2336
+ def _generate_modification_entries_from_assigned_modifications(
2337
+ sequences: Iterable[str],
2338
+ assigned_modifications: Iterable[str],
2339
+ ) -> dict[str, list[str]]:
2340
+ modified_sequence_entries = []
2341
+ modification_entries = []
2342
+ for sequence, modifications_entry in zip(sequences, assigned_modifications):
2343
+ modifications = _extract_fragpipe_assigned_modifications(
2344
+ modifications_entry, sequence
2345
+ )
2346
+ modified_sequence = helper.modify_peptide(sequence, modifications)
2347
+ modification_entry = ";".join([f"{pos}:{mod}" for pos, mod in modifications])
2348
+ modified_sequence_entries.append(modified_sequence)
2349
+ modification_entries.append(modification_entry)
2350
+
2351
+ entries = {
2352
+ "Modified sequence": modified_sequence_entries,
2353
+ "Modifications": modification_entries,
2354
+ }
2355
+ return entries
2356
+
2357
+
2358
+ def _extract_fragpipe_assigned_modifications(
2359
+ modifications_entry: str,
2360
+ sequence: str,
2361
+ ) -> list[tuple[int, str]]:
2362
+ """Extracts modifications from a FragPipe "Modifications" entry.
2363
+
2364
+ Example for a modification entry: "N-term(42.0106),8C(57.0215)"
2365
+
2366
+ Returns:
2367
+ A list of tuples, where each tuple contains the position of the modification and
2368
+ the modification text. The position is one-indexed, meaning that the first amino
2369
+ acid position is 1. N-term and C-term are represented as 0 and len(sequence)
2370
+ respectively.
2371
+ """
2372
+ if modifications_entry == "":
2373
+ return []
2374
+ modifications = []
2375
+ for mod_entry in modifications_entry.split(","):
2376
+ position_entry, modification = mod_entry.split(")")[0].split("(")
2377
+ if position_entry == "N-term":
2378
+ position = 0
2379
+ elif position_entry == "C-term":
2380
+ position = len(sequence)
2381
+ else:
2382
+ position = int(position_entry[:-1])
2383
+ modifications.append((position, modification))
2384
+ return modifications
2385
+
2386
+
2144
2387
  def extract_maxquant_localization_probabilities(localization_entry: str) -> dict:
2145
2388
  """Extract localization probabilites from a MaxQuant "Probabilities" entry.
2146
2389
 
@@ -1,4 +1,16 @@
1
1
  """Python interface to custome R scripts."""
2
2
 
3
- from .limma import multi_group_limma, two_group_limma
4
- from .rinstaller import r_package_version
3
+ from msreport.errors import OptionalDependencyError
4
+
5
+ try:
6
+ from .limma import multi_group_limma, two_group_limma
7
+ from .rinstaller import r_package_version
8
+ except ImportError as err:
9
+ raise OptionalDependencyError(
10
+ "R integration is not available. R must be installed and configured before "
11
+ "installing optional R dependencies using 'pip install msreport[R]'. For "
12
+ "more information, see: https://github.com/hollenstein/msreport"
13
+ ) from err
14
+
15
+
16
+ __all__ = ["multi_group_limma", "two_group_limma", "r_package_version"]
@@ -0,0 +1,144 @@
1
+ Metadata-Version: 2.4
2
+ Name: msreport
3
+ Version: 0.0.30
4
+ Summary: Post processing and analysis of quantitative proteomics data
5
+ Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
6
+ License-Expression: Apache-2.0
7
+ Project-URL: homepage, https://github.com/hollenstein/msreport
8
+ Project-URL: changelog, https://github.com/hollenstein/msreport/blob/main/CHANGELOG.md
9
+ Keywords: mass spectrometry,proteomics,post processing,data analysis
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
+ Requires-Python: >=3.10
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE.txt
19
+ Requires-Dist: adjustText<1.0.0,>=0.7.0
20
+ Requires-Dist: matplotlib>=3.5.2
21
+ Requires-Dist: numpy>=1.21.5
22
+ Requires-Dist: pandas>=1.4.4
23
+ Requires-Dist: profasta>=0.0.4
24
+ Requires-Dist: pyteomics>=4.6.0
25
+ Requires-Dist: pyyaml>=6.0.0
26
+ Requires-Dist: scikit-learn>=1.0.0
27
+ Requires-Dist: scipy>=1.9.1
28
+ Requires-Dist: seaborn>=0.12.0
29
+ Requires-Dist: statsmodels>=0.13.2
30
+ Requires-Dist: typing_extensions>=4
31
+ Provides-Extra: r
32
+ Requires-Dist: rpy2<3.5.13,>=3.5.3; extra == "r"
33
+ Provides-Extra: dev
34
+ Requires-Dist: mypy>=1.15.0; extra == "dev"
35
+ Requires-Dist: pytest>=8.3.5; extra == "dev"
36
+ Provides-Extra: test
37
+ Requires-Dist: pytest>=8.3.5; extra == "test"
38
+ Dynamic: license-file
39
+
40
+ # MsReport
41
+
42
+ [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
43
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15309090.svg)](https://doi.org/10.5281/zenodo.15309090)
44
+ ![Python Version from PEP 621 TOML](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fhollenstein%2Fmsreport%2Fmain%2Fpyproject.toml)
45
+ [![Run tests](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml/badge.svg)](https://github.com/hollenstein/msreport/actions/workflows/run-tests.yml)
46
+
47
+ **MsReport** is a Python library for post-processing quantitative proteomics data from
48
+ bottom-up mass spectrometry experiments.
49
+
50
+ ## Table of Contents
51
+
52
+ - [What is MsReport?](#what-is-msreport)
53
+ - [Key features of MsReport](#key-features-of-msreport)
54
+ - [Installation](#installation)
55
+ - [Installation when using Anaconda](#installation-when-using-anaconda)
56
+ - [Additional requirements](#additional-requirements)
57
+ - [Optional Dependencies](#optional-dependencies)
58
+ - [Development status](#development-status)
59
+ - [How to cite](#how-to-cite)
60
+
61
+ ## What is MsReport?
62
+
63
+ MsReport is a Python library designed to simplify the post-processing and analysis of quantitative proteomics data from bottom-up mass spectrometry experiments. It provides a high-level, abstraction-focused API for efficient and standardized workflows. The modular design of the library provides the flexibility to meet project specific data processing needs and customize workflows as required.
64
+
65
+ The library supports importing protein and peptide-level quantification results from MaxQuant, FragPipe, and Spectronaut, as well as post-translational modification (PTM) data from MaxQuant and FragPipe. MsReport provides tools for data annotation, normalization and transformation, statistical testing, and data visualization.
66
+
67
+ ### Key features of MsReport
68
+
69
+ #### Data Import and Standardization
70
+
71
+ The `reader` module provides software-specific reader classes for importing data from MaxQuant, FragPipe, and Spectronaut that enable the import of protein, peptide and ion tables. During the import process, these classes transform tables column names and table values into a standardized format to ensure that the rest of the library can operate in a tool-agnostic manner.
72
+
73
+ #### Data management
74
+
75
+ The `qtable` module provides a structured approach to managing quantitative data through its central `Qtable` class. This class combines quantitative data with an experimental design table that defines the relationship between samples and experimental conditions. The quantitative data is stored in a wide format, where each sample's measurements are stored in separate columns. The `Qtable` class serves as the foundation for data analysis workflows in MsReport, providing the standardized data structure used by the `analyze`, `plot`, and `export` modules.
76
+
77
+ #### Data processing and analysis
78
+
79
+ The `analyze` module provides tools for post-processing of mass spectrometry data generated by software such as MaxQuant, FragPipe, or Spectronaut. It includes functions for filtering, normalization, imputation of missing values, and statistical testing. The library integrates with the R package LIMMA to enable differential expression analysis.
80
+
81
+ > [!NOTE]
82
+ > In order to use the R integration you need to install msreport with optional dependencies, see [Optional Dependencies](#optional-dependencies) for more information.
83
+
84
+ #### Data visualization
85
+
86
+ The `plot` module supports the generation of visualizations for quality control and data analysis. It includes functions for creating various plots, such as intensity and ratio distributions, heatmaps, volcano plots, and PCA plots.
87
+
88
+ #### Data export
89
+
90
+ Finally, the `export` module enables the conversion and export into formats compatible with external tools. This includes generating input files for [Amica](https://bioapps.maxperutzlabs.ac.at/app/amica) and exporting tables for easier integration with Perseus.
91
+
92
+ ## Installation
93
+
94
+ If you do not already have a Python installation, we recommend installing the [Anaconda distribution](https://www.anaconda.com/download) or [Miniconda](https://docs.anaconda.com/free/miniconda/index.html) distribution from Continuum Analytics, which already contains a large number of popular Python packages for Data Science. Alternatively, you can also get Python from the [Python homepage](https://www.python.org/downloads/windows). Note that MsReport requires Python version 3.10 or higher.
95
+
96
+ The following command will install MsReport and its dependencies by using a wheel file.
97
+
98
+ ```shell
99
+ pip install msreport
100
+ ```
101
+
102
+ To uninstall the MsReport library use:
103
+
104
+ ```shell
105
+ pip uninstall msreport
106
+ ```
107
+
108
+ ### Installation when using Anaconda
109
+
110
+ To install the MsReport library using Anaconda, you need to either activate a custom conda environment or install it into the default base environment. Open the Anaconda Navigator, activate the desired conda environment or use the base environment, and then open a command line by running the "CMD.exe" application. Finally, use the `pip install` command as before.
111
+
112
+ ### Optional Dependencies
113
+
114
+ #### R Integration
115
+
116
+ MsReport provides an interface to the R package LIMMA for differential expression analysis. To use this functionality, you need:
117
+
118
+ - A local installation of **R (version 4.0 or higher)**.
119
+ - The system environment variable R_HOME set to the R home directory.
120
+ - To install msreport with the optional dependencies for R integration.
121
+
122
+ ```shell
123
+ pip install msreport[R]
124
+ ```
125
+
126
+ #### Setting the R_HOME environment variable
127
+
128
+ On Windows, you may need to restart your computer after modifying the system environment variables for the changes to take effect. To find the R home directory, you can run the following command in R:
129
+
130
+ ```R
131
+ normalizePath(R.home("home"))
132
+ ```
133
+
134
+ For example, the R home directory might look like this on Windows: `C:\Program Files\R\R-4.2.1`
135
+
136
+ ## Development status
137
+
138
+ MsReport is a stable and reliable library that has been used on a daily basis for over two years in the Mass Spectrometry Facility at the Max Perutz Labs and the Mass Spectrometry Facility of IMP/IMBA/GMI. While the current interface of MsReport is stable, the library is still under active development, with new features being added regularly. Please note that a major rewrite is planned, which may introduce changes to the API in the future.
139
+
140
+ ## How to cite
141
+
142
+ If you use MsReport for your research or publications, please include the following citation and consider giving the project a star on GitHub.
143
+
144
+ > Hollenstein, D. M., & Hartl, M. (2025). hollenstein/msreport: v0.0.29 (0.0.29). Zenodo. https://doi.org/10.5281/zenodo.15309090
@@ -1,14 +1,14 @@
1
- msreport/__init__.py,sha256=5-d_i-t9A3MV7hC-3z_vcWzaSAJSGY5T6McCBr4UGfc,339
2
- msreport/analyze.py,sha256=zNs0Vc2ODTfdiX6rSr79jXLJIh-6N11WH-vZpQzKDTE,30889
3
- msreport/errors.py,sha256=algGlR5iD9Q0U6Q3m25IwZryl9smtlPHsfhAL35PChc,295
1
+ msreport/__init__.py,sha256=ajNIgBNRP06cDKDl6tsTeDhWSlJQw922QAxoJFd8Mhs,339
2
+ msreport/analyze.py,sha256=I1sfxvXy02AjFcfLRlvC-F_bg0J8ePKoSIU8yDWxLs0,31313
3
+ msreport/errors.py,sha256=X9yFxMiIOCWQdxuqBGr8L7O3vRV2KElXdX1uHbFcZMk,421
4
4
  msreport/export.py,sha256=YvY3Nly5JC2CUM-JY1gydU1g2eqnennzToZfQQ5phO0,20156
5
5
  msreport/fasta.py,sha256=eXTmA4WGX4dT9wcTw7AdrvybLWG47p7ur48CxIjxjfg,1161
6
6
  msreport/impute.py,sha256=bf2Zy8VQNJ0Oh1sKn84Xp9iV5svi_Hp7iHxwRrFBwsI,10327
7
7
  msreport/isobar.py,sha256=m6NhLaKBiItIXuBhly_z2wEslxQGFC2f3-e1bzYXB78,6575
8
8
  msreport/normalize.py,sha256=K1x3DjL5Rep3t_eDIKIghMr0sAJiROnX6skHnOMPZ_k,20160
9
9
  msreport/peptidoform.py,sha256=26USj6WPrMgMIc7LttQ2n6Oq5jo1o7ayUQLR6gsRmZY,12015
10
- msreport/qtable.py,sha256=0e-TXmuiKBU6W5TL3tz06nNrjtEyT-CI9bvUq8W6qME,26768
11
- msreport/reader.py,sha256=ja4q8XtOHR_A6RL8ho-c6aGCVu1kzyhvil8ymiPx3PY,104612
10
+ msreport/qtable.py,sha256=4bJaWac1ePDZB1q7ssINWPdciqx4BIc6tiYUx5xrCsY,27265
11
+ msreport/reader.py,sha256=ozw6QJ22aC0B3kSeb_frIIjkzGLx2yIV-1ZI9w8WffI,115638
12
12
  msreport/aggregate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  msreport/aggregate/condense.py,sha256=eIh5A3RUvXrmoFUjRXagiPl0m-ucuRwYD8kDBI7voVs,5862
14
14
  msreport/aggregate/pivot.py,sha256=rn8li-FrtOZS4oWA8COk0uV2m71GCEbNu1ALNoMuHOA,5081
@@ -21,18 +21,18 @@ msreport/helper/temp.py,sha256=jNulgDATf9sKXEFWMXAhjflciOZPAqlxg_7QZS7IkW8,3736
21
21
  msreport/plot/__init__.py,sha256=SnoQORfrjgz9SmqPZ-1J1aeVC5xu-cFfZINP4aYVCmY,1488
22
22
  msreport/plot/_partial_plots.py,sha256=tqZTSXEPuruMgVakaGR2tUQl5OrHgo2cROJ0S4cqkR0,5598
23
23
  msreport/plot/comparison.py,sha256=J8zWyQrzx7rxDLxeZQkfAlcSmLY3e_7wwPG-cGuWo2M,18564
24
- msreport/plot/distribution.py,sha256=a2Rw6HxQwGfDwRSy8dwpT7zvEQ968wYHjcVPOdXI3l8,10150
24
+ msreport/plot/distribution.py,sha256=QNFL5vG9p-vqhwEk5WcCSXa2B8u5QgySZlAQIPys0-0,10248
25
25
  msreport/plot/multivariate.py,sha256=0xzxggqbIGQYOfgiij93DTRWfG6GvvhqI9u1GNPHarY,13111
26
26
  msreport/plot/quality.py,sha256=dIo_dpdexEN_vp35WpUTt626E-QJ2qNbJmjUai_8uck,15861
27
27
  msreport/plot/style.py,sha256=67jWf4uA1ub9RJDu4xhuSoXAW0lbLj6SMP4QXQO76Pc,10591
28
28
  msreport/plot/style_sheets/msreport-notebook.mplstyle,sha256=SPYO_7vYT8Ha7tQ0KCTLtykiRQ13-_igAm7kyvsZj1I,1266
29
29
  msreport/plot/style_sheets/seaborn-whitegrid.mplstyle,sha256=eC8Zboy8R7ybBwbHPKvKbMIHACystN6X6I0lqm7B80U,833
30
- msreport/rinterface/__init__.py,sha256=g29j2cIrc71qBdF4Zys51feoXlC0dP6YcTIscPTqPdI,146
30
+ msreport/rinterface/__init__.py,sha256=Zs6STvbDqaVZVPRM6iU0kKjq0TWz_2p2ChvNAveRdTA,616
31
31
  msreport/rinterface/limma.py,sha256=fxYRUkkJKI-JpDvivjWj8bUS0ug7RRTMnaf2UOgRsXQ,5421
32
32
  msreport/rinterface/rinstaller.py,sha256=AGs6NFMSwTLrzrIJz1E5BE5jFUz8eQBHlpM_MWVChzA,1370
33
33
  msreport/rinterface/rscripts/limma.R,sha256=gr_yjMm_YoG45irDhWOo6gkRQSTwj_7uU_p3NBRHPm8,4331
34
- msreport-0.0.28.dist-info/licenses/LICENSE.txt,sha256=Pd-b5cKP4n2tFDpdx27qJSIq0d1ok0oEcGTlbtL6QMU,11560
35
- msreport-0.0.28.dist-info/METADATA,sha256=IVyUd3ZATwccffCWbgYYmUmPe8Y4vJvwZC6oMFuBBfw,5497
36
- msreport-0.0.28.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
37
- msreport-0.0.28.dist-info/top_level.txt,sha256=Drl8mCckJHFIw-Ovh5AnyjKnqvLJltDOBUr1JAcHAlI,9
38
- msreport-0.0.28.dist-info/RECORD,,
34
+ msreport-0.0.30.dist-info/licenses/LICENSE.txt,sha256=Pd-b5cKP4n2tFDpdx27qJSIq0d1ok0oEcGTlbtL6QMU,11560
35
+ msreport-0.0.30.dist-info/METADATA,sha256=FO20yj_zTnw7F6pcWrWDbLLFwo_0Bz8Qm8djt1eOcWs,8444
36
+ msreport-0.0.30.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
37
+ msreport-0.0.30.dist-info/top_level.txt,sha256=Drl8mCckJHFIw-Ovh5AnyjKnqvLJltDOBUr1JAcHAlI,9
38
+ msreport-0.0.30.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.0.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,132 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: msreport
3
- Version: 0.0.28
4
- Summary: Post processing and analysis of quantitative proteomics data
5
- Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
6
- License: Apache-2.0
7
- Keywords: mass spectrometry,proteomics,post processing,data analysis
8
- Classifier: Development Status :: 3 - Alpha
9
- Classifier: License :: OSI Approved :: Apache Software License
10
- Classifier: Programming Language :: Python :: 3.9
11
- Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
12
- Requires-Python: >=3.9
13
- Description-Content-Type: text/markdown
14
- License-File: LICENSE.txt
15
- Requires-Dist: adjustText<1.0.0,>=0.7.0
16
- Requires-Dist: matplotlib>=3.5.2
17
- Requires-Dist: numpy>=1.21.5
18
- Requires-Dist: pandas>=1.4.4
19
- Requires-Dist: profasta>=0.0.4
20
- Requires-Dist: pyteomics>=4.6.0
21
- Requires-Dist: pyyaml>=6.0.0
22
- Requires-Dist: rpy2!=3.5.13,>=3.5.3
23
- Requires-Dist: scikit-learn>=1.0.0
24
- Requires-Dist: scipy>=1.9.1
25
- Requires-Dist: seaborn>=0.12.0
26
- Requires-Dist: statsmodels>=0.13.2
27
- Requires-Dist: typing_extensions>=4
28
- Provides-Extra: dev
29
- Requires-Dist: mypy>=1.15.0; extra == "dev"
30
- Requires-Dist: pytest>=8.3.5; extra == "dev"
31
- Dynamic: license-file
32
-
33
- [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
34
-
35
-
36
- # MsReport
37
-
38
-
39
- ## Introduction
40
-
41
- MsReport is a python library that allows simple and standardized post processing of
42
- quantitative proteomics data from bottom up, mass spectrometry experiments. Currently
43
- working with label free protein quantification reports from MaxQuant and FragPipe is
44
- fully supported. Other data analysis pipelines can be added by writing a software
45
- specific reader function.
46
-
47
- MsReport is primarily developed as a tool for the Mass Spectrometry Facility at the Max
48
- Perutz Labs (University of Vienna), to allow the generation of Quantitative Protein and
49
- PTM reports, and to facilitate project specific data analysis tasks.
50
-
51
-
52
- ## Release
53
-
54
- Development is currently in early alpha and the interface is not yet stable.
55
-
56
-
57
- ## Scope
58
-
59
- The `reader` module contains software specific reader classes that provide access to the
60
- outputs of the respective software. Reader instances allow importing protein and ion
61
- tables, and provide the ability to standardize column names and data formats during the
62
- import. To do so, reader classes must know the file structure and naming conventions of
63
- the respective software.
64
-
65
- The `qtable` class allows storing and accessing quantitative data from a particular
66
- level of abstraction, such as proteins or ions, and an experimental design table that
67
- describes to which experiment a sample belongs to. The quantitative data are in the wide
68
- format, i.e. the quantification data of each sample is stored in a separate column. The
69
- `Qtable` allows convenient handling and access to quantitative data through information
70
- from the experimental design, and represents the data structure used by the `analyze`,
71
- `plot`, and `export` modules.
72
-
73
- The `analyze` module provides a high-level interface for post-processing of quantitative
74
- data, such as filtering valid values, normalization between samples, imputation of
75
- missing values, and statistical testing with the R package LIMMA.
76
-
77
- The `plot` module allows generation of quality control and data analysis plots.
78
-
79
- Using methods from the `export` module allows conversion and export of quantitative data
80
- into the Amica input format, and generating contaminant tables for the inspection of
81
- potential contaminants.
82
-
83
- Additional scripts
84
-
85
- - The `excel_report` module enables the creation of a formatted excel protein report
86
- by using the XlsxReport library.
87
- - The `benchmark` module contains functions to generate benchmark plots from multiple
88
- `Qtable` instances, and can be used for method or software comparison.
89
-
90
-
91
- ## Install
92
-
93
- If you do not already have a Python installation, we recommend installing the
94
- [Anaconda distribution](https://www.continuum.io/downloads) of Continuum Analytics,
95
- which already contains a large number of popular Python packages for Data Science.
96
- Alternatively, you can also get Python from the
97
- [Python homepage](https://www.python.org/downloads/windows). MsReport requires Python
98
- version 3.9 or higher.
99
-
100
- You can use pip to install MsReport from the distribution file with the following
101
- command:
102
-
103
- ```
104
- pip install msreport-X.Y.Z-py3-none-any.whl
105
- ```
106
-
107
- To uninstall the MsReport library type:
108
-
109
- ```
110
- pip uninstall msreport
111
- ```
112
-
113
-
114
- ### Installation when using Anaconda
115
- If you are using Anaconda, you will need to install the MsReport package into a conda
116
- environment. Open the Anaconda navigator, activate the conda environment you want to
117
- use, run the "CMD.exe" application to open a terminal, and then use the pip install
118
- command as described above.
119
-
120
-
121
- ### Additional requirements
122
-
123
- MsReport provides an interface to the R package LIMMA for differential expression
124
- analysis, which requires a local installation of R (R version 4.0 or higher) and the
125
- system environment variable "R_HOME" to be set to the R home directory. Note that it
126
- might be necessary to restart the computer after adding the "R_HOME" variable. The R
127
- home directory can also be found from within R by using the command below, and might
128
- look similar to "C:\Program Files\R\R-4.2.1" on windows.
129
-
130
- ```
131
- normalizePath(R.home("home"))
132
- ```