seer-pas-sdk 1.1.1__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ seer_pas_sdk.core.unsupported -- in development
4
4
 
5
5
  import os
6
6
  import shutil
7
+ from pathlib import Path
7
8
 
8
9
  from typing import List as _List
9
10
 
@@ -827,20 +828,29 @@ class _UnsupportedSDK(_SeerSDK):
827
828
  )
828
829
 
829
830
  # Step 1: Check if paths and file extensions are valid.
831
+ invalid_d_zip_files = []
830
832
  for file in ms_data_files:
831
833
  if not valid_ms_data_file(file):
832
834
  raise ValueError(
833
835
  "Invalid file or file format. Please check your file."
834
836
  )
837
+ if file.endswith(".d.zip") and (not validate_d_zip_file(file)):
838
+ invalid_d_zip_files.append(file)
839
+
840
+ if invalid_d_zip_files:
841
+ raise ValueError(
842
+ f"The following .d.zip files are invalid: {', '.join(invalid_d_zip_files)}. Please check your files."
843
+ )
835
844
 
836
845
  extensions = set(
837
- [os.path.splitext(file.lower())[1] for file in ms_data_files]
846
+ ["".join(Path(file).suffixes) for file in ms_data_files]
838
847
  )
839
848
 
840
849
  if filenames and ".d.zip" in extensions:
841
850
  raise ValueError(
842
851
  "Please leave the 'filenames' parameter empty when working with .d.zip files. SeerSDK.rename_d_zip_file() is available for this use case."
843
852
  )
853
+
844
854
  # Step 2: Use active tenant to fetch the tenant_id.
845
855
  tenant_id = self.get_active_tenant_id()
846
856
 
@@ -1461,36 +1471,70 @@ class _UnsupportedSDK(_SeerSDK):
1461
1471
  Get analyte intensities data for a given PAS analysis.
1462
1472
  Args:
1463
1473
  analysis_id (str): ID of the analysis.
1464
- analyte_type (str): Type of the analyte. Must be either 'protein', 'peptide', precursor.
1474
+ analyte_type (str): Type of the analyte. Must be either 'protein', 'peptide', 'precursor'.
1465
1475
  rollup (str): Intensities rollup method. Must be either 'np' or 'panel'.
1466
- norm_method (str): Search engine. Supported engines are: raw, engine, median, median80, pepcal. Default is 'pepcal'.
1476
+ norm_method (str): Search engine. Supported engines are: raw, engine, median, median80, pepcal, pepcal_batch. Default is 'pepcal'.
1467
1477
 
1468
1478
  Returns:
1469
1479
  pd.DataFrame: A dataframe with each row containing the analyte intensity measurement:
1470
1480
  'msrun_id', 'sample_id', 'nanoparticle' (if rollup is 'np'), 'protein_group', 'peptide' (for 'peptide' and 'precursor' analyte types), 'charge' (for 'precursor' analyte type),
1471
1481
  'intensity_log10', 'protein_group_q_value', 'q_value' (for 'precursor' analyte type), 'rt' and 'irt' (for 'peptide' and 'precursor' analyte types)
1472
1482
  """
1473
- # 1. Get msrun data for analysis
1483
+
1484
+ def filepath_to_msrunid(filepath):
1485
+ return os.path.basename(filepath).split(".")[0]
1486
+
1487
+ # 1. Get samples and msrun data for analysis
1474
1488
  samples = self.find_samples(analysis_id=analysis_id)
1475
- sample_name_to_id = {s["sample_name"]: s["id"] for s in samples}
1476
- # for np rollup, a row represents an msrun
1477
- msruns = self.find_msruns(sample_ids=sample_name_to_id.values())
1478
- file_to_msrun = {
1479
- os.path.basename(msrun["raw_file_path"]).split(".")[0]: msrun
1480
- for msrun in msruns
1481
- }
1482
- sample_to_msrun = {msrun["sample_id"]: msrun for msrun in msruns}
1483
1489
 
1484
- # for panel rollup, a row represents a sample
1490
+ sample_uuid_to_id = {s["id"]: s["sample_id"] for s in samples}
1491
+ sample_id_to_uuid = {s["sample_id"]: s["id"] for s in samples}
1492
+ # FIXME sample_name is not guaranteed to be unique (within PAS analysis)
1493
+ sample_name_to_uuid = {s["sample_name"]: s["id"] for s in samples}
1494
+
1495
+ msruns = self.find_msruns(sample_ids=[s["id"] for s in samples])
1496
+ msrunid_to_info = {msrun["Run"]: msrun for msrun in msruns}
1485
1497
 
1486
1498
  # 2. Get search results
1487
- # pull the np/panel file, or report.tsv for precursor mode
1499
+ # pull the np/panel file, or the relevant columns from the report.tsv for precursor mode
1500
+ columns = None
1501
+ if analyte_type == "precursor" and rollup == "np":
1502
+ columnsExperiment = ["Run"]
1503
+ columnsProtein = [
1504
+ "Protein.Group",
1505
+ ]
1506
+ columnsPeptide = [
1507
+ "Stripped.Sequence",
1508
+ ]
1509
+ columnsPrecursor = [
1510
+ "Precursor.Id",
1511
+ "Precursor.Charge",
1512
+ "Precursor.Quantity",
1513
+ "RT",
1514
+ "iRT",
1515
+ "IM",
1516
+ "iIM",
1517
+ ]
1518
+ columnsQValue = [
1519
+ "Q.Value",
1520
+ "Protein.Q.Value",
1521
+ ]
1522
+ columns = [
1523
+ *columnsExperiment,
1524
+ *columnsProtein,
1525
+ *columnsPeptide,
1526
+ *columnsPrecursor,
1527
+ *columnsQValue,
1528
+ ]
1488
1529
  search_results = self.get_search_result(
1489
1530
  analysis_id=analysis_id,
1490
1531
  analyte_type=analyte_type,
1491
1532
  rollup=rollup,
1533
+ columns=columns,
1492
1534
  )
1535
+
1493
1536
  if analyte_type in ["protein", "peptide"]:
1537
+ # set the intensity column based on norm_method and PAS analysis protocol version
1494
1538
  intensity_column = None
1495
1539
  if norm_method == "raw":
1496
1540
  intensity_column = (
@@ -1532,140 +1576,171 @@ class _UnsupportedSDK(_SeerSDK):
1532
1576
  raise ValueError(
1533
1577
  "Pepcal normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later with the Seer Peptide Calibrant option enabled. \n Please retry using different norm_method, such as 'median'"
1534
1578
  )
1535
-
1536
1579
  intensity_column = "PepCal Intensities Log10"
1537
-
1580
+ elif norm_method == "pepcal_batch":
1581
+ if not (
1582
+ "PepCal Batch Intensities Log10" in search_results.columns
1583
+ ):
1584
+ raise ValueError(
1585
+ "Pepcal normalized batch corrected intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later with the Seer Peptide Calibrant option enabled. \n Please retry using different norm_method, such as 'median'"
1586
+ )
1587
+ intensity_column = "PepCal Batch Intensities Log10"
1538
1588
  else:
1539
1589
  raise ValueError(
1540
1590
  f"norm_method = {norm_method} is not supported. Supported normalization methods are: raw, pepcal, engine, median, median80."
1541
1591
  )
1542
- if rollup == "panel":
1543
- search_results.fillna({"Sample Name": ""}, inplace=True)
1544
- search_results["File Name"] = search_results[
1545
- "Sample Name"
1546
- ].apply(
1547
- lambda x: (
1548
- os.path.basename(
1549
- sample_to_msrun[sample_name_to_id[x]][
1550
- "raw_file_path"
1551
- ]
1552
- ).split(".")[0]
1553
- if x
1554
- else None
1555
- )
1556
- )
1557
- search_results["File Name"] = search_results["File Name"].apply(
1558
- lambda x: os.path.basename(x).split(".")[0] if x else None
1559
- )
1560
1592
 
1561
1593
  search_results["Intensity Log10"] = search_results[
1562
1594
  intensity_column
1563
1595
  ]
1564
1596
 
1565
- # 3. Merge report to search results to get Q value and other properties
1566
- report = self.get_search_result(
1567
- analysis_id=analysis_id,
1568
- analyte_type="precursor",
1569
- rollup="np",
1570
- )
1571
- report["File Name"] = report["Run"]
1572
- report["Protein Group"] = report["Protein.Group"]
1573
-
1574
- if analyte_type == "protein":
1575
- report["Protein Q Value"] = report["Protein.Q.Value"]
1597
+ if rollup == "panel":
1598
+ search_results.rename(
1599
+ columns={"Sample ID": "Sample UUID"}, inplace=True
1600
+ )
1601
+ search_results["Sample UUID"] = search_results[
1602
+ "Sample Name"
1603
+ ].map(sample_name_to_uuid)
1604
+ search_results["Sample ID"] = search_results[
1605
+ "Sample UUID"
1606
+ ].map(sample_uuid_to_id)
1607
+ experiment_columns = ["Sample UUID", "Sample ID"]
1608
+
1609
+ # analyte info is limited to the id in the panel rollup
1610
+ if analyte_type == "protein":
1611
+ analyte_id_column = "Protein Group"
1612
+ else:
1613
+ analyte_id_column = "Peptide"
1614
+
1615
+ analyte_columns = [analyte_id_column]
1616
+ df = search_results
1617
+ else:
1618
+ # np rollup, extract basename without extension
1619
+ path_to_msrunid = {
1620
+ path: filepath_to_msrunid(path)
1621
+ for path in search_results["File Name"].unique()
1622
+ }
1623
+ # strip path from the filename to allow merging with the precursor report
1624
+ search_results["Run"] = search_results["File Name"].map(
1625
+ path_to_msrunid
1626
+ )
1576
1627
 
1577
- report = report[
1578
- ["File Name", "Protein Group", "Protein Q Value"]
1579
- ]
1580
- report.drop_duplicates(
1581
- subset=["File Name", "Protein Group"], inplace=True
1628
+ search_results["MsRun UUID"] = search_results["Run"].map(
1629
+ {k: v["id"] for k, v in msrunid_to_info.items()}
1582
1630
  )
1583
- df = pd.merge(
1584
- search_results,
1585
- report,
1586
- on=["File Name", "Protein Group"],
1587
- how="left",
1631
+ search_results["Sample ID"] = search_results["Run"].map(
1632
+ {k: v["sample_id"] for k, v in msrunid_to_info.items()}
1633
+ )
1634
+ search_results["Sample UUID"] = search_results["Run"].map(
1635
+ {k: v["sample_uuid"] for k, v in msrunid_to_info.items()}
1588
1636
  )
1589
- included_columns = [
1590
- "MsRun ID",
1637
+ search_results["Nanoparticle"] = search_results["Run"].map(
1638
+ {k: v["nanoparticle"] for k, v in msrunid_to_info.items()}
1639
+ )
1640
+ experiment_columns = [
1641
+ "MsRun UUID",
1642
+ "Run",
1643
+ "Nanoparticle",
1644
+ "Sample UUID",
1591
1645
  "Sample ID",
1592
- "Protein Group",
1593
- "Intensity Log10",
1594
- "Protein Q Value",
1595
1646
  ]
1596
1647
 
1597
- else:
1598
- report["Peptide"] = report["Stripped.Sequence"]
1599
- # If analyte_type is peptide, attach retention time (RT, iRT)
1600
- report = report[["File Name", "Peptide", "RT", "iRT"]]
1601
- report.drop_duplicates(
1602
- subset=["File Name", "Peptide"], inplace=True
1648
+ # Merge report to search results to get Q value and other properties
1649
+ if analyte_type == "protein":
1650
+ columns = ["Run", "Protein.Group", "Protein.Q.Value"]
1651
+ elif analyte_type == "peptide":
1652
+ columns = ["Run", "Stripped.Sequence", "Protein.Q.Value"]
1653
+ analytes = self.get_search_result(
1654
+ analysis_id=analysis_id,
1655
+ analyte_type="precursor",
1656
+ rollup="np",
1657
+ columns=columns,
1658
+ )
1659
+ # pandas Dataframe.rename() default behavior is to ignore the columns that do not exist in the data frame.
1660
+ analytes.rename(
1661
+ columns={
1662
+ "Protein.Group": "Protein Group",
1663
+ "Protein.Q.Value": "Protein Q Value",
1664
+ "Stripped.Sequence": "Peptide",
1665
+ },
1666
+ inplace=True,
1667
+ )
1668
+
1669
+ if analyte_type == "protein":
1670
+ analyte_id_column = "Protein Group"
1671
+ analyte_columns = [
1672
+ analyte_id_column,
1673
+ "Protein Q Value",
1674
+ ]
1675
+
1676
+ else:
1677
+ analyte_id_column = "Peptide"
1678
+ analyte_columns = [analyte_id_column]
1679
+ # endif analyte_type
1680
+
1681
+ analytes.drop(
1682
+ columns=[
1683
+ col
1684
+ for col in analytes.columns
1685
+ if col != "Run" and col not in analyte_columns
1686
+ ],
1687
+ inplace=True,
1688
+ )
1689
+ analytes.drop_duplicates(
1690
+ subset=["Run", analyte_id_column], inplace=True
1603
1691
  )
1604
1692
  df = pd.merge(
1605
1693
  search_results,
1606
- report,
1607
- on=["File Name", "Peptide"],
1694
+ analytes,
1695
+ on=["Run", analyte_id_column],
1608
1696
  how="left",
1697
+ validate="one_to_one",
1609
1698
  )
1610
- included_columns = [
1611
- "MsRun ID",
1612
- "Sample ID",
1613
- "Peptide",
1614
- "Protein Group",
1615
- "Intensity Log10",
1616
- "RT",
1617
- "iRT",
1618
- ]
1619
- # endif
1620
1699
 
1621
- if rollup == "np":
1622
- included_columns.insert(
1623
- included_columns.index("Sample ID") + 1, "Nanoparticle"
1624
- )
1700
+ df = df[experiment_columns + analyte_columns + ["Intensity Log10"]]
1625
1701
 
1626
- df["MsRun ID"] = df["File Name"].apply(
1627
- lambda x: (
1628
- file_to_msrun[x]["id"] if x in file_to_msrun else None
1629
- )
1630
- )
1631
- df["Sample ID"] = df["File Name"].apply(
1632
- lambda x: (
1633
- file_to_msrun[x]["sample_id"]
1634
- if x in file_to_msrun
1635
- else None
1636
- )
1637
- )
1638
- df = df[included_columns]
1639
- df.columns = [title_case_to_snake_case(x) for x in df.columns]
1640
- return df
1641
1702
  else:
1642
1703
  # precursor
1643
1704
  # working only in report.tsv
1644
- search_results["Intensity"] = search_results["Precursor.Quantity"]
1645
- search_results["MsRun ID"] = search_results["Run"].apply(
1646
- lambda x: (
1647
- file_to_msrun[x]["id"] if x in file_to_msrun else None
1705
+ if norm_method != "raw":
1706
+ raise ValueError(
1707
+ "For precursor analyte type, only 'raw' norm_method is supported."
1648
1708
  )
1709
+
1710
+ search_results["MsRun UUID"] = search_results["Run"].map(
1711
+ {k: v["id"] for k, v in msrunid_to_info.items()}
1649
1712
  )
1650
- search_results["Sample ID"] = search_results["Run"].apply(
1651
- lambda x: (
1652
- file_to_msrun[x]["sample_id"]
1653
- if x in file_to_msrun
1654
- else None
1655
- )
1713
+ search_results["Sample ID"] = search_results["Run"].map(
1714
+ {k: v["sample_id"] for k, v in msrunid_to_info.items()}
1715
+ )
1716
+ search_results["Sample UUID"] = search_results["Sample ID"].map(
1717
+ sample_id_to_uuid
1656
1718
  )
1657
- search_results["Protein Group"] = search_results["Protein.Group"]
1658
- search_results["Peptide"] = search_results["Stripped.Sequence"]
1659
- search_results["Charge"] = search_results["Precursor.Charge"]
1660
- search_results["Precursor Id"] = search_results["Precursor.Id"]
1661
- search_results["Precursor Q Value"] = search_results["Q.Value"]
1662
- search_results["Protein Q Value"] = search_results[
1663
- "Protein.Q.Value"
1719
+ search_results["Nanoparticle"] = search_results["Run"].map(
1720
+ {k: v["nanoparticle"] for k, v in msrunid_to_info.items()}
1721
+ )
1722
+ experiment_columns = [
1723
+ "MsRun UUID",
1724
+ "Run",
1725
+ "Nanoparticle",
1726
+ "Sample UUID",
1727
+ "Sample ID",
1664
1728
  ]
1665
1729
 
1666
- included_columns = [
1667
- "MsRun ID",
1668
- "Sample ID",
1730
+ search_results.rename(
1731
+ columns={
1732
+ "Protein.Group": "Protein Group",
1733
+ "Stripped.Sequence": "Peptide",
1734
+ "Precursor.Charge": "Charge",
1735
+ "Precursor.Id": "Precursor Id",
1736
+ "Q.Value": "Precursor Q Value",
1737
+ "Protein.Q.Value": "Protein Q Value",
1738
+ "Precursor.Quantity": "Intensity",
1739
+ },
1740
+ inplace=True,
1741
+ )
1742
+
1743
+ analyte_columns = [
1669
1744
  "Protein Group",
1670
1745
  "Protein Q Value",
1671
1746
  "Peptide",
@@ -1678,10 +1753,13 @@ class _UnsupportedSDK(_SeerSDK):
1678
1753
  "IM",
1679
1754
  "iIM",
1680
1755
  ]
1681
- df = search_results[included_columns]
1682
- df.columns = [title_case_to_snake_case(x) for x in df.columns]
1756
+ df = pd.DataFrame(
1757
+ search_results[experiment_columns + analyte_columns]
1758
+ )
1683
1759
 
1684
- return df
1760
+ df.columns = [title_case_to_snake_case(x) for x in df.columns]
1761
+
1762
+ return df
1685
1763
 
1686
1764
  def get_search_data_analytes(self, analysis_id: str, analyte_type: str):
1687
1765
  if analyte_type not in ["protein", "peptide", "precursor"]:
@@ -1697,10 +1775,6 @@ class _UnsupportedSDK(_SeerSDK):
1697
1775
  analysis_id=analysis_id, analyte_type="protein", rollup="np"
1698
1776
  )
1699
1777
 
1700
- report_results = self.get_search_result(
1701
- analysis_id=analysis_id, analyte_type="precursor", rollup="np"
1702
- )
1703
-
1704
1778
  search_results = search_results[
1705
1779
  [
1706
1780
  "Protein Group",
@@ -1712,18 +1786,87 @@ class _UnsupportedSDK(_SeerSDK):
1712
1786
  ]
1713
1787
  ]
1714
1788
  search_results.drop_duplicates(subset=["Protein Group"], inplace=True)
1715
- report_results["Protein Group"] = report_results["Protein.Group"]
1716
- report_results["Peptide"] = report_results["Stripped.Sequence"]
1717
1789
 
1718
- if analyte_type == "protein":
1719
- report_results = report_results[
1790
+ # 2. fetch precursor report to extract analyte-specific details
1791
+ columnsPG = [
1792
+ "Protein.Group",
1793
+ ]
1794
+ columnsPeptide = [
1795
+ "Protein.Ids",
1796
+ "Stripped.Sequence",
1797
+ "Proteotypic",
1798
+ ]
1799
+ columnsPrecursor = [
1800
+ "Precursor.Id",
1801
+ "Precursor.Charge",
1802
+ "Precursor.Quantity",
1803
+ "Modified.Sequence",
1804
+ ]
1805
+ columnsPGQValue = [
1806
+ "Global.PG.Q.Value",
1807
+ "Lib.PG.Q.Value",
1808
+ ]
1809
+ columnsPrecursorQValue = [
1810
+ "Global.Q.Value",
1811
+ "Lib.Q.Value",
1812
+ ]
1813
+ columns = [
1814
+ *columnsPG,
1815
+ *columnsPGQValue,
1816
+ ]
1817
+ if analyte_type == "peptide":
1818
+ columns += [*columnsPeptide]
1819
+ elif analyte_type == "precursor":
1820
+ columns += [
1821
+ *columnsPeptide,
1822
+ *columnsPrecursor,
1823
+ *columnsPrecursorQValue,
1824
+ ]
1825
+ report_results = self.get_search_result(
1826
+ analysis_id=analysis_id,
1827
+ analyte_type="precursor",
1828
+ rollup="np",
1829
+ columns=columns,
1830
+ )
1831
+ report_results.rename(
1832
+ columns={
1833
+ "Protein.Group": "Protein Group",
1834
+ "Stripped.Sequence": "Peptide",
1835
+ "Modified.Sequence": "Modified.Peptide",
1836
+ },
1837
+ inplace=True,
1838
+ )
1839
+
1840
+ # function to fix the potential bug, where different precursors
1841
+ # of the same peptide map to different protein groups
1842
+ def fix_peptide_to_protein_group_assignment(
1843
+ df: pd.DataFrame,
1844
+ ) -> pd.DataFrame:
1845
+ # for each peptide, sort protein groups by confidence
1846
+ df = df.sort_values(
1720
1847
  [
1721
- "Protein Group",
1722
- "Protein.Ids",
1848
+ "Peptide",
1723
1849
  "Global.PG.Q.Value",
1724
1850
  "Lib.PG.Q.Value",
1851
+ "Protein Group",
1725
1852
  ]
1726
- ]
1853
+ )
1854
+
1855
+ # broadcast the best protein group across all rows with the same peptide
1856
+ # to fix the potential bug, where different precursors of the same peptide
1857
+ # map to different protein groups
1858
+ for col in [
1859
+ "Protein Group",
1860
+ "Protein.Ids",
1861
+ "Protein.Names",
1862
+ "Genes",
1863
+ ]:
1864
+ if col in df.columns:
1865
+ df[col] = df.groupby("Peptide")[col].transform("first")
1866
+
1867
+ return df
1868
+
1869
+ if analyte_type == "protein":
1727
1870
  report_results.drop_duplicates(
1728
1871
  subset=["Protein Group"], inplace=True
1729
1872
  )
@@ -1734,25 +1877,23 @@ class _UnsupportedSDK(_SeerSDK):
1734
1877
  how="left",
1735
1878
  )
1736
1879
  elif analyte_type == "peptide":
1737
- peptide_results = self.get_search_result(
1738
- analysis_id=analysis_id, analyte_type="peptide", rollup="np"
1739
- )
1740
- peptide_results = peptide_results[["Peptide", "Protein Group"]]
1741
- search_results = pd.merge(
1742
- peptide_results,
1743
- search_results,
1744
- on=["Protein Group"],
1745
- how="left",
1746
- )
1747
-
1748
- report_results = report_results[
1749
- ["Peptide", "Protein.Ids", "Protein.Group"]
1880
+ search_results = search_results[
1881
+ [
1882
+ "Protein Group",
1883
+ "Protein Names",
1884
+ "Gene Names",
1885
+ ]
1750
1886
  ]
1887
+ report_results.drop_duplicates(inplace=True)
1888
+ report_results = fix_peptide_to_protein_group_assignment(
1889
+ report_results
1890
+ )
1751
1891
  report_results.drop_duplicates(subset=["Peptide"], inplace=True)
1892
+
1752
1893
  df = pd.merge(
1753
- search_results,
1754
1894
  report_results,
1755
- on=["Peptide"],
1895
+ search_results,
1896
+ on=["Protein Group"],
1756
1897
  how="left",
1757
1898
  )
1758
1899
  else:
@@ -1762,66 +1903,25 @@ class _UnsupportedSDK(_SeerSDK):
1762
1903
  "Protein Group",
1763
1904
  "Protein Names",
1764
1905
  "Gene Names",
1765
- "Biological Process",
1766
- "Molecular Function",
1767
- "Cellular Component",
1768
1906
  ]
1769
1907
  ]
1770
- search_results.drop_duplicates(
1771
- subset=["Protein Group"], inplace=True
1908
+ report_results.drop_duplicates(inplace=True)
1909
+
1910
+ report_results = fix_peptide_to_protein_group_assignment(
1911
+ report_results
1772
1912
  )
1773
- report_results = report_results[
1774
- [
1775
- "Precursor.Id",
1776
- "Precursor.Charge",
1777
- "Peptide",
1778
- "Protein Group",
1779
- "Protein.Ids",
1780
- "Protein.Names",
1781
- "Genes",
1782
- "First.Protein.Description",
1783
- "Modified.Sequence",
1784
- "Proteotypic",
1785
- "Global.Q.Value",
1786
- "Global.PG.Q.Value",
1787
- "Lib.Q.Value",
1788
- "Lib.PG.Q.Value",
1789
- ]
1790
- ]
1791
1913
  report_results.drop_duplicates(
1792
- subset=["Protein Group"], inplace=True
1914
+ subset=["Peptide", "Modified.Peptide", "Precursor.Charge"],
1915
+ inplace=True,
1793
1916
  )
1917
+
1794
1918
  df = pd.merge(
1795
1919
  report_results,
1796
1920
  search_results,
1797
1921
  on=["Protein Group"],
1798
1922
  how="left",
1799
1923
  )
1800
- df = df[
1801
- [
1802
- "Precursor.Id",
1803
- "Precursor.Charge",
1804
- "Peptide",
1805
- "Protein Group",
1806
- "Protein.Ids",
1807
- "Protein.Names",
1808
- "Genes",
1809
- "First.Protein.Description",
1810
- "Modified.Sequence",
1811
- "Proteotypic",
1812
- "Global.Q.Value",
1813
- "Global.PG.Q.Value",
1814
- "Lib.Q.Value",
1815
- "Lib.PG.Q.Value",
1816
- "Gene Names",
1817
- "Biological Process",
1818
- "Molecular Function",
1819
- "Cellular Component",
1820
- ]
1821
- ]
1822
- df.rename(
1823
- columns={"Modified.Sequence": "Modified.Peptide"}, inplace=True
1824
- )
1825
1924
  # endif
1826
1925
  df.columns = [title_case_to_snake_case(x) for x in df.columns]
1926
+
1827
1927
  return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: seer-pas-sdk
3
- Version: 1.1.1
3
+ Version: 1.2.1
4
4
  Summary: SDK for Seer Proteograph Analysis Suite (PAS)
5
5
  Author-email: Ryan Sun <rsun@seer.bio>
6
6
  License:
@@ -1,19 +1,19 @@
1
1
  seer_pas_sdk/__init__.py,sha256=Ie6atdmdBV-OmdHHXjhrGhdFGXiyP3JKhKrr3hyvSsA,563
2
2
  seer_pas_sdk/auth/__init__.py,sha256=e_eM4jJnnyKUdg4Nggzi9ypt2MLWcEJ8CmCPkUaQDSs,23
3
3
  seer_pas_sdk/auth/auth.py,sha256=_SI5CdEkfqfr4o5BQ79BuPbxGeI9p7tqxJd7mUqSAkI,8854
4
- seer_pas_sdk/common/__init__.py,sha256=8r-xF0alzIdCRdtpQCWKs6pQFrOi9KYnO7EwP8Pm94A,23475
4
+ seer_pas_sdk/common/__init__.py,sha256=VbtcWOt9_jR5NfETtYTqcUXII98Jj5sphr3lLSdIzdg,24445
5
5
  seer_pas_sdk/common/errors.py,sha256=4HFORWnaQQCMXRE8kwdsJWvQRB_3KFEZ7yMb391e4gA,142
6
6
  seer_pas_sdk/common/groupanalysis.py,sha256=DxB-gbQfYzl7p9MTYWDIqghcH-IeakzdYdrRZrlIHek,1730
7
7
  seer_pas_sdk/core/__init__.py,sha256=rxbKgg-Qe24OaxX2zyHHYPYgDCTEKE_-41bB2wvpvL4,25
8
- seer_pas_sdk/core/sdk.py,sha256=OKN7z11hm0Uzx-1yO3YxiN4OCar_4tjDwpKQBjzXeD0,160092
9
- seer_pas_sdk/core/unsupported.py,sha256=SicgUHz1j-aLfcTvkWiSDMAm9D58VEv0YX-3PGQybec,69047
8
+ seer_pas_sdk/core/sdk.py,sha256=0ukg287lsjlSNoV0WqFbiPMURhVogsy_sTR7gg1fr9Q,161512
9
+ seer_pas_sdk/core/unsupported.py,sha256=WcF_Z6ZUpzOWkWQHaMtm9SnE2NveuRmljVfNe8QSbms,72732
10
10
  seer_pas_sdk/objects/__init__.py,sha256=r-lY7axLTzToAI-Dme019YfcJLDe2ok1f_e6OQx3j64,130
11
11
  seer_pas_sdk/objects/groupanalysis.py,sha256=x3D_5NmYBoPDilNCQqUoCFARIfIeUq4FBY3_N6u8tfM,994
12
12
  seer_pas_sdk/objects/headers.py,sha256=RilNzB_Nhid3U8j93BxJYcRrgDmd_1bAuI0P465xd0g,2727
13
13
  seer_pas_sdk/objects/platemap.py,sha256=8IvJPAecs_e_FyqibzhCw-O4zjCFnf-zMUp_5krTEsg,5864
14
14
  seer_pas_sdk/objects/volcanoplot.py,sha256=lTrTOVg74nT3uo-P1edQJC1ZbdoiLMtQ3VJd9CnzmoM,9396
15
- seer_pas_sdk-1.1.1.dist-info/licenses/LICENSE.txt,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
16
- seer_pas_sdk-1.1.1.dist-info/METADATA,sha256=iJFudniVKIcZBOqPmBtG2-3CC08RVN702_xBjXCciyg,13413
17
- seer_pas_sdk-1.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
- seer_pas_sdk-1.1.1.dist-info/top_level.txt,sha256=-2kZ-KFMGtXwr8H1O5llMKlcJ8gRKohEmrIvazXB61s,13
19
- seer_pas_sdk-1.1.1.dist-info/RECORD,,
15
+ seer_pas_sdk-1.2.1.dist-info/licenses/LICENSE.txt,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
16
+ seer_pas_sdk-1.2.1.dist-info/METADATA,sha256=_zLtgk1zE8eWRPizPS9h2tEbfhJ3DOAH2ePNk4ptwvw,13413
17
+ seer_pas_sdk-1.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
+ seer_pas_sdk-1.2.1.dist-info/top_level.txt,sha256=-2kZ-KFMGtXwr8H1O5llMKlcJ8gRKohEmrIvazXB61s,13
19
+ seer_pas_sdk-1.2.1.dist-info/RECORD,,