mtbls-mhd-integration 0.0.11__tar.gz → 0.0.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {mtbls_mhd_integration-0.0.11/mtbls_mhd_integration.egg-info → mtbls_mhd_integration-0.0.13}/PKG-INFO +2 -2
  2. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/__init__.py +1 -1
  3. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/v0_1/legacy/builder.py +175 -69
  4. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/v0_1/legacy/convertor.py +1 -1
  5. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13/mtbls_mhd_integration.egg-info}/PKG-INFO +2 -2
  6. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls_mhd_integration.egg-info/requires.txt +1 -1
  7. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/pyproject.toml +7 -3
  8. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/LICENSE +0 -0
  9. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/README.md +0 -0
  10. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/commands/__init__.py +0 -0
  11. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/commands/cli.py +0 -0
  12. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/commands/create.py +0 -0
  13. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/commands/create_mhd_file.py +0 -0
  14. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/commands/validate.py +0 -0
  15. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/config.py +0 -0
  16. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/convertor_factory.py +0 -0
  17. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/v0_1/__init__.py +0 -0
  18. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/v0_1/legacy/__init__.py +0 -0
  19. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/v0_1/legacy/db_metadata_collector.py +0 -0
  20. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/v0_1/legacy/folder_metadata_collector.py +0 -0
  21. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/v0_1/legacy/mtbls_study_schema.py +0 -0
  22. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/v0_1/ms/__init__.py +0 -0
  23. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls2mhd/v0_1/ms/convertor.py +0 -0
  24. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls_mhd_integration.egg-info/SOURCES.txt +0 -0
  25. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls_mhd_integration.egg-info/dependency_links.txt +0 -0
  26. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls_mhd_integration.egg-info/entry_points.txt +0 -0
  27. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/mtbls_mhd_integration.egg-info/top_level.txt +0 -0
  28. {mtbls_mhd_integration-0.0.11 → mtbls_mhd_integration-0.0.13}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mtbls-mhd-integration
3
- Version: 0.0.11
3
+ Version: 0.0.13
4
4
  Summary: MetaboLights - MetabolomicsHub Integration
5
5
  Author-email: MetaboLights Team <metabolights-help@ebi.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -9,7 +9,7 @@ Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
10
  Requires-Dist: asyncpg>=0.30.0
11
11
  Requires-Dist: metabolights-utils>=1.4.16
12
- Requires-Dist: mhd-model>=0.1.39
12
+ Requires-Dist: mhd-model>=0.1.43
13
13
  Requires-Dist: psycopg[binary,pool]>=3.3.2
14
14
  Requires-Dist: pydantic>=2.12.4
15
15
  Requires-Dist: pydantic-settings>=2.10.1
@@ -1,4 +1,4 @@
1
- __version__ = "v0.0.11"
1
+ __version__ = "v0.0.13"
2
2
 
3
3
  import pathlib
4
4
  import sys
@@ -53,11 +53,7 @@ logger = logging.getLogger(__name__)
53
53
  MTBLS_ASSAY_TYPES = {
54
54
  "LC-MS": COMMON_ASSAY_TYPES["OBI:0003097S"],
55
55
  "GC-MS": COMMON_ASSAY_TYPES["OBI:0003110"],
56
- "CE-MS": CvTerm(
57
- source="OBI",
58
- accession="OBI:0003741",
59
- name="capillary electrophoresis mass spectrometry assay",
60
- ),
56
+ "CE-MS": COMMON_ASSAY_TYPES["OBI:0003741"],
61
57
  "GCxGC-MS": COMMON_ASSAY_TYPES["OBI:0003110"],
62
58
  "FIA-MS": COMMON_ASSAY_TYPES["OBI:0000470"],
63
59
  "MALDI-MS": COMMON_ASSAY_TYPES["OBI:0000470"],
@@ -594,7 +590,7 @@ class MhdLegacyDatasetBuilder:
594
590
  data: MetabolightsStudyModel,
595
591
  config: Mtbls2MhdConfiguration,
596
592
  ):
597
- result_file_map = {}
593
+ result_file_map: dict[str, mhd_domain.ResultFile] = {}
598
594
  tsv_format = create_cv_term_object(
599
595
  type_="descriptor", accession="EDAM:3475", source="EDAM", name="TSV"
600
596
  )
@@ -1433,7 +1429,10 @@ class MhdLegacyDatasetBuilder:
1433
1429
  return protocols
1434
1430
 
1435
1431
  def add_keywords(
1436
- self, mhd_builder: MhDatasetBuilder, mhd_study: mhd_domain.Study, study: Study
1432
+ self,
1433
+ mhd_builder: MhDatasetBuilder,
1434
+ mhd_study: mhd_domain.Study,
1435
+ study: Study,
1437
1436
  ):
1438
1437
  for item in study.study_design_descriptors.design_types:
1439
1438
  keyword = create_cv_term_object(
@@ -1447,12 +1446,62 @@ class MhdLegacyDatasetBuilder:
1447
1446
  name=item.term or "",
1448
1447
  )
1449
1448
  mhd_builder.add_node(keyword)
1450
- mhd_builder.link(
1451
- mhd_study,
1452
- "has-submitter-keyword",
1453
- keyword,
1454
- reverse_relationship_name="keyword-of",
1455
- )
1449
+
1450
+ if item.source and item.source.lower() in ("data-curation", "workflows"):
1451
+ mhd_builder.link(
1452
+ mhd_study,
1453
+ "has-repository-keyword",
1454
+ keyword,
1455
+ reverse_relationship_name="keyword-of",
1456
+ )
1457
+ else:
1458
+ mhd_builder.link(
1459
+ mhd_study,
1460
+ "has-submitter-keyword",
1461
+ keyword,
1462
+ reverse_relationship_name="keyword-of",
1463
+ )
1464
+
1465
+ def add_assay_keywords(
1466
+ self,
1467
+ mhd_builder: MhDatasetBuilder,
1468
+ assays: dict[str, mhd_domain.Assay],
1469
+ study: Study,
1470
+ ):
1471
+ for assay in study.study_assays.assays:
1472
+ mhd_assay = assays.get(assay.file_name)
1473
+ if not mhd_assay:
1474
+ continue
1475
+ for item in assay.assay_descriptors:
1476
+ keyword = create_cv_term_object(
1477
+ type_="descriptor",
1478
+ source=item.term_source_ref or "",
1479
+ accession=self.convert_to_curie(
1480
+ item.term_source_ref,
1481
+ item.term_accession_number,
1482
+ )
1483
+ or "",
1484
+ name=item.term or "",
1485
+ )
1486
+ mhd_builder.add_node(keyword)
1487
+
1488
+ if item.source and item.source.lower() in (
1489
+ "data-curation",
1490
+ "workflows",
1491
+ ):
1492
+ mhd_builder.link(
1493
+ mhd_assay,
1494
+ "has-repository-keyword",
1495
+ keyword,
1496
+ reverse_relationship_name="keyword-of",
1497
+ )
1498
+ else:
1499
+ mhd_builder.link(
1500
+ mhd_assay,
1501
+ "has-submitter-keyword",
1502
+ keyword,
1503
+ reverse_relationship_name="keyword-of",
1504
+ )
1456
1505
 
1457
1506
  def find_file_format(
1458
1507
  self,
@@ -1650,54 +1699,103 @@ class MhdLegacyDatasetBuilder:
1650
1699
  mhd_builder: MhDatasetBuilder,
1651
1700
  mhd_study: mhd_domain.Study,
1652
1701
  data: MetabolightsStudyModel,
1702
+ result_files: dict[str, mhd_domain.ResultFile],
1653
1703
  ):
1654
- for file_name, maf_file in data.metabolite_assignments.items():
1655
- if maf_file.table.data.get("metabolite_identification"):
1656
- identifiers = maf_file.table.data.get("database_identifier")
1657
- for idx, name in enumerate(
1658
- maf_file.table.data["metabolite_identification"]
1659
- ):
1660
- if not name:
1661
- continue
1662
- met = mhd_domain.Metabolite(
1663
- name=name,
1664
- )
1665
- if identifiers and identifiers[idx]:
1666
- value = identifiers[idx]
1667
- identifier = None
1668
- if value.startswith("CHEBI"):
1669
- identifier = create_cv_term_value_object(
1670
- type_="metabolite-identifier",
1671
- source="CHEMINF",
1672
- accession="CHEMINF:000407",
1673
- name="ChEBI identifier",
1674
- value=value,
1675
- )
1676
- elif value.startswith("HMDB"):
1677
- identifier = create_cv_term_value_object(
1678
- type_="metabolite-identifier",
1679
- source="CHEMINF",
1680
- accession="CHEMINF:000408",
1681
- name="HMDB identifier",
1682
- value=value.replace(":", ""),
1683
- )
1704
+ for maf_filename, maf_file in data.metabolite_assignments.items():
1705
+ if not maf_file.table.data.get("metabolite_identification"):
1706
+ continue
1707
+ result_file = result_files.get(maf_filename)
1708
+ for idx, name in enumerate(
1709
+ maf_file.table.data["metabolite_identification"]
1710
+ ):
1711
+ if not name or not name.strip():
1712
+ continue
1713
+ met = mhd_domain.Metabolite(name=name)
1714
+ assignments = {}
1715
+ data: dict[str, str] = maf_file.table.data
1716
+ submitted_identifiers = []
1717
+ assigned_chebi_identifiers = []
1718
+ assigned_refmet_identifiers = []
1719
+ if maf_file.table.data.get("database_identifier"):
1720
+ submitted_identifiers = [
1721
+ x.strip()
1722
+ for x in data["database_identifier"][idx].split("|")
1723
+ if x
1724
+ ]
1725
+ if maf_file.table.data.get("assigned_chebi_identifier"):
1726
+ assigned_chebi_identifiers = [
1727
+ x.strip()
1728
+ for x in data["assigned_chebi_identifier"][idx].split("|")
1729
+ if x
1730
+ ]
1731
+ if maf_file.table.data.get("assigned_refmet_identifier"):
1732
+ assigned_refmet_identifiers = [
1733
+ x.strip()
1734
+ for x in data["assigned_refmet_identifier"][idx].split("|")
1735
+ if x
1736
+ ]
1684
1737
 
1685
- if identifier:
1686
- mhd_builder.add(identifier)
1687
- # met.identifier_refs = [identifier.id_]
1688
- mhd_builder.link(
1689
- met,
1690
- "identified-as",
1691
- identifier,
1692
- reverse_relationship_name="reported-identifier-of",
1693
- )
1694
- mhd_builder.add(met)
1738
+ for identifiers in [
1739
+ (submitted_identifiers, ""),
1740
+ (assigned_chebi_identifiers, "CHEBI"),
1741
+ (assigned_refmet_identifiers, "REFMET"),
1742
+ ]:
1743
+ for identifiers, compound_source in assignments:
1744
+ if not identifiers:
1745
+ continue
1746
+ for identifier_value in identifiers:
1747
+ identifier = None
1748
+ if (
1749
+ compound_source == "CHEBI"
1750
+ or identifier_value.upper().startswith("CHEBI")
1751
+ ):
1752
+ identifier = create_cv_term_value_object(
1753
+ type_="metabolite-identifier",
1754
+ source="CHEMINF",
1755
+ accession="CHEMINF:000407",
1756
+ name="ChEBI identifier",
1757
+ value=identifier_value,
1758
+ )
1759
+ elif identifier_value.upper().startswith("HMDB"):
1760
+ identifier = create_cv_term_value_object(
1761
+ type_="metabolite-identifier",
1762
+ source="CHEMINF",
1763
+ accession="CHEMINF:000408",
1764
+ name="HMDB identifier",
1765
+ value=identifier_value,
1766
+ )
1767
+ elif compound_source == "REFMET":
1768
+ identifier = create_cv_term_value_object(
1769
+ type_="metabolite-identifier",
1770
+ source="REFMET",
1771
+ accession="",
1772
+ name="RefMet identifier",
1773
+ value=identifier_value,
1774
+ )
1775
+
1776
+ if identifier:
1777
+ mhd_builder.add(identifier)
1778
+ mhd_builder.link(
1779
+ met,
1780
+ "identified-as",
1781
+ identifier,
1782
+ reverse_relationship_name="reported-identifier-of",
1783
+ )
1784
+ mhd_builder.add(met)
1785
+ if result_file:
1695
1786
  mhd_builder.link(
1696
- mhd_study,
1787
+ result_file,
1697
1788
  "reports",
1698
1789
  met,
1699
1790
  reverse_relationship_name="reported-in",
1700
1791
  )
1792
+ result_file
1793
+ mhd_builder.link(
1794
+ mhd_study,
1795
+ "reports",
1796
+ met,
1797
+ reverse_relationship_name="reported-in",
1798
+ )
1701
1799
 
1702
1800
  def add_assays(
1703
1801
  self,
@@ -1708,9 +1806,9 @@ class MhdLegacyDatasetBuilder:
1708
1806
  metadata_files: dict[str, mhd_domain.CvTermObject],
1709
1807
  samples: dict[str, mhd_domain.Sample],
1710
1808
  files_map,
1711
- ) -> mhd_domain.Assay:
1809
+ ) -> dict[str, mhd_domain.Assay]:
1712
1810
  protocol_summaries: OrderedDict[str, ProtocolRunSummary] = OrderedDict()
1713
- assays: list[mhd_domain.Assay] = []
1811
+ assays = dict[str, mhd_domain.Assay] = OrderedDict()
1714
1812
  for assay in selected_assays:
1715
1813
  if assay.file_name not in data.assays:
1716
1814
  continue
@@ -1724,7 +1822,7 @@ class MhdLegacyDatasetBuilder:
1724
1822
  )
1725
1823
 
1726
1824
  mhd_builder.add(mhd_assay)
1727
- assays.append(mhd_assay)
1825
+ assays[assay.file_name] = mhd_assay
1728
1826
  mhd_builder.link(
1729
1827
  mhd_study, "has-assay", mhd_assay, reverse_relationship_name="part-of"
1730
1828
  )
@@ -1833,8 +1931,7 @@ class MhdLegacyDatasetBuilder:
1833
1931
  samples,
1834
1932
  protocol_summaries,
1835
1933
  )
1836
-
1837
- for mhd_assay in assays:
1934
+ for _, mhd_assay in assays.items():
1838
1935
  self.add_assay_protocols(mhd_builder, mhd_study, data, mhd_assay)
1839
1936
  return assays
1840
1937
 
@@ -1971,20 +2068,28 @@ class MhdLegacyDatasetBuilder:
1971
2068
  data.study_db_metadata.release_date,
1972
2069
  )
1973
2070
  # actual or estimated
1974
- public_release_date_str = (
1975
- db_metadata.first_public_date or db_metadata.release_date or None
1976
- )
2071
+ submission_date_str = None
2072
+ public_release_date_str = None
2073
+ if db_metadata:
2074
+ if db_metadata.first_private_date:
2075
+ submission_date_str = db_metadata.first_private_date
2076
+ elif db_metadata.submission_date:
2077
+ submission_date_str = db_metadata.submission_date
2078
+ if db_metadata.first_public_date:
2079
+ public_release_date_str = db_metadata.first_public_date
2080
+ elif db_metadata.release_date:
2081
+ public_release_date_str = db_metadata.release_date
2082
+
1977
2083
  public_release_date = (
1978
2084
  datetime.datetime.strptime(public_release_date_str, "%Y-%m-%d")
1979
2085
  if public_release_date_str
1980
2086
  else None
1981
2087
  )
1982
2088
  submission_date = (
1983
- datetime.datetime.strptime(db_metadata.first_private_date, "%Y-%m-%d")
1984
- if db_metadata and db_metadata.first_private_date
2089
+ datetime.datetime.strptime(submission_date_str, "%Y-%m-%d")
2090
+ if submission_date_str
1985
2091
  else None
1986
2092
  )
1987
-
1988
2093
  mhd_study = mhd_domain.Study(
1989
2094
  repository_identifier=study.identifier,
1990
2095
  created_by_ref=dataset_provider.id_,
@@ -2029,12 +2134,11 @@ class MhdLegacyDatasetBuilder:
2029
2134
  self.add_publications(data, mhd_builder, mhd_study)
2030
2135
  self.add_protocols(mhd_builder, mhd_study, study)
2031
2136
 
2032
- self.add_keywords(mhd_builder, mhd_study, study)
2033
- self.add_reported_metabolites(mhd_builder, mhd_study, data)
2034
-
2035
2137
  result_files = self.add_result_files(
2036
2138
  mhd_builder, mhd_study, data, config=config
2037
2139
  )
2140
+ self.add_reported_metabolites(mhd_builder, mhd_study, data, result_files)
2141
+
2038
2142
  files_map = self.add_data_files(
2039
2143
  mhd_builder,
2040
2144
  mhd_study,
@@ -2043,7 +2147,7 @@ class MhdLegacyDatasetBuilder:
2043
2147
  result_files,
2044
2148
  config=config,
2045
2149
  )
2046
- self.add_assays(
2150
+ mhd_assays = self.add_assays(
2047
2151
  mhd_builder,
2048
2152
  mhd_study,
2049
2153
  data,
@@ -2052,6 +2156,8 @@ class MhdLegacyDatasetBuilder:
2052
2156
  samples,
2053
2157
  files_map,
2054
2158
  )
2159
+ self.add_keywords(mhd_builder, mhd_study, study)
2160
+ self.add_assay_keywords(mhd_builder, mhd_assays, study)
2055
2161
 
2056
2162
  mhd_dataset: MhDatasetBaseProfile = mhd_builder.create_dataset(
2057
2163
  start_item_refs=[mhd_study.id_], dataset_class=MhDatasetLegacyProfile
@@ -38,7 +38,7 @@ class LegacyProfileV01Convertor(BaseMhdConvertor):
38
38
  )
39
39
  try:
40
40
  success, message = mhd_dataset_builder.build(
41
- mhd_id=mhd_identifier,
41
+ mhd_id=None,
42
42
  mtbls_study_id=repository_identifier,
43
43
  mtbls_study_path=mtbls_study_path,
44
44
  mtbls_study_repository_url=mtbls_study_repository_url,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mtbls-mhd-integration
3
- Version: 0.0.11
3
+ Version: 0.0.13
4
4
  Summary: MetaboLights - MetabolomicsHub Integration
5
5
  Author-email: MetaboLights Team <metabolights-help@ebi.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -9,7 +9,7 @@ Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
10
  Requires-Dist: asyncpg>=0.30.0
11
11
  Requires-Dist: metabolights-utils>=1.4.16
12
- Requires-Dist: mhd-model>=0.1.39
12
+ Requires-Dist: mhd-model>=0.1.43
13
13
  Requires-Dist: psycopg[binary,pool]>=3.3.2
14
14
  Requires-Dist: pydantic>=2.12.4
15
15
  Requires-Dist: pydantic-settings>=2.10.1
@@ -1,6 +1,6 @@
1
1
  asyncpg>=0.30.0
2
2
  metabolights-utils>=1.4.16
3
- mhd-model>=0.1.39
3
+ mhd-model>=0.1.43
4
4
  psycopg[binary,pool]>=3.3.2
5
5
  pydantic>=2.12.4
6
6
  pydantic-settings>=2.10.1
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mtbls-mhd-integration"
3
- version = "0.0.11"
3
+ version = "0.0.13"
4
4
  description = "MetaboLights - MetabolomicsHub Integration"
5
5
  authors = [{"name" = "MetaboLights Team", "email" = "metabolights-help@ebi.ac.uk"}]
6
6
  license = "Apache-2.0"
@@ -9,7 +9,7 @@ requires-python = ">=3.12,<4.0"
9
9
  dependencies = [
10
10
  "asyncpg>=0.30.0",
11
11
  "metabolights-utils>=1.4.16",
12
- "mhd-model>=0.1.39",
12
+ "mhd-model>=0.1.43",
13
13
  "psycopg[binary,pool]>=3.3.2",
14
14
  "pydantic>=2.12.4",
15
15
  "pydantic-settings>=2.10.1",
@@ -31,6 +31,10 @@ test = [
31
31
  "pytest-cov>=6.2.1",
32
32
  ]
33
33
 
34
+ [tool.uv]
35
+ default-groups = []
36
+
37
+
34
38
  [project.scripts]
35
39
  mtbls-mhd-cli = "mtbls2mhd.commands.cli:cli"
36
40
 
@@ -50,7 +54,7 @@ exclude = ["tests*", "docs*"]
50
54
  [tool.commitizen]
51
55
  name = "cz_conventional_commits"
52
56
  version_provider = "uv"
53
- version = "0.0.113"
57
+ version = "0.0.133"
54
58
  tag_format = "v$major.$minor.$patch"
55
59
  version_files = [
56
60
  "pyproject.toml:version",