mtbls-mhd-integration 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mtbls2mhd/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "v0.0.11"
1
+ __version__ = "v0.0.12"
2
2
 
3
3
  import pathlib
4
4
  import sys
@@ -594,7 +594,7 @@ class MhdLegacyDatasetBuilder:
594
594
  data: MetabolightsStudyModel,
595
595
  config: Mtbls2MhdConfiguration,
596
596
  ):
597
- result_file_map = {}
597
+ result_file_map: dict[str, mhd_domain.ResultFile] = {}
598
598
  tsv_format = create_cv_term_object(
599
599
  type_="descriptor", accession="EDAM:3475", source="EDAM", name="TSV"
600
600
  )
@@ -1650,54 +1650,103 @@ class MhdLegacyDatasetBuilder:
1650
1650
  mhd_builder: MhDatasetBuilder,
1651
1651
  mhd_study: mhd_domain.Study,
1652
1652
  data: MetabolightsStudyModel,
1653
+ result_files: dict[str, mhd_domain.ResultFile],
1653
1654
  ):
1654
- for file_name, maf_file in data.metabolite_assignments.items():
1655
- if maf_file.table.data.get("metabolite_identification"):
1656
- identifiers = maf_file.table.data.get("database_identifier")
1657
- for idx, name in enumerate(
1658
- maf_file.table.data["metabolite_identification"]
1659
- ):
1660
- if not name:
1661
- continue
1662
- met = mhd_domain.Metabolite(
1663
- name=name,
1664
- )
1665
- if identifiers and identifiers[idx]:
1666
- value = identifiers[idx]
1667
- identifier = None
1668
- if value.startswith("CHEBI"):
1669
- identifier = create_cv_term_value_object(
1670
- type_="metabolite-identifier",
1671
- source="CHEMINF",
1672
- accession="CHEMINF:000407",
1673
- name="ChEBI identifier",
1674
- value=value,
1675
- )
1676
- elif value.startswith("HMDB"):
1677
- identifier = create_cv_term_value_object(
1678
- type_="metabolite-identifier",
1679
- source="CHEMINF",
1680
- accession="CHEMINF:000408",
1681
- name="HMDB identifier",
1682
- value=value.replace(":", ""),
1683
- )
1655
+ for maf_filename, maf_file in data.metabolite_assignments.items():
1656
+ if not maf_file.table.data.get("metabolite_identification"):
1657
+ continue
1658
+ result_file = result_files.get(maf_filename)
1659
+ for idx, name in enumerate(
1660
+ maf_file.table.data["metabolite_identification"]
1661
+ ):
1662
+ if not name or not name.strip():
1663
+ continue
1664
+ met = mhd_domain.Metabolite(name=name)
1665
+ assignments = {}
1666
+ data: dict[str, str] = maf_file.table.data
1667
+ submitted_identifiers = []
1668
+ assigned_chebi_identifiers = []
1669
+ assigned_refmet_identifiers = []
1670
+ if maf_file.table.data.get("database_identifier"):
1671
+ submitted_identifiers = [
1672
+ x.strip()
1673
+ for x in data["database_identifier"][idx].split("|")
1674
+ if x
1675
+ ]
1676
+ if maf_file.table.data.get("assigned_chebi_identifier"):
1677
+ assigned_chebi_identifiers = [
1678
+ x.strip()
1679
+ for x in data["assigned_chebi_identifier"][idx].split("|")
1680
+ if x
1681
+ ]
1682
+ if maf_file.table.data.get("assigned_refmet_identifier"):
1683
+ assigned_refmet_identifiers = [
1684
+ x.strip()
1685
+ for x in data["assigned_refmet_identifier"][idx].split("|")
1686
+ if x
1687
+ ]
1684
1688
 
1685
- if identifier:
1686
- mhd_builder.add(identifier)
1687
- # met.identifier_refs = [identifier.id_]
1688
- mhd_builder.link(
1689
- met,
1690
- "identified-as",
1691
- identifier,
1692
- reverse_relationship_name="reported-identifier-of",
1693
- )
1694
- mhd_builder.add(met)
1689
+ for identifiers in [
1690
+ (submitted_identifiers, ""),
1691
+ (assigned_chebi_identifiers, "CHEBI"),
1692
+ (assigned_refmet_identifiers, "REFMET"),
1693
+ ]:
1694
+ for identifiers, compound_source in assignments:
1695
+ if not identifiers:
1696
+ continue
1697
+ for identifier_value in identifiers:
1698
+ identifier = None
1699
+ if (
1700
+ compound_source == "CHEBI"
1701
+ or identifier_value.upper().startswith("CHEBI")
1702
+ ):
1703
+ identifier = create_cv_term_value_object(
1704
+ type_="metabolite-identifier",
1705
+ source="CHEMINF",
1706
+ accession="CHEMINF:000407",
1707
+ name="ChEBI identifier",
1708
+ value=identifier_value,
1709
+ )
1710
+ elif identifier_value.upper().startswith("HMDB"):
1711
+ identifier = create_cv_term_value_object(
1712
+ type_="metabolite-identifier",
1713
+ source="CHEMINF",
1714
+ accession="CHEMINF:000408",
1715
+ name="HMDB identifier",
1716
+ value=identifier_value,
1717
+ )
1718
+ elif compound_source == "REFMET":
1719
+ identifier = create_cv_term_value_object(
1720
+ type_="metabolite-identifier",
1721
+ source="REFMET",
1722
+ accession="",
1723
+ name="RefMet identifier",
1724
+ value=identifier_value,
1725
+ )
1726
+
1727
+ if identifier:
1728
+ mhd_builder.add(identifier)
1729
+ mhd_builder.link(
1730
+ met,
1731
+ "identified-as",
1732
+ identifier,
1733
+ reverse_relationship_name="reported-identifier-of",
1734
+ )
1735
+ mhd_builder.add(met)
1736
+ if result_file:
1695
1737
  mhd_builder.link(
1696
- mhd_study,
1738
+ result_file,
1697
1739
  "reports",
1698
1740
  met,
1699
1741
  reverse_relationship_name="reported-in",
1700
1742
  )
1743
+ result_file
1744
+ mhd_builder.link(
1745
+ mhd_study,
1746
+ "reports",
1747
+ met,
1748
+ reverse_relationship_name="reported-in",
1749
+ )
1701
1750
 
1702
1751
  def add_assays(
1703
1752
  self,
@@ -2030,11 +2079,12 @@ class MhdLegacyDatasetBuilder:
2030
2079
  self.add_protocols(mhd_builder, mhd_study, study)
2031
2080
 
2032
2081
  self.add_keywords(mhd_builder, mhd_study, study)
2033
- self.add_reported_metabolites(mhd_builder, mhd_study, data)
2034
2082
 
2035
2083
  result_files = self.add_result_files(
2036
2084
  mhd_builder, mhd_study, data, config=config
2037
2085
  )
2086
+ self.add_reported_metabolites(mhd_builder, mhd_study, data, result_files)
2087
+
2038
2088
  files_map = self.add_data_files(
2039
2089
  mhd_builder,
2040
2090
  mhd_study,
@@ -38,7 +38,7 @@ class LegacyProfileV01Convertor(BaseMhdConvertor):
38
38
  )
39
39
  try:
40
40
  success, message = mhd_dataset_builder.build(
41
- mhd_id=mhd_identifier,
41
+ mhd_id=None,
42
42
  mtbls_study_id=repository_identifier,
43
43
  mtbls_study_path=mtbls_study_path,
44
44
  mtbls_study_repository_url=mtbls_study_repository_url,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mtbls-mhd-integration
3
- Version: 0.0.11
3
+ Version: 0.0.12
4
4
  Summary: MetaboLights - MetabolomicsHub Integration
5
5
  Author-email: MetaboLights Team <metabolights-help@ebi.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -9,7 +9,7 @@ Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
10
  Requires-Dist: asyncpg>=0.30.0
11
11
  Requires-Dist: metabolights-utils>=1.4.16
12
- Requires-Dist: mhd-model>=0.1.39
12
+ Requires-Dist: mhd-model>=0.1.41
13
13
  Requires-Dist: psycopg[binary,pool]>=3.3.2
14
14
  Requires-Dist: pydantic>=2.12.4
15
15
  Requires-Dist: pydantic-settings>=2.10.1
@@ -1,4 +1,4 @@
1
- mtbls2mhd/__init__.py,sha256=0BFPsjHigJ2_UsW5db4g-LyFf8QbDFBUqvXv4BwGiFw,158
1
+ mtbls2mhd/__init__.py,sha256=lndk-uwz1jRMVCVy3G7BTa0Mz9464n8uyyOOlNSzYSs,158
2
2
  mtbls2mhd/config.py,sha256=BjOqAyfDhp9byoFjJz70xh4HRR8pu1yrm_5jweqygSI,2310
3
3
  mtbls2mhd/convertor_factory.py,sha256=4loatqIRIvIhcaeIS0cSonJNYJu47o56ZllX6593ypk,1133
4
4
  mtbls2mhd/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -8,16 +8,16 @@ mtbls2mhd/commands/create_mhd_file.py,sha256=0sDr-Cm0JhhEB5V1g66uoag3rlcaAnGP8Md
8
8
  mtbls2mhd/commands/validate.py,sha256=iwIKegviRxdH0r8scRXbDISlwQUzAq5uVoCHinU7x6Q,473
9
9
  mtbls2mhd/v0_1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  mtbls2mhd/v0_1/legacy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- mtbls2mhd/v0_1/legacy/builder.py,sha256=IN3z8I3VsDkmLv_XMeDaLVkq88d9yl6hRWilNtNKo-U,85914
12
- mtbls2mhd/v0_1/legacy/convertor.py,sha256=M4-3NB56C0DN8H5vmM0yC0J_qW87ZImjjL1vzDEBc5s,2208
11
+ mtbls2mhd/v0_1/legacy/builder.py,sha256=ou_YOtRNFjlLCvZYJc7JphczEbpOCrX5rqndCMxRFXI,88325
12
+ mtbls2mhd/v0_1/legacy/convertor.py,sha256=Nu6xJvEk8WsRQJQFoxM5eo-y46tVfWV8EkedVoqI9rI,2198
13
13
  mtbls2mhd/v0_1/legacy/db_metadata_collector.py,sha256=UGk1AeST1NQ9lWwy_sYZxaaWs0ajgaKELDJtXJ4-Uco,13071
14
14
  mtbls2mhd/v0_1/legacy/folder_metadata_collector.py,sha256=QwtXI9rBvdh6pxILQDHymIwYDqzGuMxqdOcqtdAObME,7538
15
15
  mtbls2mhd/v0_1/legacy/mtbls_study_schema.py,sha256=gUTbRmI8GfHI5leLiw8dxsmWnV3NnWw5RPX_LQWRFRQ,3162
16
16
  mtbls2mhd/v0_1/ms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  mtbls2mhd/v0_1/ms/convertor.py,sha256=kLIUpxOrH6hcs2Y9Bq1D0Mdvypg40pLyEJpHtGj6H_g,89
18
- mtbls_mhd_integration-0.0.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
19
- mtbls_mhd_integration-0.0.11.dist-info/METADATA,sha256=e8AHuzd119EKD4I_3au3GbJ2zPNGzQ4e6MZhB5HLsz4,688
20
- mtbls_mhd_integration-0.0.11.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
21
- mtbls_mhd_integration-0.0.11.dist-info/entry_points.txt,sha256=WQjM4flaYMyvHyv9zGKjCVk1i1_FGdNlhTmFVGgLgxs,61
22
- mtbls_mhd_integration-0.0.11.dist-info/top_level.txt,sha256=b7pI95n6HIQMFXDD0yL1NwldiDc-XdeWql4Iw-uYygQ,10
23
- mtbls_mhd_integration-0.0.11.dist-info/RECORD,,
18
+ mtbls_mhd_integration-0.0.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
19
+ mtbls_mhd_integration-0.0.12.dist-info/METADATA,sha256=p307N8kU4TvWHnKv_28VXUSp7N2fi0ZAh5JN_7ILx08,688
20
+ mtbls_mhd_integration-0.0.12.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
21
+ mtbls_mhd_integration-0.0.12.dist-info/entry_points.txt,sha256=WQjM4flaYMyvHyv9zGKjCVk1i1_FGdNlhTmFVGgLgxs,61
22
+ mtbls_mhd_integration-0.0.12.dist-info/top_level.txt,sha256=b7pI95n6HIQMFXDD0yL1NwldiDc-XdeWql4Iw-uYygQ,10
23
+ mtbls_mhd_integration-0.0.12.dist-info/RECORD,,