mtbls-mhd-integration 0.0.10__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {mtbls_mhd_integration-0.0.10/mtbls_mhd_integration.egg-info → mtbls_mhd_integration-0.0.12}/PKG-INFO +2 -2
  2. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/__init__.py +1 -1
  3. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/v0_1/legacy/builder.py +93 -43
  4. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/v0_1/legacy/convertor.py +1 -1
  5. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/v0_1/legacy/db_metadata_collector.py +5 -2
  6. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/v0_1/legacy/folder_metadata_collector.py +2 -0
  7. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12/mtbls_mhd_integration.egg-info}/PKG-INFO +2 -2
  8. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls_mhd_integration.egg-info/requires.txt +1 -1
  9. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/pyproject.toml +7 -3
  10. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/LICENSE +0 -0
  11. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/README.md +0 -0
  12. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/commands/__init__.py +0 -0
  13. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/commands/cli.py +0 -0
  14. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/commands/create.py +0 -0
  15. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/commands/create_mhd_file.py +0 -0
  16. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/commands/validate.py +0 -0
  17. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/config.py +0 -0
  18. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/convertor_factory.py +0 -0
  19. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/v0_1/__init__.py +0 -0
  20. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/v0_1/legacy/__init__.py +0 -0
  21. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/v0_1/legacy/mtbls_study_schema.py +0 -0
  22. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/v0_1/ms/__init__.py +0 -0
  23. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls2mhd/v0_1/ms/convertor.py +0 -0
  24. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls_mhd_integration.egg-info/SOURCES.txt +0 -0
  25. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls_mhd_integration.egg-info/dependency_links.txt +0 -0
  26. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls_mhd_integration.egg-info/entry_points.txt +0 -0
  27. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/mtbls_mhd_integration.egg-info/top_level.txt +0 -0
  28. {mtbls_mhd_integration-0.0.10 → mtbls_mhd_integration-0.0.12}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mtbls-mhd-integration
3
- Version: 0.0.10
3
+ Version: 0.0.12
4
4
  Summary: MetaboLights - MetabolomicsHub Integration
5
5
  Author-email: MetaboLights Team <metabolights-help@ebi.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -9,7 +9,7 @@ Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
10
  Requires-Dist: asyncpg>=0.30.0
11
11
  Requires-Dist: metabolights-utils>=1.4.16
12
- Requires-Dist: mhd-model>=0.1.39
12
+ Requires-Dist: mhd-model>=0.1.41
13
13
  Requires-Dist: psycopg[binary,pool]>=3.3.2
14
14
  Requires-Dist: pydantic>=2.12.4
15
15
  Requires-Dist: pydantic-settings>=2.10.1
@@ -1,4 +1,4 @@
1
- __version__ = "v0.0.10"
1
+ __version__ = "v0.0.12"
2
2
 
3
3
  import pathlib
4
4
  import sys
@@ -594,7 +594,7 @@ class MhdLegacyDatasetBuilder:
594
594
  data: MetabolightsStudyModel,
595
595
  config: Mtbls2MhdConfiguration,
596
596
  ):
597
- result_file_map = {}
597
+ result_file_map: dict[str, mhd_domain.ResultFile] = {}
598
598
  tsv_format = create_cv_term_object(
599
599
  type_="descriptor", accession="EDAM:3475", source="EDAM", name="TSV"
600
600
  )
@@ -1650,54 +1650,103 @@ class MhdLegacyDatasetBuilder:
1650
1650
  mhd_builder: MhDatasetBuilder,
1651
1651
  mhd_study: mhd_domain.Study,
1652
1652
  data: MetabolightsStudyModel,
1653
+ result_files: dict[str, mhd_domain.ResultFile],
1653
1654
  ):
1654
- for file_name, maf_file in data.metabolite_assignments.items():
1655
- if maf_file.table.data.get("metabolite_identification"):
1656
- identifiers = maf_file.table.data.get("database_identifier")
1657
- for idx, name in enumerate(
1658
- maf_file.table.data["metabolite_identification"]
1659
- ):
1660
- if not name:
1661
- continue
1662
- met = mhd_domain.Metabolite(
1663
- name=name,
1664
- )
1665
- if identifiers and identifiers[idx]:
1666
- value = identifiers[idx]
1667
- identifier = None
1668
- if value.startswith("CHEBI"):
1669
- identifier = create_cv_term_value_object(
1670
- type_="metabolite-identifier",
1671
- source="CHEMINF",
1672
- accession="CHEMINF:000407",
1673
- name="ChEBI identifier",
1674
- value=value,
1675
- )
1676
- elif value.startswith("HMDB"):
1677
- identifier = create_cv_term_value_object(
1678
- type_="metabolite-identifier",
1679
- source="CHEMINF",
1680
- accession="CHEMINF:000408",
1681
- name="HMDB identifier",
1682
- value=value.replace(":", ""),
1683
- )
1655
+ for maf_filename, maf_file in data.metabolite_assignments.items():
1656
+ if not maf_file.table.data.get("metabolite_identification"):
1657
+ continue
1658
+ result_file = result_files.get(maf_filename)
1659
+ for idx, name in enumerate(
1660
+ maf_file.table.data["metabolite_identification"]
1661
+ ):
1662
+ if not name or not name.strip():
1663
+ continue
1664
+ met = mhd_domain.Metabolite(name=name)
1665
+ assignments = {}
1666
+ data: dict[str, str] = maf_file.table.data
1667
+ submitted_identifiers = []
1668
+ assigned_chebi_identifiers = []
1669
+ assigned_refmet_identifiers = []
1670
+ if maf_file.table.data.get("database_identifier"):
1671
+ submitted_identifiers = [
1672
+ x.strip()
1673
+ for x in data["database_identifier"][idx].split("|")
1674
+ if x
1675
+ ]
1676
+ if maf_file.table.data.get("assigned_chebi_identifier"):
1677
+ assigned_chebi_identifiers = [
1678
+ x.strip()
1679
+ for x in data["assigned_chebi_identifier"][idx].split("|")
1680
+ if x
1681
+ ]
1682
+ if maf_file.table.data.get("assigned_refmet_identifier"):
1683
+ assigned_refmet_identifiers = [
1684
+ x.strip()
1685
+ for x in data["assigned_refmet_identifier"][idx].split("|")
1686
+ if x
1687
+ ]
1684
1688
 
1685
- if identifier:
1686
- mhd_builder.add(identifier)
1687
- # met.identifier_refs = [identifier.id_]
1688
- mhd_builder.link(
1689
- met,
1690
- "identified-as",
1691
- identifier,
1692
- reverse_relationship_name="reported-identifier-of",
1693
- )
1694
- mhd_builder.add(met)
1689
+ for identifiers in [
1690
+ (submitted_identifiers, ""),
1691
+ (assigned_chebi_identifiers, "CHEBI"),
1692
+ (assigned_refmet_identifiers, "REFMET"),
1693
+ ]:
1694
+ for identifiers, compound_source in assignments:
1695
+ if not identifiers:
1696
+ continue
1697
+ for identifier_value in identifiers:
1698
+ identifier = None
1699
+ if (
1700
+ compound_source == "CHEBI"
1701
+ or identifier_value.upper().startswith("CHEBI")
1702
+ ):
1703
+ identifier = create_cv_term_value_object(
1704
+ type_="metabolite-identifier",
1705
+ source="CHEMINF",
1706
+ accession="CHEMINF:000407",
1707
+ name="ChEBI identifier",
1708
+ value=identifier_value,
1709
+ )
1710
+ elif identifier_value.upper().startswith("HMDB"):
1711
+ identifier = create_cv_term_value_object(
1712
+ type_="metabolite-identifier",
1713
+ source="CHEMINF",
1714
+ accession="CHEMINF:000408",
1715
+ name="HMDB identifier",
1716
+ value=identifier_value,
1717
+ )
1718
+ elif compound_source == "REFMET":
1719
+ identifier = create_cv_term_value_object(
1720
+ type_="metabolite-identifier",
1721
+ source="REFMET",
1722
+ accession="",
1723
+ name="RefMet identifier",
1724
+ value=identifier_value,
1725
+ )
1726
+
1727
+ if identifier:
1728
+ mhd_builder.add(identifier)
1729
+ mhd_builder.link(
1730
+ met,
1731
+ "identified-as",
1732
+ identifier,
1733
+ reverse_relationship_name="reported-identifier-of",
1734
+ )
1735
+ mhd_builder.add(met)
1736
+ if result_file:
1695
1737
  mhd_builder.link(
1696
- mhd_study,
1738
+ result_file,
1697
1739
  "reports",
1698
1740
  met,
1699
1741
  reverse_relationship_name="reported-in",
1700
1742
  )
1743
+ result_file
1744
+ mhd_builder.link(
1745
+ mhd_study,
1746
+ "reports",
1747
+ met,
1748
+ reverse_relationship_name="reported-in",
1749
+ )
1701
1750
 
1702
1751
  def add_assays(
1703
1752
  self,
@@ -2030,11 +2079,12 @@ class MhdLegacyDatasetBuilder:
2030
2079
  self.add_protocols(mhd_builder, mhd_study, study)
2031
2080
 
2032
2081
  self.add_keywords(mhd_builder, mhd_study, study)
2033
- self.add_reported_metabolites(mhd_builder, mhd_study, data)
2034
2082
 
2035
2083
  result_files = self.add_result_files(
2036
2084
  mhd_builder, mhd_study, data, config=config
2037
2085
  )
2086
+ self.add_reported_metabolites(mhd_builder, mhd_study, data, result_files)
2087
+
2038
2088
  files_map = self.add_data_files(
2039
2089
  mhd_builder,
2040
2090
  mhd_study,
@@ -38,7 +38,7 @@ class LegacyProfileV01Convertor(BaseMhdConvertor):
38
38
  )
39
39
  try:
40
40
  success, message = mhd_dataset_builder.build(
41
- mhd_id=mhd_identifier,
41
+ mhd_id=None,
42
42
  mtbls_study_id=repository_identifier,
43
43
  mtbls_study_path=mtbls_study_path,
44
44
  mtbls_study_repository_url=mtbls_study_repository_url,
@@ -266,7 +266,9 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
266
266
  study_db_metadata.first_private_date = self._get_date_string(
267
267
  study["first_private_date"]
268
268
  )
269
- study_db_metadata.submission_date = self.get_date(study["submissiondate"])
269
+ study_db_metadata.submission_date = self._get_date_string(
270
+ study["submissiondate"]
271
+ )
270
272
 
271
273
  study_db_metadata.curation_request = CurationRequest.get_from_int(
272
274
  study["curation_request"]
@@ -274,7 +276,7 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
274
276
  study_db_metadata.first_public_date = self._get_date_string(
275
277
  study["first_public_date"]
276
278
  )
277
- study_db_metadata.release_date = self.get_date(study["releasedate"])
279
+ study_db_metadata.release_date = self._get_date_string(study["releasedate"])
278
280
  study_db_metadata.update_date = self._get_date_time_string(study["updatedate"])
279
281
  study_db_metadata.status_date = self._get_date_time_string(study["status_date"])
280
282
  study_db_metadata.submitters = self._create_submitters(submitters)
@@ -294,6 +296,7 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
294
296
  study_db_metadata.dataset_license_version.upper(),
295
297
  ),
296
298
  )
299
+
297
300
  study_db_metadata.study_category = StudyCategory(study["study_category"])
298
301
  study_db_metadata.mhd_model_version = study["mhd_model_version"]
299
302
  study_db_metadata.reserved_mhd_accession = study["mhd_accession"] or ""
@@ -118,6 +118,8 @@ class LocalFolderMetadataCollector(AbstractFolderMetadataCollector):
118
118
  study_path,
119
119
  calculate_data_folder_size: bool = False,
120
120
  calculate_metadata_size: bool = False,
121
+ data_files_path: str = "FILES",
122
+ data_files_mapping_folder_name: None | str = None,
121
123
  ) -> Tuple[Union[None, StudyFolderMetadata], List[GenericMessage]]:
122
124
  messages: List[GenericMessage] = []
123
125
  study_folder_metadata = StudyFolderMetadata()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mtbls-mhd-integration
3
- Version: 0.0.10
3
+ Version: 0.0.12
4
4
  Summary: MetaboLights - MetabolomicsHub Integration
5
5
  Author-email: MetaboLights Team <metabolights-help@ebi.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -9,7 +9,7 @@ Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
10
  Requires-Dist: asyncpg>=0.30.0
11
11
  Requires-Dist: metabolights-utils>=1.4.16
12
- Requires-Dist: mhd-model>=0.1.39
12
+ Requires-Dist: mhd-model>=0.1.41
13
13
  Requires-Dist: psycopg[binary,pool]>=3.3.2
14
14
  Requires-Dist: pydantic>=2.12.4
15
15
  Requires-Dist: pydantic-settings>=2.10.1
@@ -1,6 +1,6 @@
1
1
  asyncpg>=0.30.0
2
2
  metabolights-utils>=1.4.16
3
- mhd-model>=0.1.39
3
+ mhd-model>=0.1.41
4
4
  psycopg[binary,pool]>=3.3.2
5
5
  pydantic>=2.12.4
6
6
  pydantic-settings>=2.10.1
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mtbls-mhd-integration"
3
- version = "0.0.10"
3
+ version = "0.0.12"
4
4
  description = "MetaboLights - MetabolomicsHub Integration"
5
5
  authors = [{"name" = "MetaboLights Team", "email" = "metabolights-help@ebi.ac.uk"}]
6
6
  license = "Apache-2.0"
@@ -9,7 +9,7 @@ requires-python = ">=3.12,<4.0"
9
9
  dependencies = [
10
10
  "asyncpg>=0.30.0",
11
11
  "metabolights-utils>=1.4.16",
12
- "mhd-model>=0.1.39",
12
+ "mhd-model>=0.1.41",
13
13
  "psycopg[binary,pool]>=3.3.2",
14
14
  "pydantic>=2.12.4",
15
15
  "pydantic-settings>=2.10.1",
@@ -31,6 +31,10 @@ test = [
31
31
  "pytest-cov>=6.2.1",
32
32
  ]
33
33
 
34
+ [tool.uv]
35
+ default-groups = []
36
+
37
+
34
38
  [project.scripts]
35
39
  mtbls-mhd-cli = "mtbls2mhd.commands.cli:cli"
36
40
 
@@ -50,7 +54,7 @@ exclude = ["tests*", "docs*"]
50
54
  [tool.commitizen]
51
55
  name = "cz_conventional_commits"
52
56
  version_provider = "uv"
53
- version = "0.0.103"
57
+ version = "0.0.123"
54
58
  tag_format = "v$major.$minor.$patch"
55
59
  version_files = [
56
60
  "pyproject.toml:version",