mtbls-mhd-integration 0.0.12__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mtbls2mhd/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "v0.0.12"
1
+ __version__ = "v0.0.13"
2
2
 
3
3
  import pathlib
4
4
  import sys
@@ -53,11 +53,7 @@ logger = logging.getLogger(__name__)
53
53
  MTBLS_ASSAY_TYPES = {
54
54
  "LC-MS": COMMON_ASSAY_TYPES["OBI:0003097S"],
55
55
  "GC-MS": COMMON_ASSAY_TYPES["OBI:0003110"],
56
- "CE-MS": CvTerm(
57
- source="OBI",
58
- accession="OBI:0003741",
59
- name="capillary electrophoresis mass spectrometry assay",
60
- ),
56
+ "CE-MS": COMMON_ASSAY_TYPES["OBI:0003741"],
61
57
  "GCxGC-MS": COMMON_ASSAY_TYPES["OBI:0003110"],
62
58
  "FIA-MS": COMMON_ASSAY_TYPES["OBI:0000470"],
63
59
  "MALDI-MS": COMMON_ASSAY_TYPES["OBI:0000470"],
@@ -1433,7 +1429,10 @@ class MhdLegacyDatasetBuilder:
1433
1429
  return protocols
1434
1430
 
1435
1431
  def add_keywords(
1436
- self, mhd_builder: MhDatasetBuilder, mhd_study: mhd_domain.Study, study: Study
1432
+ self,
1433
+ mhd_builder: MhDatasetBuilder,
1434
+ mhd_study: mhd_domain.Study,
1435
+ study: Study,
1437
1436
  ):
1438
1437
  for item in study.study_design_descriptors.design_types:
1439
1438
  keyword = create_cv_term_object(
@@ -1447,12 +1446,62 @@ class MhdLegacyDatasetBuilder:
1447
1446
  name=item.term or "",
1448
1447
  )
1449
1448
  mhd_builder.add_node(keyword)
1450
- mhd_builder.link(
1451
- mhd_study,
1452
- "has-submitter-keyword",
1453
- keyword,
1454
- reverse_relationship_name="keyword-of",
1455
- )
1449
+
1450
+ if item.source and item.source.lower() in ("data-curation", "workflows"):
1451
+ mhd_builder.link(
1452
+ mhd_study,
1453
+ "has-repository-keyword",
1454
+ keyword,
1455
+ reverse_relationship_name="keyword-of",
1456
+ )
1457
+ else:
1458
+ mhd_builder.link(
1459
+ mhd_study,
1460
+ "has-submitter-keyword",
1461
+ keyword,
1462
+ reverse_relationship_name="keyword-of",
1463
+ )
1464
+
1465
+ def add_assay_keywords(
1466
+ self,
1467
+ mhd_builder: MhDatasetBuilder,
1468
+ assays: dict[str, mhd_domain.Assay],
1469
+ study: Study,
1470
+ ):
1471
+ for assay in study.study_assays.assays:
1472
+ mhd_assay = assays.get(assay.file_name)
1473
+ if not mhd_assay:
1474
+ continue
1475
+ for item in assay.assay_descriptors:
1476
+ keyword = create_cv_term_object(
1477
+ type_="descriptor",
1478
+ source=item.term_source_ref or "",
1479
+ accession=self.convert_to_curie(
1480
+ item.term_source_ref,
1481
+ item.term_accession_number,
1482
+ )
1483
+ or "",
1484
+ name=item.term or "",
1485
+ )
1486
+ mhd_builder.add_node(keyword)
1487
+
1488
+ if item.source and item.source.lower() in (
1489
+ "data-curation",
1490
+ "workflows",
1491
+ ):
1492
+ mhd_builder.link(
1493
+ mhd_assay,
1494
+ "has-repository-keyword",
1495
+ keyword,
1496
+ reverse_relationship_name="keyword-of",
1497
+ )
1498
+ else:
1499
+ mhd_builder.link(
1500
+ mhd_assay,
1501
+ "has-submitter-keyword",
1502
+ keyword,
1503
+ reverse_relationship_name="keyword-of",
1504
+ )
1456
1505
 
1457
1506
  def find_file_format(
1458
1507
  self,
@@ -1757,9 +1806,9 @@ class MhdLegacyDatasetBuilder:
1757
1806
  metadata_files: dict[str, mhd_domain.CvTermObject],
1758
1807
  samples: dict[str, mhd_domain.Sample],
1759
1808
  files_map,
1760
- ) -> mhd_domain.Assay:
1809
+ ) -> dict[str, mhd_domain.Assay]:
1761
1810
  protocol_summaries: OrderedDict[str, ProtocolRunSummary] = OrderedDict()
1762
- assays: list[mhd_domain.Assay] = []
1811
+ assays = dict[str, mhd_domain.Assay] = OrderedDict()
1763
1812
  for assay in selected_assays:
1764
1813
  if assay.file_name not in data.assays:
1765
1814
  continue
@@ -1773,7 +1822,7 @@ class MhdLegacyDatasetBuilder:
1773
1822
  )
1774
1823
 
1775
1824
  mhd_builder.add(mhd_assay)
1776
- assays.append(mhd_assay)
1825
+ assays[assay.file_name] = mhd_assay
1777
1826
  mhd_builder.link(
1778
1827
  mhd_study, "has-assay", mhd_assay, reverse_relationship_name="part-of"
1779
1828
  )
@@ -1882,8 +1931,7 @@ class MhdLegacyDatasetBuilder:
1882
1931
  samples,
1883
1932
  protocol_summaries,
1884
1933
  )
1885
-
1886
- for mhd_assay in assays:
1934
+ for _, mhd_assay in assays.items():
1887
1935
  self.add_assay_protocols(mhd_builder, mhd_study, data, mhd_assay)
1888
1936
  return assays
1889
1937
 
@@ -2020,20 +2068,28 @@ class MhdLegacyDatasetBuilder:
2020
2068
  data.study_db_metadata.release_date,
2021
2069
  )
2022
2070
  # actual or estimated
2023
- public_release_date_str = (
2024
- db_metadata.first_public_date or db_metadata.release_date or None
2025
- )
2071
+ submission_date_str = None
2072
+ public_release_date_str = None
2073
+ if db_metadata:
2074
+ if db_metadata.first_private_date:
2075
+ submission_date_str = db_metadata.first_private_date
2076
+ elif db_metadata.submission_date:
2077
+ submission_date_str = db_metadata.submission_date
2078
+ if db_metadata.first_public_date:
2079
+ public_release_date_str = db_metadata.first_public_date
2080
+ elif db_metadata.release_date:
2081
+ public_release_date_str = db_metadata.release_date
2082
+
2026
2083
  public_release_date = (
2027
2084
  datetime.datetime.strptime(public_release_date_str, "%Y-%m-%d")
2028
2085
  if public_release_date_str
2029
2086
  else None
2030
2087
  )
2031
2088
  submission_date = (
2032
- datetime.datetime.strptime(db_metadata.first_private_date, "%Y-%m-%d")
2033
- if db_metadata and db_metadata.first_private_date
2089
+ datetime.datetime.strptime(submission_date_str, "%Y-%m-%d")
2090
+ if submission_date_str
2034
2091
  else None
2035
2092
  )
2036
-
2037
2093
  mhd_study = mhd_domain.Study(
2038
2094
  repository_identifier=study.identifier,
2039
2095
  created_by_ref=dataset_provider.id_,
@@ -2078,8 +2134,6 @@ class MhdLegacyDatasetBuilder:
2078
2134
  self.add_publications(data, mhd_builder, mhd_study)
2079
2135
  self.add_protocols(mhd_builder, mhd_study, study)
2080
2136
 
2081
- self.add_keywords(mhd_builder, mhd_study, study)
2082
-
2083
2137
  result_files = self.add_result_files(
2084
2138
  mhd_builder, mhd_study, data, config=config
2085
2139
  )
@@ -2093,7 +2147,7 @@ class MhdLegacyDatasetBuilder:
2093
2147
  result_files,
2094
2148
  config=config,
2095
2149
  )
2096
- self.add_assays(
2150
+ mhd_assays = self.add_assays(
2097
2151
  mhd_builder,
2098
2152
  mhd_study,
2099
2153
  data,
@@ -2102,6 +2156,8 @@ class MhdLegacyDatasetBuilder:
2102
2156
  samples,
2103
2157
  files_map,
2104
2158
  )
2159
+ self.add_keywords(mhd_builder, mhd_study, study)
2160
+ self.add_assay_keywords(mhd_builder, mhd_assays, study)
2105
2161
 
2106
2162
  mhd_dataset: MhDatasetBaseProfile = mhd_builder.create_dataset(
2107
2163
  start_item_refs=[mhd_study.id_], dataset_class=MhDatasetLegacyProfile
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mtbls-mhd-integration
3
- Version: 0.0.12
3
+ Version: 0.0.13
4
4
  Summary: MetaboLights - MetabolomicsHub Integration
5
5
  Author-email: MetaboLights Team <metabolights-help@ebi.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -9,7 +9,7 @@ Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
10
  Requires-Dist: asyncpg>=0.30.0
11
11
  Requires-Dist: metabolights-utils>=1.4.16
12
- Requires-Dist: mhd-model>=0.1.41
12
+ Requires-Dist: mhd-model>=0.1.43
13
13
  Requires-Dist: psycopg[binary,pool]>=3.3.2
14
14
  Requires-Dist: pydantic>=2.12.4
15
15
  Requires-Dist: pydantic-settings>=2.10.1
@@ -1,4 +1,4 @@
1
- mtbls2mhd/__init__.py,sha256=lndk-uwz1jRMVCVy3G7BTa0Mz9464n8uyyOOlNSzYSs,158
1
+ mtbls2mhd/__init__.py,sha256=9NntcMjl9WIHQAy6xlxCyDazWlWBRKDTjy517m_kTgc,158
2
2
  mtbls2mhd/config.py,sha256=BjOqAyfDhp9byoFjJz70xh4HRR8pu1yrm_5jweqygSI,2310
3
3
  mtbls2mhd/convertor_factory.py,sha256=4loatqIRIvIhcaeIS0cSonJNYJu47o56ZllX6593ypk,1133
4
4
  mtbls2mhd/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -8,16 +8,16 @@ mtbls2mhd/commands/create_mhd_file.py,sha256=0sDr-Cm0JhhEB5V1g66uoag3rlcaAnGP8Md
8
8
  mtbls2mhd/commands/validate.py,sha256=iwIKegviRxdH0r8scRXbDISlwQUzAq5uVoCHinU7x6Q,473
9
9
  mtbls2mhd/v0_1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  mtbls2mhd/v0_1/legacy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- mtbls2mhd/v0_1/legacy/builder.py,sha256=ou_YOtRNFjlLCvZYJc7JphczEbpOCrX5rqndCMxRFXI,88325
11
+ mtbls2mhd/v0_1/legacy/builder.py,sha256=4V0jxjeXrKRFv9ojC0lg1l8aO1FDVTugrW1zh8L2VL4,90582
12
12
  mtbls2mhd/v0_1/legacy/convertor.py,sha256=Nu6xJvEk8WsRQJQFoxM5eo-y46tVfWV8EkedVoqI9rI,2198
13
13
  mtbls2mhd/v0_1/legacy/db_metadata_collector.py,sha256=UGk1AeST1NQ9lWwy_sYZxaaWs0ajgaKELDJtXJ4-Uco,13071
14
14
  mtbls2mhd/v0_1/legacy/folder_metadata_collector.py,sha256=QwtXI9rBvdh6pxILQDHymIwYDqzGuMxqdOcqtdAObME,7538
15
15
  mtbls2mhd/v0_1/legacy/mtbls_study_schema.py,sha256=gUTbRmI8GfHI5leLiw8dxsmWnV3NnWw5RPX_LQWRFRQ,3162
16
16
  mtbls2mhd/v0_1/ms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  mtbls2mhd/v0_1/ms/convertor.py,sha256=kLIUpxOrH6hcs2Y9Bq1D0Mdvypg40pLyEJpHtGj6H_g,89
18
- mtbls_mhd_integration-0.0.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
19
- mtbls_mhd_integration-0.0.12.dist-info/METADATA,sha256=p307N8kU4TvWHnKv_28VXUSp7N2fi0ZAh5JN_7ILx08,688
20
- mtbls_mhd_integration-0.0.12.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
21
- mtbls_mhd_integration-0.0.12.dist-info/entry_points.txt,sha256=WQjM4flaYMyvHyv9zGKjCVk1i1_FGdNlhTmFVGgLgxs,61
22
- mtbls_mhd_integration-0.0.12.dist-info/top_level.txt,sha256=b7pI95n6HIQMFXDD0yL1NwldiDc-XdeWql4Iw-uYygQ,10
23
- mtbls_mhd_integration-0.0.12.dist-info/RECORD,,
18
+ mtbls_mhd_integration-0.0.13.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
19
+ mtbls_mhd_integration-0.0.13.dist-info/METADATA,sha256=pww_KrAeo8l_Ehfxht5GJIGyvPC-5bvJjNEr95sTOU4,688
20
+ mtbls_mhd_integration-0.0.13.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
21
+ mtbls_mhd_integration-0.0.13.dist-info/entry_points.txt,sha256=WQjM4flaYMyvHyv9zGKjCVk1i1_FGdNlhTmFVGgLgxs,61
22
+ mtbls_mhd_integration-0.0.13.dist-info/top_level.txt,sha256=b7pI95n6HIQMFXDD0yL1NwldiDc-XdeWql4Iw-uYygQ,10
23
+ mtbls_mhd_integration-0.0.13.dist-info/RECORD,,