mtbls-mhd-integration 0.0.3__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {mtbls_mhd_integration-0.0.3/mtbls_mhd_integration.egg-info → mtbls_mhd_integration-0.0.4}/PKG-INFO +2 -2
  2. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/__init__.py +1 -1
  3. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/config.py +1 -0
  4. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/v0_1/legacy/builder.py +48 -12
  5. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/v0_1/legacy/db_metadata_collector.py +69 -33
  6. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4/mtbls_mhd_integration.egg-info}/PKG-INFO +2 -2
  7. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls_mhd_integration.egg-info/requires.txt +1 -1
  8. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/pyproject.toml +3 -3
  9. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/LICENSE +0 -0
  10. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/README.md +0 -0
  11. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/commands/__init__.py +0 -0
  12. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/commands/cli.py +0 -0
  13. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/commands/create.py +0 -0
  14. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/commands/create_mhd_file.py +0 -0
  15. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/commands/validate.py +0 -0
  16. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/convertor_factory.py +0 -0
  17. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/v0_1/__init__.py +0 -0
  18. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/v0_1/legacy/__init__.py +0 -0
  19. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/v0_1/legacy/convertor.py +0 -0
  20. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/v0_1/legacy/folder_metadata_collector.py +0 -0
  21. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/v0_1/legacy/mtbls_study_schema.py +0 -0
  22. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/v0_1/ms/__init__.py +0 -0
  23. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls2mhd/v0_1/ms/convertor.py +0 -0
  24. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls_mhd_integration.egg-info/SOURCES.txt +0 -0
  25. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls_mhd_integration.egg-info/dependency_links.txt +0 -0
  26. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls_mhd_integration.egg-info/entry_points.txt +0 -0
  27. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/mtbls_mhd_integration.egg-info/top_level.txt +0 -0
  28. {mtbls_mhd_integration-0.0.3 → mtbls_mhd_integration-0.0.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mtbls-mhd-integration
3
- Version: 0.0.3
3
+ Version: 0.0.4
4
4
  Summary: MetaboLights - MetabolomicsHub Integration
5
5
  Author-email: MetaboLights Team <metabolights-help@ebi.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -9,7 +9,7 @@ Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
10
  Requires-Dist: asyncpg>=0.30.0
11
11
  Requires-Dist: metabolights-utils>=1.4.16
12
- Requires-Dist: mhd-model>=0.1.37
12
+ Requires-Dist: mhd-model>=0.1.38
13
13
  Requires-Dist: psycopg2-binary>=2.9.11
14
14
  Requires-Dist: pydantic>=2.12.4
15
15
  Requires-Dist: pydantic-settings>=2.10.1
@@ -1,4 +1,4 @@
1
- __version__ = "v0.0.3"
1
+ __version__ = "v0.0.4"
2
2
 
3
3
  import pathlib
4
4
  import sys
@@ -24,6 +24,7 @@ class Mtbls2MhdConfiguration(BaseSettings):
24
24
  default_dataset_licence_url: str = (
25
25
  "https://creativecommons.org/publicdomain/zero/1.0"
26
26
  )
27
+ default_mhd_model_version: str = "0.1"
27
28
  mtbls_studies_root_path: str
28
29
  model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
29
30
 
@@ -1,3 +1,4 @@
1
+ import datetime
1
2
  import enum
2
3
  import json
3
4
  import logging
@@ -52,7 +53,16 @@ logger = logging.getLogger(__name__)
52
53
  MTBLS_ASSAY_TYPES = {
53
54
  "LC-MS": COMMON_ASSAY_TYPES["OBI:0003097S"],
54
55
  "GC-MS": COMMON_ASSAY_TYPES["OBI:0003110"],
55
- # TODO: Add CE-MS, FIA-MS, DI-MS for MetaboLights
56
+ "CE-MS": CvTerm(
57
+ source="OBI",
58
+ accession="OBI:0003741",
59
+ name="capillary electrophoresis mass spectrometry assay",
60
+ ),
61
+ "GCxGC-MS": COMMON_ASSAY_TYPES["OBI:0003110"],
62
+ "FIA-MS": COMMON_ASSAY_TYPES["OBI:0000470"],
63
+ "MALDI-MS": COMMON_ASSAY_TYPES["OBI:0000470"],
64
+ "DI-MS": COMMON_ASSAY_TYPES["OBI:0000470"],
65
+ "MS": COMMON_ASSAY_TYPES["OBI:0000470"],
56
66
  }
57
67
  MTBLS_MEASUREMENT_TYPES = {
58
68
  "targeted": COMMON_MEASUREMENT_TYPES["MSIO:0000100"],
@@ -73,13 +83,20 @@ COMMON_PROTOCOLS_MAP = {
73
83
  "Treatment": COMMON_PROTOCOLS["EFO:0003969"],
74
84
  "Flow Injection Analysis": COMMON_PROTOCOLS["MS:1000058"],
75
85
  "Capillary Electrophoresis": COMMON_PROTOCOLS["CHMO:0001024"],
76
- "Direct infusion": COMMON_PROTOCOLS.get(
77
- "CHMO:0001024"
78
- ), # TODO: Update after adding to managed CV terms
86
+ # TODO: Update after adding to managed CV terms
79
87
  }
80
88
 
81
89
  MTBLS_PROTOCOLS_MAP = COMMON_PROTOCOLS_MAP.copy()
82
90
 
91
+ MTBLS_PROTOCOLS_MAP.update(
92
+ {
93
+ "Direct infusion": CvTerm(
94
+ source="MS",
95
+ accession="MS:1000060",
96
+ name="infusion",
97
+ ),
98
+ }
99
+ )
83
100
  MANAGED_CHARACTERISTICS_MAP = {
84
101
  "organism": COMMON_CHARACTERISTIC_DEFINITIONS["NCIT:C14250"],
85
102
  "organism part": COMMON_CHARACTERISTIC_DEFINITIONS["NCIT:C103199"],
@@ -1877,7 +1894,23 @@ class MhdLegacyDatasetBuilder:
1877
1894
  error = f"{data.investigation_file_path} file does not have any study. Skipping..."
1878
1895
  logger.warning(error)
1879
1896
  return False, error
1880
-
1897
+ if not revision:
1898
+ db_metadata = data.study_db_metadata
1899
+ revision_date = (
1900
+ datetime.datetime.strptime(db_metadata.revision_date, "%Y-%m-%d")
1901
+ if db_metadata and db_metadata.revision_date
1902
+ else None
1903
+ )
1904
+ if revision_date:
1905
+ revision = Revision(
1906
+ revision_datetime=revision_date,
1907
+ revision=db_metadata.revision_number
1908
+ if db_metadata.revision_number
1909
+ else 0,
1910
+ comment=db_metadata.revision_comment
1911
+ if db_metadata.revision_comment
1912
+ else "",
1913
+ )
1881
1914
  selected_assays: list[Assay] = []
1882
1915
  study = data.investigation.studies[0]
1883
1916
 
@@ -1913,9 +1946,14 @@ class MhdLegacyDatasetBuilder:
1913
1946
  repository_identifier=study.identifier,
1914
1947
  schema_name=target_mhd_model_schema_uri,
1915
1948
  profile_uri=target_mhd_model_profile_uri,
1916
- repository_revision=revision.revision if revision else 1,
1949
+ repository_revision=revision.revision
1950
+ if revision and revision and revision.revision
1951
+ else 0,
1917
1952
  repository_revision_datetime=revision.revision_datetime
1918
- if revision
1953
+ if revision and revision.revision_datetime
1954
+ else None,
1955
+ repository_revision_comment=revision.comment
1956
+ if revision and revision.comment
1919
1957
  else None,
1920
1958
  change_log=[revision] if revision else None,
1921
1959
  )
@@ -1970,11 +2008,9 @@ class MhdLegacyDatasetBuilder:
1970
2008
  self.add_study_factor_definitions(mhd_builder, mhd_study, data, build_type)
1971
2009
  samples = self.add_samples(mhd_builder, mhd_study, sample_file, build_type)
1972
2010
  if build_type in (BuildType.FULL, BuildType.FULL_AND_CUSTOM_NODES):
1973
- mhd_study.license = (
1974
- HttpUrl(config.default_dataset_licence_url)
1975
- if config.default_dataset_licence_url
1976
- else None
1977
- )
2011
+ mhd_study.license = data.study_db_metadata.dataset_license_url
2012
+ if not mhd_study.license:
2013
+ mhd_study.license = HttpUrl(config.default_dataset_licence_url) or ""
1978
2014
 
1979
2015
  self.add_publications(data, mhd_builder, mhd_study)
1980
2016
  self.add_protocols(mhd_builder, mhd_study, study)
@@ -8,6 +8,7 @@ import psycopg2
8
8
  from metabolights_utils.models.common import ErrorMessage
9
9
  from metabolights_utils.models.metabolights.model import (
10
10
  CurationRequest,
11
+ StudyCategory,
11
12
  StudyDBMetadata,
12
13
  StudyStatus,
13
14
  Submitter,
@@ -59,22 +60,6 @@ def get_session_factory(mtbls2mhd_config: Mtbls2MhdConfiguration):
59
60
  return AsyncSessionFactory
60
61
 
61
62
 
62
- STUDY_FIELDS = [
63
- "id",
64
- "acc",
65
- "obfuscationcode",
66
- "submissiondate",
67
- "releasedate",
68
- "updatedate",
69
- "studysize",
70
- "status_date",
71
- "studytype",
72
- "status",
73
- "override",
74
- "comment",
75
- "curation_request",
76
- ]
77
-
78
63
  SUBMITTER_FIELDS = [
79
64
  "id",
80
65
  "orcid",
@@ -118,10 +103,18 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
118
103
  def get_study_metadata_from_db(self, study_id: str, connection):
119
104
  try:
120
105
  study = self._get_study_from_db(study_id, connection)
106
+ revision = None
107
+ if study["revision_number"] and study["revision_number"] > 0:
108
+ revision = self._get_study_revision_from_db(
109
+ study_id, study["revision_number"], connection
110
+ )
121
111
  submitters = self._get_study_submitters_from_db(study_id, connection)
122
- study_db_metadata = self._create_study_db_metadata(study, submitters)
112
+ study_db_metadata = self._create_study_db_metadata(
113
+ study, revision, submitters
114
+ )
123
115
  return study_db_metadata, []
124
116
  except Exception as ex:
117
+ logger.exception(ex)
125
118
  return StudyDBMetadata(), [
126
119
  ErrorMessage(short="Error while loading db metadata", detail=str(ex))
127
120
  ]
@@ -201,9 +194,7 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
201
194
  return study_ids
202
195
 
203
196
  def _get_study_from_db(self, study_id: str, connection):
204
- _input = (
205
- f"select {', '.join(STUDY_FIELDS)} from studies where acc = %(study_id)s;"
206
- )
197
+ _input = "select * from studies where acc = %(study_id)s;"
207
198
  try:
208
199
  cursor = connection.cursor(cursor_factory=DictCursor)
209
200
  cursor.execute(_input, {"study_id": study_id})
@@ -213,6 +204,17 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
213
204
  except Exception as ex:
214
205
  raise ex
215
206
 
207
+ def _get_study_revision_from_db(self, study_id: str, revision: int, connection):
208
+ _input = "select * from study_revisions where accession_number = %(study_id)s and revision_number = %(revision)s;"
209
+ try:
210
+ cursor = connection.cursor(cursor_factory=DictCursor)
211
+ cursor.execute(_input, {"study_id": study_id, "revision": revision})
212
+ data = cursor.fetchone()
213
+ return data
214
+
215
+ except Exception as ex:
216
+ raise ex
217
+
216
218
  def _get_study_submitters_from_db(self, study_id: str, connection):
217
219
  submitter_fields = [f"u.{field}" for field in SUBMITTER_FIELDS]
218
220
 
@@ -229,8 +231,19 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
229
231
  except Exception as ex:
230
232
  raise ex
231
233
 
234
+ LICENSE_URLS = {
235
+ (
236
+ "CC0 1.0 UNIVERSAL",
237
+ "1.0",
238
+ ): "https://creativecommons.org/publicdomain/zero/1.0/",
239
+ (
240
+ "EMBL-EBI TERMS OF USE",
241
+ "5TH FEBRUARY 2024",
242
+ ): "https://www.ebi.ac.uk/about/terms-of-use/",
243
+ }
244
+
232
245
  def _create_study_db_metadata(
233
- self, study, submitters: List[Dict[str, Any]]
246
+ self, study, revision, submitters: List[Dict[str, Any]]
234
247
  ) -> StudyDBMetadata:
235
248
  study_db_metadata: StudyDBMetadata = StudyDBMetadata()
236
249
  study_db_metadata.db_id = study["id"] or -1
@@ -248,26 +261,49 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
248
261
  if study["studytype"] and len(study["studytype"].strip()) > 0:
249
262
  study_db_metadata.study_types = study["studytype"].strip().split(";")
250
263
 
251
- if study["override"] and len(study["override"].strip()) > 0:
252
- override_list = study["override"].strip().split("|")
253
- overrides = {}
254
- for item in override_list:
255
- if item:
256
- key_value = item.split(":")
257
- if len(key_value) > 1:
258
- overrides[key_value[0]] = key_value[1] or ""
259
- study_db_metadata.overrides = overrides
260
264
  study_db_metadata.study_size = int(study["studysize"])
261
- study_db_metadata.submission_date = self._get_date_string(
262
- study["submissiondate"]
265
+
266
+ study_db_metadata.first_private_date = self._get_date_string(
267
+ study["first_private_date"]
263
268
  )
269
+ study_db_metadata.submission_date = study_db_metadata.first_private_date
270
+
264
271
  study_db_metadata.curation_request = CurationRequest.get_from_int(
265
272
  study["curation_request"]
266
273
  )
267
- study_db_metadata.release_date = self._get_date_string(study["releasedate"])
274
+ study_db_metadata.first_public_date = self._get_date_string(
275
+ study["first_public_date"]
276
+ )
277
+ study_db_metadata.release_date = study_db_metadata.first_public_date
268
278
  study_db_metadata.update_date = self._get_date_time_string(study["updatedate"])
269
279
  study_db_metadata.status_date = self._get_date_time_string(study["status_date"])
270
280
  study_db_metadata.submitters = self._create_submitters(submitters)
281
+ if revision:
282
+ study_db_metadata.revision_number = revision.get("revision_number", 0)
283
+ study_db_metadata.revision_comment = revision.get("revision_comment", "")
284
+ study_db_metadata.revision_date = self._get_date_string(
285
+ revision.get("revision_datetime", "")
286
+ )
287
+ study_db_metadata.dataset_license = study["dataset_license"] or ""
288
+ study_db_metadata.dataset_license_version = (
289
+ study["dataset_license_version"] or ""
290
+ )
291
+ study_db_metadata.dataset_license_url = self.LICENSE_URLS.get(
292
+ (
293
+ study_db_metadata.dataset_license.upper(),
294
+ study_db_metadata.dataset_license_version.upper(),
295
+ ),
296
+ )
297
+ study_db_metadata.study_category = StudyCategory(study["study_category"])
298
+ study_db_metadata.mhd_model_version = study["mhd_model_version"]
299
+ study_db_metadata.reserved_mhd_accession = study["mhd_accession"] or ""
300
+ study_db_metadata.created_at = (
301
+ self._get_date_time_string(study["created_at"]) or ""
302
+ )
303
+ study_db_metadata.study_template = study["study_template"] or ""
304
+ study_db_metadata.sample_template = study["sample_type"] or ""
305
+ study_db_metadata.template_version = study["template_version"] or ""
306
+
271
307
  return study_db_metadata
272
308
 
273
309
  def _create_submitters(self, submitters: List[Dict[str, Any]]) -> List[Submitter]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mtbls-mhd-integration
3
- Version: 0.0.3
3
+ Version: 0.0.4
4
4
  Summary: MetaboLights - MetabolomicsHub Integration
5
5
  Author-email: MetaboLights Team <metabolights-help@ebi.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -9,7 +9,7 @@ Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
10
  Requires-Dist: asyncpg>=0.30.0
11
11
  Requires-Dist: metabolights-utils>=1.4.16
12
- Requires-Dist: mhd-model>=0.1.37
12
+ Requires-Dist: mhd-model>=0.1.38
13
13
  Requires-Dist: psycopg2-binary>=2.9.11
14
14
  Requires-Dist: pydantic>=2.12.4
15
15
  Requires-Dist: pydantic-settings>=2.10.1
@@ -1,6 +1,6 @@
1
1
  asyncpg>=0.30.0
2
2
  metabolights-utils>=1.4.16
3
- mhd-model>=0.1.37
3
+ mhd-model>=0.1.38
4
4
  psycopg2-binary>=2.9.11
5
5
  pydantic>=2.12.4
6
6
  pydantic-settings>=2.10.1
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mtbls-mhd-integration"
3
- version = "0.0.3"
3
+ version = "0.0.4"
4
4
  description = "MetaboLights - MetabolomicsHub Integration"
5
5
  authors = [{"name" = "MetaboLights Team", "email" = "metabolights-help@ebi.ac.uk"}]
6
6
  license = "Apache-2.0"
@@ -9,7 +9,7 @@ requires-python = ">=3.12"
9
9
  dependencies = [
10
10
  "asyncpg>=0.30.0",
11
11
  "metabolights-utils>=1.4.16",
12
- "mhd-model>=0.1.37",
12
+ "mhd-model>=0.1.38",
13
13
  "psycopg2-binary>=2.9.11",
14
14
  "pydantic>=2.12.4",
15
15
  "pydantic-settings>=2.10.1",
@@ -50,7 +50,7 @@ exclude = ["tests*", "docs*"]
50
50
  [tool.commitizen]
51
51
  name = "cz_conventional_commits"
52
52
  version_provider = "uv"
53
- version = "0.0.33"
53
+ version = "0.0.43"
54
54
  tag_format = "v$major.$minor.$patch"
55
55
  version_files = [
56
56
  "pyproject.toml:version",