mtbls-mhd-integration 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mtbls2mhd/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "v0.0.3"
1
+ __version__ = "v0.0.5"
2
2
 
3
3
  import pathlib
4
4
  import sys
mtbls2mhd/config.py CHANGED
@@ -24,6 +24,7 @@ class Mtbls2MhdConfiguration(BaseSettings):
24
24
  default_dataset_licence_url: str = (
25
25
  "https://creativecommons.org/publicdomain/zero/1.0"
26
26
  )
27
+ default_mhd_model_version: str = "0.1"
27
28
  mtbls_studies_root_path: str
28
29
  model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
29
30
 
@@ -1,3 +1,4 @@
1
+ import datetime
1
2
  import enum
2
3
  import json
3
4
  import logging
@@ -52,7 +53,16 @@ logger = logging.getLogger(__name__)
52
53
  MTBLS_ASSAY_TYPES = {
53
54
  "LC-MS": COMMON_ASSAY_TYPES["OBI:0003097S"],
54
55
  "GC-MS": COMMON_ASSAY_TYPES["OBI:0003110"],
55
- # TODO: Add CE-MS, FIA-MS, DI-MS for MetaboLights
56
+ "CE-MS": CvTerm(
57
+ source="OBI",
58
+ accession="OBI:0003741",
59
+ name="capillary electrophoresis mass spectrometry assay",
60
+ ),
61
+ "GCxGC-MS": COMMON_ASSAY_TYPES["OBI:0003110"],
62
+ "FIA-MS": COMMON_ASSAY_TYPES["OBI:0000470"],
63
+ "MALDI-MS": COMMON_ASSAY_TYPES["OBI:0000470"],
64
+ "DI-MS": COMMON_ASSAY_TYPES["OBI:0000470"],
65
+ "MS": COMMON_ASSAY_TYPES["OBI:0000470"],
56
66
  }
57
67
  MTBLS_MEASUREMENT_TYPES = {
58
68
  "targeted": COMMON_MEASUREMENT_TYPES["MSIO:0000100"],
@@ -73,13 +83,20 @@ COMMON_PROTOCOLS_MAP = {
73
83
  "Treatment": COMMON_PROTOCOLS["EFO:0003969"],
74
84
  "Flow Injection Analysis": COMMON_PROTOCOLS["MS:1000058"],
75
85
  "Capillary Electrophoresis": COMMON_PROTOCOLS["CHMO:0001024"],
76
- "Direct infusion": COMMON_PROTOCOLS.get(
77
- "CHMO:0001024"
78
- ), # TODO: Update after adding to managed CV terms
86
+ # TODO: Update after adding to managed CV terms
79
87
  }
80
88
 
81
89
  MTBLS_PROTOCOLS_MAP = COMMON_PROTOCOLS_MAP.copy()
82
90
 
91
+ MTBLS_PROTOCOLS_MAP.update(
92
+ {
93
+ "Direct infusion": CvTerm(
94
+ source="MS",
95
+ accession="MS:1000060",
96
+ name="infusion",
97
+ ),
98
+ }
99
+ )
83
100
  MANAGED_CHARACTERISTICS_MAP = {
84
101
  "organism": COMMON_CHARACTERISTIC_DEFINITIONS["NCIT:C14250"],
85
102
  "organism part": COMMON_CHARACTERISTIC_DEFINITIONS["NCIT:C103199"],
@@ -1877,7 +1894,23 @@ class MhdLegacyDatasetBuilder:
1877
1894
  error = f"{data.investigation_file_path} file does not have any study. Skipping..."
1878
1895
  logger.warning(error)
1879
1896
  return False, error
1880
-
1897
+ if not revision:
1898
+ db_metadata = data.study_db_metadata
1899
+ revision_date = (
1900
+ datetime.datetime.strptime(db_metadata.revision_date, "%Y-%m-%d")
1901
+ if db_metadata and db_metadata.revision_date
1902
+ else None
1903
+ )
1904
+ if revision_date:
1905
+ revision = Revision(
1906
+ revision_datetime=revision_date,
1907
+ revision=db_metadata.revision_number
1908
+ if db_metadata.revision_number
1909
+ else 0,
1910
+ comment=db_metadata.revision_comment
1911
+ if db_metadata.revision_comment
1912
+ else "",
1913
+ )
1881
1914
  selected_assays: list[Assay] = []
1882
1915
  study = data.investigation.studies[0]
1883
1916
 
@@ -1913,9 +1946,14 @@ class MhdLegacyDatasetBuilder:
1913
1946
  repository_identifier=study.identifier,
1914
1947
  schema_name=target_mhd_model_schema_uri,
1915
1948
  profile_uri=target_mhd_model_profile_uri,
1916
- repository_revision=revision.revision if revision else 1,
1949
+ repository_revision=revision.revision
1950
+ if revision and revision and revision.revision
1951
+ else 0,
1917
1952
  repository_revision_datetime=revision.revision_datetime
1918
- if revision
1953
+ if revision and revision.revision_datetime
1954
+ else None,
1955
+ repository_revision_comment=revision.comment
1956
+ if revision and revision.comment
1919
1957
  else None,
1920
1958
  change_log=[revision] if revision else None,
1921
1959
  )
@@ -1970,11 +2008,9 @@ class MhdLegacyDatasetBuilder:
1970
2008
  self.add_study_factor_definitions(mhd_builder, mhd_study, data, build_type)
1971
2009
  samples = self.add_samples(mhd_builder, mhd_study, sample_file, build_type)
1972
2010
  if build_type in (BuildType.FULL, BuildType.FULL_AND_CUSTOM_NODES):
1973
- mhd_study.license = (
1974
- HttpUrl(config.default_dataset_licence_url)
1975
- if config.default_dataset_licence_url
1976
- else None
1977
- )
2011
+ mhd_study.license = data.study_db_metadata.dataset_license_url
2012
+ if not mhd_study.license:
2013
+ mhd_study.license = HttpUrl(config.default_dataset_licence_url) or ""
1978
2014
 
1979
2015
  self.add_publications(data, mhd_builder, mhd_study)
1980
2016
  self.add_protocols(mhd_builder, mhd_study, study)
@@ -4,10 +4,11 @@ from functools import lru_cache
4
4
  from logging import getLogger
5
5
  from typing import Any, Dict, List
6
6
 
7
- import psycopg2
7
+ import psycopg
8
8
  from metabolights_utils.models.common import ErrorMessage
9
9
  from metabolights_utils.models.metabolights.model import (
10
10
  CurationRequest,
11
+ StudyCategory,
11
12
  StudyDBMetadata,
12
13
  StudyStatus,
13
14
  Submitter,
@@ -15,7 +16,7 @@ from metabolights_utils.models.metabolights.model import (
15
16
  UserStatus,
16
17
  )
17
18
  from metabolights_utils.provider.study_provider import AbstractDbMetadataCollector
18
- from psycopg2.extras import DictCursor
19
+ from psycopg.rows import dict_row
19
20
  from sqlalchemy import or_, select
20
21
  from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
21
22
  from sqlalchemy.orm import sessionmaker
@@ -59,22 +60,6 @@ def get_session_factory(mtbls2mhd_config: Mtbls2MhdConfiguration):
59
60
  return AsyncSessionFactory
60
61
 
61
62
 
62
- STUDY_FIELDS = [
63
- "id",
64
- "acc",
65
- "obfuscationcode",
66
- "submissiondate",
67
- "releasedate",
68
- "updatedate",
69
- "studysize",
70
- "status_date",
71
- "studytype",
72
- "status",
73
- "override",
74
- "comment",
75
- "curation_request",
76
- ]
77
-
78
63
  SUBMITTER_FIELDS = [
79
64
  "id",
80
65
  "orcid",
@@ -98,15 +83,16 @@ def create_postgresql_connection(mtbls2mhd_config: Mtbls2MhdConfiguration):
98
83
  Creates and returns a PostgreSQL connection.
99
84
  """
100
85
  try:
101
- connection = psycopg2.connect(
86
+ connection = psycopg.connect(
102
87
  dbname=mtbls2mhd_config.database_name,
103
88
  user=mtbls2mhd_config.database_user,
104
89
  password=mtbls2mhd_config.database_user_password,
105
90
  host=mtbls2mhd_config.database_host,
106
91
  port=mtbls2mhd_config.database_host_port,
92
+ row_factory=dict_row,
107
93
  )
108
94
  return connection
109
- except psycopg2.Error as e:
95
+ except psycopg.Error as e:
110
96
  logger.exception(e)
111
97
  raise e
112
98
 
@@ -118,10 +104,18 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
118
104
  def get_study_metadata_from_db(self, study_id: str, connection):
119
105
  try:
120
106
  study = self._get_study_from_db(study_id, connection)
107
+ revision = None
108
+ if study["revision_number"] and study["revision_number"] > 0:
109
+ revision = self._get_study_revision_from_db(
110
+ study_id, study["revision_number"], connection
111
+ )
121
112
  submitters = self._get_study_submitters_from_db(study_id, connection)
122
- study_db_metadata = self._create_study_db_metadata(study, submitters)
113
+ study_db_metadata = self._create_study_db_metadata(
114
+ study, revision, submitters
115
+ )
123
116
  return study_db_metadata, []
124
117
  except Exception as ex:
118
+ logger.exception(ex)
125
119
  return StudyDBMetadata(), [
126
120
  ErrorMessage(short="Error while loading db metadata", detail=str(ex))
127
121
  ]
@@ -178,7 +172,7 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
178
172
  where_clause = " and ".join(_filter)
179
173
  _input = f"select acc from studies where {where_clause};"
180
174
  try:
181
- cursor = connection.cursor(cursor_factory=DictCursor)
175
+ cursor = connection.cursor()
182
176
  cursor.execute(_input)
183
177
  data = cursor.fetchall()
184
178
  return data
@@ -201,11 +195,9 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
201
195
  return study_ids
202
196
 
203
197
  def _get_study_from_db(self, study_id: str, connection):
204
- _input = (
205
- f"select {', '.join(STUDY_FIELDS)} from studies where acc = %(study_id)s;"
206
- )
198
+ _input = "select * from studies where acc = %(study_id)s;"
207
199
  try:
208
- cursor = connection.cursor(cursor_factory=DictCursor)
200
+ cursor = connection.cursor()
209
201
  cursor.execute(_input, {"study_id": study_id})
210
202
  data = cursor.fetchone()
211
203
  return data
@@ -213,13 +205,24 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
213
205
  except Exception as ex:
214
206
  raise ex
215
207
 
208
+ def _get_study_revision_from_db(self, study_id: str, revision: int, connection):
209
+ _input = "select * from study_revisions where accession_number = %(study_id)s and revision_number = %(revision)s;"
210
+ try:
211
+ cursor = connection.cursor()
212
+ cursor.execute(_input, {"study_id": study_id, "revision": revision})
213
+ data = cursor.fetchone()
214
+ return data
215
+
216
+ except Exception as ex:
217
+ raise ex
218
+
216
219
  def _get_study_submitters_from_db(self, study_id: str, connection):
217
220
  submitter_fields = [f"u.{field}" for field in SUBMITTER_FIELDS]
218
221
 
219
222
  _input = f"select {', '.join(submitter_fields)} from studies as s, study_user as su, \
220
223
  users as u where su.userid = u.id and su.studyid = s.id and s.acc = %(study_id)s;"
221
224
  try:
222
- cursor = connection.cursor(cursor_factory=DictCursor)
225
+ cursor = connection.cursor()
223
226
  cursor.execute(_input, {"study_id": study_id})
224
227
  data = cursor.fetchall()
225
228
  if data:
@@ -229,8 +232,19 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
229
232
  except Exception as ex:
230
233
  raise ex
231
234
 
235
+ LICENSE_URLS = {
236
+ (
237
+ "CC0 1.0 UNIVERSAL",
238
+ "1.0",
239
+ ): "https://creativecommons.org/publicdomain/zero/1.0/",
240
+ (
241
+ "EMBL-EBI TERMS OF USE",
242
+ "5TH FEBRUARY 2024",
243
+ ): "https://www.ebi.ac.uk/about/terms-of-use/",
244
+ }
245
+
232
246
  def _create_study_db_metadata(
233
- self, study, submitters: List[Dict[str, Any]]
247
+ self, study, revision, submitters: List[Dict[str, Any]]
234
248
  ) -> StudyDBMetadata:
235
249
  study_db_metadata: StudyDBMetadata = StudyDBMetadata()
236
250
  study_db_metadata.db_id = study["id"] or -1
@@ -248,26 +262,49 @@ class DbMetadataCollector(AbstractDbMetadataCollector):
248
262
  if study["studytype"] and len(study["studytype"].strip()) > 0:
249
263
  study_db_metadata.study_types = study["studytype"].strip().split(";")
250
264
 
251
- if study["override"] and len(study["override"].strip()) > 0:
252
- override_list = study["override"].strip().split("|")
253
- overrides = {}
254
- for item in override_list:
255
- if item:
256
- key_value = item.split(":")
257
- if len(key_value) > 1:
258
- overrides[key_value[0]] = key_value[1] or ""
259
- study_db_metadata.overrides = overrides
260
265
  study_db_metadata.study_size = int(study["studysize"])
261
- study_db_metadata.submission_date = self._get_date_string(
262
- study["submissiondate"]
266
+
267
+ study_db_metadata.first_private_date = self._get_date_string(
268
+ study["first_private_date"]
263
269
  )
270
+ study_db_metadata.submission_date = study_db_metadata.first_private_date
271
+
264
272
  study_db_metadata.curation_request = CurationRequest.get_from_int(
265
273
  study["curation_request"]
266
274
  )
267
- study_db_metadata.release_date = self._get_date_string(study["releasedate"])
275
+ study_db_metadata.first_public_date = self._get_date_string(
276
+ study["first_public_date"]
277
+ )
278
+ study_db_metadata.release_date = study_db_metadata.first_public_date
268
279
  study_db_metadata.update_date = self._get_date_time_string(study["updatedate"])
269
280
  study_db_metadata.status_date = self._get_date_time_string(study["status_date"])
270
281
  study_db_metadata.submitters = self._create_submitters(submitters)
282
+ if revision:
283
+ study_db_metadata.revision_number = revision.get("revision_number", 0)
284
+ study_db_metadata.revision_comment = revision.get("revision_comment", "")
285
+ study_db_metadata.revision_date = self._get_date_string(
286
+ revision.get("revision_datetime", "")
287
+ )
288
+ study_db_metadata.dataset_license = study["dataset_license"] or ""
289
+ study_db_metadata.dataset_license_version = (
290
+ study["dataset_license_version"] or ""
291
+ )
292
+ study_db_metadata.dataset_license_url = self.LICENSE_URLS.get(
293
+ (
294
+ study_db_metadata.dataset_license.upper(),
295
+ study_db_metadata.dataset_license_version.upper(),
296
+ ),
297
+ )
298
+ study_db_metadata.study_category = StudyCategory(study["study_category"])
299
+ study_db_metadata.mhd_model_version = study["mhd_model_version"]
300
+ study_db_metadata.reserved_mhd_accession = study["mhd_accession"] or ""
301
+ study_db_metadata.created_at = (
302
+ self._get_date_time_string(study["created_at"]) or ""
303
+ )
304
+ study_db_metadata.study_template = study["study_template"] or ""
305
+ study_db_metadata.sample_template = study["sample_type"] or ""
306
+ study_db_metadata.template_version = study["template_version"] or ""
307
+
271
308
  return study_db_metadata
272
309
 
273
310
  def _create_submitters(self, submitters: List[Dict[str, Any]]) -> List[Submitter]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mtbls-mhd-integration
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Summary: MetaboLights - MetabolomicsHub Integration
5
5
  Author-email: MetaboLights Team <metabolights-help@ebi.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -9,8 +9,8 @@ Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
10
  Requires-Dist: asyncpg>=0.30.0
11
11
  Requires-Dist: metabolights-utils>=1.4.16
12
- Requires-Dist: mhd-model>=0.1.37
13
- Requires-Dist: psycopg2-binary>=2.9.11
12
+ Requires-Dist: mhd-model>=0.1.39
13
+ Requires-Dist: psycopg[binary,pool]>=3.3.2
14
14
  Requires-Dist: pydantic>=2.12.4
15
15
  Requires-Dist: pydantic-settings>=2.10.1
16
16
  Requires-Dist: pyyaml>=6.0.3
@@ -1,5 +1,5 @@
1
- mtbls2mhd/__init__.py,sha256=4oQxeAlcyP87VQOUq168Fi3sYGp0s98KvT6gDZltZWw,157
2
- mtbls2mhd/config.py,sha256=z_iN3rUolcLXz_Q55DraGmyOUhJhP0xK4zlmaj-VWxw,2267
1
+ mtbls2mhd/__init__.py,sha256=3byMWcFYU6S6BKe-liUPRtmlSwQrjbuEljIatokiM3s,157
2
+ mtbls2mhd/config.py,sha256=BjOqAyfDhp9byoFjJz70xh4HRR8pu1yrm_5jweqygSI,2310
3
3
  mtbls2mhd/convertor_factory.py,sha256=4loatqIRIvIhcaeIS0cSonJNYJu47o56ZllX6593ypk,1133
4
4
  mtbls2mhd/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  mtbls2mhd/commands/cli.py,sha256=XhLcqBxmBNX_nSEc4aU9q2D9VoYvqZKeRuYLbt-JwZ8,662
@@ -8,16 +8,16 @@ mtbls2mhd/commands/create_mhd_file.py,sha256=0sDr-Cm0JhhEB5V1g66uoag3rlcaAnGP8Md
8
8
  mtbls2mhd/commands/validate.py,sha256=iwIKegviRxdH0r8scRXbDISlwQUzAq5uVoCHinU7x6Q,473
9
9
  mtbls2mhd/v0_1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  mtbls2mhd/v0_1/legacy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- mtbls2mhd/v0_1/legacy/builder.py,sha256=SdNvxZMMyDz0UteGn9b-syqrfVge7_sU4xQH38rXFm0,84029
11
+ mtbls2mhd/v0_1/legacy/builder.py,sha256=Kz4bHoGIUFTaj3nJaHs5OFG095sgbHmOxd5r-rlCeL0,85414
12
12
  mtbls2mhd/v0_1/legacy/convertor.py,sha256=gewPleS8FH3TS63dALqtIUuX26dAkIFo701UMlKiOE8,2607
13
- mtbls2mhd/v0_1/legacy/db_metadata_collector.py,sha256=OFpF0L0ueFC90r3jq27RdHo86L7myJ0ah7CcAPwGLZQ,11346
13
+ mtbls2mhd/v0_1/legacy/db_metadata_collector.py,sha256=4OyA_KD2X2zr7AHn8pQZJ0Y2_bW5r2_2wgTtQ93LM6A,13193
14
14
  mtbls2mhd/v0_1/legacy/folder_metadata_collector.py,sha256=1lELGwTsr12nBGwTog_Z8qi9dLt4awma56vBYoI678k,7439
15
15
  mtbls2mhd/v0_1/legacy/mtbls_study_schema.py,sha256=gUTbRmI8GfHI5leLiw8dxsmWnV3NnWw5RPX_LQWRFRQ,3162
16
16
  mtbls2mhd/v0_1/ms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  mtbls2mhd/v0_1/ms/convertor.py,sha256=kLIUpxOrH6hcs2Y9Bq1D0Mdvypg40pLyEJpHtGj6H_g,89
18
- mtbls_mhd_integration-0.0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
19
- mtbls_mhd_integration-0.0.3.dist-info/METADATA,sha256=J88s4MwzG1rogEzcJrgeBxwIv461t7UHD3If6ow8yiU,678
20
- mtbls_mhd_integration-0.0.3.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
21
- mtbls_mhd_integration-0.0.3.dist-info/entry_points.txt,sha256=WQjM4flaYMyvHyv9zGKjCVk1i1_FGdNlhTmFVGgLgxs,61
22
- mtbls_mhd_integration-0.0.3.dist-info/top_level.txt,sha256=b7pI95n6HIQMFXDD0yL1NwldiDc-XdeWql4Iw-uYygQ,10
23
- mtbls_mhd_integration-0.0.3.dist-info/RECORD,,
18
+ mtbls_mhd_integration-0.0.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
19
+ mtbls_mhd_integration-0.0.5.dist-info/METADATA,sha256=2BQEjtVChMbUMm737D2dUGEWB2kJoh0oUvmM0Dl57Hs,682
20
+ mtbls_mhd_integration-0.0.5.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
21
+ mtbls_mhd_integration-0.0.5.dist-info/entry_points.txt,sha256=WQjM4flaYMyvHyv9zGKjCVk1i1_FGdNlhTmFVGgLgxs,61
22
+ mtbls_mhd_integration-0.0.5.dist-info/top_level.txt,sha256=b7pI95n6HIQMFXDD0yL1NwldiDc-XdeWql4Iw-uYygQ,10
23
+ mtbls_mhd_integration-0.0.5.dist-info/RECORD,,