nmdc-runtime 1.0.8__py3-none-any.whl → 1.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

@@ -4,14 +4,17 @@ base.py: Provides data classes for creating changesheets for NMDC database objec
4
4
  """
5
5
 
6
6
  import logging
7
+ import os
7
8
  import time
8
9
  from dataclasses import dataclass, field
10
+ from dotenv import load_dotenv
9
11
  from pathlib import Path
10
12
  import requests
11
13
  from typing import Any, ClassVar, Dict, TypeAlias, Optional
12
14
 
13
- from nmdc_runtime.site.resources import RuntimeApiUserClient
15
+ from nmdc_runtime.site.resources import GoldApiClient, RuntimeApiUserClient
14
16
 
17
+ load_dotenv()
15
18
  logging.basicConfig(
16
19
  level=logging.INFO, format="%(asctime)s %(levelname)s %(" "message)s"
17
20
  )
@@ -83,3 +86,25 @@ class Changesheet:
83
86
  f.write(self.header + "\n")
84
87
  for line_item in self.line_items:
85
88
  f.write(line_item.line + "\n")
89
+
90
+
91
+ def get_runtime_client(use_dev_api):
92
+ if use_dev_api:
93
+ base_url = os.getenv("API_HOST_DEV")
94
+ logging.info("using Dev API...")
95
+ else:
96
+ base_url = os.getenv("API_HOST")
97
+ logging.info("using prod API...")
98
+ return RuntimeApiUserClient(
99
+ base_url=base_url,
100
+ username=os.getenv("API_QUERY_USER"),
101
+ password=os.getenv("API_QUERY_PASS"),
102
+ )
103
+
104
+
105
+ def get_gold_client():
106
+ return GoldApiClient(
107
+ base_url=os.getenv("GOLD_API_BASE_URL"),
108
+ username=os.getenv("GOLD_API_USERNAME"),
109
+ password=os.getenv("GOLD_API_PASSWORD"),
110
+ )
@@ -17,7 +17,7 @@ from dagster import (
17
17
  from fastjsonschema import JsonSchemaValueException
18
18
  from frozendict import frozendict
19
19
  from linkml_runtime.dumpers import json_dumper
20
- from pydantic import BaseModel
20
+ from pydantic import BaseModel, AnyUrl
21
21
  from pymongo import MongoClient, ReplaceOne, InsertOne
22
22
  from terminusdb_client import WOQLClient
23
23
  from toolz import get_in
@@ -27,6 +27,7 @@ from nmdc_runtime.api.core.util import expiry_dt_from_now, has_passed
27
27
  from nmdc_runtime.api.models.object import DrsObject, AccessURL, DrsObjectIn
28
28
  from nmdc_runtime.api.models.operation import ListOperationsResponse
29
29
  from nmdc_runtime.api.models.util import ListRequest
30
+ from nmdc_runtime.site.normalization.gold import normalize_gold_id
30
31
  from nmdc_runtime.util import unfreeze, nmdc_jsonschema_validator_noidpatterns
31
32
  from nmdc_schema import nmdc
32
33
 
@@ -95,7 +96,8 @@ class RuntimeApiUserClient(RuntimeApiClient):
95
96
  return self.request("GET", f"/runs/{run_id}")
96
97
 
97
98
  def get_biosamples_by_gold_biosample_id(self, gold_biosample_id: str):
98
- return self.request(
99
+ gold_biosample_id = normalize_gold_id(gold_biosample_id)
100
+ response = self.request(
99
101
  "POST",
100
102
  f"/queries:run",
101
103
  {
@@ -107,9 +109,40 @@ class RuntimeApiUserClient(RuntimeApiClient):
107
109
  },
108
110
  },
109
111
  )
112
+ response.raise_for_status()
113
+ return response.json()["cursor"]["firstBatch"]
114
+
115
+ def get_omics_processing_records_by_gold_project_id(self, gold_project_id: str):
116
+ gold_project_id = normalize_gold_id(gold_project_id)
117
+ response = self.request(
118
+ "POST",
119
+ f"/queries:run",
120
+ {
121
+ "find": "omics_processing_set",
122
+ "filter": {
123
+ "gold_sequencing_project_identifiers": {
124
+ "$elemMatch": {"$eq": gold_project_id}
125
+ }
126
+ },
127
+ },
128
+ )
129
+ response.raise_for_status()
130
+ return response.json()["cursor"]["firstBatch"]
131
+
132
+ def get_biosamples_for_study(self, study_id: str):
133
+ response = self.request(
134
+ "POST",
135
+ f"/queries:run",
136
+ {
137
+ "find": "biosample_set",
138
+ "filter": {"part_of": {"$elemMatch": {"$eq": study_id}}},
139
+ },
140
+ )
141
+ response.raise_for_status()
142
+ return response.json()["cursor"]["firstBatch"]
110
143
 
111
144
  def get_omics_processing_by_name(self, name: str):
112
- return self.request(
145
+ response = self.request(
113
146
  "POST",
114
147
  f"/queries:run",
115
148
  {
@@ -117,6 +150,8 @@ class RuntimeApiUserClient(RuntimeApiClient):
117
150
  "filter": {"name": {"$regex": name, "$options": "i"}},
118
151
  },
119
152
  )
153
+ response.raise_for_status()
154
+ return response.json()["cursor"]["firstBatch"]
120
155
 
121
156
 
122
157
  class RuntimeApiSiteClient(RuntimeApiClient):
@@ -194,15 +229,17 @@ class RuntimeApiSiteClient(RuntimeApiClient):
194
229
  access = AccessURL(
195
230
  **self.get_object_access(object_id, method.access_id).json()
196
231
  )
197
- if access.url.startswith(
232
+ if str(access.url).startswith(
198
233
  os.getenv("API_HOST_EXTERNAL")
199
234
  ) and self.base_url == os.getenv("API_HOST"):
200
- access.url = access.url.replace(
201
- os.getenv("API_HOST_EXTERNAL"), os.getenv("API_HOST")
235
+ access.url = AnyUrl(
236
+ str(access.url).replace(
237
+ os.getenv("API_HOST_EXTERNAL"), os.getenv("API_HOST")
238
+ )
202
239
  )
203
240
  else:
204
241
  access = AccessURL(url=method.access_url.url)
205
- return requests.get(access.url)
242
+ return requests.get(str(access.url))
206
243
 
207
244
  def list_jobs(self, list_request=None):
208
245
  if list_request is None:
nmdc_runtime/util.py CHANGED
@@ -10,6 +10,7 @@ from functools import lru_cache
10
10
  from io import BytesIO
11
11
  from pathlib import Path
12
12
  from uuid import uuid4
13
+ from typing import List, Optional, Set, Dict
13
14
 
14
15
  import fastjsonschema
15
16
  import requests
@@ -27,13 +28,67 @@ from nmdc_runtime.api.models.object import DrsObjectIn
27
28
  from typing_extensions import Annotated
28
29
 
29
30
 
31
+ def get_class_names_from_collection_spec(
32
+ spec: dict, prefix: Optional[str] = None
33
+ ) -> List[str]:
34
+ """
35
+ Returns the list of classes referenced by the `$ref` values in a JSON Schema snippet describing a collection,
36
+ applying an optional prefix to each class name.
37
+
38
+ >>> get_class_names_from_collection_spec({"items": {"foo": "#/$defs/A"}})
39
+ []
40
+ >>> get_class_names_from_collection_spec({"items": {"$ref": "#/$defs/A"}})
41
+ ['A']
42
+ >>> get_class_names_from_collection_spec({"items": {"$ref": "#/$defs/A"}}, "p:")
43
+ ['p:A']
44
+ >>> get_class_names_from_collection_spec({"items": {"anyOf": "not-a-list"}})
45
+ []
46
+ >>> get_class_names_from_collection_spec({"items": {"anyOf": []}})
47
+ []
48
+ >>> get_class_names_from_collection_spec({"items": {"anyOf": [{"$ref": "#/$defs/A"}]}})
49
+ ['A']
50
+ >>> get_class_names_from_collection_spec({"items": {"anyOf": [{"$ref": "#/$defs/A"}, {"$ref": "#/$defs/B"}]}})
51
+ ['A', 'B']
52
+ >>> get_class_names_from_collection_spec({"items": {"anyOf": [{"$ref": "#/$defs/A"}, {"$ref": "#/$defs/B"}]}}, "p:")
53
+ ['p:A', 'p:B']
54
+ """
55
+
56
+ class_names = []
57
+ if "items" in spec:
58
+ # If the `items` dictionary has a key named `$ref`, get the single class name from it.
59
+ if "$ref" in spec["items"]:
60
+ ref_dict = spec["items"]["$ref"]
61
+ class_name = ref_dict.split("/")[-1] # e.g. `#/$defs/Foo` --> `Foo`
62
+ class_names.append(class_name)
63
+
64
+ # Else, if it has a key named `anyOf` whose value is a list, get the class name from each ref in the list.
65
+ elif "anyOf" in spec["items"] and isinstance(spec["items"]["anyOf"], list):
66
+ for element in spec["items"]["anyOf"]:
67
+ ref_dict = element["$ref"]
68
+ class_name = ref_dict.split("/")[-1] # e.g. `#/$defs/Foo` --> `Foo`
69
+ class_names.append(class_name)
70
+
71
+ # Apply the specified prefix, if any, to each class name.
72
+ if isinstance(prefix, str):
73
+ class_names = list(map(lambda name: f"{prefix}{name}", class_names))
74
+
75
+ return class_names
76
+
77
+
30
78
  @lru_cache
31
- def get_type_collections():
32
- return {
33
- f'nmdc:{spec["items"]["$ref"].split("/")[-1]}': collection_name
34
- for collection_name, spec in nmdc_jsonschema["properties"].items()
35
- if collection_name.endswith("_set")
36
- }
79
+ def get_type_collections() -> dict:
80
+ """Returns a dictionary mapping class names to Mongo collection names."""
81
+
82
+ mappings = {}
83
+
84
+ # Process the `items` dictionary of each collection whose name ends with `_set`.
85
+ for collection_name, spec in nmdc_jsonschema["properties"].items():
86
+ if collection_name.endswith("_set"):
87
+ class_names = get_class_names_from_collection_spec(spec, "nmdc:")
88
+ for class_name in class_names:
89
+ mappings[class_name] = collection_name
90
+
91
+ return mappings
37
92
 
38
93
 
39
94
  def without_id_patterns(nmdc_jsonschema):
@@ -82,7 +137,7 @@ def put_object(filepath, url, mime_type=None):
82
137
  return requests.put(url, data=f, headers={"Content-Type": mime_type})
83
138
 
84
139
 
85
- def drs_metadata_for(filepath, base=None):
140
+ def drs_metadata_for(filepath, base=None, timestamp=None):
86
141
  """given file path, get drs metadata
87
142
 
88
143
  required: size, created_time, and at least one checksum.
@@ -96,7 +151,7 @@ def drs_metadata_for(filepath, base=None):
96
151
  )
97
152
  if "checksums" not in base:
98
153
  base["checksums"] = [
99
- {"type": "sha256", "checksum": sha256hash_from_file(filepath)}
154
+ {"type": "sha256", "checksum": sha256hash_from_file(filepath, timestamp)}
100
155
  ]
101
156
  if "mime_type" not in base:
102
157
  base["mime_type"] = mimetypes.guess_type(filepath)[0]
@@ -312,22 +367,30 @@ def specialize_activity_set_docs(docs):
312
367
  return docs, validation_errors
313
368
 
314
369
 
315
- collection_name_to_class_name = {
316
- db_prop: db_prop_spec["items"]["$ref"].split("/")[-1]
317
- for db_prop, db_prop_spec in get_nmdc_jsonschema_dict()["$defs"]["Database"][
370
+ # Define a mapping from collection name to a list of class names allowable for that collection's documents.
371
+ collection_name_to_class_names: Dict[str, List[str]] = {
372
+ collection_name: get_class_names_from_collection_spec(spec)
373
+ for collection_name, spec in nmdc_jsonschema["$defs"]["Database"][
318
374
  "properties"
319
375
  ].items()
320
- if "items" in db_prop_spec and "$ref" in db_prop_spec["items"]
321
376
  }
322
377
 
323
378
 
324
379
  @lru_cache
325
- def schema_collection_names_with_id_field():
326
- return {
327
- coll_name
328
- for coll_name, class_name in collection_name_to_class_name.items()
329
- if "id" in get_nmdc_jsonschema_dict()["$defs"][class_name].get("properties", {})
330
- }
380
+ def schema_collection_names_with_id_field() -> Set[str]:
381
+ """
382
+ Returns the set of collection names with which _any_ of the associated classes contains an `id` field.
383
+ """
384
+
385
+ target_collection_names = set()
386
+
387
+ for collection_name, class_names in collection_name_to_class_names.items():
388
+ for class_name in class_names:
389
+ if "id" in nmdc_jsonschema["$defs"][class_name].get("properties", {}):
390
+ target_collection_names.add(collection_name)
391
+ break
392
+
393
+ return target_collection_names
331
394
 
332
395
 
333
396
  def ensure_unique_id_indexes(mdb: MongoDatabase):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nmdc-runtime
3
- Version: 1.0.8
3
+ Version: 1.0.10
4
4
  Summary: A runtime system for NMDC data management and orchestration
5
5
  Home-page: https://github.com/microbiomedata/nmdc-runtime
6
6
  Author: Donny Winston
@@ -1,7 +1,7 @@
1
1
  nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  nmdc_runtime/containers.py,sha256=WBzPue0PRoyKXFxgLR-aQcuHetTa8yC5JjI0dGBblYA,419
3
3
  nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- nmdc_runtime/util.py,sha256=Ai9mmRPiiyK0-Y5QnI71aWWzR08fEt42LSdkpGOJeAU,17088
4
+ nmdc_runtime/util.py,sha256=o74ZKOmSD79brPFAcQFsYpA6wh9287m0hDhDlIpn9VM,19872
5
5
  nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  nmdc_runtime/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  nmdc_runtime/core/db/Database.py,sha256=WamgBUbq85A7-fr3p5B9Tk92U__yPdr9pBb4zyQok-4,377
@@ -38,14 +38,14 @@ nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
38
38
  nmdc_runtime/site/graphs.py,sha256=AiCQzwrITZvpfGwvm2PAr2pL96juZduBw2e4M_2N1LM,5965
39
39
  nmdc_runtime/site/ops.py,sha256=i3Oy3_yEaFxsYpDRpnUmWMJSyL2W_2wL1XEHPxGojyA,28779
40
40
  nmdc_runtime/site/repository.py,sha256=DFvF0PqMK0TCuZJfjtpVQqCUl9F5rMcEo5_M6Dw0wYk,23304
41
- nmdc_runtime/site/resources.py,sha256=kqAnVPVrEHLQBsGvNVsKDznJDmwVGEm96-drkQToLag,16401
41
+ nmdc_runtime/site/resources.py,sha256=pQSwg1dRpL_D91gYLzzaOIDZ3qa69rPqSlsq5dS9i_M,17783
42
42
  nmdc_runtime/site/util.py,sha256=6hyVPpb6ZkWEG8Nm7uQxnZ-QmuPOG9hgWvl0mUBr5JU,1303
43
43
  nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
45
45
  nmdc_runtime/site/backup/nmdcdb_mongoexport.py,sha256=XIFI_AI3zl0dFr-ELOEmwvT41MyRKBGFaAT3RcamTNE,4166
46
46
  nmdc_runtime/site/backup/nmdcdb_mongoimport.py,sha256=k6w5yscMNYoMBVkaAA9soWS0Dj2CB0FRBSFlifRO3Ro,1739
47
47
  nmdc_runtime/site/changesheets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- nmdc_runtime/site/changesheets/base.py,sha256=ODUNd0Ivpv0Jl_qPeb_-XBSvWOaTF6j1v0YdY6qG9ek,2501
48
+ nmdc_runtime/site/changesheets/base.py,sha256=lZT6WCsEBl-FsTr7Ki8_ploT93uMiVyIWWKM36aOrRk,3171
49
49
  nmdc_runtime/site/drsobjects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
50
  nmdc_runtime/site/drsobjects/ingest.py,sha256=pcMP69WSzFHFqHB9JIL55ePFhilnCLRc2XHCQ97w1Ik,3107
51
51
  nmdc_runtime/site/drsobjects/registration.py,sha256=D1T3QUuxEOxqKZIvB5rkb_6ZxFZiA-U9SMPajyeWC2Y,3572
@@ -71,9 +71,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=TgckqKkFquHDLso77sn-jZRu5ZaBevGCt5p8
71
71
  nmdc_runtime/site/validation/gold.py,sha256=kJ1L081SZb-8qKpF731r5aQOueM206SUfUYMTTNTFMc,802
72
72
  nmdc_runtime/site/validation/jgi.py,sha256=lBo-FCtEYedT74CpW-Kdj512Ib963ik-4YIYmY5puDo,1298
73
73
  nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
74
- nmdc_runtime-1.0.8.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
75
- nmdc_runtime-1.0.8.dist-info/METADATA,sha256=UsGWNojU5Bfzi3-awRbUPOq7leHL-yuoJzGu-HAwfxo,6806
76
- nmdc_runtime-1.0.8.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
77
- nmdc_runtime-1.0.8.dist-info/entry_points.txt,sha256=nfH6-K9tDKv7va8ENfShsBnxVQoYJdEe7HHdwtkbh1Y,289
78
- nmdc_runtime-1.0.8.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
79
- nmdc_runtime-1.0.8.dist-info/RECORD,,
74
+ nmdc_runtime-1.0.10.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
75
+ nmdc_runtime-1.0.10.dist-info/METADATA,sha256=SrqcOdwVvwblF8WwVYjpZapxCwvcijms5vNaOuK5eng,6807
76
+ nmdc_runtime-1.0.10.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
77
+ nmdc_runtime-1.0.10.dist-info/entry_points.txt,sha256=nfH6-K9tDKv7va8ENfShsBnxVQoYJdEe7HHdwtkbh1Y,289
78
+ nmdc_runtime-1.0.10.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
79
+ nmdc_runtime-1.0.10.dist-info/RECORD,,