nmdc-runtime 1.0.8__py3-none-any.whl → 1.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/site/changesheets/base.py +26 -1
- nmdc_runtime/site/resources.py +44 -7
- nmdc_runtime/util.py +81 -18
- {nmdc_runtime-1.0.8.dist-info → nmdc_runtime-1.0.10.dist-info}/METADATA +1 -1
- {nmdc_runtime-1.0.8.dist-info → nmdc_runtime-1.0.10.dist-info}/RECORD +9 -9
- {nmdc_runtime-1.0.8.dist-info → nmdc_runtime-1.0.10.dist-info}/LICENSE +0 -0
- {nmdc_runtime-1.0.8.dist-info → nmdc_runtime-1.0.10.dist-info}/WHEEL +0 -0
- {nmdc_runtime-1.0.8.dist-info → nmdc_runtime-1.0.10.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-1.0.8.dist-info → nmdc_runtime-1.0.10.dist-info}/top_level.txt +0 -0
|
@@ -4,14 +4,17 @@ base.py: Provides data classes for creating changesheets for NMDC database objec
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
|
+
import os
|
|
7
8
|
import time
|
|
8
9
|
from dataclasses import dataclass, field
|
|
10
|
+
from dotenv import load_dotenv
|
|
9
11
|
from pathlib import Path
|
|
10
12
|
import requests
|
|
11
13
|
from typing import Any, ClassVar, Dict, TypeAlias, Optional
|
|
12
14
|
|
|
13
|
-
from nmdc_runtime.site.resources import RuntimeApiUserClient
|
|
15
|
+
from nmdc_runtime.site.resources import GoldApiClient, RuntimeApiUserClient
|
|
14
16
|
|
|
17
|
+
load_dotenv()
|
|
15
18
|
logging.basicConfig(
|
|
16
19
|
level=logging.INFO, format="%(asctime)s %(levelname)s %(" "message)s"
|
|
17
20
|
)
|
|
@@ -83,3 +86,25 @@ class Changesheet:
|
|
|
83
86
|
f.write(self.header + "\n")
|
|
84
87
|
for line_item in self.line_items:
|
|
85
88
|
f.write(line_item.line + "\n")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_runtime_client(use_dev_api):
|
|
92
|
+
if use_dev_api:
|
|
93
|
+
base_url = os.getenv("API_HOST_DEV")
|
|
94
|
+
logging.info("using Dev API...")
|
|
95
|
+
else:
|
|
96
|
+
base_url = os.getenv("API_HOST")
|
|
97
|
+
logging.info("using prod API...")
|
|
98
|
+
return RuntimeApiUserClient(
|
|
99
|
+
base_url=base_url,
|
|
100
|
+
username=os.getenv("API_QUERY_USER"),
|
|
101
|
+
password=os.getenv("API_QUERY_PASS"),
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def get_gold_client():
|
|
106
|
+
return GoldApiClient(
|
|
107
|
+
base_url=os.getenv("GOLD_API_BASE_URL"),
|
|
108
|
+
username=os.getenv("GOLD_API_USERNAME"),
|
|
109
|
+
password=os.getenv("GOLD_API_PASSWORD"),
|
|
110
|
+
)
|
nmdc_runtime/site/resources.py
CHANGED
|
@@ -17,7 +17,7 @@ from dagster import (
|
|
|
17
17
|
from fastjsonschema import JsonSchemaValueException
|
|
18
18
|
from frozendict import frozendict
|
|
19
19
|
from linkml_runtime.dumpers import json_dumper
|
|
20
|
-
from pydantic import BaseModel
|
|
20
|
+
from pydantic import BaseModel, AnyUrl
|
|
21
21
|
from pymongo import MongoClient, ReplaceOne, InsertOne
|
|
22
22
|
from terminusdb_client import WOQLClient
|
|
23
23
|
from toolz import get_in
|
|
@@ -27,6 +27,7 @@ from nmdc_runtime.api.core.util import expiry_dt_from_now, has_passed
|
|
|
27
27
|
from nmdc_runtime.api.models.object import DrsObject, AccessURL, DrsObjectIn
|
|
28
28
|
from nmdc_runtime.api.models.operation import ListOperationsResponse
|
|
29
29
|
from nmdc_runtime.api.models.util import ListRequest
|
|
30
|
+
from nmdc_runtime.site.normalization.gold import normalize_gold_id
|
|
30
31
|
from nmdc_runtime.util import unfreeze, nmdc_jsonschema_validator_noidpatterns
|
|
31
32
|
from nmdc_schema import nmdc
|
|
32
33
|
|
|
@@ -95,7 +96,8 @@ class RuntimeApiUserClient(RuntimeApiClient):
|
|
|
95
96
|
return self.request("GET", f"/runs/{run_id}")
|
|
96
97
|
|
|
97
98
|
def get_biosamples_by_gold_biosample_id(self, gold_biosample_id: str):
|
|
98
|
-
|
|
99
|
+
gold_biosample_id = normalize_gold_id(gold_biosample_id)
|
|
100
|
+
response = self.request(
|
|
99
101
|
"POST",
|
|
100
102
|
f"/queries:run",
|
|
101
103
|
{
|
|
@@ -107,9 +109,40 @@ class RuntimeApiUserClient(RuntimeApiClient):
|
|
|
107
109
|
},
|
|
108
110
|
},
|
|
109
111
|
)
|
|
112
|
+
response.raise_for_status()
|
|
113
|
+
return response.json()["cursor"]["firstBatch"]
|
|
114
|
+
|
|
115
|
+
def get_omics_processing_records_by_gold_project_id(self, gold_project_id: str):
|
|
116
|
+
gold_project_id = normalize_gold_id(gold_project_id)
|
|
117
|
+
response = self.request(
|
|
118
|
+
"POST",
|
|
119
|
+
f"/queries:run",
|
|
120
|
+
{
|
|
121
|
+
"find": "omics_processing_set",
|
|
122
|
+
"filter": {
|
|
123
|
+
"gold_sequencing_project_identifiers": {
|
|
124
|
+
"$elemMatch": {"$eq": gold_project_id}
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
},
|
|
128
|
+
)
|
|
129
|
+
response.raise_for_status()
|
|
130
|
+
return response.json()["cursor"]["firstBatch"]
|
|
131
|
+
|
|
132
|
+
def get_biosamples_for_study(self, study_id: str):
|
|
133
|
+
response = self.request(
|
|
134
|
+
"POST",
|
|
135
|
+
f"/queries:run",
|
|
136
|
+
{
|
|
137
|
+
"find": "biosample_set",
|
|
138
|
+
"filter": {"part_of": {"$elemMatch": {"$eq": study_id}}},
|
|
139
|
+
},
|
|
140
|
+
)
|
|
141
|
+
response.raise_for_status()
|
|
142
|
+
return response.json()["cursor"]["firstBatch"]
|
|
110
143
|
|
|
111
144
|
def get_omics_processing_by_name(self, name: str):
|
|
112
|
-
|
|
145
|
+
response = self.request(
|
|
113
146
|
"POST",
|
|
114
147
|
f"/queries:run",
|
|
115
148
|
{
|
|
@@ -117,6 +150,8 @@ class RuntimeApiUserClient(RuntimeApiClient):
|
|
|
117
150
|
"filter": {"name": {"$regex": name, "$options": "i"}},
|
|
118
151
|
},
|
|
119
152
|
)
|
|
153
|
+
response.raise_for_status()
|
|
154
|
+
return response.json()["cursor"]["firstBatch"]
|
|
120
155
|
|
|
121
156
|
|
|
122
157
|
class RuntimeApiSiteClient(RuntimeApiClient):
|
|
@@ -194,15 +229,17 @@ class RuntimeApiSiteClient(RuntimeApiClient):
|
|
|
194
229
|
access = AccessURL(
|
|
195
230
|
**self.get_object_access(object_id, method.access_id).json()
|
|
196
231
|
)
|
|
197
|
-
if access.url.startswith(
|
|
232
|
+
if str(access.url).startswith(
|
|
198
233
|
os.getenv("API_HOST_EXTERNAL")
|
|
199
234
|
) and self.base_url == os.getenv("API_HOST"):
|
|
200
|
-
access.url =
|
|
201
|
-
|
|
235
|
+
access.url = AnyUrl(
|
|
236
|
+
str(access.url).replace(
|
|
237
|
+
os.getenv("API_HOST_EXTERNAL"), os.getenv("API_HOST")
|
|
238
|
+
)
|
|
202
239
|
)
|
|
203
240
|
else:
|
|
204
241
|
access = AccessURL(url=method.access_url.url)
|
|
205
|
-
return requests.get(access.url)
|
|
242
|
+
return requests.get(str(access.url))
|
|
206
243
|
|
|
207
244
|
def list_jobs(self, list_request=None):
|
|
208
245
|
if list_request is None:
|
nmdc_runtime/util.py
CHANGED
|
@@ -10,6 +10,7 @@ from functools import lru_cache
|
|
|
10
10
|
from io import BytesIO
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
from uuid import uuid4
|
|
13
|
+
from typing import List, Optional, Set, Dict
|
|
13
14
|
|
|
14
15
|
import fastjsonschema
|
|
15
16
|
import requests
|
|
@@ -27,13 +28,67 @@ from nmdc_runtime.api.models.object import DrsObjectIn
|
|
|
27
28
|
from typing_extensions import Annotated
|
|
28
29
|
|
|
29
30
|
|
|
31
|
+
def get_class_names_from_collection_spec(
|
|
32
|
+
spec: dict, prefix: Optional[str] = None
|
|
33
|
+
) -> List[str]:
|
|
34
|
+
"""
|
|
35
|
+
Returns the list of classes referenced by the `$ref` values in a JSON Schema snippet describing a collection,
|
|
36
|
+
applying an optional prefix to each class name.
|
|
37
|
+
|
|
38
|
+
>>> get_class_names_from_collection_spec({"items": {"foo": "#/$defs/A"}})
|
|
39
|
+
[]
|
|
40
|
+
>>> get_class_names_from_collection_spec({"items": {"$ref": "#/$defs/A"}})
|
|
41
|
+
['A']
|
|
42
|
+
>>> get_class_names_from_collection_spec({"items": {"$ref": "#/$defs/A"}}, "p:")
|
|
43
|
+
['p:A']
|
|
44
|
+
>>> get_class_names_from_collection_spec({"items": {"anyOf": "not-a-list"}})
|
|
45
|
+
[]
|
|
46
|
+
>>> get_class_names_from_collection_spec({"items": {"anyOf": []}})
|
|
47
|
+
[]
|
|
48
|
+
>>> get_class_names_from_collection_spec({"items": {"anyOf": [{"$ref": "#/$defs/A"}]}})
|
|
49
|
+
['A']
|
|
50
|
+
>>> get_class_names_from_collection_spec({"items": {"anyOf": [{"$ref": "#/$defs/A"}, {"$ref": "#/$defs/B"}]}})
|
|
51
|
+
['A', 'B']
|
|
52
|
+
>>> get_class_names_from_collection_spec({"items": {"anyOf": [{"$ref": "#/$defs/A"}, {"$ref": "#/$defs/B"}]}}, "p:")
|
|
53
|
+
['p:A', 'p:B']
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
class_names = []
|
|
57
|
+
if "items" in spec:
|
|
58
|
+
# If the `items` dictionary has a key named `$ref`, get the single class name from it.
|
|
59
|
+
if "$ref" in spec["items"]:
|
|
60
|
+
ref_dict = spec["items"]["$ref"]
|
|
61
|
+
class_name = ref_dict.split("/")[-1] # e.g. `#/$defs/Foo` --> `Foo`
|
|
62
|
+
class_names.append(class_name)
|
|
63
|
+
|
|
64
|
+
# Else, if it has a key named `anyOf` whose value is a list, get the class name from each ref in the list.
|
|
65
|
+
elif "anyOf" in spec["items"] and isinstance(spec["items"]["anyOf"], list):
|
|
66
|
+
for element in spec["items"]["anyOf"]:
|
|
67
|
+
ref_dict = element["$ref"]
|
|
68
|
+
class_name = ref_dict.split("/")[-1] # e.g. `#/$defs/Foo` --> `Foo`
|
|
69
|
+
class_names.append(class_name)
|
|
70
|
+
|
|
71
|
+
# Apply the specified prefix, if any, to each class name.
|
|
72
|
+
if isinstance(prefix, str):
|
|
73
|
+
class_names = list(map(lambda name: f"{prefix}{name}", class_names))
|
|
74
|
+
|
|
75
|
+
return class_names
|
|
76
|
+
|
|
77
|
+
|
|
30
78
|
@lru_cache
|
|
31
|
-
def get_type_collections():
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
79
|
+
def get_type_collections() -> dict:
|
|
80
|
+
"""Returns a dictionary mapping class names to Mongo collection names."""
|
|
81
|
+
|
|
82
|
+
mappings = {}
|
|
83
|
+
|
|
84
|
+
# Process the `items` dictionary of each collection whose name ends with `_set`.
|
|
85
|
+
for collection_name, spec in nmdc_jsonschema["properties"].items():
|
|
86
|
+
if collection_name.endswith("_set"):
|
|
87
|
+
class_names = get_class_names_from_collection_spec(spec, "nmdc:")
|
|
88
|
+
for class_name in class_names:
|
|
89
|
+
mappings[class_name] = collection_name
|
|
90
|
+
|
|
91
|
+
return mappings
|
|
37
92
|
|
|
38
93
|
|
|
39
94
|
def without_id_patterns(nmdc_jsonschema):
|
|
@@ -82,7 +137,7 @@ def put_object(filepath, url, mime_type=None):
|
|
|
82
137
|
return requests.put(url, data=f, headers={"Content-Type": mime_type})
|
|
83
138
|
|
|
84
139
|
|
|
85
|
-
def drs_metadata_for(filepath, base=None):
|
|
140
|
+
def drs_metadata_for(filepath, base=None, timestamp=None):
|
|
86
141
|
"""given file path, get drs metadata
|
|
87
142
|
|
|
88
143
|
required: size, created_time, and at least one checksum.
|
|
@@ -96,7 +151,7 @@ def drs_metadata_for(filepath, base=None):
|
|
|
96
151
|
)
|
|
97
152
|
if "checksums" not in base:
|
|
98
153
|
base["checksums"] = [
|
|
99
|
-
{"type": "sha256", "checksum": sha256hash_from_file(filepath)}
|
|
154
|
+
{"type": "sha256", "checksum": sha256hash_from_file(filepath, timestamp)}
|
|
100
155
|
]
|
|
101
156
|
if "mime_type" not in base:
|
|
102
157
|
base["mime_type"] = mimetypes.guess_type(filepath)[0]
|
|
@@ -312,22 +367,30 @@ def specialize_activity_set_docs(docs):
|
|
|
312
367
|
return docs, validation_errors
|
|
313
368
|
|
|
314
369
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
370
|
+
# Define a mapping from collection name to a list of class names allowable for that collection's documents.
|
|
371
|
+
collection_name_to_class_names: Dict[str, List[str]] = {
|
|
372
|
+
collection_name: get_class_names_from_collection_spec(spec)
|
|
373
|
+
for collection_name, spec in nmdc_jsonschema["$defs"]["Database"][
|
|
318
374
|
"properties"
|
|
319
375
|
].items()
|
|
320
|
-
if "items" in db_prop_spec and "$ref" in db_prop_spec["items"]
|
|
321
376
|
}
|
|
322
377
|
|
|
323
378
|
|
|
324
379
|
@lru_cache
|
|
325
|
-
def schema_collection_names_with_id_field():
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
380
|
+
def schema_collection_names_with_id_field() -> Set[str]:
|
|
381
|
+
"""
|
|
382
|
+
Returns the set of collection names with which _any_ of the associated classes contains an `id` field.
|
|
383
|
+
"""
|
|
384
|
+
|
|
385
|
+
target_collection_names = set()
|
|
386
|
+
|
|
387
|
+
for collection_name, class_names in collection_name_to_class_names.items():
|
|
388
|
+
for class_name in class_names:
|
|
389
|
+
if "id" in nmdc_jsonschema["$defs"][class_name].get("properties", {}):
|
|
390
|
+
target_collection_names.add(collection_name)
|
|
391
|
+
break
|
|
392
|
+
|
|
393
|
+
return target_collection_names
|
|
331
394
|
|
|
332
395
|
|
|
333
396
|
def ensure_unique_id_indexes(mdb: MongoDatabase):
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
nmdc_runtime/containers.py,sha256=WBzPue0PRoyKXFxgLR-aQcuHetTa8yC5JjI0dGBblYA,419
|
|
3
3
|
nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
nmdc_runtime/util.py,sha256=
|
|
4
|
+
nmdc_runtime/util.py,sha256=o74ZKOmSD79brPFAcQFsYpA6wh9287m0hDhDlIpn9VM,19872
|
|
5
5
|
nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
nmdc_runtime/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
nmdc_runtime/core/db/Database.py,sha256=WamgBUbq85A7-fr3p5B9Tk92U__yPdr9pBb4zyQok-4,377
|
|
@@ -38,14 +38,14 @@ nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
38
38
|
nmdc_runtime/site/graphs.py,sha256=AiCQzwrITZvpfGwvm2PAr2pL96juZduBw2e4M_2N1LM,5965
|
|
39
39
|
nmdc_runtime/site/ops.py,sha256=i3Oy3_yEaFxsYpDRpnUmWMJSyL2W_2wL1XEHPxGojyA,28779
|
|
40
40
|
nmdc_runtime/site/repository.py,sha256=DFvF0PqMK0TCuZJfjtpVQqCUl9F5rMcEo5_M6Dw0wYk,23304
|
|
41
|
-
nmdc_runtime/site/resources.py,sha256=
|
|
41
|
+
nmdc_runtime/site/resources.py,sha256=pQSwg1dRpL_D91gYLzzaOIDZ3qa69rPqSlsq5dS9i_M,17783
|
|
42
42
|
nmdc_runtime/site/util.py,sha256=6hyVPpb6ZkWEG8Nm7uQxnZ-QmuPOG9hgWvl0mUBr5JU,1303
|
|
43
43
|
nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
44
|
nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
|
|
45
45
|
nmdc_runtime/site/backup/nmdcdb_mongoexport.py,sha256=XIFI_AI3zl0dFr-ELOEmwvT41MyRKBGFaAT3RcamTNE,4166
|
|
46
46
|
nmdc_runtime/site/backup/nmdcdb_mongoimport.py,sha256=k6w5yscMNYoMBVkaAA9soWS0Dj2CB0FRBSFlifRO3Ro,1739
|
|
47
47
|
nmdc_runtime/site/changesheets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
|
-
nmdc_runtime/site/changesheets/base.py,sha256=
|
|
48
|
+
nmdc_runtime/site/changesheets/base.py,sha256=lZT6WCsEBl-FsTr7Ki8_ploT93uMiVyIWWKM36aOrRk,3171
|
|
49
49
|
nmdc_runtime/site/drsobjects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
50
|
nmdc_runtime/site/drsobjects/ingest.py,sha256=pcMP69WSzFHFqHB9JIL55ePFhilnCLRc2XHCQ97w1Ik,3107
|
|
51
51
|
nmdc_runtime/site/drsobjects/registration.py,sha256=D1T3QUuxEOxqKZIvB5rkb_6ZxFZiA-U9SMPajyeWC2Y,3572
|
|
@@ -71,9 +71,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=TgckqKkFquHDLso77sn-jZRu5ZaBevGCt5p8
|
|
|
71
71
|
nmdc_runtime/site/validation/gold.py,sha256=kJ1L081SZb-8qKpF731r5aQOueM206SUfUYMTTNTFMc,802
|
|
72
72
|
nmdc_runtime/site/validation/jgi.py,sha256=lBo-FCtEYedT74CpW-Kdj512Ib963ik-4YIYmY5puDo,1298
|
|
73
73
|
nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
|
|
74
|
-
nmdc_runtime-1.0.
|
|
75
|
-
nmdc_runtime-1.0.
|
|
76
|
-
nmdc_runtime-1.0.
|
|
77
|
-
nmdc_runtime-1.0.
|
|
78
|
-
nmdc_runtime-1.0.
|
|
79
|
-
nmdc_runtime-1.0.
|
|
74
|
+
nmdc_runtime-1.0.10.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
|
|
75
|
+
nmdc_runtime-1.0.10.dist-info/METADATA,sha256=SrqcOdwVvwblF8WwVYjpZapxCwvcijms5vNaOuK5eng,6807
|
|
76
|
+
nmdc_runtime-1.0.10.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
|
77
|
+
nmdc_runtime-1.0.10.dist-info/entry_points.txt,sha256=nfH6-K9tDKv7va8ENfShsBnxVQoYJdEe7HHdwtkbh1Y,289
|
|
78
|
+
nmdc_runtime-1.0.10.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
|
|
79
|
+
nmdc_runtime-1.0.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|