nmdc-runtime 2.8.0__py3-none-any.whl → 2.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/api/__init__.py +0 -0
- nmdc_runtime/api/analytics.py +70 -0
- nmdc_runtime/api/boot/__init__.py +0 -0
- nmdc_runtime/api/boot/capabilities.py +9 -0
- nmdc_runtime/api/boot/object_types.py +126 -0
- nmdc_runtime/api/boot/triggers.py +84 -0
- nmdc_runtime/api/boot/workflows.py +116 -0
- nmdc_runtime/api/core/__init__.py +0 -0
- nmdc_runtime/api/core/auth.py +208 -0
- nmdc_runtime/api/core/idgen.py +170 -0
- nmdc_runtime/api/core/metadata.py +788 -0
- nmdc_runtime/api/core/util.py +109 -0
- nmdc_runtime/api/db/__init__.py +0 -0
- nmdc_runtime/api/db/mongo.py +447 -0
- nmdc_runtime/api/db/s3.py +37 -0
- nmdc_runtime/api/endpoints/__init__.py +0 -0
- nmdc_runtime/api/endpoints/capabilities.py +25 -0
- nmdc_runtime/api/endpoints/find.py +794 -0
- nmdc_runtime/api/endpoints/ids.py +192 -0
- nmdc_runtime/api/endpoints/jobs.py +143 -0
- nmdc_runtime/api/endpoints/lib/__init__.py +0 -0
- nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
- nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
- nmdc_runtime/api/endpoints/metadata.py +260 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +581 -0
- nmdc_runtime/api/endpoints/object_types.py +38 -0
- nmdc_runtime/api/endpoints/objects.py +277 -0
- nmdc_runtime/api/endpoints/operations.py +105 -0
- nmdc_runtime/api/endpoints/queries.py +679 -0
- nmdc_runtime/api/endpoints/runs.py +98 -0
- nmdc_runtime/api/endpoints/search.py +38 -0
- nmdc_runtime/api/endpoints/sites.py +229 -0
- nmdc_runtime/api/endpoints/triggers.py +25 -0
- nmdc_runtime/api/endpoints/users.py +214 -0
- nmdc_runtime/api/endpoints/util.py +774 -0
- nmdc_runtime/api/endpoints/workflows.py +353 -0
- nmdc_runtime/api/main.py +401 -0
- nmdc_runtime/api/middleware.py +43 -0
- nmdc_runtime/api/models/__init__.py +0 -0
- nmdc_runtime/api/models/capability.py +14 -0
- nmdc_runtime/api/models/id.py +92 -0
- nmdc_runtime/api/models/job.py +37 -0
- nmdc_runtime/api/models/lib/__init__.py +0 -0
- nmdc_runtime/api/models/lib/helpers.py +78 -0
- nmdc_runtime/api/models/metadata.py +11 -0
- nmdc_runtime/api/models/minter.py +0 -0
- nmdc_runtime/api/models/nmdc_schema.py +146 -0
- nmdc_runtime/api/models/object.py +180 -0
- nmdc_runtime/api/models/object_type.py +20 -0
- nmdc_runtime/api/models/operation.py +66 -0
- nmdc_runtime/api/models/query.py +246 -0
- nmdc_runtime/api/models/query_continuation.py +111 -0
- nmdc_runtime/api/models/run.py +161 -0
- nmdc_runtime/api/models/site.py +87 -0
- nmdc_runtime/api/models/trigger.py +13 -0
- nmdc_runtime/api/models/user.py +140 -0
- nmdc_runtime/api/models/util.py +253 -0
- nmdc_runtime/api/models/workflow.py +15 -0
- nmdc_runtime/api/openapi.py +242 -0
- nmdc_runtime/config.py +55 -4
- nmdc_runtime/core/db/Database.py +1 -3
- nmdc_runtime/infrastructure/database/models/user.py +0 -9
- nmdc_runtime/lib/extract_nmdc_data.py +0 -8
- nmdc_runtime/lib/nmdc_dataframes.py +3 -7
- nmdc_runtime/lib/nmdc_etl_class.py +1 -7
- nmdc_runtime/minter/adapters/repository.py +1 -2
- nmdc_runtime/minter/config.py +2 -0
- nmdc_runtime/minter/domain/model.py +35 -1
- nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
- nmdc_runtime/mongo_util.py +1 -2
- nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
- nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
- nmdc_runtime/site/export/ncbi_xml.py +1 -2
- nmdc_runtime/site/export/ncbi_xml_utils.py +1 -1
- nmdc_runtime/site/graphs.py +33 -28
- nmdc_runtime/site/ops.py +97 -237
- nmdc_runtime/site/repair/database_updater.py +8 -0
- nmdc_runtime/site/repository.py +7 -117
- nmdc_runtime/site/resources.py +4 -4
- nmdc_runtime/site/translation/gold_translator.py +22 -21
- nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
- nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
- nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
- nmdc_runtime/site/translation/submission_portal_translator.py +64 -54
- nmdc_runtime/site/translation/translator.py +63 -1
- nmdc_runtime/site/util.py +8 -3
- nmdc_runtime/site/validation/util.py +10 -5
- nmdc_runtime/util.py +9 -321
- {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/METADATA +57 -6
- nmdc_runtime-2.10.0.dist-info/RECORD +138 -0
- nmdc_runtime/site/translation/emsl.py +0 -43
- nmdc_runtime/site/translation/gold.py +0 -53
- nmdc_runtime/site/translation/jgi.py +0 -32
- nmdc_runtime/site/translation/util.py +0 -132
- nmdc_runtime/site/validation/jgi.py +0 -43
- nmdc_runtime-2.8.0.dist-info/RECORD +0 -84
- {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/WHEEL +0 -0
- {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/licenses/LICENSE +0 -0
- {nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.10.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
import base32_lib as base32
|
|
5
|
+
from pymongo.database import Database as MongoDatabase
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def generate_id(length=10, split_every=4, checksum=True) -> str:
|
|
9
|
+
"""Generate random base32 string: a user-shareable ID for a database entity.
|
|
10
|
+
|
|
11
|
+
Uses Douglas Crockford Base32 encoding: <https://www.crockford.com/base32.html>
|
|
12
|
+
|
|
13
|
+
Default is 8 characters (5-bits each) plus 2 digit characters for ISO 7064 checksum,
|
|
14
|
+
so 2**40 ~ 1 trillion possible values, *much* larger than the number of statements
|
|
15
|
+
feasibly storable by the database. Hyphen splits are optional for human readability,
|
|
16
|
+
and the default is one split after 5 characters, so an example output using the default
|
|
17
|
+
settings is '3sbk2-5j060'.
|
|
18
|
+
|
|
19
|
+
:param length: non-hyphen identifier length *including* checksum
|
|
20
|
+
:param split_every: hyphenates every that many characters
|
|
21
|
+
:param checksum: computes and appends ISO-7064 checksum
|
|
22
|
+
:returns: identifier as a string
|
|
23
|
+
"""
|
|
24
|
+
return base32.generate(length=length, split_every=split_every, checksum=checksum)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def decode_id(encoded: str, checksum=True) -> int:
|
|
28
|
+
"""Decodes generated string ID (via `generate_id`) to a number.
|
|
29
|
+
|
|
30
|
+
The string is normalized -- lowercased, hyphens removed,
|
|
31
|
+
{I,i,l,L}=>1 and {O,o}=>0 (user typos corrected) -- before decoding.
|
|
32
|
+
|
|
33
|
+
If `checksum` is enabled, raises a ValueError on checksum error.
|
|
34
|
+
|
|
35
|
+
:param encoded: string to decode
|
|
36
|
+
:param checksum: extract checksum and validate
|
|
37
|
+
:returns: original number.
|
|
38
|
+
"""
|
|
39
|
+
return base32.decode(encoded=encoded, checksum=checksum)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def encode_id(number: int, split_every=4, min_length=10, checksum=True) -> int:
|
|
43
|
+
"""Encodes `number` to URI-friendly Douglas Crockford base32 string.
|
|
44
|
+
|
|
45
|
+
:param number: number to encode
|
|
46
|
+
:param split_every: if provided, insert '-' every `split_every` characters
|
|
47
|
+
going from left to right
|
|
48
|
+
:param min_length: 0-pad beginning of string to obtain minimum desired length
|
|
49
|
+
:param checksum: append modulo 97-10 (ISO 7064) checksum to string
|
|
50
|
+
:returns: A random Douglas Crockford base32 encoded string composed only
|
|
51
|
+
of valid URI characters.
|
|
52
|
+
"""
|
|
53
|
+
return base32.encode(
|
|
54
|
+
number, split_every=split_every, min_length=min_length, checksum=checksum
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# sping: "semi-opaque string" (https://n2t.net/e/n2t_apidoc.html).
|
|
59
|
+
#
|
|
60
|
+
# Note: The result is always the following list of tuples:
|
|
61
|
+
# ```
|
|
62
|
+
# [
|
|
63
|
+
# ( 2, 512),
|
|
64
|
+
# ( 4, 524288),
|
|
65
|
+
# ( 6, 536870912),
|
|
66
|
+
# ( 8, 549755813888),
|
|
67
|
+
# (10, 562949953421312)
|
|
68
|
+
# ]
|
|
69
|
+
# ````
|
|
70
|
+
SPING_SIZE_THRESHOLDS = [(n, (2 ** (5 * n)) // 2) for n in [2, 4, 6, 8, 10]]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def collection_name(naa, shoulder):
|
|
74
|
+
r"""
|
|
75
|
+
Returns a string designed to be used as a MongoDB collection name.
|
|
76
|
+
|
|
77
|
+
TODO: Document the function parameters, including expanding the "naa" acronym.
|
|
78
|
+
"""
|
|
79
|
+
return f"ids_{naa}_{shoulder}"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def generate_ids(
|
|
83
|
+
mdb: MongoDatabase,
|
|
84
|
+
owner: str,
|
|
85
|
+
populator: str,
|
|
86
|
+
number: int,
|
|
87
|
+
ns: str = "",
|
|
88
|
+
naa: str = "nmdc",
|
|
89
|
+
shoulder: str = "fk4",
|
|
90
|
+
) -> List[str]:
|
|
91
|
+
r"""
|
|
92
|
+
TODO: Document this function.
|
|
93
|
+
"""
|
|
94
|
+
collection = mdb.get_collection(collection_name(naa, shoulder))
|
|
95
|
+
estimated_document_count = collection.estimated_document_count()
|
|
96
|
+
n_chars = next(
|
|
97
|
+
(
|
|
98
|
+
n
|
|
99
|
+
for n, t in SPING_SIZE_THRESHOLDS
|
|
100
|
+
if (number + estimated_document_count) < t
|
|
101
|
+
),
|
|
102
|
+
12,
|
|
103
|
+
)
|
|
104
|
+
collected = []
|
|
105
|
+
|
|
106
|
+
while True:
|
|
107
|
+
eids = set()
|
|
108
|
+
n_to_generate = number - len(collected)
|
|
109
|
+
while len(eids) < n_to_generate:
|
|
110
|
+
eids.add(generate_id(length=(n_chars + 2), split_every=0, checksum=True))
|
|
111
|
+
eids = list(eids)
|
|
112
|
+
deids = [decode_id(eid) for eid in eids]
|
|
113
|
+
taken = {d["_id"] for d in collection.find({"_id": {"$in": deids}}, {"_id": 1})}
|
|
114
|
+
not_taken = [
|
|
115
|
+
(eid, eid_decoded)
|
|
116
|
+
for eid, eid_decoded in zip(eids, deids)
|
|
117
|
+
if eid_decoded not in taken
|
|
118
|
+
]
|
|
119
|
+
if not_taken:
|
|
120
|
+
# All attribute names beginning with "__a" are reserved...
|
|
121
|
+
# https://github.com/jkunze/n2t-eggnog/blob/0f0f4c490e6dece507dba710d3557e29b8f6627e/egg#L1882
|
|
122
|
+
# XXX mongo is a pain with '.'s in field names, so not using e.g. "_.e" names.
|
|
123
|
+
docs = [
|
|
124
|
+
{
|
|
125
|
+
"@context": "https://n2t.net/e/n2t_apidoc.html#identifier-metadata",
|
|
126
|
+
"_id": eid_decoded,
|
|
127
|
+
"who": populator,
|
|
128
|
+
"what": (f"{ns}/{eid}" if ns else "(:tba) Work in progress"),
|
|
129
|
+
"when": datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
|
130
|
+
"how": shoulder,
|
|
131
|
+
"where": f"{naa}:{shoulder}{eid}",
|
|
132
|
+
"__as": "reserved", # status, public|reserved|unavailable
|
|
133
|
+
"__ao": owner, # owner
|
|
134
|
+
"__ac": datetime.now(timezone.utc).isoformat(
|
|
135
|
+
timespec="seconds"
|
|
136
|
+
), # created
|
|
137
|
+
}
|
|
138
|
+
for eid, eid_decoded in not_taken
|
|
139
|
+
]
|
|
140
|
+
collection.insert_many(docs)
|
|
141
|
+
collected.extend(docs)
|
|
142
|
+
if len(collected) == number:
|
|
143
|
+
break
|
|
144
|
+
return [d["where"] for d in collected]
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def generate_one_id(
|
|
148
|
+
mdb: MongoDatabase = None,
|
|
149
|
+
ns: str = "",
|
|
150
|
+
shoulder: str = "sys0",
|
|
151
|
+
) -> str:
|
|
152
|
+
"""Generate unique Crockford Base32-encoded ID for mdb repository.
|
|
153
|
+
|
|
154
|
+
Can associate ID with namespace ns to facilitate ID deletion/recycling.
|
|
155
|
+
|
|
156
|
+
"""
|
|
157
|
+
return generate_ids(
|
|
158
|
+
mdb,
|
|
159
|
+
owner="_system",
|
|
160
|
+
populator="_system",
|
|
161
|
+
number=1,
|
|
162
|
+
ns=ns,
|
|
163
|
+
naa="nmdc",
|
|
164
|
+
shoulder=shoulder,
|
|
165
|
+
)[0]
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def local_part(id_):
|
|
169
|
+
"""nmdc:fk0123 -> fk0123"""
|
|
170
|
+
return id_.split(":", maxsplit=1)[1]
|