nmdc-runtime 2.9.0__py3-none-any.whl → 2.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (98) hide show
  1. nmdc_runtime/api/__init__.py +0 -0
  2. nmdc_runtime/api/analytics.py +70 -0
  3. nmdc_runtime/api/boot/__init__.py +0 -0
  4. nmdc_runtime/api/boot/capabilities.py +9 -0
  5. nmdc_runtime/api/boot/object_types.py +126 -0
  6. nmdc_runtime/api/boot/triggers.py +84 -0
  7. nmdc_runtime/api/boot/workflows.py +116 -0
  8. nmdc_runtime/api/core/__init__.py +0 -0
  9. nmdc_runtime/api/core/auth.py +208 -0
  10. nmdc_runtime/api/core/idgen.py +170 -0
  11. nmdc_runtime/api/core/metadata.py +788 -0
  12. nmdc_runtime/api/core/util.py +109 -0
  13. nmdc_runtime/api/db/__init__.py +0 -0
  14. nmdc_runtime/api/db/mongo.py +447 -0
  15. nmdc_runtime/api/db/s3.py +37 -0
  16. nmdc_runtime/api/endpoints/__init__.py +0 -0
  17. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  18. nmdc_runtime/api/endpoints/find.py +794 -0
  19. nmdc_runtime/api/endpoints/ids.py +192 -0
  20. nmdc_runtime/api/endpoints/jobs.py +143 -0
  21. nmdc_runtime/api/endpoints/lib/__init__.py +0 -0
  22. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  23. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  24. nmdc_runtime/api/endpoints/metadata.py +260 -0
  25. nmdc_runtime/api/endpoints/nmdcschema.py +581 -0
  26. nmdc_runtime/api/endpoints/object_types.py +38 -0
  27. nmdc_runtime/api/endpoints/objects.py +277 -0
  28. nmdc_runtime/api/endpoints/operations.py +105 -0
  29. nmdc_runtime/api/endpoints/queries.py +679 -0
  30. nmdc_runtime/api/endpoints/runs.py +98 -0
  31. nmdc_runtime/api/endpoints/search.py +38 -0
  32. nmdc_runtime/api/endpoints/sites.py +229 -0
  33. nmdc_runtime/api/endpoints/triggers.py +25 -0
  34. nmdc_runtime/api/endpoints/users.py +214 -0
  35. nmdc_runtime/api/endpoints/util.py +774 -0
  36. nmdc_runtime/api/endpoints/workflows.py +353 -0
  37. nmdc_runtime/api/main.py +401 -0
  38. nmdc_runtime/api/middleware.py +43 -0
  39. nmdc_runtime/api/models/__init__.py +0 -0
  40. nmdc_runtime/api/models/capability.py +14 -0
  41. nmdc_runtime/api/models/id.py +92 -0
  42. nmdc_runtime/api/models/job.py +37 -0
  43. nmdc_runtime/api/models/lib/__init__.py +0 -0
  44. nmdc_runtime/api/models/lib/helpers.py +78 -0
  45. nmdc_runtime/api/models/metadata.py +11 -0
  46. nmdc_runtime/api/models/minter.py +0 -0
  47. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  48. nmdc_runtime/api/models/object.py +180 -0
  49. nmdc_runtime/api/models/object_type.py +20 -0
  50. nmdc_runtime/api/models/operation.py +66 -0
  51. nmdc_runtime/api/models/query.py +246 -0
  52. nmdc_runtime/api/models/query_continuation.py +111 -0
  53. nmdc_runtime/api/models/run.py +161 -0
  54. nmdc_runtime/api/models/site.py +87 -0
  55. nmdc_runtime/api/models/trigger.py +13 -0
  56. nmdc_runtime/api/models/user.py +140 -0
  57. nmdc_runtime/api/models/util.py +253 -0
  58. nmdc_runtime/api/models/workflow.py +15 -0
  59. nmdc_runtime/api/openapi.py +242 -0
  60. nmdc_runtime/config.py +7 -8
  61. nmdc_runtime/core/db/Database.py +1 -3
  62. nmdc_runtime/infrastructure/database/models/user.py +0 -9
  63. nmdc_runtime/lib/extract_nmdc_data.py +0 -8
  64. nmdc_runtime/lib/nmdc_dataframes.py +3 -7
  65. nmdc_runtime/lib/nmdc_etl_class.py +1 -7
  66. nmdc_runtime/minter/adapters/repository.py +1 -2
  67. nmdc_runtime/minter/config.py +2 -0
  68. nmdc_runtime/minter/domain/model.py +35 -1
  69. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  70. nmdc_runtime/mongo_util.py +1 -2
  71. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  72. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  73. nmdc_runtime/site/export/ncbi_xml.py +1 -2
  74. nmdc_runtime/site/export/ncbi_xml_utils.py +1 -1
  75. nmdc_runtime/site/graphs.py +1 -22
  76. nmdc_runtime/site/ops.py +60 -152
  77. nmdc_runtime/site/repository.py +0 -112
  78. nmdc_runtime/site/translation/gold_translator.py +4 -12
  79. nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
  80. nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
  81. nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
  82. nmdc_runtime/site/translation/submission_portal_translator.py +2 -54
  83. nmdc_runtime/site/translation/translator.py +63 -1
  84. nmdc_runtime/site/util.py +8 -3
  85. nmdc_runtime/site/validation/util.py +10 -5
  86. nmdc_runtime/util.py +3 -47
  87. {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/METADATA +57 -6
  88. nmdc_runtime-2.10.0.dist-info/RECORD +138 -0
  89. nmdc_runtime/site/translation/emsl.py +0 -43
  90. nmdc_runtime/site/translation/gold.py +0 -53
  91. nmdc_runtime/site/translation/jgi.py +0 -32
  92. nmdc_runtime/site/translation/util.py +0 -132
  93. nmdc_runtime/site/validation/jgi.py +0 -43
  94. nmdc_runtime-2.9.0.dist-info/RECORD +0 -84
  95. {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/WHEEL +0 -0
  96. {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/entry_points.txt +0 -0
  97. {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/licenses/LICENSE +0 -0
  98. {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,170 @@
1
+ from datetime import datetime, timezone
2
+ from typing import List
3
+
4
+ import base32_lib as base32
5
+ from pymongo.database import Database as MongoDatabase
6
+
7
+
8
+ def generate_id(length=10, split_every=4, checksum=True) -> str:
9
+ """Generate random base32 string: a user-shareable ID for a database entity.
10
+
11
+ Uses Douglas Crockford Base32 encoding: <https://www.crockford.com/base32.html>
12
+
13
+ Default is 8 characters (5-bits each) plus 2 digit characters for ISO 7064 checksum,
14
+ so 2**40 ~ 1 trillion possible values, *much* larger than the number of statements
15
+ feasibly storable by the database. Hyphen splits are optional for human readability,
16
+ and the default is one split after 5 characters, so an example output using the default
17
+ settings is '3sbk2-5j060'.
18
+
19
+ :param length: non-hyphen identifier length *including* checksum
20
+ :param split_every: hyphenates every that many characters
21
+ :param checksum: computes and appends ISO-7064 checksum
22
+ :returns: identifier as a string
23
+ """
24
+ return base32.generate(length=length, split_every=split_every, checksum=checksum)
25
+
26
+
27
+ def decode_id(encoded: str, checksum=True) -> int:
28
+ """Decodes generated string ID (via `generate_id`) to a number.
29
+
30
+ The string is normalized -- lowercased, hyphens removed,
31
+ {I,i,l,L}=>1 and {O,o}=>0 (user typos corrected) -- before decoding.
32
+
33
+ If `checksum` is enabled, raises a ValueError on checksum error.
34
+
35
+ :param encoded: string to decode
36
+ :param checksum: extract checksum and validate
37
+ :returns: original number.
38
+ """
39
+ return base32.decode(encoded=encoded, checksum=checksum)
40
+
41
+
42
+ def encode_id(number: int, split_every=4, min_length=10, checksum=True) -> int:
43
+ """Encodes `number` to URI-friendly Douglas Crockford base32 string.
44
+
45
+ :param number: number to encode
46
+ :param split_every: if provided, insert '-' every `split_every` characters
47
+ going from left to right
48
+ :param min_length: 0-pad beginning of string to obtain minimum desired length
49
+ :param checksum: append modulo 97-10 (ISO 7064) checksum to string
50
+ :returns: A random Douglas Crockford base32 encoded string composed only
51
+ of valid URI characters.
52
+ """
53
+ return base32.encode(
54
+ number, split_every=split_every, min_length=min_length, checksum=checksum
55
+ )
56
+
57
+
58
+ # sping: "semi-opaque string" (https://n2t.net/e/n2t_apidoc.html).
59
+ #
60
+ # Note: The result is always the following list of tuples:
61
+ # ```
62
+ # [
63
+ # ( 2, 512),
64
+ # ( 4, 524288),
65
+ # ( 6, 536870912),
66
+ # ( 8, 549755813888),
67
+ # (10, 562949953421312)
68
+ # ]
69
+ # ````
70
+ SPING_SIZE_THRESHOLDS = [(n, (2 ** (5 * n)) // 2) for n in [2, 4, 6, 8, 10]]
71
+
72
+
73
+ def collection_name(naa, shoulder):
74
+ r"""
75
+ Returns a string designed to be used as a MongoDB collection name.
76
+
77
+ TODO: Document the function parameters, including expanding the "naa" acronym.
78
+ """
79
+ return f"ids_{naa}_{shoulder}"
80
+
81
+
82
+ def generate_ids(
83
+ mdb: MongoDatabase,
84
+ owner: str,
85
+ populator: str,
86
+ number: int,
87
+ ns: str = "",
88
+ naa: str = "nmdc",
89
+ shoulder: str = "fk4",
90
+ ) -> List[str]:
91
+ r"""
92
+ TODO: Document this function.
93
+ """
94
+ collection = mdb.get_collection(collection_name(naa, shoulder))
95
+ estimated_document_count = collection.estimated_document_count()
96
+ n_chars = next(
97
+ (
98
+ n
99
+ for n, t in SPING_SIZE_THRESHOLDS
100
+ if (number + estimated_document_count) < t
101
+ ),
102
+ 12,
103
+ )
104
+ collected = []
105
+
106
+ while True:
107
+ eids = set()
108
+ n_to_generate = number - len(collected)
109
+ while len(eids) < n_to_generate:
110
+ eids.add(generate_id(length=(n_chars + 2), split_every=0, checksum=True))
111
+ eids = list(eids)
112
+ deids = [decode_id(eid) for eid in eids]
113
+ taken = {d["_id"] for d in collection.find({"_id": {"$in": deids}}, {"_id": 1})}
114
+ not_taken = [
115
+ (eid, eid_decoded)
116
+ for eid, eid_decoded in zip(eids, deids)
117
+ if eid_decoded not in taken
118
+ ]
119
+ if not_taken:
120
+ # All attribute names beginning with "__a" are reserved...
121
+ # https://github.com/jkunze/n2t-eggnog/blob/0f0f4c490e6dece507dba710d3557e29b8f6627e/egg#L1882
122
+ # XXX mongo is a pain with '.'s in field names, so not using e.g. "_.e" names.
123
+ docs = [
124
+ {
125
+ "@context": "https://n2t.net/e/n2t_apidoc.html#identifier-metadata",
126
+ "_id": eid_decoded,
127
+ "who": populator,
128
+ "what": (f"{ns}/{eid}" if ns else "(:tba) Work in progress"),
129
+ "when": datetime.now(timezone.utc).isoformat(timespec="seconds"),
130
+ "how": shoulder,
131
+ "where": f"{naa}:{shoulder}{eid}",
132
+ "__as": "reserved", # status, public|reserved|unavailable
133
+ "__ao": owner, # owner
134
+ "__ac": datetime.now(timezone.utc).isoformat(
135
+ timespec="seconds"
136
+ ), # created
137
+ }
138
+ for eid, eid_decoded in not_taken
139
+ ]
140
+ collection.insert_many(docs)
141
+ collected.extend(docs)
142
+ if len(collected) == number:
143
+ break
144
+ return [d["where"] for d in collected]
145
+
146
+
147
+ def generate_one_id(
148
+ mdb: MongoDatabase = None,
149
+ ns: str = "",
150
+ shoulder: str = "sys0",
151
+ ) -> str:
152
+ """Generate unique Crockford Base32-encoded ID for mdb repository.
153
+
154
+ Can associate ID with namespace ns to facilitate ID deletion/recycling.
155
+
156
+ """
157
+ return generate_ids(
158
+ mdb,
159
+ owner="_system",
160
+ populator="_system",
161
+ number=1,
162
+ ns=ns,
163
+ naa="nmdc",
164
+ shoulder=shoulder,
165
+ )[0]
166
+
167
+
168
+ def local_part(id_):
169
+ """nmdc:fk0123 -> fk0123"""
170
+ return id_.split(":", maxsplit=1)[1]