nmdc-runtime 2.7.0__py3-none-any.whl → 2.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/config.py +57 -1
- nmdc_runtime/mongo_util.py +90 -0
- nmdc_runtime/site/export/ncbi_xml.py +98 -27
- nmdc_runtime/site/export/ncbi_xml_utils.py +27 -25
- nmdc_runtime/site/graphs.py +72 -9
- nmdc_runtime/site/ops.py +408 -65
- nmdc_runtime/site/repair/database_updater.py +210 -1
- nmdc_runtime/site/repository.py +107 -6
- nmdc_runtime/site/resources.py +17 -4
- nmdc_runtime/site/translation/gold_translator.py +18 -9
- nmdc_runtime/site/translation/neon_benthic_translator.py +1 -0
- nmdc_runtime/site/translation/neon_soil_translator.py +1 -0
- nmdc_runtime/site/translation/neon_surface_water_translator.py +1 -0
- nmdc_runtime/site/translation/submission_portal_translator.py +62 -0
- nmdc_runtime/util.py +53 -267
- {nmdc_runtime-2.7.0.dist-info → nmdc_runtime-2.9.0.dist-info}/METADATA +18 -3
- {nmdc_runtime-2.7.0.dist-info → nmdc_runtime-2.9.0.dist-info}/RECORD +21 -20
- {nmdc_runtime-2.7.0.dist-info → nmdc_runtime-2.9.0.dist-info}/WHEEL +1 -1
- {nmdc_runtime-2.7.0.dist-info → nmdc_runtime-2.9.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.7.0.dist-info → nmdc_runtime-2.9.0.dist-info}/licenses/LICENSE +0 -0
- {nmdc_runtime-2.7.0.dist-info → nmdc_runtime-2.9.0.dist-info}/top_level.txt +0 -0
nmdc_runtime/util.py
CHANGED
|
@@ -3,36 +3,28 @@ import mimetypes
|
|
|
3
3
|
import os
|
|
4
4
|
import pkgutil
|
|
5
5
|
from collections.abc import Iterable
|
|
6
|
-
from contextlib import AbstractContextManager
|
|
7
6
|
from copy import deepcopy
|
|
8
7
|
from datetime import datetime, timezone
|
|
9
8
|
from functools import lru_cache
|
|
10
9
|
from io import BytesIO
|
|
11
10
|
from itertools import chain
|
|
12
11
|
from pathlib import Path
|
|
13
|
-
from
|
|
14
|
-
from typing import List, Optional, Set, Dict
|
|
12
|
+
from typing import Callable, List, Optional, Set, Dict
|
|
15
13
|
|
|
16
14
|
import fastjsonschema
|
|
17
15
|
import requests
|
|
18
16
|
from frozendict import frozendict
|
|
19
|
-
from jsonschema.validators import Draft7Validator
|
|
20
17
|
from linkml_runtime import linkml_model
|
|
21
18
|
from linkml_runtime.utils.schemaview import SchemaView
|
|
22
|
-
from nmdc_schema.nmdc import Database as NMDCDatabase
|
|
23
19
|
from nmdc_schema.get_nmdc_view import ViewGetter
|
|
24
|
-
from pydantic import Field, BaseModel
|
|
25
20
|
from pymongo.database import Database as MongoDatabase
|
|
26
21
|
from pymongo.errors import OperationFailure
|
|
27
22
|
from refscan.lib.helpers import identify_references
|
|
28
|
-
from refscan.lib.Finder import Finder
|
|
29
23
|
from refscan.lib.ReferenceList import ReferenceList
|
|
30
|
-
from
|
|
31
|
-
from toolz import merge, unique
|
|
24
|
+
from toolz import merge
|
|
32
25
|
|
|
33
26
|
from nmdc_runtime.api.core.util import sha256hash_from_file
|
|
34
27
|
from nmdc_runtime.api.models.object import DrsObjectIn
|
|
35
|
-
from typing_extensions import Annotated
|
|
36
28
|
|
|
37
29
|
|
|
38
30
|
def get_names_of_classes_in_effective_range_of_slot(
|
|
@@ -499,6 +491,11 @@ def populated_schema_collection_names_with_id_field(mdb: MongoDatabase) -> List[
|
|
|
499
491
|
|
|
500
492
|
def ensure_unique_id_indexes(mdb: MongoDatabase):
|
|
501
493
|
"""Ensure that any collections with an "id" field have an index on "id"."""
|
|
494
|
+
|
|
495
|
+
# Note: The pipe (i.e. `|`) operator performs a union of the two sets. In this case,
|
|
496
|
+
# it creates a set (i.e. `candidate_names`) consisting of the names of both
|
|
497
|
+
# (a) all collections in the real database, and (b) all collections that
|
|
498
|
+
# the NMDC schema says can contain instances of classes that have an "id" slot.
|
|
502
499
|
candidate_names = (
|
|
503
500
|
set(mdb.list_collection_names()) | schema_collection_names_with_id_field()
|
|
504
501
|
)
|
|
@@ -510,269 +507,58 @@ def ensure_unique_id_indexes(mdb: MongoDatabase):
|
|
|
510
507
|
collection_name in schema_collection_names_with_id_field()
|
|
511
508
|
or all_docs_have_unique_id(mdb[collection_name])
|
|
512
509
|
):
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
upsert: bool = False
|
|
520
|
-
multi: bool = False
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
class DeleteStatement(BaseModel):
|
|
524
|
-
q: dict
|
|
525
|
-
limit: Annotated[int, Field(ge=0, le=1)] = 1
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
class OverlayDBError(Exception):
|
|
529
|
-
pass
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
class OverlayDB(AbstractContextManager):
|
|
533
|
-
"""Provides a context whereby a base Database is overlaid with a temporary one.
|
|
534
|
-
|
|
535
|
-
If you need to run basic simulations of updates to a base database,
|
|
536
|
-
you don't want to actually commit transactions to the base database.
|
|
537
|
-
|
|
538
|
-
For example, to insert or replace (matching on "id") many documents into a collection in order
|
|
539
|
-
to then validate the resulting total set of collection documents, an OverlayDB writes to
|
|
540
|
-
an overlay collection that "shadows" the base collection during a "find" query
|
|
541
|
-
(the "merge_find" method of an OverlayDB object): if a document with `id0` is found in the
|
|
542
|
-
overlay collection, that id is marked as "seen" and will not also be returned when
|
|
543
|
-
subsequently scanning the (unmodified) base-database collection.
|
|
544
|
-
|
|
545
|
-
Note: The OverlayDB object does not provide a means to perform arbitrary MongoDB queries on the virtual "merged"
|
|
546
|
-
database. Callers can access the real database via `overlay_db._bottom_db` and the overlaying database via
|
|
547
|
-
`overlay_db._top_db` and perform arbitrary MongoDB queries on the individual databases that way. Access to
|
|
548
|
-
the virtual "merged" database is limited to the methods of the `OverlayDB` class, which simulates the
|
|
549
|
-
"merging" just-in-time to process the method invocation. You can see an example of this in the implementation
|
|
550
|
-
of the `merge_find` method, which internally accesses both the real database and the overlaying database.
|
|
551
|
-
|
|
552
|
-
Mongo "update" commands (as the "apply_updates" method) are simulated by first copying affected
|
|
553
|
-
documents from a base collection to the overlay, and then applying the updates to the overlay,
|
|
554
|
-
so that again, base collections are unmodified, and a "merge_find" call will produce a result
|
|
555
|
-
*as if* the base collection(s) were modified.
|
|
510
|
+
# Check if index already exists, and if so, drop it if not unique
|
|
511
|
+
try:
|
|
512
|
+
existing_indexes = list(mdb[collection_name].list_indexes())
|
|
513
|
+
id_index = next(
|
|
514
|
+
(idx for idx in existing_indexes if idx["name"] == "id_1"), None
|
|
515
|
+
)
|
|
556
516
|
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
517
|
+
if id_index:
|
|
518
|
+
# If index exists but isn't unique, drop it so we can recreate
|
|
519
|
+
if not id_index.get("unique", False):
|
|
520
|
+
mdb[collection_name].drop_index("id_1")
|
|
521
|
+
|
|
522
|
+
# Create index with unique constraint
|
|
523
|
+
mdb[collection_name].create_index("id", unique=True)
|
|
524
|
+
except OperationFailure as e:
|
|
525
|
+
# If error is about index with same name, just continue
|
|
526
|
+
if "An existing index has the same name" in str(e):
|
|
527
|
+
continue
|
|
528
|
+
else:
|
|
529
|
+
# Re-raise other errors
|
|
530
|
+
raise
|
|
561
531
|
|
|
562
|
-
Usage:
|
|
563
|
-
````
|
|
564
|
-
with OverlayDB(mdb) as odb:
|
|
565
|
-
# do stuff, e.g. `odb.replace_or_insert_many(...)`
|
|
566
|
-
```
|
|
567
|
-
"""
|
|
568
532
|
|
|
569
|
-
|
|
570
|
-
self._bottom_db = mdb
|
|
571
|
-
self._top_db = self._bottom_db.client.get_database(f"overlay-{uuid4()}")
|
|
572
|
-
ensure_unique_id_indexes(self._top_db)
|
|
573
|
-
|
|
574
|
-
def __enter__(self):
|
|
575
|
-
return self
|
|
576
|
-
|
|
577
|
-
def __exit__(self, exc_type, exc_value, traceback):
|
|
578
|
-
self._bottom_db.client.drop_database(self._top_db.name)
|
|
579
|
-
|
|
580
|
-
def replace_or_insert_many(self, coll_name, documents: list):
|
|
581
|
-
try:
|
|
582
|
-
self._top_db[coll_name].insert_many(documents)
|
|
583
|
-
except OperationFailure as e:
|
|
584
|
-
raise OverlayDBError(str(e.details))
|
|
585
|
-
|
|
586
|
-
def apply_updates(self, coll_name, updates: list):
|
|
587
|
-
"""prepare overlay db and apply updates to it."""
|
|
588
|
-
assert all(UpdateStatement(**us) for us in updates)
|
|
589
|
-
for update_spec in updates:
|
|
590
|
-
for bottom_doc in self._bottom_db[coll_name].find(update_spec["q"]):
|
|
591
|
-
self._top_db[coll_name].insert_one(bottom_doc)
|
|
592
|
-
try:
|
|
593
|
-
self._top_db.command({"update": coll_name, "updates": updates})
|
|
594
|
-
except OperationFailure as e:
|
|
595
|
-
raise OverlayDBError(str(e.details))
|
|
596
|
-
|
|
597
|
-
def delete(self, coll_name, deletes: list):
|
|
598
|
-
""" "apply" delete command by flagging docs in overlay database"""
|
|
599
|
-
assert all(DeleteStatement(**us) for us in deletes)
|
|
600
|
-
for delete_spec in deletes:
|
|
601
|
-
for bottom_doc in self._bottom_db[coll_name].find(
|
|
602
|
-
delete_spec["q"], limit=delete_spec.get("limit", 1)
|
|
603
|
-
):
|
|
604
|
-
bottom_doc["_deleted"] = True
|
|
605
|
-
self._top_db[coll_name].insert_one(bottom_doc)
|
|
606
|
-
|
|
607
|
-
def merge_find(self, coll_name, find_spec: dict):
|
|
608
|
-
"""Yield docs first from overlay and then from base db, minding deletion flags."""
|
|
609
|
-
# ensure projection of "id" and "_deleted"
|
|
610
|
-
if "projection" in find_spec:
|
|
611
|
-
proj = find_spec["projection"]
|
|
612
|
-
if isinstance(proj, dict):
|
|
613
|
-
proj = merge(proj, {"id": 1, "_deleted": 1})
|
|
614
|
-
elif isinstance(proj, list):
|
|
615
|
-
proj = list(unique(proj + ["id", "_deleted"]))
|
|
616
|
-
|
|
617
|
-
top_docs = self._top_db[coll_name].find(**find_spec)
|
|
618
|
-
bottom_docs = self._bottom_db[coll_name].find(**find_spec)
|
|
619
|
-
top_seen_ids = set()
|
|
620
|
-
for doc in top_docs:
|
|
621
|
-
if not doc.get("_deleted"):
|
|
622
|
-
yield doc
|
|
623
|
-
top_seen_ids.add(doc["id"])
|
|
624
|
-
|
|
625
|
-
for doc in bottom_docs:
|
|
626
|
-
if doc["id"] not in top_seen_ids:
|
|
627
|
-
yield doc
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
def validate_json(
|
|
631
|
-
in_docs: dict, mdb: MongoDatabase, check_inter_document_references: bool = False
|
|
632
|
-
):
|
|
533
|
+
def decorate_if(condition: bool = False) -> Callable:
|
|
633
534
|
r"""
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
is referenced by any of the documents passed in would, indeed, exist
|
|
653
|
-
in the database, if the documents passed in were to be inserted into
|
|
654
|
-
the database. In other words, set this to `True` if you want this
|
|
655
|
-
function to perform referential integrity checks.
|
|
535
|
+
Decorator that applies another decorator only when `condition` is `True`.
|
|
536
|
+
|
|
537
|
+
Note: We implemented this so we could conditionally register
|
|
538
|
+
endpoints with FastAPI's `@router`.
|
|
539
|
+
|
|
540
|
+
Example usages:
|
|
541
|
+
A. Apply the `@router.get` decorator:
|
|
542
|
+
```python
|
|
543
|
+
@decorate_if(True)(router.get("/me"))
|
|
544
|
+
def get_me(...):
|
|
545
|
+
...
|
|
546
|
+
```
|
|
547
|
+
B. Bypass the `@router.get` decorator:
|
|
548
|
+
```python
|
|
549
|
+
@decorate_if(False)(router.get("/me"))
|
|
550
|
+
def get_me(...):
|
|
551
|
+
...
|
|
552
|
+
```
|
|
656
553
|
"""
|
|
657
|
-
validator = Draft7Validator(get_nmdc_jsonschema_dict())
|
|
658
|
-
docs = deepcopy(in_docs)
|
|
659
|
-
validation_errors = {}
|
|
660
554
|
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
# See: https://github.com/microbiomedata/nmdc-runtime/discussions/858
|
|
666
|
-
if coll_name == "@type" and coll_docs in ("Database", "nmdc:Database"):
|
|
667
|
-
continue
|
|
555
|
+
def apply_original_decorator(original_decorator: Callable) -> Callable:
|
|
556
|
+
def check_condition(original_function: Callable) -> Callable:
|
|
557
|
+
if condition:
|
|
558
|
+
return original_decorator(original_function)
|
|
668
559
|
else:
|
|
669
|
-
|
|
670
|
-
f"'{coll_name}' is not a known schema collection name"
|
|
671
|
-
]
|
|
672
|
-
continue
|
|
560
|
+
return original_function
|
|
673
561
|
|
|
674
|
-
|
|
675
|
-
validation_errors[coll_name] = [e.message for e in errors]
|
|
676
|
-
if coll_docs:
|
|
677
|
-
if not isinstance(coll_docs, list):
|
|
678
|
-
validation_errors[coll_name].append("value must be a list")
|
|
679
|
-
elif not all(isinstance(d, dict) for d in coll_docs):
|
|
680
|
-
validation_errors[coll_name].append(
|
|
681
|
-
"all elements of list must be dicts"
|
|
682
|
-
)
|
|
683
|
-
if not validation_errors[coll_name]:
|
|
684
|
-
try:
|
|
685
|
-
with OverlayDB(mdb) as odb:
|
|
686
|
-
odb.replace_or_insert_many(coll_name, coll_docs)
|
|
687
|
-
except OverlayDBError as e:
|
|
688
|
-
validation_errors[coll_name].append(str(e))
|
|
689
|
-
|
|
690
|
-
if all(len(v) == 0 for v in validation_errors.values()):
|
|
691
|
-
# Second pass. Try instantiating linkml-sourced dataclass
|
|
692
|
-
in_docs.pop("@type", None)
|
|
693
|
-
try:
|
|
694
|
-
NMDCDatabase(**in_docs)
|
|
695
|
-
except Exception as e:
|
|
696
|
-
return {"result": "errors", "detail": str(e)}
|
|
697
|
-
|
|
698
|
-
# Third pass (if enabled): Check inter-document references.
|
|
699
|
-
if check_inter_document_references is True:
|
|
700
|
-
# Prepare to use `refscan`.
|
|
701
|
-
#
|
|
702
|
-
# Note: We check the inter-document references in two stages, which are:
|
|
703
|
-
# 1. For each document in the JSON payload, check whether each document it references already exists
|
|
704
|
-
# (in the collections the schema says it can exist in) in the database. We use the
|
|
705
|
-
# `refscan` package to do this, which returns violation details we'll use in the second stage.
|
|
706
|
-
# 2. For each violation found in the first stage (i.e. each reference to a not-found document), we
|
|
707
|
-
# check whether that document exists (in the collections the schema says it can exist in) in the
|
|
708
|
-
# JSON payload. If it does, then we "waive" (i.e. discard) that violation.
|
|
709
|
-
# The violations that remain after those two stages are the ones we return to the caller.
|
|
710
|
-
#
|
|
711
|
-
# Note: The reason we do not insert documents into an `OverlayDB` and scan _that_, is that the `OverlayDB`
|
|
712
|
-
# does not provide a means to perform arbitrary queries against its virtual "merged" database. It
|
|
713
|
-
# is not a drop-in replacement for a pymongo's `Database` class, which is the only thing that
|
|
714
|
-
# `refscan`'s `Finder` class accepts.
|
|
715
|
-
#
|
|
716
|
-
finder = Finder(database=mdb)
|
|
717
|
-
references = get_allowed_references()
|
|
718
|
-
reference_field_names_by_source_class_name = (
|
|
719
|
-
references.get_reference_field_names_by_source_class_name()
|
|
720
|
-
)
|
|
562
|
+
return check_condition
|
|
721
563
|
|
|
722
|
-
|
|
723
|
-
for source_collection_name, documents in in_docs.items():
|
|
724
|
-
for document in documents:
|
|
725
|
-
# Add an `_id` field to the document, since `refscan` requires the document to have one.
|
|
726
|
-
source_document = dict(document, _id=None)
|
|
727
|
-
violations = scan_outgoing_references(
|
|
728
|
-
document=source_document,
|
|
729
|
-
schema_view=nmdc_schema_view(),
|
|
730
|
-
reference_field_names_by_source_class_name=reference_field_names_by_source_class_name,
|
|
731
|
-
references=references,
|
|
732
|
-
finder=finder,
|
|
733
|
-
collection_names=nmdc_database_collection_names(),
|
|
734
|
-
source_collection_name=source_collection_name,
|
|
735
|
-
user_wants_to_locate_misplaced_documents=False,
|
|
736
|
-
)
|
|
737
|
-
|
|
738
|
-
# For each violation, check whether the misplaced document is in the JSON payload, itself.
|
|
739
|
-
for violation in violations:
|
|
740
|
-
can_waive_violation = False
|
|
741
|
-
# Determine which collections can contain the referenced document, based upon
|
|
742
|
-
# the schema class of which this source document is an instance.
|
|
743
|
-
target_collection_names = (
|
|
744
|
-
references.get_target_collection_names(
|
|
745
|
-
source_class_name=violation.source_class_name,
|
|
746
|
-
source_field_name=violation.source_field_name,
|
|
747
|
-
)
|
|
748
|
-
)
|
|
749
|
-
# Check whether the referenced document exists in any of those collections in the JSON payload.
|
|
750
|
-
for json_coll_name, json_coll_docs in in_docs.items():
|
|
751
|
-
if json_coll_name in target_collection_names:
|
|
752
|
-
for json_coll_doc in json_coll_docs:
|
|
753
|
-
if json_coll_doc["id"] == violation.target_id:
|
|
754
|
-
can_waive_violation = True
|
|
755
|
-
break # stop checking
|
|
756
|
-
if can_waive_violation:
|
|
757
|
-
break # stop checking
|
|
758
|
-
if not can_waive_violation:
|
|
759
|
-
violation_as_str = (
|
|
760
|
-
f"Document '{violation.source_document_id}' "
|
|
761
|
-
f"in collection '{violation.source_collection_name}' "
|
|
762
|
-
f"has a field '{violation.source_field_name}' that "
|
|
763
|
-
f"references a document having id "
|
|
764
|
-
f"'{violation.target_id}', but the latter document "
|
|
765
|
-
f"does not exist in any of the collections the "
|
|
766
|
-
f"NMDC Schema says it can exist in."
|
|
767
|
-
)
|
|
768
|
-
validation_errors[source_collection_name].append(
|
|
769
|
-
violation_as_str
|
|
770
|
-
)
|
|
771
|
-
|
|
772
|
-
# If any collection's error list is not empty, return an error response.
|
|
773
|
-
if any(len(v) > 0 for v in validation_errors.values()):
|
|
774
|
-
return {"result": "errors", "detail": validation_errors}
|
|
775
|
-
|
|
776
|
-
return {"result": "All Okay!"}
|
|
777
|
-
else:
|
|
778
|
-
return {"result": "errors", "detail": validation_errors}
|
|
564
|
+
return apply_original_decorator
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nmdc_runtime
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.9.0
|
|
4
4
|
Summary: A runtime system for NMDC data management and orchestration
|
|
5
5
|
Home-page: https://github.com/microbiomedata/nmdc-runtime
|
|
6
6
|
Author: Donny Winston
|
|
@@ -127,7 +127,8 @@ source .env
|
|
|
127
127
|
set +a
|
|
128
128
|
```
|
|
129
129
|
|
|
130
|
-
If you are connecting to resources that require an SSH tunnel—for example, a MongoDB server that is only accessible on
|
|
130
|
+
If you are connecting to resources that require an SSH tunnel—for example, a MongoDB server that is only accessible on
|
|
131
|
+
the NERSC network—set up the SSH tunnel.
|
|
131
132
|
|
|
132
133
|
The following command could be useful to you, either directly or as a template (see `Makefile`).
|
|
133
134
|
|
|
@@ -149,6 +150,19 @@ The Dagit web server is viewable at http://127.0.0.1:3000/.
|
|
|
149
150
|
The FastAPI service is viewable at http://127.0.0.1:8000/ -- e.g., rendered documentation at
|
|
150
151
|
http://127.0.0.1:8000/redoc/.
|
|
151
152
|
|
|
153
|
+
|
|
154
|
+
* NOTE: Any time you add or change requirements in requirements/main.in or requirements/dev.in, you must run:
|
|
155
|
+
```
|
|
156
|
+
pip-compile --build-isolation --allow-unsafe --resolver=backtracking --strip-extras --output-file requirements/[main|dev].txt requirements/[main|dev].in
|
|
157
|
+
```
|
|
158
|
+
to generate main.txt and dev.txt files respectively. main.in is kind of like a poetry dependency stanza, dev.in is kind
|
|
159
|
+
of like poetry dev.dependencies stanza. main.txt and dev.txt are kind of like poetry.lock files to specify the exact
|
|
160
|
+
versions of dependencies to use. main.txt and dev.txt are combined in the docker compose build process to create the
|
|
161
|
+
final requirements.txt file and import the dependencies into the Docker image.
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
|
|
152
166
|
## Local Testing
|
|
153
167
|
|
|
154
168
|
Tests can be found in `tests` and are run with the following commands:
|
|
@@ -160,12 +174,13 @@ make test
|
|
|
160
174
|
# Run a Specific test file eg. tests/test_api/test_endpoints.py
|
|
161
175
|
make test ARGS="tests/test_api/test_endpoints.py"
|
|
162
176
|
```
|
|
177
|
+
docker compose --file docker-compose.test.yml run test
|
|
163
178
|
|
|
164
179
|
As you create Dagster solids and pipelines, add tests in `tests/` to check that your code behaves as
|
|
165
180
|
desired and does not break over time.
|
|
166
181
|
|
|
167
182
|
[For hints on how to write tests for solids and pipelines in Dagster, see their documentation
|
|
168
|
-
tutorial on Testing](https://docs.dagster.io/
|
|
183
|
+
tutorial on Testing](https://docs.dagster.io/guides/test/unit-testing-assets-and-ops).
|
|
169
184
|
|
|
170
185
|
### RAM usage
|
|
171
186
|
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
nmdc_runtime/config.py,sha256=
|
|
2
|
+
nmdc_runtime/config.py,sha256=CW6LnN8Idsbra_mZnHU-kcWsYBZWbgivqVEp8rpOMi4,1989
|
|
3
3
|
nmdc_runtime/containers.py,sha256=8m_S1wiFu8VOWvY7tyqzf-02X9gXY83YGc8FgjWzLGA,418
|
|
4
4
|
nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
-
nmdc_runtime/
|
|
5
|
+
nmdc_runtime/mongo_util.py,sha256=7NRvqFE8W2CUcpcXAA4KElUACIdAkBehZ9TBG4k7zNE,3000
|
|
6
|
+
nmdc_runtime/util.py,sha256=Rw-OiQDHrz4cNX3ZdC-cgfHYUMq1qsk-_Mv81UrDlC8,19823
|
|
6
7
|
nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
8
|
nmdc_runtime/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
9
|
nmdc_runtime/core/db/Database.py,sha256=WamgBUbq85A7-fr3p5B9Tk92U__yPdr9pBb4zyQok-4,377
|
|
@@ -36,10 +37,10 @@ nmdc_runtime/minter/domain/model.py,sha256=WMOuKub3dVzkOt_EZSRDLeTsJPqFbKx01SMQ5
|
|
|
36
37
|
nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
38
|
nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
|
|
38
39
|
nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
-
nmdc_runtime/site/graphs.py,sha256=
|
|
40
|
-
nmdc_runtime/site/ops.py,sha256=
|
|
41
|
-
nmdc_runtime/site/repository.py,sha256=
|
|
42
|
-
nmdc_runtime/site/resources.py,sha256=
|
|
40
|
+
nmdc_runtime/site/graphs.py,sha256=CWbLLtoaakmNgSoaQWylXvcOY6qS7qwkTexEUDiMNfM,18295
|
|
41
|
+
nmdc_runtime/site/ops.py,sha256=y6bBJhAytrSqt0COkOqXVKgfSGVdgQ7uByUP8S-zUB4,63935
|
|
42
|
+
nmdc_runtime/site/repository.py,sha256=g0bZytvCrUjLpWuvkAzzmI16mChsrYPbWcvVFPNZFnM,47687
|
|
43
|
+
nmdc_runtime/site/resources.py,sha256=dLNtNa4FfSKN_6b21eItn-i8e0ZHyveoBsexl2I6zmo,20144
|
|
43
44
|
nmdc_runtime/site/util.py,sha256=h70UJCT9g-I63EJn0drZjv1iaQ8LHJTbG29R9kqJ04c,1821
|
|
44
45
|
nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
46
|
nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
|
|
@@ -51,23 +52,23 @@ nmdc_runtime/site/drsobjects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
51
52
|
nmdc_runtime/site/drsobjects/ingest.py,sha256=pcMP69WSzFHFqHB9JIL55ePFhilnCLRc2XHCQ97w1Ik,3107
|
|
52
53
|
nmdc_runtime/site/drsobjects/registration.py,sha256=D1T3QUuxEOxqKZIvB5rkb_6ZxFZiA-U9SMPajyeWC2Y,3572
|
|
53
54
|
nmdc_runtime/site/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
-
nmdc_runtime/site/export/ncbi_xml.py,sha256=
|
|
55
|
-
nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=
|
|
55
|
+
nmdc_runtime/site/export/ncbi_xml.py,sha256=iZQHBr3LL5Q32I2L_Xpfp9n4ZtgAz_MwrlxIF5do7Pw,29715
|
|
56
|
+
nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=RnoAW0HQwBG6JR63d9muI18RIC114wnX3iYPqOllw44,10700
|
|
56
57
|
nmdc_runtime/site/export/study_metadata.py,sha256=yR5pXL6JG8d7cAtqcF-60Hp7bLD3dJ0Rut4AtYc0tXA,4844
|
|
57
58
|
nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
59
|
nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
|
|
59
60
|
nmdc_runtime/site/repair/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
60
|
-
nmdc_runtime/site/repair/database_updater.py,sha256=
|
|
61
|
+
nmdc_runtime/site/repair/database_updater.py,sha256=a6POYZcLEl0JvnuWxPjaOJtwZjkJhhvvUg1ABhnBiP8,21268
|
|
61
62
|
nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
63
|
nmdc_runtime/site/translation/emsl.py,sha256=-aCTJTSCNaK-Koh8BE_4fTf5nyxP1KkquR6lloLEJl0,1245
|
|
63
64
|
nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
|
|
64
|
-
nmdc_runtime/site/translation/gold_translator.py,sha256=
|
|
65
|
+
nmdc_runtime/site/translation/gold_translator.py,sha256=n7PrAyZb6ODG1uaZ0cay91DygAHIefOL2qXLuukOyIM,33075
|
|
65
66
|
nmdc_runtime/site/translation/jgi.py,sha256=qk878KhIw674TkrVfbl2x1QJrKi3zlvE0vesIpe9slM,876
|
|
66
|
-
nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=
|
|
67
|
-
nmdc_runtime/site/translation/neon_soil_translator.py,sha256=
|
|
68
|
-
nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=
|
|
67
|
+
nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=8_QF75Gf-dc2xVeO6jzTmdDrlGdh1-QrLJKG2SwUhCA,23797
|
|
68
|
+
nmdc_runtime/site/translation/neon_soil_translator.py,sha256=IMeq4ABgWaSUbB_gmG8vBCMeynQSlbCUw9p2be6o8kE,38620
|
|
69
|
+
nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=Js8_r6vHBW8b-_BpFySTUuYOFe7r51k8HwaNCQ7nAAg,30587
|
|
69
70
|
nmdc_runtime/site/translation/neon_utils.py,sha256=d00o7duKKugpLHmsEifNbp4WjeC4GOqcgw0b5qlCg4I,5549
|
|
70
|
-
nmdc_runtime/site/translation/submission_portal_translator.py,sha256=
|
|
71
|
+
nmdc_runtime/site/translation/submission_portal_translator.py,sha256=d5ycQhd-I07iUeuqN0vcHvMkOHqrwB67j2Q64aFkKBw,44147
|
|
71
72
|
nmdc_runtime/site/translation/translator.py,sha256=V6Aq0y03LoQ4LTL2iHDHxGTh_eMjOmDJJSwNHSrp2wo,837
|
|
72
73
|
nmdc_runtime/site/translation/util.py,sha256=w_l3SiExGsl6cXRqto0a_ssDmHkP64ITvrOVfPxmNpY,4366
|
|
73
74
|
nmdc_runtime/site/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -75,9 +76,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=OG20mv_3E2rkQqTQtYO0_SVRqFb-Z_zKCiAV
|
|
|
75
76
|
nmdc_runtime/site/validation/gold.py,sha256=Z5ZzYdjERbrJ2Tu06d0TDTBSfwaFdL1Z23Rl-YkZ2Ow,803
|
|
76
77
|
nmdc_runtime/site/validation/jgi.py,sha256=LdJfhqBVHWCDp0Kzyk8eJZMwEI5NQ-zuTda31BcGwOA,1299
|
|
77
78
|
nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
|
|
78
|
-
nmdc_runtime-2.
|
|
79
|
-
nmdc_runtime-2.
|
|
80
|
-
nmdc_runtime-2.
|
|
81
|
-
nmdc_runtime-2.
|
|
82
|
-
nmdc_runtime-2.
|
|
83
|
-
nmdc_runtime-2.
|
|
79
|
+
nmdc_runtime-2.9.0.dist-info/licenses/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
|
|
80
|
+
nmdc_runtime-2.9.0.dist-info/METADATA,sha256=4NgNI-Et3t1WLDfZPbSFT18JnMBVEuSCoFAZbm_V0xk,8953
|
|
81
|
+
nmdc_runtime-2.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
82
|
+
nmdc_runtime-2.9.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
|
|
83
|
+
nmdc_runtime-2.9.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
|
|
84
|
+
nmdc_runtime-2.9.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|