nmdc-runtime 1.6.0__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

@@ -42,6 +42,8 @@ from nmdc_runtime.site.graphs import (
42
42
  ingest_neon_soil_metadata,
43
43
  ingest_neon_benthic_metadata,
44
44
  ingest_neon_surface_water_metadata,
45
+ ensure_alldocs,
46
+ nmdc_study_to_ncbi_submission_export,
45
47
  )
46
48
  from nmdc_runtime.site.resources import (
47
49
  get_mongo,
@@ -50,7 +52,6 @@ from nmdc_runtime.site.resources import (
50
52
  nmdc_portal_api_client_resource,
51
53
  gold_api_client_resource,
52
54
  neon_api_client_resource,
53
- terminus_resource,
54
55
  mongo_resource,
55
56
  )
56
57
  from nmdc_runtime.site.resources import (
@@ -68,7 +69,6 @@ resource_defs = {
68
69
  "nmdc_portal_api_client": nmdc_portal_api_client_resource,
69
70
  "gold_api_client": gold_api_client_resource,
70
71
  "neon_api_client": neon_api_client_resource,
71
- "terminus": terminus_resource,
72
72
  "mongo": mongo_resource,
73
73
  }
74
74
 
@@ -451,6 +451,7 @@ def repo():
451
451
  ensure_jobs.to_job(**preset_normal),
452
452
  apply_metadata_in.to_job(**preset_normal),
453
453
  export_study_biosamples_metadata.to_job(**preset_normal),
454
+ ensure_alldocs.to_job(**preset_normal),
454
455
  ]
455
456
  schedules = [housekeeping_weekly]
456
457
  sensors = [
@@ -515,8 +516,8 @@ def biosample_submission_ingest():
515
516
  "nmdc_portal_api_client": {
516
517
  "config": {
517
518
  "base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
518
- "session_cookie": {
519
- "env": "NMDC_PORTAL_API_SESSION_COOKIE"
519
+ "refresh_token": {
520
+ "env": "NMDC_PORTAL_API_REFRESH_TOKEN"
520
521
  },
521
522
  }
522
523
  }
@@ -555,8 +556,8 @@ def biosample_submission_ingest():
555
556
  "nmdc_portal_api_client": {
556
557
  "config": {
557
558
  "base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
558
- "session_cookie": {
559
- "env": "NMDC_PORTAL_API_SESSION_COOKIE"
559
+ "refresh_token": {
560
+ "env": "NMDC_PORTAL_API_REFRESH_TOKEN"
560
561
  },
561
562
  }
562
563
  }
@@ -852,6 +853,57 @@ def biosample_submission_ingest():
852
853
  ]
853
854
 
854
855
 
856
+ @repository
857
+ def biosample_export():
858
+ normal_resources = run_config_frozen__normal_env["resources"]
859
+ return [
860
+ nmdc_study_to_ncbi_submission_export.to_job(
861
+ resource_defs=resource_defs,
862
+ config={
863
+ "resources": merge(
864
+ unfreeze(normal_resources),
865
+ {
866
+ "mongo": {
867
+ "config": {
868
+ "host": {"env": "MONGO_HOST"},
869
+ "username": {"env": "MONGO_USERNAME"},
870
+ "password": {"env": "MONGO_PASSWORD"},
871
+ "dbname": {"env": "MONGO_DBNAME"},
872
+ },
873
+ },
874
+ "runtime_api_site_client": {
875
+ "config": {
876
+ "base_url": {"env": "API_HOST"},
877
+ "client_id": {"env": "API_SITE_CLIENT_ID"},
878
+ "client_secret": {"env": "API_SITE_CLIENT_SECRET"},
879
+ "site_id": {"env": "API_SITE_ID"},
880
+ },
881
+ },
882
+ },
883
+ ),
884
+ "ops": {
885
+ "get_ncbi_export_pipeline_study": {
886
+ "config": {
887
+ "nmdc_study_id": "",
888
+ }
889
+ },
890
+ "get_ncbi_export_pipeline_inputs": {
891
+ "config": {
892
+ "nmdc_ncbi_attribute_mapping_file_url": "",
893
+ "ncbi_submission_metadata": {
894
+ "organization": "",
895
+ },
896
+ "ncbi_biosample_metadata": {
897
+ "organism_name": "",
898
+ },
899
+ }
900
+ },
901
+ },
902
+ },
903
+ ),
904
+ ]
905
+
906
+
855
907
  # @repository
856
908
  # def validation():
857
909
  # graph_jobs = [validate_jgi_job, validate_gold_job, validate_emsl_job]
@@ -19,7 +19,6 @@ from frozendict import frozendict
19
19
  from linkml_runtime.dumpers import json_dumper
20
20
  from pydantic import BaseModel, AnyUrl
21
21
  from pymongo import MongoClient, ReplaceOne, InsertOne
22
- from terminusdb_client import WOQLClient
23
22
  from toolz import get_in
24
23
  from toolz import merge
25
24
 
@@ -372,16 +371,37 @@ def gold_api_client_resource(context: InitResourceContext):
372
371
 
373
372
  @dataclass
374
373
  class NmdcPortalApiClient:
374
+
375
375
  base_url: str
376
- # Using a cookie for authentication is not ideal and should be replaced
377
- # when this API has an another authentication method
378
- session_cookie: str
376
+ refresh_token: str
377
+ access_token: Optional[str] = None
378
+ access_token_expires_at: Optional[datetime] = None
379
+
380
+ def _request(self, method: str, endpoint: str, **kwargs):
381
+ r"""
382
+ Submits a request to the specified API endpoint;
383
+ after refreshing the access token, if necessary.
384
+ """
385
+ if self.access_token is None or datetime.now() > self.access_token_expires_at:
386
+ refresh_response = requests.post(
387
+ f"{self.base_url}/auth/refresh",
388
+ json={"refresh_token": self.refresh_token},
389
+ )
390
+ refresh_response.raise_for_status()
391
+ refresh_body = refresh_response.json()
392
+ self.access_token_expires_at = datetime.now() + timedelta(
393
+ seconds=refresh_body["expires_in"]
394
+ )
395
+ self.access_token = refresh_body["access_token"]
379
396
 
380
- def fetch_metadata_submission(self, id: str) -> Dict[str, Any]:
381
- response = requests.get(
382
- f"{self.base_url}/api/metadata_submission/{id}",
383
- cookies={"session": self.session_cookie},
397
+ headers = kwargs.get("headers", {})
398
+ headers["Authorization"] = f"Bearer {self.access_token}"
399
+ return requests.request(
400
+ method, f"{self.base_url}{endpoint}", **kwargs, headers=headers
384
401
  )
402
+
403
+ def fetch_metadata_submission(self, id: str) -> Dict[str, Any]:
404
+ response = self._request("GET", f"/api/metadata_submission/{id}")
385
405
  response.raise_for_status()
386
406
  return response.json()
387
407
 
@@ -389,13 +409,13 @@ class NmdcPortalApiClient:
389
409
  @resource(
390
410
  config_schema={
391
411
  "base_url": StringSource,
392
- "session_cookie": StringSource,
412
+ "refresh_token": StringSource,
393
413
  }
394
414
  )
395
415
  def nmdc_portal_api_client_resource(context: InitResourceContext):
396
416
  return NmdcPortalApiClient(
397
417
  base_url=context.resource_config["base_url"],
398
- session_cookie=context.resource_config["session_cookie"],
418
+ refresh_token=context.resource_config["refresh_token"],
399
419
  )
400
420
 
401
421
 
@@ -512,33 +532,3 @@ def get_mongo(run_config: frozendict):
512
532
  )
513
533
  )
514
534
  return mongo_resource(resource_context)
515
-
516
-
517
- class TerminusDB:
518
- def __init__(self, server_url, user, key, account, dbid):
519
- self.client = WOQLClient(server_url=server_url)
520
- self.client.connect(user=user, key=key, account=account)
521
- db_info = self.client.get_database(dbid=dbid, account=account)
522
- if db_info is None:
523
- self.client.create_database(dbid=dbid, accountid=account, label=dbid)
524
- self.client.create_graph(graph_type="inference", graph_id="main")
525
- self.client.connect(user=user, key=key, account=account, db=dbid)
526
-
527
-
528
- @resource(
529
- config_schema={
530
- "server_url": StringSource,
531
- "user": StringSource,
532
- "key": StringSource,
533
- "account": StringSource,
534
- "dbid": StringSource,
535
- }
536
- )
537
- def terminus_resource(context):
538
- return TerminusDB(
539
- server_url=context.resource_config["server_url"],
540
- user=context.resource_config["user"],
541
- key=context.resource_config["key"],
542
- account=context.resource_config["account"],
543
- dbid=context.resource_config["dbid"],
544
- )
@@ -13,6 +13,9 @@ from toolz import get_in, groupby, concat, valmap, dissoc
13
13
  from nmdc_runtime.site.translation.translator import JSON_OBJECT, Translator
14
14
 
15
15
 
16
+ BIOSAMPLE_UNIQUE_KEY_SLOT = "samp_name"
17
+
18
+
16
19
  @lru_cache
17
20
  def _get_schema_view():
18
21
  """Return a SchemaView instance representing the NMDC schema"""
@@ -98,7 +101,9 @@ class SubmissionPortalTranslator(Translator):
98
101
  self.study_pi_image_url = study_pi_image_url
99
102
  self.study_funding_sources = study_funding_sources
100
103
 
101
- self.biosample_extras = group_dicts_by_key("source_mat_id", biosample_extras)
104
+ self.biosample_extras = group_dicts_by_key(
105
+ BIOSAMPLE_UNIQUE_KEY_SLOT, biosample_extras
106
+ )
102
107
  self.biosample_extras_slot_mapping = group_dicts_by_key(
103
108
  "subject_id", biosample_extras_slot_mapping
104
109
  )
@@ -521,7 +526,7 @@ class SubmissionPortalTranslator(Translator):
521
526
  :param default_env_package: Default value for `env_package` slot
522
527
  :return: nmdc:Biosample
523
528
  """
524
- source_mat_id = sample_data[0].get("source_mat_id", "").strip()
529
+ biosample_key = sample_data[0].get(BIOSAMPLE_UNIQUE_KEY_SLOT, "").strip()
525
530
  slots = {
526
531
  "id": nmdc_biosample_id,
527
532
  "part_of": nmdc_study_id,
@@ -533,7 +538,7 @@ class SubmissionPortalTranslator(Translator):
533
538
  slots.update(transformed_tab)
534
539
 
535
540
  if self.biosample_extras:
536
- raw_extras = self.biosample_extras.get(source_mat_id)
541
+ raw_extras = self.biosample_extras.get(biosample_key)
537
542
  if raw_extras:
538
543
  transformed_extras = self._transform_dict_for_class(
539
544
  raw_extras, "Biosample", self.biosample_extras_slot_mapping
@@ -564,7 +569,9 @@ class SubmissionPortalTranslator(Translator):
564
569
 
565
570
  sample_data = metadata_submission_data.get("sampleData", {})
566
571
  package_name = metadata_submission_data["packageName"]
567
- sample_data_by_id = groupby("source_mat_id", concat(sample_data.values()))
572
+ sample_data_by_id = groupby(
573
+ BIOSAMPLE_UNIQUE_KEY_SLOT, concat(sample_data.values())
574
+ )
568
575
  nmdc_biosample_ids = self._id_minter("nmdc:Biosample", len(sample_data_by_id))
569
576
  sample_data_to_nmdc_biosample_ids = dict(
570
577
  zip(sample_data_by_id.keys(), nmdc_biosample_ids)
@@ -583,15 +590,15 @@ class SubmissionPortalTranslator(Translator):
583
590
 
584
591
  if self.omics_processing_mapping:
585
592
  # If there is data from an OmicsProcessing mapping file, process it now. This part
586
- # assumes that there is a column in that file with the header __biosample_source_mat_id
593
+ # assumes that there is a column in that file with the header __biosample_samp_name
587
594
  # that can be used to join with the sample data from the submission portal. The
588
- # biosample identified by that `source_mat_id` will be referenced in the `has_input`
595
+ # biosample identified by that `samp_name` will be referenced in the `has_input`
589
596
  # slot of the OmicsProcessing object. If a DataObject mapping file was also provided,
590
597
  # those objects will also be generated and referenced in the `has_output` slot of the
591
- # OmicsProcessing object. By keying off of the `source_mat_id` slot of the submission's
598
+ # OmicsProcessing object. By keying off of the `samp_name` slot of the submission's
592
599
  # sample data there is an implicit 1:1 relationship between Biosample objects and
593
600
  # OmicsProcessing objects generated here.
594
- join_key = "__biosample_source_mat_id"
601
+ join_key = f"__biosample_{BIOSAMPLE_UNIQUE_KEY_SLOT}"
595
602
  database.omics_processing_set = []
596
603
  database.data_object_set = []
597
604
  data_objects_by_sample_data_id = {}
@@ -617,7 +624,7 @@ class SubmissionPortalTranslator(Translator):
617
624
  or sample_data_id not in sample_data_to_nmdc_biosample_ids
618
625
  ):
619
626
  logging.warning(
620
- f"Unrecognized biosample source_mat_id: {sample_data_id}"
627
+ f"Unrecognized biosample {BIOSAMPLE_UNIQUE_KEY_SLOT}: {sample_data_id}"
621
628
  )
622
629
  continue
623
630
  nmdc_biosample_id = sample_data_to_nmdc_biosample_ids[sample_data_id]
nmdc_runtime/util.py CHANGED
@@ -16,7 +16,7 @@ import fastjsonschema
16
16
  import requests
17
17
  from frozendict import frozendict
18
18
  from jsonschema.validators import Draft7Validator
19
- from nmdc_schema.nmdc_schema_accepting_legacy_ids import Database as NMDCDatabase
19
+ from nmdc_schema.nmdc import Database as NMDCDatabase
20
20
  from nmdc_schema.get_nmdc_view import ViewGetter
21
21
  from pydantic import Field, BaseModel
22
22
  from pymongo.database import Database as MongoDatabase
@@ -376,6 +376,24 @@ collection_name_to_class_names: Dict[str, List[str]] = {
376
376
  }
377
377
 
378
378
 
379
+ def class_hierarchy_as_list(obj) -> list[str]:
380
+ """
381
+ get list of inherited classes for each concrete class
382
+ """
383
+ rv = []
384
+ current_class = obj.__class__
385
+
386
+ def recurse_through_bases(cls):
387
+ if cls.__name__ == "YAMLRoot":
388
+ return rv
389
+ rv.append(cls.__name__)
390
+ for base in cls.__bases__:
391
+ recurse_through_bases(base)
392
+ return rv
393
+
394
+ return recurse_through_bases(current_class)
395
+
396
+
379
397
  @lru_cache
380
398
  def schema_collection_names_with_id_field() -> Set[str]:
381
399
  """
@@ -393,6 +411,11 @@ def schema_collection_names_with_id_field() -> Set[str]:
393
411
  return target_collection_names
394
412
 
395
413
 
414
+ def populated_schema_collection_names_with_id_field(mdb: MongoDatabase) -> List[str]:
415
+ collection_names = sorted(schema_collection_names_with_id_field())
416
+ return [n for n in collection_names if mdb[n].find_one({"id": {"$exists": True}})]
417
+
418
+
396
419
  def ensure_unique_id_indexes(mdb: MongoDatabase):
397
420
  """Ensure that any collections with an "id" field have an index on "id"."""
398
421
  candidate_names = (
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nmdc_runtime
3
- Version: 1.6.0
3
+ Version: 1.8.0
4
4
  Summary: A runtime system for NMDC data management and orchestration
5
5
  Home-page: https://github.com/microbiomedata/nmdc-runtime
6
6
  Author: Donny Winston
@@ -77,18 +77,15 @@ The runtime features:
77
77
  - `schedules` trigger recurring pipeline runs based on time
78
78
  - `sensors` trigger pipeline runs based on external state
79
79
  - Each `pipeline` can declare dependencies on any runtime `resources` or additional
80
- configuration. There are TerminusDB and MongoDB `resources` defined, as well as `preset`
80
+ configuration. There are MongoDB `resources` defined, as well as `preset`
81
81
  configuration definitions for both "dev" and "prod" `modes`. The `preset`s tell Dagster to
82
82
  look to a set of known environment variables to load resources configurations, depending on
83
83
  the `mode`.
84
-
85
- 2. A [TerminusDB](https://terminusdb.com/) database supporting revision control of schema-validated
86
- data.
87
84
 
88
- 3. A MongoDB database supporting write-once, high-throughput internal
85
+ 2. A MongoDB database supporting write-once, high-throughput internal
89
86
  data storage by the nmdc-runtime FastAPI instance.
90
87
 
91
- 4. A [FastAPI](https://fastapi.tiangolo.com/) service to interface with the orchestrator and
88
+ 3. A [FastAPI](https://fastapi.tiangolo.com/) service to interface with the orchestrator and
92
89
  database, as a hub for data management and workflow automation.
93
90
 
94
91
  ## Local Development
@@ -1,7 +1,7 @@
1
1
  nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  nmdc_runtime/containers.py,sha256=8m_S1wiFu8VOWvY7tyqzf-02X9gXY83YGc8FgjWzLGA,418
3
3
  nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- nmdc_runtime/util.py,sha256=o74ZKOmSD79brPFAcQFsYpA6wh9287m0hDhDlIpn9VM,19872
4
+ nmdc_runtime/util.py,sha256=nfj1MjZzVaxs9pKrHo6A98yGAzL-jHQ0zTGs_sOkBnM,20531
5
5
  nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  nmdc_runtime/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  nmdc_runtime/core/db/Database.py,sha256=WamgBUbq85A7-fr3p5B9Tk92U__yPdr9pBb4zyQok-4,377
@@ -35,10 +35,10 @@ nmdc_runtime/minter/domain/model.py,sha256=WMOuKub3dVzkOt_EZSRDLeTsJPqFbKx01SMQ5
35
35
  nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
37
37
  nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- nmdc_runtime/site/graphs.py,sha256=mOWZvT2Rk4X96RmVAvHQwur-FhNuMWAko3jjRLGygEE,11455
39
- nmdc_runtime/site/ops.py,sha256=YjaH2zqzd01cRcqV0E93RoaWt8T4ExESx4SSszmczZ8,33620
40
- nmdc_runtime/site/repository.py,sha256=QI9Gcjr68-DT2MPwOx87Vkxcwp3ZIOVaFZ9uCO13w9U,35502
41
- nmdc_runtime/site/resources.py,sha256=pQSwg1dRpL_D91gYLzzaOIDZ3qa69rPqSlsq5dS9i_M,17783
38
+ nmdc_runtime/site/graphs.py,sha256=QdmNvdtDLCgpJyKviLUj-IIF1gPS_vYzl1Kzv2mSF4g,12122
39
+ nmdc_runtime/site/ops.py,sha256=btdgcGBwNOFnVCzAa-vO4Gs1lMxgnjcRFd8B28X0who,38222
40
+ nmdc_runtime/site/repository.py,sha256=xTHAfokzbZVqlRFG65VuHxTfZfhyKZskOaCSGyrW_hw,37540
41
+ nmdc_runtime/site/resources.py,sha256=ZSH1yvA-li0R7Abc22_v0XLbjBYf5igETr2G01J3hnc,17557
42
42
  nmdc_runtime/site/util.py,sha256=6hyVPpb6ZkWEG8Nm7uQxnZ-QmuPOG9hgWvl0mUBr5JU,1303
43
43
  nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
@@ -50,13 +50,11 @@ nmdc_runtime/site/drsobjects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
50
50
  nmdc_runtime/site/drsobjects/ingest.py,sha256=pcMP69WSzFHFqHB9JIL55ePFhilnCLRc2XHCQ97w1Ik,3107
51
51
  nmdc_runtime/site/drsobjects/registration.py,sha256=D1T3QUuxEOxqKZIvB5rkb_6ZxFZiA-U9SMPajyeWC2Y,3572
52
52
  nmdc_runtime/site/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
- nmdc_runtime/site/export/study_metadata.py,sha256=d3q6RV93B0BA64ZkfCPEHdlrC1P8pm9Sj6SxRDcpB7A,4027
53
+ nmdc_runtime/site/export/ncbi_xml.py,sha256=-GflgZO_Q4Y2rm53QIkI7vYY6pWwCf_l7tolGgTXiBg,21026
54
+ nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=CqrtjwzmUbZXEW8aD-KpnCV_PlXVH-Gqp309nw3vbeo,6464
55
+ nmdc_runtime/site/export/study_metadata.py,sha256=WRU0F1ksWfNX3k9LD91Pn2DuLA-IOpGvYPJd6DnguEs,4819
54
56
  nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
57
  nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
56
- nmdc_runtime/site/terminusdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
- nmdc_runtime/site/terminusdb/generate.py,sha256=Z3c06LDx3TGw4pvPRO97caQvzc8SuhGmPIr_d5b_E9I,6144
58
- nmdc_runtime/site/terminusdb/ingest.py,sha256=WE_V4vRRnlL6hIBU1TDSUheYOBWS9d5g6FHPS64jzvM,1245
59
- nmdc_runtime/site/terminusdb/schema.py,sha256=3e39rHUSZsNbN_F0SHHNsvcEGRWtYa6O9KNj3cH3tUs,77129
60
58
  nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
59
  nmdc_runtime/site/translation/emsl.py,sha256=-aCTJTSCNaK-Koh8BE_4fTf5nyxP1KkquR6lloLEJl0,1245
62
60
  nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
@@ -66,7 +64,7 @@ nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=e_7tXFrP0PpdhqUC
66
64
  nmdc_runtime/site/translation/neon_soil_translator.py,sha256=cJJ_QPva5G5SIT_7DjCSsqbDvgbiKGqUYrxK3nx7_Lw,37634
67
65
  nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=6LaFwBnVx6TN9v1D-G6LFrDxY0TK05AvMklx0E1tTeQ,26590
68
66
  nmdc_runtime/site/translation/neon_utils.py,sha256=mdxJVPb3zbD4DiKW3Fwgk22kjczKMwkcozvy7fwteTE,5203
69
- nmdc_runtime/site/translation/submission_portal_translator.py,sha256=lHcrfPR5wk3BcZ0Uw5zUyWu5XRVikgOzdzSb5nFVS9I,27964
67
+ nmdc_runtime/site/translation/submission_portal_translator.py,sha256=KiVO1vohhrJGfwzLJOumRfyHjcbYfswBIBvkYIdFxv8,28097
70
68
  nmdc_runtime/site/translation/translator.py,sha256=xM9dM-nTgSWwu5HFoUVNHf8kqk9iiH4PgWdSx4OKxEk,601
71
69
  nmdc_runtime/site/translation/util.py,sha256=w_l3SiExGsl6cXRqto0a_ssDmHkP64ITvrOVfPxmNpY,4366
72
70
  nmdc_runtime/site/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -74,9 +72,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=OG20mv_3E2rkQqTQtYO0_SVRqFb-Z_zKCiAV
74
72
  nmdc_runtime/site/validation/gold.py,sha256=Z5ZzYdjERbrJ2Tu06d0TDTBSfwaFdL1Z23Rl-YkZ2Ow,803
75
73
  nmdc_runtime/site/validation/jgi.py,sha256=LdJfhqBVHWCDp0Kzyk8eJZMwEI5NQ-zuTda31BcGwOA,1299
76
74
  nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
77
- nmdc_runtime-1.6.0.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
78
- nmdc_runtime-1.6.0.dist-info/METADATA,sha256=hKgDLZfx14AX3IWIi3C9vHa9YAP-agU7tsmKZ_kg8JY,7424
79
- nmdc_runtime-1.6.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
80
- nmdc_runtime-1.6.0.dist-info/entry_points.txt,sha256=nfH6-K9tDKv7va8ENfShsBnxVQoYJdEe7HHdwtkbh1Y,289
81
- nmdc_runtime-1.6.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
82
- nmdc_runtime-1.6.0.dist-info/RECORD,,
75
+ nmdc_runtime-1.8.0.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
76
+ nmdc_runtime-1.8.0.dist-info/METADATA,sha256=lBQzzEEXtwobBObmYmDogAdFKQMLvSJn3wmjG8lHQ5I,7302
77
+ nmdc_runtime-1.8.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
78
+ nmdc_runtime-1.8.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
79
+ nmdc_runtime-1.8.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
80
+ nmdc_runtime-1.8.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (72.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -2,4 +2,3 @@
2
2
  nmdcdb-mongodump = nmdc_runtime.site.backup.nmdcdb_mongodump:main
3
3
  nmdcdb-mongoexport = nmdc_runtime.site.backup.nmdcdb_mongoexport:main
4
4
  nmdcdb-mongoimport = nmdc_runtime.site.backup.nmdcdb_mongoimport:main
5
- schemagen-terminusdb = nmdc_runtime.site.terminusdb.generate:cli
File without changes
@@ -1,198 +0,0 @@
1
- """
2
- Example usage:
3
- $ schemagen-terminusdb ../nmdc-schema/src/schema/nmdc.yaml \
4
- > nmdc_runtime/site/terminusdb/nmdc.schema.terminusdb.json
5
- """
6
-
7
- import json
8
- import os
9
- from typing import Union, TextIO, List
10
-
11
- import click
12
- from linkml.utils.generator import Generator, shared_arguments
13
- from linkml_runtime.linkml_model.meta import (
14
- SchemaDefinition,
15
- ClassDefinition,
16
- SlotDefinition,
17
- )
18
- from linkml_runtime.utils.formatutils import camelcase, be, underscore
19
-
20
- # http://books.xmlschemata.org/relaxng/relax-CHP-19.html
21
- XSD_Ok = {
22
- "xsd:anyURI",
23
- "xsd:base64Binary",
24
- "xsd:boolean",
25
- "xsd:byte",
26
- "xsd:date",
27
- "xsd:dateTime",
28
- "xsd:decimal",
29
- "xsd:double",
30
- "xsd:duration",
31
- "xsd:ENTITIES",
32
- "xsd:ENTITY",
33
- "xsd:float",
34
- "xsd:gDay",
35
- "xsd:gMonth",
36
- "xsd:gMonthDay",
37
- "xsd:gYear",
38
- "xsd:gYearMonth",
39
- "xsd:hexBinary",
40
- "xsd:ID",
41
- "xsd:IDREF",
42
- "xsd:IDREFS",
43
- "xsd:int",
44
- "xsd:integer",
45
- "xsd:language",
46
- "xsd:long",
47
- "xsd:Name",
48
- "xsd:NCName",
49
- "xsd:negativeInteger",
50
- "xsd:NMTOKEN",
51
- "xsd:NMTOKENS",
52
- "xsd:nonNegativeInteger",
53
- "xsd:nonPositiveInteger",
54
- "xsd:normalizedString",
55
- "xsd:NOTATION",
56
- "xsd:positiveInteger",
57
- "xsd:short",
58
- "xsd:string",
59
- "xsd:time",
60
- "xsd:token",
61
- "xsd:unsignedByte",
62
- "xsd:unsignedInt",
63
- "xsd:unsignedLong",
64
- "xsd:unsignedShort",
65
- }
66
-
67
-
68
- def as_list(thing) -> list:
69
- return thing if isinstance(thing, list) else [thing]
70
-
71
-
72
- def has_field(graph: List[dict], cls: dict, field: str) -> bool:
73
- if field in cls:
74
- return True
75
- for parent_id in as_list(cls.get("@inherits", [])):
76
- parent_cls = next(
77
- graph_cls for graph_cls in graph if graph_cls.get("@id") == parent_id
78
- )
79
- if parent_cls and has_field(graph, parent_cls, field):
80
- return True
81
- return False
82
-
83
-
84
- class TerminusdbGenerator(Generator):
85
- """Generates JSON file to pass to WOQLClient.insert_document(..., graph_type="schema")`."""
86
-
87
- generatorname = os.path.basename(__file__)
88
- generatorversion = "0.1.0"
89
- valid_formats = ["json"]
90
- visit_all_class_slots = True
91
-
92
- def __init__(self, schema: Union[str, TextIO, SchemaDefinition], **kwargs) -> None:
93
- super().__init__(schema, **kwargs)
94
- self.graph = []
95
- self.cls_json = {}
96
-
97
- def visit_schema(self, inline: bool = False, **kwargs) -> None:
98
- self.graph.append(
99
- {
100
- "@type": "@context",
101
- "@base": "https://api.microbiomedata.org/nmdcschema/ids/",
102
- "@schema": "https://w3id.org/nmdc/",
103
- }
104
- )
105
-
106
- def end_schema(self, **_) -> None:
107
- for cls in self.graph:
108
- if has_field(self.graph, cls, "id"):
109
- cls["@key"] = {"@type": "Lexical", "@fields": ["id"]}
110
- print(json.dumps(self.graph, indent=2))
111
-
112
- def visit_class(self, cls: ClassDefinition) -> bool:
113
- self.cls_json = {
114
- "@type": "Class",
115
- "@id": camelcase(cls.name),
116
- "@documentation": {
117
- "@comment": be(cls.description),
118
- "@properties": {},
119
- },
120
- }
121
- if cls.is_a:
122
- self.cls_json["@inherits"] = camelcase(cls.is_a)
123
- if cls.abstract:
124
- self.cls_json["@abstract"] = []
125
- return True
126
-
127
- def end_class(self, cls: ClassDefinition) -> None:
128
- self.cls_json["@id"] = cls.definition_uri.split(":")[-1].rpartition("/")[-1]
129
- self.graph.append(self.cls_json)
130
-
131
- # sounding board as solist
132
- # safe space to ask questions. more of a whatsapp group.
133
- # both re: business, how to structure proposals, etc.
134
- # And also technical content suggestions. R data pipeline / copy/paste in Figma
135
- # - how far do you go in automation in delivery
136
-
137
- def visit_class_slot(
138
- self, cls: ClassDefinition, aliased_slot_name: str, slot: SlotDefinition
139
- ) -> None:
140
- if slot not in self.own_slots(cls):
141
- return
142
- if slot.is_usage_slot:
143
- # TerminusDB does not support calling different things the same name.
144
- # So, ignore usage overrides.
145
- slot = self.schema.slots[aliased_slot_name]
146
-
147
- if slot.range in self.schema.classes:
148
- rng = camelcase(slot.range)
149
- elif slot.range in self.schema.types:
150
- # XXX Why does `linkml.utils.metamodelcore.Identifier` subclass `str`??
151
- rng = str(self.schema.types[slot.range].uri)
152
- else:
153
- rng = "xsd:string"
154
-
155
- # name = (
156
- # f"{cls.name} {aliased_slot_name}"
157
- # if slot.is_usage_slot
158
- # else aliased_slot_name
159
- # )
160
- name = slot.name
161
- # TODO fork nmdc schema and make any slots NOT required in parent class
162
- # also NOT required in child classes. Can have opt-in entity validation logic in code.
163
-
164
- # XXX MAG bin -> bin name goes to "mAGBin__bin_name", etc. Weird.
165
-
166
- # # translate to terminusdb xsd builtins:
167
- # if rng == "xsd:int":
168
- # rng = "xsd:integer"
169
- # elif rng == "xsd:float":
170
- # rng = "xsd:double"
171
- # elif rng == "xsd:language":
172
- # rng = "xsd:string"
173
-
174
- if rng not in XSD_Ok and slot.range not in self.schema.classes:
175
- raise Exception(
176
- f"slot range for {name} must be schema class or supported xsd type. "
177
- f"Range {rng} is of type {type(rng)}."
178
- )
179
-
180
- self.cls_json[underscore(name)] = rng
181
- self.cls_json["@documentation"]["@properties"][
182
- underscore(name)
183
- ] = slot.description
184
- if not slot.required:
185
- self.cls_json[underscore(name)] = {"@type": "Optional", "@class": rng}
186
- if slot.multivalued: # XXX what about an required multivalued field?
187
- self.cls_json[underscore(name)] = {"@type": "Set", "@class": rng}
188
-
189
-
190
- @shared_arguments(TerminusdbGenerator)
191
- @click.command()
192
- def cli(yamlfile, **args):
193
- """Generate graphql representation of a biolink model"""
194
- print(TerminusdbGenerator(yamlfile, **args).serialize(**args))
195
-
196
-
197
- if __name__ == "__main__":
198
- cli()