nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. nmdc_runtime/Dockerfile +177 -0
  2. nmdc_runtime/api/analytics.py +90 -0
  3. nmdc_runtime/api/boot/capabilities.py +9 -0
  4. nmdc_runtime/api/boot/object_types.py +126 -0
  5. nmdc_runtime/api/boot/triggers.py +84 -0
  6. nmdc_runtime/api/boot/workflows.py +116 -0
  7. nmdc_runtime/api/core/auth.py +212 -0
  8. nmdc_runtime/api/core/idgen.py +200 -0
  9. nmdc_runtime/api/core/metadata.py +777 -0
  10. nmdc_runtime/api/core/util.py +114 -0
  11. nmdc_runtime/api/db/mongo.py +436 -0
  12. nmdc_runtime/api/db/s3.py +37 -0
  13. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  14. nmdc_runtime/api/endpoints/find.py +634 -0
  15. nmdc_runtime/api/endpoints/jobs.py +206 -0
  16. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  17. nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
  18. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  19. nmdc_runtime/api/endpoints/metadata.py +260 -0
  20. nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
  21. nmdc_runtime/api/endpoints/object_types.py +38 -0
  22. nmdc_runtime/api/endpoints/objects.py +277 -0
  23. nmdc_runtime/api/endpoints/operations.py +78 -0
  24. nmdc_runtime/api/endpoints/queries.py +701 -0
  25. nmdc_runtime/api/endpoints/runs.py +98 -0
  26. nmdc_runtime/api/endpoints/search.py +38 -0
  27. nmdc_runtime/api/endpoints/sites.py +205 -0
  28. nmdc_runtime/api/endpoints/triggers.py +25 -0
  29. nmdc_runtime/api/endpoints/users.py +214 -0
  30. nmdc_runtime/api/endpoints/util.py +817 -0
  31. nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
  32. nmdc_runtime/api/endpoints/workflows.py +353 -0
  33. nmdc_runtime/api/entrypoint.sh +7 -0
  34. nmdc_runtime/api/main.py +495 -0
  35. nmdc_runtime/api/middleware.py +43 -0
  36. nmdc_runtime/api/models/capability.py +14 -0
  37. nmdc_runtime/api/models/id.py +92 -0
  38. nmdc_runtime/api/models/job.py +57 -0
  39. nmdc_runtime/api/models/lib/helpers.py +78 -0
  40. nmdc_runtime/api/models/metadata.py +11 -0
  41. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  42. nmdc_runtime/api/models/object.py +180 -0
  43. nmdc_runtime/api/models/object_type.py +20 -0
  44. nmdc_runtime/api/models/operation.py +66 -0
  45. nmdc_runtime/api/models/query.py +246 -0
  46. nmdc_runtime/api/models/query_continuation.py +111 -0
  47. nmdc_runtime/api/models/run.py +161 -0
  48. nmdc_runtime/api/models/site.py +87 -0
  49. nmdc_runtime/api/models/trigger.py +13 -0
  50. nmdc_runtime/api/models/user.py +207 -0
  51. nmdc_runtime/api/models/util.py +260 -0
  52. nmdc_runtime/api/models/wfe_file_stages.py +122 -0
  53. nmdc_runtime/api/models/workflow.py +15 -0
  54. nmdc_runtime/api/openapi.py +178 -0
  55. nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
  56. nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
  57. nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
  58. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  59. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  60. nmdc_runtime/config.py +56 -0
  61. nmdc_runtime/minter/adapters/repository.py +22 -2
  62. nmdc_runtime/minter/config.py +30 -4
  63. nmdc_runtime/minter/domain/model.py +55 -1
  64. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  65. nmdc_runtime/mongo_util.py +89 -0
  66. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  67. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  68. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  69. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  70. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  71. nmdc_runtime/site/dagster.yaml +53 -0
  72. nmdc_runtime/site/entrypoint-daemon.sh +29 -0
  73. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  74. nmdc_runtime/site/entrypoint-dagit.sh +29 -0
  75. nmdc_runtime/site/export/ncbi_xml.py +1331 -0
  76. nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
  77. nmdc_runtime/site/export/study_metadata.py +27 -4
  78. nmdc_runtime/site/graphs.py +294 -45
  79. nmdc_runtime/site/ops.py +1008 -230
  80. nmdc_runtime/site/repair/database_updater.py +451 -0
  81. nmdc_runtime/site/repository.py +368 -133
  82. nmdc_runtime/site/resources.py +154 -80
  83. nmdc_runtime/site/translation/gold_translator.py +235 -83
  84. nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
  85. nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
  86. nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
  87. nmdc_runtime/site/translation/neon_utils.py +24 -7
  88. nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
  89. nmdc_runtime/site/translation/translator.py +73 -3
  90. nmdc_runtime/site/util.py +26 -7
  91. nmdc_runtime/site/validation/emsl.py +1 -0
  92. nmdc_runtime/site/validation/gold.py +1 -0
  93. nmdc_runtime/site/validation/util.py +16 -12
  94. nmdc_runtime/site/workspace.yaml +13 -0
  95. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  96. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  97. nmdc_runtime/static/README.md +5 -0
  98. nmdc_runtime/static/favicon.ico +0 -0
  99. nmdc_runtime/util.py +236 -192
  100. nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
  101. nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
  102. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
  103. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
  104. nmdc_runtime/containers.py +0 -14
  105. nmdc_runtime/core/db/Database.py +0 -15
  106. nmdc_runtime/core/exceptions/__init__.py +0 -23
  107. nmdc_runtime/core/exceptions/base.py +0 -47
  108. nmdc_runtime/core/exceptions/token.py +0 -13
  109. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  110. nmdc_runtime/domain/users/userSchema.py +0 -37
  111. nmdc_runtime/domain/users/userService.py +0 -14
  112. nmdc_runtime/infrastructure/database/db.py +0 -3
  113. nmdc_runtime/infrastructure/database/models/user.py +0 -10
  114. nmdc_runtime/lib/__init__.py +0 -1
  115. nmdc_runtime/lib/extract_nmdc_data.py +0 -41
  116. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  117. nmdc_runtime/lib/nmdc_dataframes.py +0 -829
  118. nmdc_runtime/lib/nmdc_etl_class.py +0 -402
  119. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  120. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  121. nmdc_runtime/site/drsobjects/registration.py +0 -131
  122. nmdc_runtime/site/terminusdb/generate.py +0 -198
  123. nmdc_runtime/site/terminusdb/ingest.py +0 -44
  124. nmdc_runtime/site/terminusdb/schema.py +0 -1671
  125. nmdc_runtime/site/translation/emsl.py +0 -42
  126. nmdc_runtime/site/translation/gold.py +0 -53
  127. nmdc_runtime/site/translation/jgi.py +0 -31
  128. nmdc_runtime/site/translation/util.py +0 -132
  129. nmdc_runtime/site/validation/jgi.py +0 -42
  130. nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
  131. nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
  132. nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
  133. /nmdc_runtime/{client → api}/__init__.py +0 -0
  134. /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
  135. /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
  136. /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
  137. /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
  138. /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
  139. /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
  140. /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
  141. /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
  142. /nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
  143. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,1671 +0,0 @@
1
- ####
2
- # This is the script for storing the schema of your TerminusDB
3
- # database for your project.
4
- # Use 'terminusdb commit' to commit changes to the database and
5
- # use 'terminusdb sync' to change this file according to
6
- # the exsisting database schema
7
- ####
8
- from datetime import datetime
9
- from typing import Optional, Set
10
-
11
- from terminusdb_client.woqlschema import DocumentTemplate, LexicalKey
12
-
13
-
14
- class GenomeFeature(DocumentTemplate):
15
- """A feature localized to an interval along a genome
16
-
17
- Attributes
18
- ----------
19
- encodes : Optional['GeneProduct']
20
- The gene product encoded by this feature. Typically this is used for a CDS feature or gene feature which will encode a protein. It can also be used by a nc transcript ot gene feature that encoded a ncRNA
21
- end : int
22
- The end of the feature in positive 1-based integer coordinates
23
- feature_type : Optional[str]
24
- TODO: Yuri to write
25
- phase : Optional[int]
26
- The phase for a coding sequence entity. For example, phase of a CDS as represented in a GFF3 with a value of 0, 1 or 2.
27
- seqid : str
28
- The ID of the landmark used to establish the coordinate system for the current feature.
29
- start : int
30
- The start of the feature in positive 1-based integer coordinates
31
- strand : Optional[str]
32
- The strand on which a feature is located. Has a value of '+' (sense strand or forward strand) or '-' (anti-sense strand or reverse strand).
33
- type : Optional[str]
34
- An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.
35
- """
36
-
37
- encodes: Optional["GeneProduct"]
38
- end: int
39
- feature_type: Optional[str]
40
- phase: Optional[int]
41
- seqid: str
42
- start: int
43
- strand: Optional[str]
44
- type: Optional[str]
45
-
46
-
47
- class AttributeValue(DocumentTemplate):
48
- """The value for any value of a attribute for a sample. This object can hold both the un-normalized atomic value and the structured value
49
-
50
- Attributes
51
- ----------
52
- has_raw_value : Optional[str]
53
- The value that was specified for an annotation in raw form, i.e. a string. E.g. "2 cm" or "2-4 cm"
54
- type : Optional[str]
55
- An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.
56
- was_generated_by : Optional['Activity']
57
- null
58
- """
59
-
60
- has_raw_value: Optional[str]
61
- type: Optional[str]
62
- was_generated_by: Optional["Activity"]
63
-
64
-
65
- class PersonValue(AttributeValue):
66
- """An attribute value representing a person
67
-
68
- Attributes
69
- ----------
70
- email : Optional[str]
71
- An email address for an entity such as a person. This should be the primarly email address used.
72
- has_raw_value : Optional[str]
73
- The value that was specified for an annotation in raw form, i.e. a string. E.g. "2 cm" or "2-4 cm"
74
- name : Optional[str]
75
- A human readable label for an entity
76
- orcid : Optional[str]
77
- The ORICD of a person.
78
- profile_image_url : Optional[str]
79
- A url that points to an image of a person.
80
- websites : Set[str]
81
- A list of websites that are assocatiated with the entity.
82
- """
83
-
84
- email: Optional[str]
85
- has_raw_value: Optional[str]
86
- name: Optional[str]
87
- orcid: Optional[str]
88
- profile_image_url: Optional[str]
89
- websites: Set[str]
90
-
91
-
92
- class ReactionParticipant(DocumentTemplate):
93
- """Instances of this link a reaction to a chemical entity participant
94
-
95
- Attributes
96
- ----------
97
- chemical : Optional['ChemicalEntity']
98
- null
99
- stoichiometry : Optional[int]
100
- null
101
- """
102
-
103
- chemical: Optional["ChemicalEntity"]
104
- stoichiometry: Optional[int]
105
-
106
-
107
- class NamedThing(DocumentTemplate):
108
- """a databased entity or concept/class
109
-
110
- Attributes
111
- ----------
112
- alternative_identifiers : Set[str]
113
- A list of alternative identifiers for the entity.
114
- description : Optional[str]
115
- a human-readable description of a thing
116
- id : str
117
- A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI
118
- name : Optional[str]
119
- A human readable label for an entity
120
- """
121
-
122
- _key = LexicalKey(["id"])
123
- _abstract = []
124
- alternative_identifiers: Set[str]
125
- description: Optional[str] = None
126
- id: str
127
- name: Optional[str]
128
-
129
-
130
- class Person(NamedThing):
131
- """represents a person, such as a researcher
132
-
133
- Attributes
134
- ----------
135
- id : str
136
- A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI
137
- """
138
-
139
- _key = LexicalKey(["id"])
140
- id: str
141
-
142
-
143
- class Biosample(NamedThing):
144
- """A material sample. It may be environmental (encompassing many organisms) or isolate or tissue. An environmental sample containing genetic material from multiple individuals is commonly referred to as a biosample.
145
-
146
- Attributes
147
- ----------
148
- GOLD_sample_identifiers : Set['xsd:anyURI']
149
- identifiers for corresponding sample in GOLD
150
- INSDC_biosample_identifiers : Set['xsd:anyURI']
151
- identifiers for corresponding sample in INSDC
152
- INSDC_secondary_sample_identifiers : Set['xsd:anyURI']
153
- secondary identifiers for corresponding sample in INSDC
154
- add_date : Optional[str]
155
- The date on which the information was added to the database.
156
- agrochem_addition : Optional['QuantityValue']
157
- Addition of fertilizers, pesticides, etc. - amount and time of applications
158
- al_sat : Optional['QuantityValue']
159
- Aluminum saturation (esp. For tropical soils)
160
- al_sat_meth : Optional['TextValue']
161
- Reference or method used in determining Al saturation
162
- alkalinity : Optional['QuantityValue']
163
- Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate
164
- alkalinity_method : Optional['TextValue']
165
- Method used for alkalinity measurement
166
- alkyl_diethers : Optional['QuantityValue']
167
- Concentration of alkyl diethers
168
- alt : Optional['QuantityValue']
169
- Altitude is a term used to identify heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earthbs surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air
170
- aminopept_act : Optional['QuantityValue']
171
- Measurement of aminopeptidase activity
172
- ammonium : Optional['QuantityValue']
173
- Concentration of ammonium in the sample
174
- annual_precpt : Optional['QuantityValue']
175
- The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps.
176
- annual_temp : Optional['QuantityValue']
177
- Mean annual temperature
178
- bacteria_carb_prod : Optional['QuantityValue']
179
- Measurement of bacterial carbon production
180
- bishomohopanol : Optional['QuantityValue']
181
- Concentration of bishomohopanol
182
- bromide : Optional['QuantityValue']
183
- Concentration of bromide
184
- calcium : Optional['QuantityValue']
185
- Concentration of calcium in the sample
186
- carb_nitro_ratio : Optional['QuantityValue']
187
- Ratio of amount or concentrations of carbon to nitrogen
188
- chem_administration : Optional['ControlledTermValue']
189
- List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi
190
- chloride : Optional['QuantityValue']
191
- Concentration of chloride in the sample
192
- chlorophyll : Optional['QuantityValue']
193
- Concentration of chlorophyll
194
- collection_date : Optional['TimestampValue']
195
- The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant
196
- community : Optional[str]
197
- null
198
- crop_rotation : Optional['TextValue']
199
- Whether or not crop is rotated, and if yes, rotation schedule
200
- cur_land_use : Optional['TextValue']
201
- Present state of sample site
202
- cur_vegetation : Optional['TextValue']
203
- Vegetation classification from one or more standard classification systems, or agricultural crop
204
- cur_vegetation_meth : Optional['TextValue']
205
- Reference or method used in vegetation classification
206
- density : Optional['QuantityValue']
207
- Density of the sample, which is its mass per unit volume (aka volumetric mass density)
208
- depth : Optional['QuantityValue']
209
- Depth is defined as the vertical distance below local surface, e.g. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples
210
- depth2 : Optional['QuantityValue']
211
- null
212
- diss_carb_dioxide : Optional['QuantityValue']
213
- Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample
214
- diss_hydrogen : Optional['QuantityValue']
215
- Concentration of dissolved hydrogen
216
- diss_inorg_carb : Optional['QuantityValue']
217
- Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter
218
- diss_inorg_phosp : Optional['QuantityValue']
219
- Concentration of dissolved inorganic phosphorus in the sample
220
- diss_org_carb : Optional['QuantityValue']
221
- Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid
222
- diss_org_nitro : Optional['QuantityValue']
223
- Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2
224
- diss_oxygen : Optional['QuantityValue']
225
- Concentration of dissolved oxygen
226
- drainage_class : Optional['TextValue']
227
- Drainage classification from a standard system such as the USDA system
228
- ecosystem : Optional[str]
229
- An ecosystem is a combination of a physical environment (abiotic factors) and all the organisms (biotic factors) that interact with this environment. Ecosystem is in position 1/5 in a GOLD path.
230
- ecosystem_category : Optional[str]
231
- Ecosystem categories represent divisions within the ecosystem based on specific characteristics of the environment from where an organism or sample is isolated. Ecosystem category is in position 2/5 in a GOLD path.
232
- ecosystem_subtype : Optional[str]
233
- Ecosystem subtypes represent further subdivision of Ecosystem types into more distinct subtypes. Ecosystem subtype is in position 4/5 in a GOLD path.
234
- ecosystem_type : Optional[str]
235
- Ecosystem types represent things having common characteristics within the Ecosystem Category. These common characteristics based grouping is still broad but specific to the characteristics of a given environment. Ecosystem type is in position 3/5 in a GOLD path.
236
- elev : Optional['QuantityValue']
237
- Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit
238
- env_broad_scale : Optional['ControlledTermValue']
239
- In this field, report which major environmental system your sample or specimen came from. The systems identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. were you in the desert or a rainforest?). We recommend using subclasses of ENVOUs biome class: http://purl.obolibrary.org/obo/ENVO_00000428. Format (one term): termLabel [termID], Format (multiple terms): termLabel [termID]|termLabel [termID]|termLabel [termID]. Example: Annotating a water sample from the photic zone in middle of the Atlantic Ocean, consider: oceanic epipelagic zone biome [ENVO:01000033]. Example: Annotating a sample from the Amazon rainforest consider: tropical moist broadleaf forest biome [ENVO:01000228]. If needed, request new terms on the ENVO tracker, identified here: http://www.obofoundry.org/ontology/envo.html
240
- env_local_scale : Optional['ControlledTermValue']
241
- In this field, report the entity or entities which are in your sample or specimenUs local vicinity and which you believe have significant causal influences on your sample or specimen. Please use terms that are present in ENVO and which are of smaller spatial grain than your entry for env_broad_scale. Format (one term): termLabel [termID]; Format (multiple terms): termLabel [termID]|termLabel [termID]|termLabel [termID]. Example: Annotating a pooled sample taken from various vegetation layers in a forest consider: canopy [ENVO:00000047]|herb and fern layer [ENVO:01000337]|litter layer [ENVO:01000338]|understory [01000335]|shrub layer [ENVO:01000336]. If needed, request new terms on the ENVO tracker, identified here: http://www.obofoundry.org/ontology/envo.html
242
- env_medium : Optional['ControlledTermValue']
243
- In this field, report which environmental material or materials (pipe separated) immediately surrounded your sample or specimen prior to sampling, using one or more subclasses of ENVOUs environmental material class: http://purl.obolibrary.org/obo/ENVO_00010483. Format (one term): termLabel [termID]; Format (multiple terms): termLabel [termID]|termLabel [termID]|termLabel [termID]. Example: Annotating a fish swimming in the upper 100 m of the Atlantic Ocean, consider: ocean water [ENVO:00002151]. Example: Annotating a duck on a pond consider: pond water [ENVO:00002228]|air ENVO_00002005. If needed, request new terms on the ENVO tracker, identified here: http://www.obofoundry.org/ontology/envo.html
244
- env_package : Optional['TextValue']
245
- MIxS extension for reporting of measurements and observations obtained from one or more of the environments where the sample was obtained. All environmental packages listed here are further defined in separate subtables. By giving the name of the environmental package, a selection of fields can be made from the subtables and can be reported
246
- extreme_event : Optional['TimestampValue']
247
- Unusual physical events that may have affected microbial populations
248
- fao_class : Optional['TextValue']
249
- Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups
250
- fire : Optional['TimestampValue']
251
- Historical and/or physical evidence of fire
252
- flooding : Optional['TimestampValue']
253
- Historical and/or physical evidence of flooding
254
- geo_loc_name : Optional['TextValue']
255
- The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (v 1.512) (http://purl.bioontology.org/ontology/GAZ)
256
- glucosidase_act : Optional['QuantityValue']
257
- Measurement of glucosidase activity
258
- habitat : Optional[str]
259
- null
260
- heavy_metals : Optional['QuantityValue']
261
- Heavy metals present and concentrationsany drug used by subject and the frequency of usage; can include multiple heavy metals and concentrations
262
- heavy_metals_meth : Optional['TextValue']
263
- Reference or method used in determining heavy metals
264
- horizon : Optional['TextValue']
265
- Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath
266
- horizon_meth : Optional['TextValue']
267
- Reference or method used in determining the horizon
268
- host_name : Optional[str]
269
- null
270
- identifier : Optional[str]
271
- null
272
- lat_lon : Optional['GeolocationValue']
273
- The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system
274
- link_addit_analys : Optional['TextValue']
275
- Link to additional analysis results performed on the sample
276
- link_class_info : Optional['TextValue']
277
- Link to digitized soil maps or other soil classification information
278
- link_climate_info : Optional['TextValue']
279
- Link to climate resource
280
- local_class : Optional['TextValue']
281
- Soil classification based on local soil classification system
282
- local_class_meth : Optional['TextValue']
283
- Reference or method used in determining the local soil classification
284
- location : Optional[str]
285
- null
286
- magnesium : Optional['QuantityValue']
287
- Concentration of magnesium in the sample
288
- mean_frict_vel : Optional['QuantityValue']
289
- Measurement of mean friction velocity
290
- mean_peak_frict_vel : Optional['QuantityValue']
291
- Measurement of mean peak friction velocity
292
- microbial_biomass : Optional['QuantityValue']
293
- The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units
294
- microbial_biomass_meth : Optional['TextValue']
295
- Reference or method used in determining microbial biomass
296
- misc_param : Optional['QuantityValue']
297
- Any other measurement performed or parameter collected, that is not listed here
298
- mod_date : Optional[str]
299
- The last date on which the database information was modified.
300
- n_alkanes : Optional['QuantityValue']
301
- Concentration of n-alkanes; can include multiple n-alkanes
302
- ncbi_taxonomy_name : Optional[str]
303
- null
304
- nitrate : Optional['QuantityValue']
305
- Concentration of nitrate in the sample
306
- nitrite : Optional['QuantityValue']
307
- Concentration of nitrite in the sample
308
- org_matter : Optional['QuantityValue']
309
- Concentration of organic matter
310
- org_nitro : Optional['QuantityValue']
311
- Concentration of organic nitrogen
312
- organism_count : Optional['QuantityValue']
313
- Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)
314
- oxy_stat_samp : Optional['TextValue']
315
- Oxygenation status of sample
316
- part_of : Set['NamedThing']
317
- Links a resource to another resource that either logically or physically includes it.
318
- part_org_carb : Optional['QuantityValue']
319
- Concentration of particulate organic carbon
320
- perturbation : Optional['TextValue']
321
- Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types
322
- petroleum_hydrocarb : Optional['QuantityValue']
323
- Concentration of petroleum hydrocarbon
324
- ph : Optional['QuantityValue']
325
- Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid
326
- ph_meth : Optional['TextValue']
327
- Reference or method used in determining ph
328
- phaeopigments : Optional['QuantityValue']
329
- Concentration of phaeopigments; can include multiple phaeopigments
330
- phosplipid_fatt_acid : Optional['QuantityValue']
331
- Concentration of phospholipid fatty acids; can include multiple values
332
- pool_dna_extracts : Optional['TextValue']
333
- Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given
334
- potassium : Optional['QuantityValue']
335
- Concentration of potassium in the sample
336
- pressure : Optional['QuantityValue']
337
- Pressure to which the sample is subject to, in atmospheres
338
- previous_land_use : Optional['TextValue']
339
- Previous land use and dates
340
- previous_land_use_meth : Optional['TextValue']
341
- Reference or method used in determining previous land use and dates
342
- profile_position : Optional['TextValue']
343
- Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas
344
- proport_woa_temperature : Optional[str]
345
- null
346
- redox_potential : Optional['QuantityValue']
347
- Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential
348
- salinity : Optional['QuantityValue']
349
- Salinity is the total concentration of all dissolved salts in a water sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater
350
- salinity_category : Optional[str]
351
- Categorcial description of the sample's salinity. Examples: halophile, halotolerant, hypersaline, huryhaline
352
- salinity_meth : Optional['TextValue']
353
- Reference or method used in determining salinity
354
- samp_collect_device : Optional['TextValue']
355
- The method or device employed for collecting the sample
356
- samp_mat_process : Optional['ControlledTermValue']
357
- Any processing applied to the sample during or after retrieving the sample from environment. This field accepts OBI, for a browser of OBI (v 2018-02-12) terms please see http://purl.bioontology.org/ontology/OBI
358
- samp_store_dur : Optional['TextValue']
359
- Duration for which the sample was stored
360
- samp_store_loc : Optional['TextValue']
361
- Location at which sample was stored, usually name of a specific freezer/room
362
- samp_store_temp : Optional['QuantityValue']
363
- Temperature at which sample was stored, e.g. -80 degree Celsius
364
- samp_vol_we_dna_ext : Optional['QuantityValue']
365
- Volume (ml), weight (g) of processed sample, or surface area swabbed from sample for DNA extraction
366
- sample_collection_site : Optional[str]
367
- null
368
- season_precpt : Optional['QuantityValue']
369
- The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps.
370
- season_temp : Optional['QuantityValue']
371
- Mean seasonal temperature
372
- sieving : Optional['QuantityValue']
373
- Collection design of pooled samples and/or sieve size and amount of sample sieved
374
- size_frac_low : Optional['QuantityValue']
375
- Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample
376
- size_frac_up : Optional['QuantityValue']
377
- Refers to the mesh/pore size used to retain the sample. Materials smaller than the size threshold are excluded from the sample
378
- slope_aspect : Optional['QuantityValue']
379
- The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration.
380
- slope_gradient : Optional['QuantityValue']
381
- Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer
382
- sodium : Optional['QuantityValue']
383
- Sodium concentration in the sample
384
- soil_type : Optional['TextValue']
385
- Soil series name or other lower-level classification
386
- soil_type_meth : Optional['TextValue']
387
- Reference or method used in determining soil series name or other lower-level classification
388
- soluble_iron_micromol : Optional[str]
389
- null
390
- specific_ecosystem : Optional[str]
391
- Specific ecosystems represent specific features of the environment like aphotic zone in an ocean or gastric mucosa within a host digestive system. Specific ecosystem is in position 5/5 in a GOLD path.
392
- store_cond : Optional['TextValue']
393
- Explain how and for how long the soil sample was stored before DNA extraction
394
- subsurface_depth : Optional['QuantityValue']
395
- null
396
- subsurface_depth2 : Optional['QuantityValue']
397
- null
398
- sulfate : Optional['QuantityValue']
399
- Concentration of sulfate in the sample
400
- sulfide : Optional['QuantityValue']
401
- Concentration of sulfide in the sample
402
- temp : Optional['QuantityValue']
403
- Temperature of the sample at the time of sampling
404
- texture : Optional['QuantityValue']
405
- The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional.
406
- texture_meth : Optional['TextValue']
407
- Reference or method used in determining soil texture
408
- tidal_stage : Optional['TextValue']
409
- Stage of tide
410
- tillage : Optional['TextValue']
411
- Note method(s) used for tilling
412
- tot_carb : Optional['QuantityValue']
413
- Total carbon content
414
- tot_depth_water_col : Optional['QuantityValue']
415
- Measurement of total depth of water column
416
- tot_diss_nitro : Optional['QuantityValue']
417
- Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen
418
- tot_nitro_content : Optional['QuantityValue']
419
- Total nitrogen content of the sample
420
- tot_nitro_content_meth : Optional['TextValue']
421
- Reference or method used in determining the total nitrogen
422
- tot_org_c_meth : Optional['TextValue']
423
- Reference or method used in determining total organic carbon
424
- tot_org_carb : Optional['QuantityValue']
425
- Definition for soil: total organic carbon content of the soil, definition otherwise: total organic carbon content
426
- tot_phosp : Optional['QuantityValue']
427
- Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus
428
- type : Optional[str]
429
- An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.
430
- water_content : Optional['QuantityValue']
431
- Water content measurement
432
- water_content_soil_meth : Optional['TextValue']
433
- Reference or method used in determining the water content of soil
434
- """
435
-
436
- _key = LexicalKey(["id"])
437
- GOLD_sample_identifiers: Set["xsd:anyURI"]
438
- INSDC_biosample_identifiers: Set["xsd:anyURI"]
439
- INSDC_secondary_sample_identifiers: Set["xsd:anyURI"]
440
- add_date: Optional[str]
441
- agrochem_addition: Optional["QuantityValue"]
442
- al_sat: Optional["QuantityValue"]
443
- al_sat_meth: Optional["TextValue"]
444
- alkalinity: Optional["QuantityValue"]
445
- alkalinity_method: Optional["TextValue"]
446
- alkyl_diethers: Optional["QuantityValue"]
447
- alt: Optional["QuantityValue"]
448
- aminopept_act: Optional["QuantityValue"]
449
- ammonium: Optional["QuantityValue"]
450
- annual_precpt: Optional["QuantityValue"]
451
- annual_temp: Optional["QuantityValue"]
452
- bacteria_carb_prod: Optional["QuantityValue"]
453
- bishomohopanol: Optional["QuantityValue"]
454
- bromide: Optional["QuantityValue"]
455
- calcium: Optional["QuantityValue"]
456
- carb_nitro_ratio: Optional["QuantityValue"]
457
- chem_administration: Optional["ControlledTermValue"]
458
- chloride: Optional["QuantityValue"]
459
- chlorophyll: Optional["QuantityValue"]
460
- collection_date: Optional["TimestampValue"]
461
- community: Optional[str]
462
- crop_rotation: Optional["TextValue"]
463
- cur_land_use: Optional["TextValue"]
464
- cur_vegetation: Optional["TextValue"]
465
- cur_vegetation_meth: Optional["TextValue"]
466
- density: Optional["QuantityValue"]
467
- depth: Optional["QuantityValue"]
468
- depth2: Optional["QuantityValue"]
469
- diss_carb_dioxide: Optional["QuantityValue"]
470
- diss_hydrogen: Optional["QuantityValue"]
471
- diss_inorg_carb: Optional["QuantityValue"]
472
- diss_inorg_phosp: Optional["QuantityValue"]
473
- diss_org_carb: Optional["QuantityValue"]
474
- diss_org_nitro: Optional["QuantityValue"]
475
- diss_oxygen: Optional["QuantityValue"]
476
- drainage_class: Optional["TextValue"]
477
- ecosystem: Optional[str]
478
- ecosystem_category: Optional[str]
479
- ecosystem_subtype: Optional[str]
480
- ecosystem_type: Optional[str]
481
- elev: Optional["QuantityValue"]
482
- env_broad_scale: Optional["ControlledTermValue"]
483
- env_local_scale: Optional["ControlledTermValue"]
484
- env_medium: Optional["ControlledTermValue"]
485
- env_package: Optional["TextValue"]
486
- extreme_event: Optional["TimestampValue"]
487
- fao_class: Optional["TextValue"]
488
- fire: Optional["TimestampValue"]
489
- flooding: Optional["TimestampValue"]
490
- geo_loc_name: Optional["TextValue"]
491
- glucosidase_act: Optional["QuantityValue"]
492
- habitat: Optional[str]
493
- heavy_metals: Optional["QuantityValue"]
494
- heavy_metals_meth: Optional["TextValue"]
495
- horizon: Optional["TextValue"]
496
- horizon_meth: Optional["TextValue"]
497
- host_name: Optional[str]
498
- identifier: Optional[str]
499
- lat_lon: Optional["GeolocationValue"]
500
- link_addit_analys: Optional["TextValue"]
501
- link_class_info: Optional["TextValue"]
502
- link_climate_info: Optional["TextValue"]
503
- local_class: Optional["TextValue"]
504
- local_class_meth: Optional["TextValue"]
505
- location: Optional[str]
506
- magnesium: Optional["QuantityValue"]
507
- mean_frict_vel: Optional["QuantityValue"]
508
- mean_peak_frict_vel: Optional["QuantityValue"]
509
- microbial_biomass: Optional["QuantityValue"]
510
- microbial_biomass_meth: Optional["TextValue"]
511
- misc_param: Optional["QuantityValue"]
512
- mod_date: Optional[str]
513
- n_alkanes: Optional["QuantityValue"]
514
- ncbi_taxonomy_name: Optional[str]
515
- nitrate: Optional["QuantityValue"]
516
- nitrite: Optional["QuantityValue"]
517
- org_matter: Optional["QuantityValue"]
518
- org_nitro: Optional["QuantityValue"]
519
- organism_count: Optional["QuantityValue"]
520
- oxy_stat_samp: Optional["TextValue"]
521
- part_of: Set["NamedThing"]
522
- part_org_carb: Optional["QuantityValue"]
523
- perturbation: Optional["TextValue"]
524
- petroleum_hydrocarb: Optional["QuantityValue"]
525
- ph: Optional["QuantityValue"]
526
- ph_meth: Optional["TextValue"]
527
- phaeopigments: Optional["QuantityValue"]
528
- phosplipid_fatt_acid: Optional["QuantityValue"]
529
- pool_dna_extracts: Optional["TextValue"]
530
- potassium: Optional["QuantityValue"]
531
- pressure: Optional["QuantityValue"]
532
- previous_land_use: Optional["TextValue"]
533
- previous_land_use_meth: Optional["TextValue"]
534
- profile_position: Optional["TextValue"]
535
- proport_woa_temperature: Optional[str]
536
- redox_potential: Optional["QuantityValue"]
537
- salinity: Optional["QuantityValue"]
538
- salinity_category: Optional[str]
539
- salinity_meth: Optional["TextValue"]
540
- samp_collect_device: Optional["TextValue"]
541
- samp_mat_process: Optional["ControlledTermValue"]
542
- samp_store_dur: Optional["TextValue"]
543
- samp_store_loc: Optional["TextValue"]
544
- samp_store_temp: Optional["QuantityValue"]
545
- samp_vol_we_dna_ext: Optional["QuantityValue"]
546
- sample_collection_site: Optional[str]
547
- season_precpt: Optional["QuantityValue"]
548
- season_temp: Optional["QuantityValue"]
549
- sieving: Optional["QuantityValue"]
550
- size_frac_low: Optional["QuantityValue"]
551
- size_frac_up: Optional["QuantityValue"]
552
- slope_aspect: Optional["QuantityValue"]
553
- slope_gradient: Optional["QuantityValue"]
554
- sodium: Optional["QuantityValue"]
555
- soil_type: Optional["TextValue"]
556
- soil_type_meth: Optional["TextValue"]
557
- soluble_iron_micromol: Optional[str]
558
- specific_ecosystem: Optional[str]
559
- store_cond: Optional["TextValue"]
560
- subsurface_depth: Optional["QuantityValue"]
561
- subsurface_depth2: Optional["QuantityValue"]
562
- sulfate: Optional["QuantityValue"]
563
- sulfide: Optional["QuantityValue"]
564
- temp: Optional["QuantityValue"]
565
- texture: Optional["QuantityValue"]
566
- texture_meth: Optional["TextValue"]
567
- tidal_stage: Optional["TextValue"]
568
- tillage: Optional["TextValue"]
569
- tot_carb: Optional["QuantityValue"]
570
- tot_depth_water_col: Optional["QuantityValue"]
571
- tot_diss_nitro: Optional["QuantityValue"]
572
- tot_nitro_content: Optional["QuantityValue"]
573
- tot_nitro_content_meth: Optional["TextValue"]
574
- tot_org_c_meth: Optional["TextValue"]
575
- tot_org_carb: Optional["QuantityValue"]
576
- tot_phosp: Optional["QuantityValue"]
577
- type: Optional[str]
578
- water_content: Optional["QuantityValue"]
579
- water_content_soil_meth: Optional["TextValue"]
580
-
581
-
582
- class Database(DocumentTemplate):
583
- """An abstract holder for any set of metadata and data. It does not need to correspond to an actual managed databse top level holder class. When translated to JSON-Schema this is the 'root' object. It should contain pointers to other objects of interest
584
-
585
- Attributes
586
- ----------
587
- activity_set : Set['WorkflowExecutionActivity']
588
- This property links a database object to the set of workflow activities.
589
- biosample_set : Set['Biosample']
590
- This property links a database object to the set of samples within it.
591
- data_object_set : Set['DataObject']
592
- This property links a database object to the set of data objects within it.
593
- date_created : Optional[str]
594
- TODO
595
- etl_software_version : Optional[str]
596
- TODO
597
- functional_annotation_set : Set['FunctionalAnnotation']
598
- This property links a database object to the set of all functional annotations
599
- genome_feature_set : Set['GenomeFeature']
600
- This property links a database object to the set of all features
601
- mags_activity_set : Set['MAGsAnalysisActivity']
602
- This property links a database object to the set of MAGs analysis activities.
603
- metabolomics_analysis_activity_set : Set['MetabolomicsAnalysisActivity']
604
- This property links a database object to the set of metabolomics analysis activities.
605
- metagenome_annotation_activity_set : Set['MetagenomeAnnotationActivity']
606
- This property links a database object to the set of metagenome annotation activities.
607
- metagenome_assembly_set : Set['MetagenomeAssembly']
608
- This property links a database object to the set of metagenome assembly activities.
609
- metaproteomics_analysis_activity_set : Set['MetaproteomicsAnalysisActivity']
610
- This property links a database object to the set of metaproteomics analysis activities.
611
- metatranscriptome_activity_set : Set['MetatranscriptomeActivity']
612
- This property links a database object to the set of metatranscriptome analysis activities.
613
- nmdc_schema_version : Optional[str]
614
- TODO
615
- nom_analysis_activity_set : Set['NomAnalysisActivity']
616
- This property links a database object to the set of natural organic matter (NOM) analysis activities.
617
- omics_processing_set : Set['OmicsProcessing']
618
- This property links a database object to the set of omics processings within it.
619
- read_QC_analysis_activity_set : Set['ReadQCAnalysisActivity']
620
- This property links a database object to the set of read QC analysis activities.
621
- read_based_analysis_activity_set : Set['ReadBasedAnalysisActivity']
622
- This property links a database object to the set of read based analysis activities.
623
-
624
- study_set : Set['Study']
625
- This property links a database object to the set of studies within it.
626
- """
627
-
628
- activity_set: Set["WorkflowExecutionActivity"]
629
- biosample_set: Set["Biosample"]
630
- data_object_set: Set["DataObject"]
631
- date_created: Optional[str]
632
- etl_software_version: Optional[str]
633
- functional_annotation_set: Set["FunctionalAnnotation"]
634
- genome_feature_set: Set["GenomeFeature"]
635
- mags_activity_set: Set["MAGsAnalysisActivity"]
636
- metabolomics_analysis_activity_set: Set["MetabolomicsAnalysisActivity"]
637
- metagenome_annotation_activity_set: Set["MetagenomeAnnotationActivity"]
638
- metagenome_assembly_set: Set["MetagenomeAssembly"]
639
- metaproteomics_analysis_activity_set: Set["MetaproteomicsAnalysisActivity"]
640
- metatranscriptome_activity_set: Set["MetatranscriptomeActivity"]
641
- nmdc_schema_version: Optional[str]
642
- nom_analysis_activity_set: Set["NomAnalysisActivity"]
643
- omics_processing_set: Set["OmicsProcessing"]
644
- read_QC_analysis_activity_set: Set["ReadQCAnalysisActivity"]
645
- read_based_analysis_activity_set: Set["ReadBasedAnalysisActivity"]
646
- study_set: Set["Study"]
647
-
648
-
649
- class FunctionalAnnotation(DocumentTemplate):
650
- """An assignment of a function term (e.g. reaction or pathway) that is executed by a gene product, or which the gene product plays an active role in. Functional annotations can be assigned manually by curators, or automatically in workflows. In the context of NMDC, all function annotation is performed automatically, typically using HMM or Blast type methods
651
-
652
- Attributes
653
- ----------
654
- has_function : Optional[str]
655
- null
656
- subject : Optional['GeneProduct']
657
- null
658
- type : Optional[str]
659
- An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.
660
- was_generated_by : Optional['Activity']
661
- null
662
- """
663
-
664
- has_function: Optional[str]
665
- subject: Optional["GeneProduct"]
666
- type: Optional[str]
667
- was_generated_by: Optional["Activity"]
668
-
669
-
670
- class BiosampleProcessing(NamedThing):
671
- """A process that takes one or more biosamples as inputs and generates one or as outputs. Examples of outputs include samples cultivated from another sample or data objects created by instruments runs.
672
-
673
- Attributes
674
- ----------
675
- has_input : Set['NamedThing']
676
- An input to a process.
677
- """
678
-
679
- _key = LexicalKey(["id"])
680
- has_input: Set["NamedThing"]
681
-
682
-
683
- class MAGBin(DocumentTemplate):
684
- """
685
-
686
- Attributes
687
- ----------
688
- bin_name : Optional[str]
689
- null
690
- bin_quality : Optional[str]
691
- null
692
- completeness : Optional['xsd:float']
693
- null
694
- contamination : Optional['xsd:float']
695
- null
696
- gene_count : Optional[int]
697
- null
698
- gtdbtk_class : Optional[str]
699
- null
700
- gtdbtk_domain : Optional[str]
701
- null
702
- gtdbtk_family : Optional[str]
703
- null
704
- gtdbtk_genus : Optional[str]
705
- null
706
- gtdbtk_order : Optional[str]
707
- null
708
- gtdbtk_phylum : Optional[str]
709
- null
710
- gtdbtk_species : Optional[str]
711
- null
712
- num_16s : Optional[int]
713
- null
714
- num_23s : Optional[int]
715
- null
716
- num_5s : Optional[int]
717
- null
718
- num_tRNA : Optional[int]
719
- null
720
- number_of_contig : Optional[int]
721
- null
722
- type : Optional[str]
723
- An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.
724
- """
725
-
726
- bin_name: Optional[str]
727
- bin_quality: Optional[str]
728
- completeness: Optional["xsd:float"]
729
- contamination: Optional["xsd:float"]
730
- gene_count: Optional[int]
731
- gtdbtk_class: Optional[str]
732
- gtdbtk_domain: Optional[str]
733
- gtdbtk_family: Optional[str]
734
- gtdbtk_genus: Optional[str]
735
- gtdbtk_order: Optional[str]
736
- gtdbtk_phylum: Optional[str]
737
- gtdbtk_species: Optional[str]
738
- num_16s: Optional[int]
739
- num_23s: Optional[int]
740
- num_5s: Optional[int]
741
- num_tRNA: Optional[int]
742
- number_of_contig: Optional[int]
743
- type: Optional[str]
744
-
745
-
746
- class GeneProduct(NamedThing):
747
- """A molecule encoded by a gene that has an evolved function"""
748
-
749
- _key = LexicalKey(["id"])
750
-
751
-
752
- class Instrument(NamedThing):
753
- """A material entity that is designed to perform a function in a scientific investigation, but is not a reagent[OBI]."""
754
-
755
- _key = LexicalKey(["id"])
756
-
757
-
758
- class OntologyClass(NamedThing):
759
- """"""
760
-
761
- _key = LexicalKey(["id"])
762
-
763
-
764
- class ChemicalEntity(OntologyClass):
765
- """An atom or molecule that can be represented with a chemical formula. Include lipids, glycans, natural products, drugs. There may be different terms for distinct acid-base forms, protonation states
766
-
767
- Attributes
768
- ----------
769
- chemical_formula : Optional[str]
770
- A generic grouping for miolecular formulae and empirican formulae
771
- inchi : Optional[str]
772
- null
773
- inchi_key : Optional[str]
774
- null
775
- smiles : Set[str]
776
- A string encoding of a molecular graph, no chiral or isotopic information. There are usually a large number of valid SMILES which represent a given structure. For example, CCO, OCC and C(O)C all specify the structure of ethanol.
777
- """
778
-
779
- _key = LexicalKey(["id"])
780
- chemical_formula: Optional[str]
781
- inchi: Optional[str]
782
- inchi_key: Optional[str]
783
- smiles: Set[str]
784
-
785
-
786
- class Study(NamedThing):
787
- """A study summarizes the overall goal of a research initiative and outlines the key objective of its underlying projects.
788
-
789
- Attributes
790
- ----------
791
- GOLD_study_identifiers : Set['xsd:anyURI']
792
- identifiers for corresponding project in GOLD
793
- INSDC_SRA_ENA_study_identifiers : Set['xsd:anyURI']
794
- identifiers for corresponding project in INSDC SRA / ENA
795
- INSDC_bioproject_identifiers : Set['xsd:anyURI']
796
- identifiers for corresponding project in INSDC Bioproject
797
- MGnify_project_identifiers : Set['xsd:anyURI']
798
- identifiers for corresponding project in MGnify
799
- abstract : Optional[str]
800
- The abstract of manuscript/grant associated with the entity; i.e., a summary of the resource.
801
- alternative_descriptions : Set[str]
802
- A list of alternative descriptions for the entity. The distinction between desciption and alternative descriptions is application-specific.
803
- alternative_names : Set[str]
804
- A list of alternative names used to refer to the entity. The distinction between name and alternative names is application-specific.
805
- alternative_titles : Set[str]
806
- A list of alternative titles for the entity. The distinction between title and alternative titles is application-specific.
807
- doi : Optional['AttributeValue']
808
- null
809
- ecosystem : Optional[str]
810
- An ecosystem is a combination of a physical environment (abiotic factors) and all the organisms (biotic factors) that interact with this environment. Ecosystem is in position 1/5 in a GOLD path.
811
- ecosystem_category : Optional[str]
812
- Ecosystem categories represent divisions within the ecosystem based on specific characteristics of the environment from where an organism or sample is isolated. Ecosystem category is in position 2/5 in a GOLD path.
813
- ecosystem_subtype : Optional[str]
814
- Ecosystem subtypes represent further subdivision of Ecosystem types into more distinct subtypes. Ecosystem subtype is in position 4/5 in a GOLD path.
815
- ecosystem_type : Optional[str]
816
- Ecosystem types represent things having common characteristics within the Ecosystem Category. These common characteristics based grouping is still broad but specific to the characteristics of a given environment. Ecosystem type is in position 3/5 in a GOLD path.
817
- ess_dive_datasets : Set[str]
818
- List of ESS-DIVE dataset DOIs
819
- funding_sources : Set[str]
820
- null
821
- has_credit_associations : Set['CreditAssociation']
822
- This slot links a study to a credit association. The credit association will be linked to a person value and to a CRediT Contributor Roles term. Overall semantics: person should get credit X for their participation in the study
823
- objective : Optional[str]
824
- The scientific objectives associated with the entity. It SHOULD correspond to scientific norms for objectives field in a structured abstract.
825
- principal_investigator : Optional['PersonValue']
826
- Principal Investigator who led the study and/or generated the dataset.
827
- publications : Set[str]
828
- A list of publications that are assocatiated with the entity. The publicatons SHOULD be given using an identifier, such as a DOI or Pubmed ID, if possible.
829
- relevant_protocols : Set[str]
830
- null
831
- specific_ecosystem : Optional[str]
832
- Specific ecosystems represent specific features of the environment like aphotic zone in an ocean or gastric mucosa within a host digestive system. Specific ecosystem is in position 5/5 in a GOLD path.
833
- study_image : Set['ImageValue']
834
- Links a study to one or more images.
835
- title : Optional[str]
836
- A name given to the entity that differs from the name/label programatically assigned to it. For example, when extracting study information for GOLD, the GOLD system has assigned a name/label. However, for display purposes, we may also wish the capture the title of the proposal that was used to fund the study.
837
- type : Optional[str]
838
- An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.
839
- websites : Set[str]
840
- A list of websites that are assocatiated with the entity.
841
- """
842
-
843
- _key = LexicalKey(["id"])
844
- GOLD_study_identifiers: Set["xsd:anyURI"]
845
- INSDC_SRA_ENA_study_identifiers: Set["xsd:anyURI"]
846
- INSDC_bioproject_identifiers: Set["xsd:anyURI"]
847
- MGnify_project_identifiers: Set["xsd:anyURI"]
848
- abstract: Optional[str]
849
- alternative_descriptions: Set[str]
850
- alternative_names: Set[str]
851
- alternative_titles: Set[str]
852
- doi: Optional["AttributeValue"]
853
- ecosystem: Optional[str]
854
- ecosystem_category: Optional[str]
855
- ecosystem_subtype: Optional[str]
856
- ecosystem_type: Optional[str]
857
- ess_dive_datasets: Set[str]
858
- funding_sources: Set[str]
859
- has_credit_associations: Set["CreditAssociation"]
860
- objective: Optional[str]
861
- principal_investigator: Optional["PersonValue"]
862
- publications: Set[str]
863
- relevant_protocols: Set[str]
864
- specific_ecosystem: Optional[str]
865
- study_image: Set["ImageValue"]
866
- title: Optional[str]
867
- type: Optional[str]
868
- websites: Set[str]
869
-
870
-
871
- class IntegerValue(AttributeValue):
872
- """A value that is an integer
873
-
874
- Attributes
875
- ----------
876
- has_numeric_value : Optional['xsd:float']
877
- Links a quantity value to a number
878
- """
879
-
880
- has_numeric_value: Optional["xsd:float"]
881
-
882
-
883
- class Activity(DocumentTemplate):
884
- """a provence-generating activity
885
-
886
- Attributes
887
- ----------
888
- ended_at_time : Optional[datetime]
889
- null
890
- id : str
891
- A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI
892
- name : Optional[str]
893
- A human readable label for an entity
894
- started_at_time : Optional[datetime]
895
- null
896
- used : Optional[str]
897
- null
898
- was_associated_with : Optional['Agent']
899
- null
900
- was_informed_by : Optional['Activity']
901
- null
902
- """
903
-
904
- _key = LexicalKey(["id"])
905
- ended_at_time: Optional[datetime]
906
- id: str
907
- name: Optional[str]
908
- started_at_time: Optional[datetime]
909
- used: Optional[str]
910
- was_associated_with: Optional["Agent"]
911
- was_informed_by: Optional["Activity"]
912
-
913
-
914
- class BooleanValue(AttributeValue):
915
- """A value that is a boolean
916
-
917
- Attributes
918
- ----------
919
- has_boolean_value : Optional[bool]
920
- Links a quantity value to a boolean
921
- """
922
-
923
- has_boolean_value: Optional[bool]
924
-
925
-
926
- class ControlledTermValue(AttributeValue):
927
- """A controlled term or class from an ontology
928
-
929
- Attributes
930
- ----------
931
- term : Optional['OntologyClass']
932
- pointer to an ontology class
933
- """
934
-
935
- term: Optional["OntologyClass"]
936
-
937
-
938
- class EnvironmentalMaterialTerm(OntologyClass):
939
- """"""
940
-
941
- _key = LexicalKey(["id"])
942
-
943
-
944
- class QuantityValue(AttributeValue):
945
- """A simple quantity, e.g. 2cm
946
-
947
- Attributes
948
- ----------
949
- has_maximum_numeric_value : Optional['xsd:float']
950
- The maximum value part, expressed as number, of the quantity value when the value covers a range.
951
- has_minimum_numeric_value : Optional['xsd:float']
952
- The minimum value part, expressed as number, of the quantity value when the value covers a range.
953
- has_numeric_value : Optional['xsd:float']
954
- Links a quantity value to a number
955
- has_raw_value : Optional[str]
956
- The value that was specified for an annotation in raw form, i.e. a string. E.g. "2 cm" or "2-4 cm"
957
- has_unit : Optional[str]
958
- Links a quantity value to a unit
959
- """
960
-
961
- has_maximum_numeric_value: Optional["xsd:float"]
962
- has_minimum_numeric_value: Optional["xsd:float"]
963
- has_numeric_value: Optional["xsd:float"]
964
- has_raw_value: Optional[str]
965
- has_unit: Optional[str]
966
-
967
-
968
- class Agent(DocumentTemplate):
969
- """a provence-generating agent
970
-
971
- Attributes
972
- ----------
973
- acted_on_behalf_of : Optional['Agent']
974
- null
975
- was_informed_by : Optional['Activity']
976
- null
977
- """
978
-
979
- acted_on_behalf_of: Optional["Agent"]
980
- was_informed_by: Optional["Activity"]
981
-
982
-
983
- class GeolocationValue(AttributeValue):
984
- """A normalized value for a location on the earth's surface
985
-
986
- Attributes
987
- ----------
988
- has_raw_value : Optional[str]
989
- The value that was specified for an annotation in raw form, i.e. a string. E.g. "2 cm" or "2-4 cm"
990
- latitude : Optional[float]
991
- latitude
992
- longitude : Optional[float]
993
- longitude
994
- """
995
-
996
- has_raw_value: Optional[str]
997
- latitude: Optional[float]
998
- longitude: Optional[float]
999
-
1000
-
1001
- class CreditAssociation(DocumentTemplate):
1002
- """This class supports binding associated researchers to studies. There will be at least a slot for a CRediT Contributor Role (https://casrai.org/credit/) and for a person value Specifically see the associated researchers tab on the NMDC_SampleMetadata-V4_CommentsForUpdates at https://docs.google.com/spreadsheets/d/1INlBo5eoqn2efn4H2P2i8rwRBtnbDVTqXrochJEAPko/edit#gid=0
1003
-
1004
- Attributes
1005
- ----------
1006
- applied_role : Optional[str]
1007
- null
1008
- applied_roles : Set[str]
1009
- null
1010
- applies_to_person : 'PersonValue'
1011
- null
1012
- type : Optional[str]
1013
- An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.
1014
- """
1015
-
1016
- applied_role: Optional[str]
1017
- applied_roles: Set[str]
1018
- applies_to_person: "PersonValue"
1019
- type: Optional[str]
1020
-
1021
-
1022
- class PeptideQuantification(DocumentTemplate):
1023
- """This is used to link a metaproteomics analysis workflow to a specific peptide sequence and related information
1024
-
1025
- Attributes
1026
- ----------
1027
- all_proteins : Set['GeneProduct']
1028
- the list of protein identifiers that are associated with the peptide sequence
1029
- best_protein : Optional['GeneProduct']
1030
- the specific protein identifier most correctly associated with the peptide sequence
1031
- min_q_value : Optional['xsd:float']
1032
- smallest Q-Value associated with the peptide sequence as provided by MSGFPlus tool
1033
- peptide_sequence : Optional[str]
1034
- null
1035
- peptide_spectral_count : Optional[int]
1036
- sum of filter passing MS2 spectra associated with the peptide sequence within a given LC-MS/MS data file
1037
- peptide_sum_masic_abundance : Optional[int]
1038
- combined MS1 extracted ion chromatograms derived from MS2 spectra associated with the peptide sequence from a given LC-MS/MS data file using the MASIC tool
1039
- """
1040
-
1041
- all_proteins: Set["GeneProduct"]
1042
- best_protein: Optional["GeneProduct"]
1043
- min_q_value: Optional["xsd:float"]
1044
- peptide_sequence: Optional[str]
1045
- peptide_spectral_count: Optional[int]
1046
- peptide_sum_masic_abundance: Optional[int]
1047
-
1048
-
1049
- class ProteinQuantification(DocumentTemplate):
1050
- """This is used to link a metaproteomics analysis workflow to a specific protein
1051
-
1052
- Attributes
1053
- ----------
1054
- all_proteins : Set['GeneProduct']
1055
- the list of protein identifiers that are associated with the peptide sequence
1056
- best_protein : Optional['GeneProduct']
1057
- the specific protein identifier most correctly associated with the peptide sequence
1058
- peptide_sequence_count : Optional[int]
1059
- count of peptide sequences grouped to the best_protein
1060
- protein_spectral_count : Optional[int]
1061
- sum of filter passing MS2 spectra associated with the best protein within a given LC-MS/MS data file
1062
- protein_sum_masic_abundance : Optional[int]
1063
- combined MS1 extracted ion chromatograms derived from MS2 spectra associated with the best protein from a given LC-MS/MS data file using the MASIC tool
1064
- """
1065
-
1066
- all_proteins: Set["GeneProduct"]
1067
- best_protein: Optional["GeneProduct"]
1068
- peptide_sequence_count: Optional[int]
1069
- protein_spectral_count: Optional[int]
1070
- protein_sum_masic_abundance: Optional[int]
1071
-
1072
-
1073
- class MetaboliteQuantification(DocumentTemplate):
1074
- """This is used to link a metabolomics analysis workflow to a specific metabolite
1075
-
1076
- Attributes
1077
- ----------
1078
- alternative_identifiers : Set[str]
1079
- A list of alternative identifiers for the entity.
1080
- highest_similarity_score : Optional['xsd:float']
1081
- TODO: Yuri to fill in
1082
- metabolite_quantified : Optional['ChemicalEntity']
1083
- the specific metabolite identifier
1084
- """
1085
-
1086
- alternative_identifiers: Set[str]
1087
- highest_similarity_score: Optional["xsd:float"]
1088
- metabolite_quantified: Optional["ChemicalEntity"]
1089
-
1090
-
1091
- class UrlValue(AttributeValue):
1092
- """A value that is a string that conforms to URL syntax"""
1093
-
1094
-
1095
- class ImageValue(AttributeValue):
1096
- """An attribute value representing an image.
1097
-
1098
- Attributes
1099
- ----------
1100
- description : Optional[str]
1101
- a human-readable description of a thing
1102
- display_order : Optional[str]
1103
- When rendering information, this attribute to specify the order in which the information should be rendered.
1104
- url : Optional[str]
1105
- null
1106
- """
1107
-
1108
- description: Optional[str] = None
1109
- display_order: Optional[str]
1110
- url: Optional[str]
1111
-
1112
-
1113
- class TextValue(AttributeValue):
1114
- """A basic string value
1115
-
1116
- Attributes
1117
- ----------
1118
- language : Optional['xsd:language']
1119
- Should use ISO 639-1 code e.g. "en", "fr"
1120
- """
1121
-
1122
- language: Optional["xsd:language"]
1123
-
1124
-
1125
- class DataObject(NamedThing):
1126
- """An object that primarily consists of symbols that represent information. Files, records, and omics data are examples of data objects.
1127
-
1128
- Attributes
1129
- ----------
1130
- compression_type : Optional[str]
1131
- If provided, specifies the compression type
1132
- data_object_type : Optional[str]
1133
- The type of file represented by the data object.
1134
- description : Optional[str]
1135
- a human-readable description of a thing
1136
- file_size_bytes : Optional['xsd:long']
1137
- Size of the file in bytes
1138
- md5_checksum : Optional[str]
1139
- MD5 checksum of file (pre-compressed)
1140
- name : Optional[str]
1141
- A human readable label for an entity
1142
- type : Optional[str]
1143
- An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.
1144
- url : Optional[str]
1145
- null
1146
- was_generated_by : Optional['Activity']
1147
- null
1148
- """
1149
-
1150
- _key = LexicalKey(["id"])
1151
- compression_type: Optional[str]
1152
- data_object_type: Optional[str]
1153
- description: Optional[str] = None
1154
- file_size_bytes: Optional["xsd:long"]
1155
- md5_checksum: Optional[str]
1156
- name: Optional[str]
1157
- type: Optional[str]
1158
- url: Optional[str]
1159
- was_generated_by: Optional["Activity"]
1160
-
1161
-
1162
- class TimestampValue(AttributeValue):
1163
- """A value that is a timestamp. The range should be ISO-8601"""
1164
-
1165
-
1166
- class OmicsProcessing(BiosampleProcessing):
1167
- """The methods and processes used to generate omics data from a biosample or organism.
1168
-
1169
- Attributes
1170
- ----------
1171
- GOLD_sequencing_project_identifiers : Set['xsd:anyURI']
1172
- identifiers for corresponding sequencing project in GOLD
1173
- INSDC_experiment_identifiers : Set['xsd:anyURI']
1174
- null
1175
- add_date : Optional[str]
1176
- The date on which the information was added to the database.
1177
- chimera_check : Optional['TextValue']
1178
- A chimeric sequence, or chimera for short, is a sequence comprised of two or more phylogenetically distinct parent sequences. Chimeras are usually PCR artifacts thought to occur when a prematurely terminated amplicon reanneals to a foreign DNA strand and is copied to completion in the following PCR cycles. The point at which the chimeric sequence changes from one parent to the next is called the breakpoint or conversion point
1179
- has_input : Set['NamedThing']
1180
- An input to a process.
1181
- has_output : Set['NamedThing']
1182
- An output biosample to a processing step
1183
- instrument_name : Optional[str]
1184
- The name of the instrument that was used for processing the sample.
1185
-
1186
- mod_date : Optional[str]
1187
- The last date on which the database information was modified.
1188
- ncbi_project_name : Optional[str]
1189
- null
1190
- nucl_acid_amp : Optional['TextValue']
1191
- A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids
1192
- nucl_acid_ext : Optional['TextValue']
1193
- A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample
1194
- omics_type : Optional['ControlledTermValue']
1195
- The type of omics data
1196
- part_of : Set['NamedThing']
1197
- Links a resource to another resource that either logically or physically includes it.
1198
- pcr_cond : Optional['TextValue']
1199
- Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'
1200
- pcr_primers : Optional['TextValue']
1201
- PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters
1202
- principal_investigator : Optional['PersonValue']
1203
- Principal Investigator who led the study and/or generated the dataset.
1204
- processing_institution : Optional[str]
1205
- The organization that processed the sample.
1206
- samp_vol_we_dna_ext : Optional['QuantityValue']
1207
- Volume (ml), weight (g) of processed sample, or surface area swabbed from sample for DNA extraction
1208
- seq_meth : Optional['TextValue']
1209
- Sequencing method used; e.g. Sanger, pyrosequencing, ABI-solid
1210
- seq_quality_check : Optional['TextValue']
1211
- Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA
1212
- target_gene : Optional['TextValue']
1213
- Targeted gene or locus name for marker gene studies
1214
- target_subfragment : Optional['TextValue']
1215
- Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA
1216
- type : Optional[str]
1217
- An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.
1218
- """
1219
-
1220
- _key = LexicalKey(["id"])
1221
- GOLD_sequencing_project_identifiers: Set["xsd:anyURI"]
1222
- INSDC_experiment_identifiers: Set["xsd:anyURI"]
1223
- add_date: Optional[str]
1224
- chimera_check: Optional["TextValue"]
1225
- has_input: Set["NamedThing"]
1226
- has_output: Set["NamedThing"]
1227
- instrument_name: Optional[str]
1228
- mod_date: Optional[str]
1229
- ncbi_project_name: Optional[str]
1230
- nucl_acid_amp: Optional["TextValue"]
1231
- nucl_acid_ext: Optional["TextValue"]
1232
- omics_type: Optional["ControlledTermValue"]
1233
- part_of: Set["NamedThing"]
1234
- pcr_cond: Optional["TextValue"]
1235
- pcr_primers: Optional["TextValue"]
1236
- principal_investigator: Optional["PersonValue"]
1237
- processing_institution: Optional[str]
1238
- samp_vol_we_dna_ext: Optional["QuantityValue"]
1239
- seq_meth: Optional["TextValue"]
1240
- seq_quality_check: Optional["TextValue"]
1241
- target_gene: Optional["TextValue"]
1242
- target_subfragment: Optional["TextValue"]
1243
- type: Optional[str]
1244
-
1245
-
1246
- class FunctionalAnnotationTerm(OntologyClass):
1247
- """Abstract grouping class for any term/descriptor that can be applied to a functional unit of a genome (protein, ncRNA, complex)."""
1248
-
1249
- _key = LexicalKey(["id"])
1250
- _abstract = []
1251
-
1252
-
1253
- class Pathway(FunctionalAnnotationTerm):
1254
- """A pathway is a sequence of steps/reactions carried out by an organism or community of organisms
1255
-
1256
- Attributes
1257
- ----------
1258
- has_part : Set['Reaction']
1259
- A pathway can be broken down to a series of reaction step
1260
- """
1261
-
1262
- _key = LexicalKey(["id"])
1263
- has_part: Set["Reaction"]
1264
-
1265
-
1266
- class OrthologyGroup(FunctionalAnnotationTerm):
1267
- """A set of genes or gene products in which all members are orthologous"""
1268
-
1269
- _key = LexicalKey(["id"])
1270
-
1271
-
1272
- class Reaction(FunctionalAnnotationTerm):
1273
- """An individual biochemical transformation carried out by a functional unit of an organism, in which a collection of substrates are transformed into a collection of products. Can also represent transporters
1274
-
1275
- Attributes
1276
- ----------
1277
- direction : Optional[str]
1278
- One of l->r, r->l, bidirectional, neutral
1279
- is_balanced : Optional[bool]
1280
- null
1281
- is_diastereoselective : Optional[bool]
1282
- null
1283
- is_fully_characterized : Optional[bool]
1284
- False if includes R-groups
1285
- is_stereo : Optional[bool]
1286
- null
1287
- is_transport : Optional[bool]
1288
- null
1289
- left_participants : Set['ReactionParticipant']
1290
- null
1291
- right_participants : Set['ReactionParticipant']
1292
- null
1293
- smarts_string : Optional[str]
1294
- null
1295
- """
1296
-
1297
- _key = LexicalKey(["id"])
1298
- direction: Optional[str]
1299
- is_balanced: Optional[bool]
1300
- is_diastereoselective: Optional[bool]
1301
- is_fully_characterized: Optional[bool]
1302
- is_stereo: Optional[bool]
1303
- is_transport: Optional[bool]
1304
- left_participants: Set["ReactionParticipant"]
1305
- right_participants: Set["ReactionParticipant"]
1306
- smarts_string: Optional[str]
1307
-
1308
-
1309
- class WorkflowExecutionActivity(Activity):
1310
- """Represents an instance of an execution of a particular workflow
1311
-
1312
- Attributes
1313
- ----------
1314
- ended_at_time : Optional[datetime]
1315
- null
1316
- execution_resource : Optional[str]
1317
- Example: NERSC-Cori
1318
- git_url : Optional[str]
1319
- Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1
1320
- has_input : Set['NamedThing']
1321
- An input to a process.
1322
- has_output : Set['NamedThing']
1323
- An output biosample to a processing step
1324
- part_of : Set['NamedThing']
1325
- Links a resource to another resource that either logically or physically includes it.
1326
- started_at_time : Optional[datetime]
1327
- null
1328
- type : Optional[str]
1329
- An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.
1330
- was_associated_with : Optional['Agent']
1331
- null
1332
- was_informed_by : Optional['Activity']
1333
- null
1334
- """
1335
-
1336
- _key = LexicalKey(["id"])
1337
- ended_at_time: Optional[datetime]
1338
- execution_resource: Optional[str]
1339
- git_url: Optional[str]
1340
- has_input: Set["NamedThing"]
1341
- has_output: Set["NamedThing"]
1342
- part_of: Set["NamedThing"]
1343
- started_at_time: Optional[datetime]
1344
- type: Optional[str]
1345
- was_associated_with: Optional["Agent"]
1346
- was_informed_by: Optional["Activity"]
1347
-
1348
-
1349
- class MetatranscriptomeAnnotationActivity(WorkflowExecutionActivity):
1350
- """"""
1351
-
1352
- _key = LexicalKey(["id"])
1353
-
1354
-
1355
- class ReadBasedAnalysisActivity(WorkflowExecutionActivity):
1356
- """"""
1357
-
1358
- _key = LexicalKey(["id"])
1359
-
1360
-
1361
- class MAGsAnalysisActivity(WorkflowExecutionActivity):
1362
- """
1363
-
1364
- Attributes
1365
- ----------
1366
- binned_contig_num : Optional[int]
1367
- null
1368
- input_contig_num : Optional[int]
1369
- null
1370
- lowDepth_contig_num : Optional[int]
1371
- null
1372
- mags_list : Set['MAGBin']
1373
- null
1374
- too_short_contig_num : Optional[int]
1375
- null
1376
- unbinned_contig_num : Optional[int]
1377
- null
1378
- """
1379
-
1380
- _key = LexicalKey(["id"])
1381
- binned_contig_num: Optional[int]
1382
- input_contig_num: Optional[int]
1383
- lowDepth_contig_num: Optional[int]
1384
- mags_list: Set["MAGBin"]
1385
- too_short_contig_num: Optional[int]
1386
- unbinned_contig_num: Optional[int]
1387
-
1388
-
1389
- class ReadQCAnalysisActivity(WorkflowExecutionActivity):
1390
- """
1391
-
1392
- Attributes
1393
- ----------
1394
- has_input : Set['NamedThing']
1395
- An input to a process.
1396
- has_output : Set['NamedThing']
1397
- An output biosample to a processing step
1398
- input_base_count : Optional['xsd:float']
1399
- The nucleotide base count number of input reads for QC analysis.
1400
- input_read_bases : Optional['xsd:float']
1401
- TODO
1402
- input_read_count : Optional['xsd:float']
1403
- The sequence count number of input reads for QC analysis.
1404
- output_base_count : Optional['xsd:float']
1405
- After QC analysis nucleotide base count number.
1406
- output_read_bases : Optional['xsd:float']
1407
- TODO
1408
- output_read_count : Optional['xsd:float']
1409
- After QC analysis sequence count number.
1410
- """
1411
-
1412
- _key = LexicalKey(["id"])
1413
- has_input: Set["NamedThing"]
1414
- has_output: Set["NamedThing"]
1415
- input_base_count: Optional["xsd:float"]
1416
- input_read_bases: Optional["xsd:float"]
1417
- input_read_count: Optional["xsd:float"]
1418
- output_base_count: Optional["xsd:float"]
1419
- output_read_bases: Optional["xsd:float"]
1420
- output_read_count: Optional["xsd:float"]
1421
-
1422
-
1423
- class MetatranscriptomeAssembly(WorkflowExecutionActivity):
1424
- """
1425
-
1426
- Attributes
1427
- ----------
1428
- INSDC_assembly_identifiers : Optional[str]
1429
- null
1430
- asm_score : Optional['xsd:float']
1431
- A score for comparing metagenomic assembly quality from same sample.
1432
- contig_bp : Optional['xsd:float']
1433
- Total size in bp of all contigs.
1434
- contigs : Optional['xsd:float']
1435
- The sum of the (length*log(length)) of all contigs, times some constant. Increase the contiguity, the score will increase
1436
- ctg_L50 : Optional['xsd:float']
1437
- Given a set of contigs, the L50 is defined as the sequence length of the shortest contig at 50% of the total genome length.
1438
- ctg_L90 : Optional['xsd:float']
1439
- The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all contigs of that length or longer contains at least 90% of the sum of the lengths of all contigs.
1440
- ctg_N50 : Optional['xsd:float']
1441
- Given a set of contigs, each with its own length, the N50 count is defined as the smallest number of contigs whose length sum makes up half of genome size.
1442
- ctg_N90 : Optional['xsd:float']
1443
- Given a set of contigs, each with its own length, the N90 count is defined as the smallest number of contigs whose length sum makes up 90% of genome size.
1444
- ctg_logsum : Optional['xsd:float']
1445
- Maximum contig length.
1446
- ctg_max : Optional['xsd:float']
1447
- Maximum contig length.
1448
- ctg_powsum : Optional['xsd:float']
1449
- Powersum of all contigs is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).
1450
- gap_pct : Optional['xsd:float']
1451
- The gap size percentage of all scaffolds.
1452
- gc_avg : Optional['xsd:float']
1453
- Average of GC content of all contigs.
1454
- gc_std : Optional['xsd:float']
1455
- Standard deviation of GC content of all contigs.
1456
- num_aligned_reads : Optional['xsd:float']
1457
- The sequence count number of input reads aligned to assembled contigs.
1458
- num_input_reads : Optional['xsd:float']
1459
- The sequence count number of input reads for assembly.
1460
- scaf_L50 : Optional['xsd:float']
1461
- Given a set of scaffolds, the L50 is defined as the sequence length of the shortest scaffold at 50% of the total genome length.
1462
- scaf_L90 : Optional['xsd:float']
1463
- The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all scaffolds of that length or longer contains at least 90% of the sum of the lengths of all scaffolds.
1464
- scaf_N50 : Optional['xsd:float']
1465
- Given a set of scaffolds, each with its own length, the N50 count is defined as the smallest number of scaffolds whose length sum makes up half of genome size.
1466
- scaf_N90 : Optional['xsd:float']
1467
- Given a set of scaffolds, each with its own length, the N90 count is defined as the smallest number of scaffolds whose length sum makes up 90% of genome size.
1468
- scaf_bp : Optional['xsd:float']
1469
- Total size in bp of all scaffolds.
1470
- scaf_l_gt50K : Optional['xsd:float']
1471
- Total size in bp of all scaffolds greater than 50 KB.
1472
- scaf_logsum : Optional['xsd:float']
1473
- The sum of the (length*log(length)) of all scaffolds, times some constant. Increase the contiguity, the score will increase
1474
- scaf_max : Optional['xsd:float']
1475
- Maximum scaffold length.
1476
- scaf_n_gt50K : Optional['xsd:float']
1477
- Total sequence count of scaffolds greater than 50 KB.
1478
- scaf_pct_gt50K : Optional['xsd:float']
1479
- Total sequence size percentage of scaffolds greater than 50 KB.
1480
- scaf_powsum : Optional['xsd:float']
1481
- Powersum of all scaffolds is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).
1482
- scaffolds : Optional['xsd:float']
1483
- Total sequence count of all scaffolds.
1484
- """
1485
-
1486
- _key = LexicalKey(["id"])
1487
- INSDC_assembly_identifiers: Optional[str]
1488
- asm_score: Optional["xsd:float"]
1489
- contig_bp: Optional["xsd:float"]
1490
- contigs: Optional["xsd:float"]
1491
- ctg_L50: Optional["xsd:float"]
1492
- ctg_L90: Optional["xsd:float"]
1493
- ctg_N50: Optional["xsd:float"]
1494
- ctg_N90: Optional["xsd:float"]
1495
- ctg_logsum: Optional["xsd:float"]
1496
- ctg_max: Optional["xsd:float"]
1497
- ctg_powsum: Optional["xsd:float"]
1498
- gap_pct: Optional["xsd:float"]
1499
- gc_avg: Optional["xsd:float"]
1500
- gc_std: Optional["xsd:float"]
1501
- num_aligned_reads: Optional["xsd:float"]
1502
- num_input_reads: Optional["xsd:float"]
1503
- scaf_L50: Optional["xsd:float"]
1504
- scaf_L90: Optional["xsd:float"]
1505
- scaf_N50: Optional["xsd:float"]
1506
- scaf_N90: Optional["xsd:float"]
1507
- scaf_bp: Optional["xsd:float"]
1508
- scaf_l_gt50K: Optional["xsd:float"]
1509
- scaf_logsum: Optional["xsd:float"]
1510
- scaf_max: Optional["xsd:float"]
1511
- scaf_n_gt50K: Optional["xsd:float"]
1512
- scaf_pct_gt50K: Optional["xsd:float"]
1513
- scaf_powsum: Optional["xsd:float"]
1514
- scaffolds: Optional["xsd:float"]
1515
-
1516
-
1517
- class MetagenomeAssembly(WorkflowExecutionActivity):
1518
- """
1519
-
1520
- Attributes
1521
- ----------
1522
- INSDC_assembly_identifiers : Optional[str]
1523
- null
1524
- asm_score : Optional['xsd:float']
1525
- A score for comparing metagenomic assembly quality from same sample.
1526
- contig_bp : Optional['xsd:float']
1527
- Total size in bp of all contigs.
1528
- contigs : Optional['xsd:float']
1529
- The sum of the (length*log(length)) of all contigs, times some constant. Increase the contiguity, the score will increase
1530
- ctg_L50 : Optional['xsd:float']
1531
- Given a set of contigs, the L50 is defined as the sequence length of the shortest contig at 50% of the total genome length.
1532
- ctg_L90 : Optional['xsd:float']
1533
- The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all contigs of that length or longer contains at least 90% of the sum of the lengths of all contigs.
1534
- ctg_N50 : Optional['xsd:float']
1535
- Given a set of contigs, each with its own length, the N50 count is defined as the smallest number of contigs whose length sum makes up half of genome size.
1536
- ctg_N90 : Optional['xsd:float']
1537
- Given a set of contigs, each with its own length, the N90 count is defined as the smallest number of contigs whose length sum makes up 90% of genome size.
1538
- ctg_logsum : Optional['xsd:float']
1539
- Maximum contig length.
1540
- ctg_max : Optional['xsd:float']
1541
- Maximum contig length.
1542
- ctg_powsum : Optional['xsd:float']
1543
- Powersum of all contigs is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).
1544
- gap_pct : Optional['xsd:float']
1545
- The gap size percentage of all scaffolds.
1546
- gc_avg : Optional['xsd:float']
1547
- Average of GC content of all contigs.
1548
- gc_std : Optional['xsd:float']
1549
- Standard deviation of GC content of all contigs.
1550
- num_aligned_reads : Optional['xsd:float']
1551
- The sequence count number of input reads aligned to assembled contigs.
1552
- num_input_reads : Optional['xsd:float']
1553
- The sequence count number of input reads for assembly.
1554
- scaf_L50 : Optional['xsd:float']
1555
- Given a set of scaffolds, the L50 is defined as the sequence length of the shortest scaffold at 50% of the total genome length.
1556
- scaf_L90 : Optional['xsd:float']
1557
- The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all scaffolds of that length or longer contains at least 90% of the sum of the lengths of all scaffolds.
1558
- scaf_N50 : Optional['xsd:float']
1559
- Given a set of scaffolds, each with its own length, the N50 count is defined as the smallest number of scaffolds whose length sum makes up half of genome size.
1560
- scaf_N90 : Optional['xsd:float']
1561
- Given a set of scaffolds, each with its own length, the N90 count is defined as the smallest number of scaffolds whose length sum makes up 90% of genome size.
1562
- scaf_bp : Optional['xsd:float']
1563
- Total size in bp of all scaffolds.
1564
- scaf_l_gt50K : Optional['xsd:float']
1565
- Total size in bp of all scaffolds greater than 50 KB.
1566
- scaf_logsum : Optional['xsd:float']
1567
- The sum of the (length*log(length)) of all scaffolds, times some constant. Increase the contiguity, the score will increase
1568
- scaf_max : Optional['xsd:float']
1569
- Maximum scaffold length.
1570
- scaf_n_gt50K : Optional['xsd:float']
1571
- Total sequence count of scaffolds greater than 50 KB.
1572
- scaf_pct_gt50K : Optional['xsd:float']
1573
- Total sequence size percentage of scaffolds greater than 50 KB.
1574
- scaf_powsum : Optional['xsd:float']
1575
- Powersum of all scaffolds is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).
1576
- scaffolds : Optional['xsd:float']
1577
- Total sequence count of all scaffolds.
1578
- """
1579
-
1580
- _key = LexicalKey(["id"])
1581
- INSDC_assembly_identifiers: Optional[str]
1582
- asm_score: Optional["xsd:float"]
1583
- contig_bp: Optional["xsd:float"]
1584
- contigs: Optional["xsd:float"]
1585
- ctg_L50: Optional["xsd:float"]
1586
- ctg_L90: Optional["xsd:float"]
1587
- ctg_N50: Optional["xsd:float"]
1588
- ctg_N90: Optional["xsd:float"]
1589
- ctg_logsum: Optional["xsd:float"]
1590
- ctg_max: Optional["xsd:float"]
1591
- ctg_powsum: Optional["xsd:float"]
1592
- gap_pct: Optional["xsd:float"]
1593
- gc_avg: Optional["xsd:float"]
1594
- gc_std: Optional["xsd:float"]
1595
- num_aligned_reads: Optional["xsd:float"]
1596
- num_input_reads: Optional["xsd:float"]
1597
- scaf_L50: Optional["xsd:float"]
1598
- scaf_L90: Optional["xsd:float"]
1599
- scaf_N50: Optional["xsd:float"]
1600
- scaf_N90: Optional["xsd:float"]
1601
- scaf_bp: Optional["xsd:float"]
1602
- scaf_l_gt50K: Optional["xsd:float"]
1603
- scaf_logsum: Optional["xsd:float"]
1604
- scaf_max: Optional["xsd:float"]
1605
- scaf_n_gt50K: Optional["xsd:float"]
1606
- scaf_pct_gt50K: Optional["xsd:float"]
1607
- scaf_powsum: Optional["xsd:float"]
1608
- scaffolds: Optional["xsd:float"]
1609
-
1610
-
1611
- class NomAnalysisActivity(WorkflowExecutionActivity):
1612
- """
1613
-
1614
- Attributes
1615
- ----------
1616
- has_calibration : Optional[str]
1617
- TODO: Yuri to fill in
1618
- used : Optional[str]
1619
- null
1620
- """
1621
-
1622
- _key = LexicalKey(["id"])
1623
- has_calibration: Optional[str]
1624
- used: Optional[str]
1625
-
1626
-
1627
- class MetabolomicsAnalysisActivity(WorkflowExecutionActivity):
1628
- """
1629
-
1630
- Attributes
1631
- ----------
1632
- has_calibration : Optional[str]
1633
- TODO: Yuri to fill in
1634
- has_metabolite_quantifications : Set['MetaboliteQuantification']
1635
- null
1636
- used : Optional[str]
1637
- null
1638
- """
1639
-
1640
- _key = LexicalKey(["id"])
1641
- has_calibration: Optional[str]
1642
- has_metabolite_quantifications: Set["MetaboliteQuantification"]
1643
- used: Optional[str]
1644
-
1645
-
1646
- class MetatranscriptomeActivity(WorkflowExecutionActivity):
1647
- """A metatranscriptome activity that e.g. pools assembly and annotation activity."""
1648
-
1649
- _key = LexicalKey(["id"])
1650
-
1651
-
1652
- class MetaproteomicsAnalysisActivity(WorkflowExecutionActivity):
1653
- """
1654
-
1655
- Attributes
1656
- ----------
1657
- has_peptide_quantifications : Set['PeptideQuantification']
1658
- null
1659
- used : Optional[str]
1660
- null
1661
- """
1662
-
1663
- _key = LexicalKey(["id"])
1664
- has_peptide_quantifications: Set["PeptideQuantification"]
1665
- used: Optional[str]
1666
-
1667
-
1668
- class MetagenomeAnnotationActivity(WorkflowExecutionActivity):
1669
- """"""
1670
-
1671
- _key = LexicalKey(["id"])