nmdc-runtime 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,548 @@
1
+ import re
2
+ import sqlite3
3
+
4
+ import pandas as pd
5
+ import requests_cache
6
+
7
+ from nmdc_schema import nmdc
8
+ from nmdc_runtime.site.translation.translator import Translator
9
+ from nmdc_runtime.site.util import get_basename
10
+ from nmdc_runtime.site.translation.neon_utils import (
11
+ _get_value_or_none,
12
+ _create_controlled_identified_term_value,
13
+ _create_controlled_term_value,
14
+ _create_geolocation_value,
15
+ _create_quantity_value,
16
+ _create_timestamp_value,
17
+ _create_text_value,
18
+ )
19
+
20
+
21
+ BENTHIC_BROAD_SCALE_MAPPINGS = {
22
+ "stream": {"term_id": "ENVO:01000253", "term_name": "freshwater river biome"}
23
+ }
24
+
25
+ BENTHIC_LOCAL_SCALE_MAPPINGS = {
26
+ "pool": {"term_id": "ENVO:03600094", "term_name": "stream pool"},
27
+ "run": {"term_id": "ENVO:03600095", "term_name": "stream run"},
28
+ "step pool": {"term_id": "ENVO:03600096", "term_name": "step pool"},
29
+ "riffle": {"term_id": "ENVO:00000148", "term_name": "riffle"},
30
+ "stepPool": {"term_id": "ENVO:03600096", "term_name": "step pool"},
31
+ }
32
+
33
+ BENTHIC_ENV_MEDIUM_MAPPINGS = {
34
+ "plant-associated": {
35
+ "term_id": "ENVO:01001057",
36
+ "term_name": "environment associated with a plant part or small plant",
37
+ },
38
+ "sediment": {"term_id": "ENVO:00002007", "term_name": "sediment"},
39
+ "biofilm": {"term_id": "ENVO:00002034", "term_name": "biofilm"},
40
+ }
41
+
42
+
43
+ class NeonBenthicDataTranslator(Translator):
44
+ def __init__(
45
+ self,
46
+ benthic_data: dict,
47
+ site_code_mapping: dict,
48
+ neon_envo_mappings_file: pd.DataFrame,
49
+ neon_raw_data_file_mappings_file: pd.DataFrame,
50
+ *args,
51
+ **kwargs,
52
+ ) -> None:
53
+ super().__init__(*args, **kwargs)
54
+
55
+ self.conn = sqlite3.connect("neon.db")
56
+ requests_cache.install_cache("neon_api_cache")
57
+
58
+ neon_amb_data_tables = (
59
+ "mms_benthicMetagenomeSequencing",
60
+ "mms_benthicMetagenomeDnaExtraction",
61
+ "amb_fieldParent",
62
+ )
63
+
64
+ if all(k in benthic_data for k in neon_amb_data_tables):
65
+ benthic_data["mms_benthicMetagenomeSequencing"].to_sql(
66
+ "mms_benthicMetagenomeSequencing",
67
+ self.conn,
68
+ if_exists="replace",
69
+ index=False,
70
+ )
71
+ benthic_data["mms_benthicMetagenomeDnaExtraction"].to_sql(
72
+ "mms_benthicMetagenomeDnaExtraction",
73
+ self.conn,
74
+ if_exists="replace",
75
+ index=False,
76
+ )
77
+ benthic_data["amb_fieldParent"].to_sql(
78
+ "amb_fieldParent", self.conn, if_exists="replace", index=False
79
+ )
80
+ else:
81
+ raise ValueError(
82
+ f"You are missing one of the aquatic benthic microbiome tables: {neon_amb_data_tables}"
83
+ )
84
+
85
+ neon_envo_mappings_file.to_sql(
86
+ "neonEnvoTerms", self.conn, if_exists="replace", index=False
87
+ )
88
+
89
+ self.neon_raw_data_file_mappings_df = neon_raw_data_file_mappings_file
90
+ self.neon_raw_data_file_mappings_df.to_sql(
91
+ "neonRawDataFile", self.conn, if_exists="replace", index=False
92
+ )
93
+
94
+ self.site_code_mapping = site_code_mapping
95
+
96
+ def _translate_biosample(
97
+ self, neon_id: str, nmdc_id: str, biosample_row: pd.DataFrame
98
+ ) -> nmdc.Biosample:
99
+ return nmdc.Biosample(
100
+ id=nmdc_id,
101
+ part_of="nmdc:sty-11-pzmd0x14",
102
+ env_broad_scale=_create_controlled_identified_term_value(
103
+ BENTHIC_BROAD_SCALE_MAPPINGS.get(
104
+ biosample_row["aquaticSiteType"].values[0]
105
+ ).get("term_id"),
106
+ BENTHIC_BROAD_SCALE_MAPPINGS.get(
107
+ biosample_row["aquaticSiteType"].values[0]
108
+ ).get("term_name"),
109
+ ),
110
+ env_local_scale=_create_controlled_identified_term_value(
111
+ BENTHIC_LOCAL_SCALE_MAPPINGS.get(
112
+ biosample_row["habitatType"].values[0]
113
+ ).get("term_id"),
114
+ BENTHIC_LOCAL_SCALE_MAPPINGS.get(
115
+ biosample_row["habitatType"].values[0]
116
+ ).get("term_name"),
117
+ ),
118
+ env_medium=_create_controlled_identified_term_value(
119
+ BENTHIC_ENV_MEDIUM_MAPPINGS.get(
120
+ biosample_row["sampleMaterial"].values[0]
121
+ ).get("term_id"),
122
+ BENTHIC_ENV_MEDIUM_MAPPINGS.get(
123
+ biosample_row["sampleMaterial"].values[0]
124
+ ).get("term_name"),
125
+ ),
126
+ name=neon_id,
127
+ lat_lon=_create_geolocation_value(
128
+ biosample_row["decimalLatitude"].values[0],
129
+ biosample_row["decimalLongitude"].values[0],
130
+ ),
131
+ elev=nmdc.Float(biosample_row["elevation"].values[0]),
132
+ collection_date=_create_timestamp_value(
133
+ biosample_row["collectDate"].values[0]
134
+ ),
135
+ samp_size=_create_quantity_value(
136
+ biosample_row["fieldSampleVolume"].values[0], "mL"
137
+ ),
138
+ geo_loc_name=_create_text_value(
139
+ self.site_code_mapping[biosample_row["siteID"].values[0]]
140
+ if biosample_row["siteID"].values[0]
141
+ else None
142
+ ),
143
+ type="nmdc:Biosample",
144
+ analysis_type="metagenomics",
145
+ biosample_categories="NEON",
146
+ depth=nmdc.QuantityValue(
147
+ has_minimum_numeric_value=nmdc.Float("0"),
148
+ has_maximum_numeric_value=nmdc.Float("1"),
149
+ has_unit="meters",
150
+ ),
151
+ )
152
+
153
+ def _translate_extraction_process(
154
+ self,
155
+ extraction_id: str,
156
+ extraction_input: str,
157
+ processed_sample_id: str,
158
+ extraction_row: pd.DataFrame,
159
+ ) -> nmdc.Extraction:
160
+ """
161
+ Create an nmdc Extraction process, which is a process to model the DNA extraction in
162
+ a metagenome sequencing experiment. The input to an Extraction process is the
163
+ output from a Pooling process.
164
+
165
+ :param extraction_id: Minted id for Extraction process.
166
+ :param extraction_input: Input to an Extraction process is the output from a Pooling process.
167
+ :param processed_sample_id: Output of Extraction process is a ProcessedSample.
168
+ :param extraction_row: DataFrame with Extraction process metadata.
169
+ :return: Extraction process object.
170
+ """
171
+ processing_institution = None
172
+ laboratory_name = _get_value_or_none(extraction_row, "laboratoryName")
173
+ if laboratory_name is not None:
174
+ if re.search("Battelle", laboratory_name, re.IGNORECASE):
175
+ processing_institution = "Battelle"
176
+ elif re.search("Argonne", laboratory_name, re.IGNORECASE):
177
+ processing_institution = "ANL"
178
+
179
+ return nmdc.Extraction(
180
+ id=extraction_id,
181
+ has_input=extraction_input,
182
+ has_output=processed_sample_id,
183
+ start_date=_get_value_or_none(extraction_row, "collectDate"),
184
+ end_date=_get_value_or_none(extraction_row, "processedDate"),
185
+ input_mass=_create_quantity_value(
186
+ _get_value_or_none(extraction_row, "sampleMass"), "g"
187
+ ),
188
+ quality_control_report=nmdc.QualityControlReport(
189
+ status=_get_value_or_none(extraction_row, "qaqcStatus")
190
+ ),
191
+ processing_institution=processing_institution,
192
+ )
193
+
194
+ def _translate_library_preparation(
195
+ self,
196
+ library_preparation_id: str,
197
+ library_preparation_input: str,
198
+ processed_sample_id: str,
199
+ library_preparation_row: pd.DataFrame,
200
+ ):
201
+ """
202
+ Create LibraryPreparation process object. The input to LibraryPreparation process
203
+ is the output ProcessedSample from an Extraction process. The output of LibraryPreparation
204
+ process is fed as input to an OmicsProcessing object.
205
+
206
+ :param library_preparation_id: Minted id for LibraryPreparation process.
207
+ :param library_preparation_input: Input to LibraryPreparation process is output from
208
+ Extraction process.
209
+ :param processed_sample_id: Minted ProcessedSample id which is output of LibraryPreparation
210
+ is also input to OmicsProcessing.
211
+ :param library_preparation_row: Metadata required to populate LibraryPreparation.
212
+ :return: Object that using LibraryPreparation process model.
213
+ """
214
+ processing_institution = None
215
+ laboratory_name = _get_value_or_none(library_preparation_row, "laboratoryName")
216
+ if laboratory_name is not None:
217
+ if re.search("Battelle", laboratory_name, re.IGNORECASE):
218
+ processing_institution = "Battelle"
219
+ elif re.search("Argonne", laboratory_name, re.IGNORECASE):
220
+ processing_institution = "ANL"
221
+
222
+ return nmdc.LibraryPreparation(
223
+ id=library_preparation_id,
224
+ has_input=library_preparation_input,
225
+ has_output=processed_sample_id,
226
+ start_date=_get_value_or_none(library_preparation_row, "collectDate"),
227
+ end_date=_get_value_or_none(library_preparation_row, "processedDate"),
228
+ processing_institution=processing_institution,
229
+ )
230
+
231
+ def _translate_omics_processing(
232
+ self,
233
+ omics_processing_id: str,
234
+ processed_sample_id: str,
235
+ raw_data_file_data: str,
236
+ omics_processing_row: pd.DataFrame,
237
+ ) -> nmdc.OmicsProcessing:
238
+ """Create nmdc OmicsProcessing object. This class typically models the run of a
239
+ Bioinformatics workflow on sequence data from a biosample. The input to an OmicsProcessing
240
+ process is the output from a LibraryPreparation process, and the output of OmicsProcessing
241
+ is a DataObject which has the FASTQ sequence file URLs embedded in them.
242
+
243
+ :param omics_processing_id: Minted id for an OmicsProcessing process.
244
+ :param processed_sample_id: ProcessedSample that is the output of LibraryPreparation.
245
+ :param raw_data_file_data: R1/R2 DataObjects which have links to workflow processed output
246
+ files embedded in them.
247
+ :param omics_processing_row: DataFrame with metadata for an OmicsProcessing workflow
248
+ process/run.
249
+ :return: OmicsProcessing object that models a Bioinformatics workflow process/run.
250
+ """
251
+ processing_institution = None
252
+ sequencing_facility = _get_value_or_none(
253
+ omics_processing_row, "sequencingFacilityID"
254
+ )
255
+ if sequencing_facility is not None:
256
+ if re.search("Battelle", sequencing_facility, re.IGNORECASE):
257
+ processing_institution = "Battelle"
258
+ elif re.search("Argonne", sequencing_facility, re.IGNORECASE):
259
+ processing_institution = "ANL"
260
+
261
+ return nmdc.OmicsProcessing(
262
+ id=omics_processing_id,
263
+ has_input=processed_sample_id,
264
+ has_output=raw_data_file_data,
265
+ processing_institution=processing_institution,
266
+ ncbi_project_name=_get_value_or_none(omics_processing_row, "ncbiProjectID"),
267
+ omics_type=_create_controlled_term_value(
268
+ omics_processing_row["investigation_type"].values[0]
269
+ ),
270
+ instrument_name=f"{_get_value_or_none(omics_processing_row, 'sequencingMethod')} {_get_value_or_none(omics_processing_row, 'instrument_model')}",
271
+ part_of="nmdc:sty-11-34xj1150",
272
+ name=f"Terrestrial soil microbial communities - {_get_value_or_none(omics_processing_row, 'dnaSampleID')}",
273
+ type="nmdc:OmicsProcessing",
274
+ )
275
+
276
+ def _translate_processed_sample(
277
+ self, processed_sample_id: str, sample_id: str
278
+ ) -> nmdc.ProcessedSample:
279
+ """
280
+ Create an nmdc ProcessedSample. ProcessedSample is typically the output of a PlannedProcess
281
+ like Pooling, Extraction, LibraryPreparation, etc. We are using this to create a
282
+ reference for the nmdc minted ProcessedSample ids in `processed_sample_set`. We are
283
+ associating the minted ids with the name of the sample it is coming from which can be
284
+ a value from either the `genomicsSampleID` column or from the `dnaSampleID` column.
285
+
286
+ :param processed_sample_id: NMDC minted ProcessedSampleID.
287
+ :param sample_id: Value from `genomicsSampleID` or `dnaSampleID` column.
288
+ :return: ProcessedSample objects to be stored in `processed_sample_set`.
289
+ """
290
+ return nmdc.ProcessedSample(id=processed_sample_id, name=sample_id)
291
+
292
+ def _translate_data_object(
293
+ self, do_id: str, url: str, do_type: str, checksum: str
294
+ ) -> nmdc.DataObject:
295
+ """Create nmdc DataObject which is the output of an OmicsProcessing process. This
296
+ object mainly contains information about the sequencing file that was generated as
297
+ the result of running a Bioinformatics workflow on a certain ProcessedSample, which
298
+ is the result of a LibraryPreparation process.
299
+
300
+ :param do_id: NMDC minted DataObject id.
301
+ :param url: URL of zipped FASTQ file on NEON file server. Retrieved from file provided
302
+ by Hugh Cross at NEON.
303
+ :param do_type: Indicate whether it is FASTQ for Read 1 or Read 2 (paired end sequencing).
304
+ :param checksum: Checksum value for FASTQ in zip file, once again provided by Hugh Cross
305
+ at NEON.
306
+ :return: DataObject with all the sequencing file metadata.
307
+ """
308
+ file_name = get_basename(url)
309
+ basename = file_name.split(".", 1)[0]
310
+
311
+ return nmdc.DataObject(
312
+ id=do_id,
313
+ name=file_name,
314
+ url=url,
315
+ description=f"sequencing results for {basename}",
316
+ type="nmdc:DataObject",
317
+ md5_checksum=checksum,
318
+ data_object_type=do_type,
319
+ )
320
+
321
+ def get_database(self):
322
+ database = nmdc.Database()
323
+
324
+ query = """
325
+ SELECT
326
+ merged.laboratoryName,
327
+ merged.sequencingFacilityID,
328
+ merged.processedDate,
329
+ merged.dnaSampleID,
330
+ merged.dnaSampleCode,
331
+ merged.internalLabID,
332
+ merged.instrument_model,
333
+ merged.sequencingMethod,
334
+ merged.investigation_type,
335
+ merged.qaqcStatus,
336
+ merged.ncbiProjectID,
337
+ merged.genomicsSampleID,
338
+ merged.sequenceAnalysisType,
339
+ merged.sampleMass,
340
+ merged.nucleicAcidConcentration,
341
+ afp.aquaticSiteType,
342
+ afp.habitatType,
343
+ afp.sampleMaterial,
344
+ afp.geneticSampleID,
345
+ afp.elevation,
346
+ afp.fieldSampleVolume,
347
+ afp.decimalLatitude,
348
+ afp.decimalLongitude,
349
+ afp.siteID,
350
+ afp.sampleID,
351
+ afp.collectDate
352
+ FROM
353
+ (
354
+ SELECT
355
+ bs.collectDate,
356
+ bs.laboratoryName,
357
+ bs.sequencingFacilityID,
358
+ bs.processedDate,
359
+ bs.dnaSampleID,
360
+ bs.dnaSampleCode,
361
+ bs.internalLabID,
362
+ bs.instrument_model,
363
+ bs.sequencingMethod,
364
+ bs.investigation_type,
365
+ bs.qaqcStatus,
366
+ bs.ncbiProjectID,
367
+ bd.genomicsSampleID,
368
+ bd.sequenceAnalysisType,
369
+ bd.sampleMass,
370
+ bd.nucleicAcidConcentration
371
+ FROM
372
+ mms_benthicMetagenomeSequencing AS bs
373
+ JOIN
374
+ mms_benthicMetagenomeDnaExtraction AS bd
375
+ ON
376
+ bs.dnaSampleID = bd.dnaSampleID
377
+ ) AS merged
378
+ LEFT JOIN amb_fieldParent AS afp
379
+ ON
380
+ merged.genomicsSampleID = afp.geneticSampleID
381
+ """
382
+ benthic_samples = pd.read_sql_query(query, self.conn)
383
+ benthic_samples.to_sql(
384
+ "benthicSamples", self.conn, if_exists="replace", index=False
385
+ )
386
+
387
+ neon_biosample_ids = benthic_samples["sampleID"]
388
+ nmdc_biosample_ids = self._id_minter("nmdc:Biosample", len(neon_biosample_ids))
389
+ neon_to_nmdc_biosample_ids = dict(zip(neon_biosample_ids, nmdc_biosample_ids))
390
+
391
+ neon_extraction_ids = benthic_samples["sampleID"]
392
+ nmdc_extraction_ids = self._id_minter(
393
+ "nmdc:Extraction", len(neon_extraction_ids)
394
+ )
395
+ neon_to_nmdc_extraction_ids = dict(
396
+ zip(neon_extraction_ids, nmdc_extraction_ids)
397
+ )
398
+
399
+ neon_extraction_processed_ids = benthic_samples["sampleID"]
400
+ nmdc_extraction_processed_ids = self._id_minter(
401
+ "nmdc:ProcessedSample", len(neon_extraction_processed_ids)
402
+ )
403
+ neon_to_nmdc_extraction_processed_ids = dict(
404
+ zip(neon_extraction_processed_ids, nmdc_extraction_processed_ids)
405
+ )
406
+
407
+ neon_lib_prep_ids = benthic_samples["sampleID"]
408
+ nmdc_lib_prep_ids = self._id_minter(
409
+ "nmdc:LibraryPreparation", len(neon_lib_prep_ids)
410
+ )
411
+ neon_to_nmdc_lib_prep_ids = dict(zip(neon_lib_prep_ids, nmdc_lib_prep_ids))
412
+
413
+ neon_lib_prep_processed_ids = benthic_samples["sampleID"]
414
+ nmdc_lib_prep_processed_ids = self._id_minter(
415
+ "nmdc:ProcessedSample", len(neon_lib_prep_processed_ids)
416
+ )
417
+ neon_to_nmdc_lib_prep_processed_ids = dict(
418
+ zip(neon_lib_prep_processed_ids, nmdc_lib_prep_processed_ids)
419
+ )
420
+
421
+ neon_omprc_ids = benthic_samples["sampleID"]
422
+ nmdc_omprc_ids = self._id_minter("nmdc:OmicsProcessing", len(neon_omprc_ids))
423
+ neon_to_nmdc_omprc_ids = dict(zip(neon_omprc_ids, nmdc_omprc_ids))
424
+
425
+ neon_raw_data_file_mappings_df = self.neon_raw_data_file_mappings_df
426
+ neon_raw_file_paths = neon_raw_data_file_mappings_df["rawDataFilePath"]
427
+ nmdc_data_object_ids = self._id_minter(
428
+ "nmdc:DataObject", len(neon_raw_file_paths)
429
+ )
430
+ neon_to_nmdc_data_object_ids = dict(
431
+ zip(neon_raw_file_paths, nmdc_data_object_ids)
432
+ )
433
+
434
+ for neon_id, nmdc_id in neon_to_nmdc_biosample_ids.items():
435
+ biosample_row = benthic_samples[benthic_samples["sampleID"] == neon_id]
436
+
437
+ database.biosample_set.append(
438
+ self._translate_biosample(neon_id, nmdc_id, biosample_row)
439
+ )
440
+
441
+ for neon_id, nmdc_id in neon_to_nmdc_extraction_ids.items():
442
+ extraction_row = benthic_samples[benthic_samples["sampleID"] == neon_id]
443
+
444
+ extraction_input = neon_to_nmdc_biosample_ids.get(neon_id)
445
+ processed_sample_id = neon_to_nmdc_extraction_processed_ids.get(neon_id)
446
+
447
+ if extraction_input is not None and processed_sample_id is not None:
448
+ database.extraction_set.append(
449
+ self._translate_extraction_process(
450
+ nmdc_id,
451
+ extraction_input,
452
+ processed_sample_id,
453
+ extraction_row,
454
+ )
455
+ )
456
+
457
+ genomics_sample_id = _get_value_or_none(
458
+ extraction_row, "genomicsSampleID"
459
+ )
460
+
461
+ database.processed_sample_set.append(
462
+ self._translate_processed_sample(
463
+ processed_sample_id,
464
+ f"Extracted DNA from {genomics_sample_id}",
465
+ )
466
+ )
467
+
468
+ query = """
469
+ SELECT dnaSampleID, GROUP_CONCAT(rawDataFilePath, '|') AS rawDataFilePaths
470
+ FROM neonRawDataFile
471
+ GROUP BY dnaSampleID
472
+ """
473
+ neon_raw_data_files = pd.read_sql_query(query, self.conn)
474
+ neon_raw_data_files_dict = (
475
+ neon_raw_data_files.set_index("dnaSampleID")["rawDataFilePaths"]
476
+ .str.split("|")
477
+ .to_dict()
478
+ )
479
+ filtered_neon_raw_data_files_dict = {
480
+ key: value
481
+ for key, value in neon_raw_data_files_dict.items()
482
+ if len(value) <= 2
483
+ }
484
+
485
+ for neon_id, nmdc_id in neon_to_nmdc_lib_prep_ids.items():
486
+ lib_prep_row = benthic_samples[benthic_samples["sampleID"] == neon_id]
487
+
488
+ lib_prep_input = neon_to_nmdc_extraction_processed_ids.get(neon_id)
489
+ processed_sample_id = neon_to_nmdc_lib_prep_processed_ids.get(neon_id)
490
+
491
+ if lib_prep_input is not None and processed_sample_id is not None:
492
+ database.library_preparation_set.append(
493
+ self._translate_library_preparation(
494
+ nmdc_id,
495
+ lib_prep_input,
496
+ processed_sample_id,
497
+ lib_prep_row,
498
+ )
499
+ )
500
+
501
+ dna_sample_id = _get_value_or_none(lib_prep_row, "dnaSampleID")
502
+
503
+ database.processed_sample_set.append(
504
+ self._translate_processed_sample(
505
+ processed_sample_id,
506
+ f"Library preparation for {dna_sample_id}",
507
+ )
508
+ )
509
+
510
+ has_output = None
511
+ has_output_do_ids = []
512
+
513
+ if dna_sample_id in filtered_neon_raw_data_files_dict:
514
+ has_output = filtered_neon_raw_data_files_dict[dna_sample_id]
515
+ for item in has_output:
516
+ if item in neon_to_nmdc_data_object_ids:
517
+ has_output_do_ids.append(neon_to_nmdc_data_object_ids[item])
518
+
519
+ checksum = None
520
+ do_type = None
521
+
522
+ checksum = neon_raw_data_file_mappings_df[
523
+ neon_raw_data_file_mappings_df["rawDataFilePath"] == item
524
+ ]["checkSum"].values[0]
525
+ if "_R1.fastq.gz" in item:
526
+ do_type = "Metagenome Raw Read 1"
527
+ elif "_R2.fastq.gz" in item:
528
+ do_type = "Metagenome Raw Read 2"
529
+
530
+ database.data_object_set.append(
531
+ self._translate_data_object(
532
+ neon_to_nmdc_data_object_ids.get(item),
533
+ item,
534
+ do_type,
535
+ checksum,
536
+ )
537
+ )
538
+
539
+ database.omics_processing_set.append(
540
+ self._translate_omics_processing(
541
+ neon_to_nmdc_omprc_ids.get(neon_id),
542
+ processed_sample_id,
543
+ has_output_do_ids,
544
+ lib_prep_row,
545
+ )
546
+ )
547
+
548
+ return database