lsstdesc-dataregistry 0.5.4rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsstdesc_dataregistry-0.5.4rc1/LICENSE +29 -0
- lsstdesc_dataregistry-0.5.4rc1/PKG-INFO +42 -0
- lsstdesc_dataregistry-0.5.4rc1/README.md +22 -0
- lsstdesc_dataregistry-0.5.4rc1/pyproject.toml +45 -0
- lsstdesc_dataregistry-0.5.4rc1/setup.cfg +4 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/DataRegistry.py +127 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/__init__.py +6 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/_version.py +1 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/db_basic.py +347 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/exceptions.py +12 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/git_util.py +21 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/query.py +592 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/__init__.py +1 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/base_table_class.py +218 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/dataset.py +649 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/dataset_alias.py +92 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/dataset_util.py +74 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/execution.py +97 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/registrar.py +52 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/registrar_util.py +342 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/schema/__init__.py +1 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/schema/keywords.yaml +4 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/schema/load_schema.py +61 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/schema/schema.yaml +468 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/site_config/site_rootdir.yaml +1 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry_cli/cli.py +267 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry_cli/delete.py +35 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry_cli/query.py +97 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry_cli/register.py +66 -0
- lsstdesc_dataregistry-0.5.4rc1/src/dataregistry_cli/show.py +55 -0
- lsstdesc_dataregistry-0.5.4rc1/src/lsstdesc_dataregistry.egg-info/PKG-INFO +42 -0
- lsstdesc_dataregistry-0.5.4rc1/src/lsstdesc_dataregistry.egg-info/SOURCES.txt +34 -0
- lsstdesc_dataregistry-0.5.4rc1/src/lsstdesc_dataregistry.egg-info/dependency_links.txt +1 -0
- lsstdesc_dataregistry-0.5.4rc1/src/lsstdesc_dataregistry.egg-info/entry_points.txt +2 -0
- lsstdesc_dataregistry-0.5.4rc1/src/lsstdesc_dataregistry.egg-info/requires.txt +7 -0
- lsstdesc_dataregistry-0.5.4rc1/src/lsstdesc_dataregistry.egg-info/top_level.txt +2 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2022, JoanneBogart
|
|
4
|
+
All rights reserved.
|
|
5
|
+
|
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
|
7
|
+
modification, are permitted provided that the following conditions are met:
|
|
8
|
+
|
|
9
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
10
|
+
list of conditions and the following disclaimer.
|
|
11
|
+
|
|
12
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
and/or other materials provided with the distribution.
|
|
15
|
+
|
|
16
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
17
|
+
contributors may be used to endorse or promote products derived from
|
|
18
|
+
this software without specific prior written permission.
|
|
19
|
+
|
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
21
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
22
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
23
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
24
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
25
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
26
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
27
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
28
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
29
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: lsstdesc-dataregistry
|
|
3
|
+
Version: 0.5.4rc1
|
|
4
|
+
Summary: Creation and user API for DESC data registry.
|
|
5
|
+
Author-email: Joanne Bogart <jrb@slac.stanford.edu>, Stuart McAlpine <stuart.mcalpine@fysik.su.se>
|
|
6
|
+
Keywords: desc,python,registry
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Requires-Python: <3.12,>=3.9
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: psycopg2
|
|
15
|
+
Requires-Dist: sqlalchemy
|
|
16
|
+
Requires-Dist: pyyaml
|
|
17
|
+
Requires-Dist: pandas
|
|
18
|
+
Provides-Extra: docs
|
|
19
|
+
Requires-Dist: sphinx_rtd_theme; extra == "docs"
|
|
20
|
+
|
|
21
|
+
[](https://www.python.org)
|
|
22
|
+
|
|
23
|
+
<img src="docs/source/_static/DREGS_logo_v2.png" width="300"/>
|
|
24
|
+
|
|
25
|
+
**The ``dataregistry`` is currently undergoing beta testing, if you would like to participate please get in touch!**
|
|
26
|
+
|
|
27
|
+
### What is the data registry?
|
|
28
|
+
|
|
29
|
+
The data registry is a facility to store and share datasets from DESC related projects and pipelines.
|
|
30
|
+
|
|
31
|
+
The data, once registered, are transferred and stored at a central location at NERSC, which can then be later accessed and queried using the ``dataregistry`` Python package, or the CLI.
|
|
32
|
+
|
|
33
|
+
### Documentation
|
|
34
|
+
|
|
35
|
+
The full documentation for the data registry can be found [here](http://lsstdesc.org/dataregistry).
|
|
36
|
+
|
|
37
|
+
### Contact
|
|
38
|
+
|
|
39
|
+
For any further information, please get in touch!
|
|
40
|
+
|
|
41
|
+
- Admin: Joanne Bogart ([@JoanneBogart](https://www.github.com/JoanneBogart))
|
|
42
|
+
- Admin: Stuart McAlpine ([@stuartmcalpine](https://www.github.com/stuartmcalpine))
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[](https://www.python.org)
|
|
2
|
+
|
|
3
|
+
<img src="docs/source/_static/DREGS_logo_v2.png" width="300"/>
|
|
4
|
+
|
|
5
|
+
**The ``dataregistry`` is currently undergoing beta testing, if you would like to participate please get in touch!**
|
|
6
|
+
|
|
7
|
+
### What is the data registry?
|
|
8
|
+
|
|
9
|
+
The data registry is a facility to store and share datasets from DESC related projects and pipelines.
|
|
10
|
+
|
|
11
|
+
The data, once registered, are transferred and stored at a central location at NERSC, which can then be later accessed and queried using the ``dataregistry`` Python package, or the CLI.
|
|
12
|
+
|
|
13
|
+
### Documentation
|
|
14
|
+
|
|
15
|
+
The full documentation for the data registry can be found [here](http://lsstdesc.org/dataregistry).
|
|
16
|
+
|
|
17
|
+
### Contact
|
|
18
|
+
|
|
19
|
+
For any further information, please get in touch!
|
|
20
|
+
|
|
21
|
+
- Admin: Joanne Bogart ([@JoanneBogart](https://www.github.com/JoanneBogart))
|
|
22
|
+
- Admin: Stuart McAlpine ([@stuartmcalpine](https://www.github.com/stuartmcalpine))
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools >= 61.0"] # PEP 621 compliant
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
dynamic = ["version"]
|
|
7
|
+
name = "lsstdesc-dataregistry"
|
|
8
|
+
description = "Creation and user API for DESC data registry."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
authors = [
|
|
11
|
+
{ name = "Joanne Bogart", email = "jrb@slac.stanford.edu" },
|
|
12
|
+
{ name = "Stuart McAlpine", email = "stuart.mcalpine@fysik.su.se" }
|
|
13
|
+
]
|
|
14
|
+
license = { file = "LICENCE" }
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.9",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
]
|
|
21
|
+
keywords = ["desc", "python", "registry"]
|
|
22
|
+
# NOTE: Those needing to create a database will also need to install the
|
|
23
|
+
# GitPython package. It is not needed for accessing an existing db.
|
|
24
|
+
dependencies = [
|
|
25
|
+
'psycopg2',
|
|
26
|
+
'sqlalchemy',
|
|
27
|
+
'pyyaml',
|
|
28
|
+
'pandas'
|
|
29
|
+
]
|
|
30
|
+
requires-python = ">=3.9,<3.12" # Supported versions (in CI)
|
|
31
|
+
|
|
32
|
+
[tool.setuptools.dynamic]
|
|
33
|
+
version = {attr = "dataregistry._version.__version__"}
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
docs = ["sphinx_rtd_theme"]
|
|
37
|
+
|
|
38
|
+
[tool.setuptools.packages.find]
|
|
39
|
+
where = ["src"]
|
|
40
|
+
|
|
41
|
+
[project.scripts]
|
|
42
|
+
dregs = "dataregistry_cli.cli:main"
|
|
43
|
+
|
|
44
|
+
[tool.setuptools.package-data]
|
|
45
|
+
"dataregistry" = ["site_config/site_rootdir.yaml", "schema/*.yaml"]
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from dataregistry.db_basic import DbConnection
|
|
2
|
+
from dataregistry.query import Query
|
|
3
|
+
from dataregistry.registrar import Registrar
|
|
4
|
+
import yaml
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
_HERE = os.path.dirname(__file__)
|
|
8
|
+
_SITE_CONFIG_PATH = os.path.join(_HERE, "site_config", "site_rootdir.yaml")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DataRegistry:
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
owner=None,
|
|
15
|
+
owner_type=None,
|
|
16
|
+
config_file=None,
|
|
17
|
+
schema=None,
|
|
18
|
+
root_dir=None,
|
|
19
|
+
verbose=False,
|
|
20
|
+
site=None,
|
|
21
|
+
):
|
|
22
|
+
"""
|
|
23
|
+
Primary data registry wrapper class.
|
|
24
|
+
|
|
25
|
+
The DataRegistry class links to both the Registrar class, to
|
|
26
|
+
register/modify/delete datasets, and the Query class, to query existing
|
|
27
|
+
datasets.
|
|
28
|
+
|
|
29
|
+
Links to the database is done automatically using the:
|
|
30
|
+
- the users config file (if None defaults are used)
|
|
31
|
+
- the passed schema (if None the default schema is used)
|
|
32
|
+
|
|
33
|
+
The `root_dir` is the location the data is copied to. This can be
|
|
34
|
+
manually passed, or alternately a predefined `site` can be chosen. If
|
|
35
|
+
nether are chosen, the NERSC site will be selected as the default.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
owner : str
|
|
40
|
+
To set the default owner for all registered datasets in this
|
|
41
|
+
instance.
|
|
42
|
+
owner_type : str
|
|
43
|
+
To set the default owner_type for all registered datasets in this
|
|
44
|
+
instance.
|
|
45
|
+
config_file : str
|
|
46
|
+
Path to config file, if None, default location is assumed.
|
|
47
|
+
schema : str
|
|
48
|
+
Schema to connect to, if None, default schema is assumed.
|
|
49
|
+
root_dir : str
|
|
50
|
+
Root directory for datasets, if None, default is assumed.
|
|
51
|
+
verbose : bool
|
|
52
|
+
True for more output.
|
|
53
|
+
site : str
|
|
54
|
+
Can be used instead of `root_dir`. Some predefined "sites" are
|
|
55
|
+
built in, such as "nersc", which will set the `root_dir` to the
|
|
56
|
+
data registry's default data location at NERSC.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
# Establish connection to database
|
|
60
|
+
self.db_connection = DbConnection(config_file, schema=schema,
|
|
61
|
+
verbose=verbose)
|
|
62
|
+
|
|
63
|
+
# Work out the location of the root directory
|
|
64
|
+
self.root_dir = self._get_root_dir(root_dir, site)
|
|
65
|
+
|
|
66
|
+
# Create registrar object
|
|
67
|
+
self.Registrar = Registrar(self.db_connection, self.root_dir,
|
|
68
|
+
owner, owner_type)
|
|
69
|
+
|
|
70
|
+
# Create query object
|
|
71
|
+
self.Query = Query(self.db_connection, self.root_dir)
|
|
72
|
+
|
|
73
|
+
def _get_root_dir(self, root_dir, site):
|
|
74
|
+
"""
|
|
75
|
+
What is the location of the root_dir we are pairing with?
|
|
76
|
+
|
|
77
|
+
In order of priority:
|
|
78
|
+
- If manually passed `root_dir` is not None, use that.
|
|
79
|
+
- If manually passed `site` is not None, use that.
|
|
80
|
+
- If env DATAREG_SITE is set, use that.
|
|
81
|
+
- Else use `site="nersc"`.
|
|
82
|
+
|
|
83
|
+
All `site`s are assumed to be postgres. Sqlite users must manually
|
|
84
|
+
specify the `root_dir.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
root_dir : str
|
|
89
|
+
site : str
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
- : str
|
|
94
|
+
Path to root directory
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
# Load the site config yaml file
|
|
98
|
+
with open(_SITE_CONFIG_PATH) as f:
|
|
99
|
+
data = yaml.safe_load(f)
|
|
100
|
+
|
|
101
|
+
# Sqlite case
|
|
102
|
+
if self.db_connection._dialect == "sqlite":
|
|
103
|
+
# Sqlite cannot work with `site`s, must pass a `root_dir`
|
|
104
|
+
if root_dir is None:
|
|
105
|
+
raise ValueError("Must pass a `root_dir` using Sqlite")
|
|
106
|
+
else:
|
|
107
|
+
# root_dir cannot equal a site path when using Sqlite
|
|
108
|
+
for a, v in data.items():
|
|
109
|
+
if root_dir == v:
|
|
110
|
+
raise ValueError(
|
|
111
|
+
"`root_dir` must not equal a pre-defined site with Sqlite"
|
|
112
|
+
)
|
|
113
|
+
return root_dir
|
|
114
|
+
|
|
115
|
+
# Non Sqlite case
|
|
116
|
+
else:
|
|
117
|
+
if root_dir is None:
|
|
118
|
+
if site is not None:
|
|
119
|
+
if site.lower() not in data.keys():
|
|
120
|
+
raise ValueError(f"{site} is not a valid site")
|
|
121
|
+
root_dir = data[site.lower()]
|
|
122
|
+
elif os.getenv("DATAREG_SITE"):
|
|
123
|
+
root_dir = data[os.getenv("DATAREG_SITE").lower()]
|
|
124
|
+
else:
|
|
125
|
+
root_dir = data["nersc"]
|
|
126
|
+
|
|
127
|
+
return root_dir
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.5.4-rc1"
|
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
from sqlalchemy import engine_from_config
|
|
2
|
+
from sqlalchemy.engine import make_url
|
|
3
|
+
from sqlalchemy import MetaData
|
|
4
|
+
from sqlalchemy import column, insert, select
|
|
5
|
+
import yaml
|
|
6
|
+
import os
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from dataregistry import __version__
|
|
9
|
+
from dataregistry.exceptions import DataRegistryException
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
|
+
Low-level utility routines and classes for accessing the registry
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
SCHEMA_VERSION = "registry_beta"
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"DbConnection",
|
|
19
|
+
"add_table_row",
|
|
20
|
+
"TableMetadata",
|
|
21
|
+
"SCHEMA_VERSION",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _get_dataregistry_config(config_file=None, verbose=False):
|
|
26
|
+
"""
|
|
27
|
+
Locate the data registry configuration file.
|
|
28
|
+
|
|
29
|
+
The code will check three scenarios, which are, in order of priority:
|
|
30
|
+
- The config_file has been manually passed
|
|
31
|
+
- The DATAREG_CONFIG env variable has been set
|
|
32
|
+
- The default location (the .config_reg_access file in $HOME)
|
|
33
|
+
|
|
34
|
+
If none of these are true, an exception is raised.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
config_file : str, optional
|
|
39
|
+
Manually set the location of the config file
|
|
40
|
+
verbose : bool, optional
|
|
41
|
+
True for more output
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
config_file : str
|
|
46
|
+
Path to data registry configuration file
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
_default_loc = os.path.join(os.getenv("HOME"), ".config_reg_access")
|
|
50
|
+
|
|
51
|
+
# Case where the user has manually specified the location
|
|
52
|
+
if config_file is not None:
|
|
53
|
+
if verbose:
|
|
54
|
+
print(f"Using manually passed config file ({config_file})")
|
|
55
|
+
return config_file
|
|
56
|
+
|
|
57
|
+
# Case where the env variable is set
|
|
58
|
+
elif os.getenv("DATAREG_CONFIG"):
|
|
59
|
+
if verbose:
|
|
60
|
+
print(
|
|
61
|
+
"Using DATAREG_CONFIG env var for config file",
|
|
62
|
+
f"({os.getenv('DATAREG_CONFIG')})",
|
|
63
|
+
)
|
|
64
|
+
return os.getenv("DATAREG_CONFIG")
|
|
65
|
+
|
|
66
|
+
# Finally check default location in $HOME
|
|
67
|
+
elif os.path.isfile(_default_loc):
|
|
68
|
+
if verbose:
|
|
69
|
+
print("Using default location for config file", f"({_default_loc})")
|
|
70
|
+
return _default_loc
|
|
71
|
+
else:
|
|
72
|
+
raise ValueError("Unable to located data registry config file")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def add_table_row(conn, table_meta, values, commit=True):
|
|
76
|
+
"""
|
|
77
|
+
Generic insert, given connection, metadata for a table and column values to
|
|
78
|
+
be used.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
conn : SQLAlchemy Engine object
|
|
83
|
+
Connection to the database
|
|
84
|
+
table_meta : TableMetadata object
|
|
85
|
+
Table we are inserting data into
|
|
86
|
+
values : dict
|
|
87
|
+
Properties to be entered
|
|
88
|
+
commit : bool, optional
|
|
89
|
+
True to commit changes to database (default True)
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
- : int
|
|
94
|
+
Primary key for new row if successful
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
result = conn.execute(insert(table_meta), [values])
|
|
98
|
+
|
|
99
|
+
if commit:
|
|
100
|
+
conn.commit()
|
|
101
|
+
|
|
102
|
+
return result.inserted_primary_key[0]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class DbConnection:
|
|
106
|
+
def __init__(self, config_file=None, schema=None, verbose=False):
|
|
107
|
+
"""
|
|
108
|
+
Simple class to act as container for connection
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
config : str, optional
|
|
113
|
+
Path to config file with low-level connection information.
|
|
114
|
+
If None, default location is assumed
|
|
115
|
+
schema : str, optional
|
|
116
|
+
Schema to connect to. If None, default schema is assumed
|
|
117
|
+
verbose : bool, optional
|
|
118
|
+
If True, produce additional output
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
# Extract connection info from configuration file
|
|
122
|
+
with open(_get_dataregistry_config(config_file, verbose)) as f:
|
|
123
|
+
connection_parameters = yaml.safe_load(f)
|
|
124
|
+
|
|
125
|
+
# Build the engine
|
|
126
|
+
self._engine = engine_from_config(connection_parameters)
|
|
127
|
+
|
|
128
|
+
# Pull out the working schema version
|
|
129
|
+
driver = make_url(connection_parameters["sqlalchemy.url"]).drivername
|
|
130
|
+
self._dialect = driver.split("+")[0]
|
|
131
|
+
|
|
132
|
+
if self._dialect == "sqlite":
|
|
133
|
+
self._schema = None
|
|
134
|
+
else:
|
|
135
|
+
if schema is None:
|
|
136
|
+
self._schema = SCHEMA_VERSION
|
|
137
|
+
else:
|
|
138
|
+
self._schema = schema
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def engine(self):
|
|
142
|
+
return self._engine
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def dialect(self):
|
|
146
|
+
return self._dialect
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def schema(self):
|
|
150
|
+
return self._schema
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class TableMetadata:
|
|
154
|
+
def __init__(self, db_connection, get_db_version=True):
|
|
155
|
+
"""
|
|
156
|
+
Keep and dispense table metadata
|
|
157
|
+
|
|
158
|
+
Parameters
|
|
159
|
+
----------
|
|
160
|
+
db_connection : DbConnection object
|
|
161
|
+
Stores information about the DB connection
|
|
162
|
+
get_db_version : bool, optional
|
|
163
|
+
True to extract the DB version from the provenance table
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
self._metadata = MetaData(schema=db_connection.schema)
|
|
167
|
+
self._engine = db_connection.engine
|
|
168
|
+
self._schema = db_connection.schema
|
|
169
|
+
|
|
170
|
+
# Load all existing tables
|
|
171
|
+
self._metadata.reflect(self._engine, db_connection.schema)
|
|
172
|
+
|
|
173
|
+
# Fetch and save db versioning, assoc. production schema
|
|
174
|
+
# if present and requested
|
|
175
|
+
self._prod_schema = None
|
|
176
|
+
if db_connection.dialect == "sqlite":
|
|
177
|
+
prov_name = "provenance"
|
|
178
|
+
else:
|
|
179
|
+
prov_name = ".".join([self._schema, "provenance"])
|
|
180
|
+
|
|
181
|
+
if prov_name not in self._metadata.tables:
|
|
182
|
+
raise DataRegistryException("Incompatible database: no Provenance table")
|
|
183
|
+
|
|
184
|
+
if prov_name in self._metadata.tables and get_db_version:
|
|
185
|
+
prov_table = self._metadata.tables[prov_name]
|
|
186
|
+
stmt = select(column("associated_production")).select_from(prov_table)
|
|
187
|
+
stmt = stmt.order_by(prov_table.c.provenance_id.desc())
|
|
188
|
+
with self._engine.connect() as conn:
|
|
189
|
+
results = conn.execute(stmt)
|
|
190
|
+
r = results.fetchone()
|
|
191
|
+
self._prod_schema = r[0]
|
|
192
|
+
|
|
193
|
+
cols = ["db_version_major", "db_version_minor", "db_version_patch"]
|
|
194
|
+
|
|
195
|
+
stmt = select(*[column(c) for c in cols])
|
|
196
|
+
stmt = stmt.select_from(prov_table)
|
|
197
|
+
stmt = stmt.order_by(prov_table.c.provenance_id.desc())
|
|
198
|
+
with self._engine.connect() as conn:
|
|
199
|
+
results = conn.execute(stmt)
|
|
200
|
+
r = results.fetchone()
|
|
201
|
+
self._db_major = r[0]
|
|
202
|
+
self._db_minor = r[1]
|
|
203
|
+
self._db_patch = r[2]
|
|
204
|
+
else:
|
|
205
|
+
self._db_major = None
|
|
206
|
+
self._db_minor = None
|
|
207
|
+
self._db_patch = None
|
|
208
|
+
|
|
209
|
+
@property
|
|
210
|
+
def db_version_major(self):
|
|
211
|
+
return self._db_major
|
|
212
|
+
|
|
213
|
+
@property
|
|
214
|
+
def db_version_minor(self):
|
|
215
|
+
return self._db_minor
|
|
216
|
+
|
|
217
|
+
@property
|
|
218
|
+
def db_version_patch(self):
|
|
219
|
+
return self._db_patch
|
|
220
|
+
|
|
221
|
+
def get(self, tbl):
|
|
222
|
+
if "." not in tbl:
|
|
223
|
+
if self._schema:
|
|
224
|
+
tbl = ".".join([self._schema, tbl])
|
|
225
|
+
if tbl not in self._metadata.tables.keys():
|
|
226
|
+
try:
|
|
227
|
+
self._metadata.reflect(self._engine, only=[tbl])
|
|
228
|
+
except Exception:
|
|
229
|
+
raise ValueError(f"No such table {tbl}")
|
|
230
|
+
return self._metadata.tables[tbl]
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _insert_provenance(
|
|
234
|
+
db_connection,
|
|
235
|
+
db_version_major,
|
|
236
|
+
db_version_minor,
|
|
237
|
+
db_version_patch,
|
|
238
|
+
update_method,
|
|
239
|
+
comment=None,
|
|
240
|
+
associated_production="production",
|
|
241
|
+
):
|
|
242
|
+
"""
|
|
243
|
+
Write a row to the provenance table. Includes version of db schema,
|
|
244
|
+
version of code, etc.
|
|
245
|
+
|
|
246
|
+
Parameters
|
|
247
|
+
----------
|
|
248
|
+
db_version_major : int
|
|
249
|
+
db_version_minor : int
|
|
250
|
+
db_version_patch : int
|
|
251
|
+
update_method : str
|
|
252
|
+
One of "create", "migrate"
|
|
253
|
+
comment : str, optional
|
|
254
|
+
Briefly describe reason for new version
|
|
255
|
+
associated_production : str, defaults to "production"
|
|
256
|
+
Name of production schema, if any, this schema may reference
|
|
257
|
+
|
|
258
|
+
Returns
|
|
259
|
+
-------
|
|
260
|
+
id : int
|
|
261
|
+
Id of new row in provenance table
|
|
262
|
+
"""
|
|
263
|
+
from dataregistry.git_util import get_git_info
|
|
264
|
+
from git import InvalidGitRepositoryError
|
|
265
|
+
|
|
266
|
+
version_fields = __version__.split(".")
|
|
267
|
+
patch = version_fields[2]
|
|
268
|
+
suffix = None
|
|
269
|
+
if "-" in patch:
|
|
270
|
+
subfields = patch.split("-")
|
|
271
|
+
patch = subfields[0]
|
|
272
|
+
suffix = "-".join(subfields[1:])
|
|
273
|
+
|
|
274
|
+
values = dict()
|
|
275
|
+
values["code_version_major"] = version_fields[0]
|
|
276
|
+
values["code_version_minor"] = version_fields[1]
|
|
277
|
+
values["code_version_patch"] = patch
|
|
278
|
+
if suffix:
|
|
279
|
+
values["code_version_suffix"] = suffix
|
|
280
|
+
values["db_version_major"] = db_version_major
|
|
281
|
+
values["db_version_minor"] = db_version_minor
|
|
282
|
+
values["db_version_patch"] = db_version_patch
|
|
283
|
+
values["schema_enabled_date"] = datetime.now()
|
|
284
|
+
values["creator_uid"] = os.getenv("USER")
|
|
285
|
+
pkg_root = os.path.join(os.path.dirname(__file__), "../..")
|
|
286
|
+
|
|
287
|
+
# If this is a git repo, save hash and state
|
|
288
|
+
try:
|
|
289
|
+
git_hash, is_clean = get_git_info(pkg_root)
|
|
290
|
+
values["git_hash"] = git_hash
|
|
291
|
+
values["repo_is_clean"] = is_clean
|
|
292
|
+
except InvalidGitRepositoryError:
|
|
293
|
+
# no git repo; this is an install. Code version is sufficient
|
|
294
|
+
pass
|
|
295
|
+
|
|
296
|
+
values["update_method"] = update_method
|
|
297
|
+
if comment is not None:
|
|
298
|
+
values["comment"] = comment
|
|
299
|
+
if associated_production is not None: # None is normal for sqlite
|
|
300
|
+
values["associated_production"] = associated_production
|
|
301
|
+
prov_table = TableMetadata(db_connection,
|
|
302
|
+
get_db_version=False).get("provenance")
|
|
303
|
+
with db_connection.engine.connect() as conn:
|
|
304
|
+
id = add_table_row(conn, prov_table, values)
|
|
305
|
+
|
|
306
|
+
return id
|
|
307
|
+
|
|
308
|
+
def _insert_keyword(
|
|
309
|
+
db_connection,
|
|
310
|
+
keyword,
|
|
311
|
+
system,
|
|
312
|
+
creator_uid=None,
|
|
313
|
+
):
|
|
314
|
+
"""
|
|
315
|
+
Write a row to a keyword table.
|
|
316
|
+
|
|
317
|
+
Parameters
|
|
318
|
+
----------
|
|
319
|
+
db_connection : DbConnection class
|
|
320
|
+
Conenction to the database
|
|
321
|
+
keyword : str
|
|
322
|
+
Keyword to add
|
|
323
|
+
system : bool
|
|
324
|
+
True if this is a preset system keyword (False for user custom keyword)
|
|
325
|
+
creator_uid : int, optional
|
|
326
|
+
|
|
327
|
+
Returns
|
|
328
|
+
-------
|
|
329
|
+
id : int
|
|
330
|
+
Id of new row in keyword table
|
|
331
|
+
"""
|
|
332
|
+
|
|
333
|
+
values = dict()
|
|
334
|
+
values["keyword"] = keyword
|
|
335
|
+
values["system"] = system
|
|
336
|
+
if creator_uid is None:
|
|
337
|
+
values["creator_uid"] = os.getenv("USER")
|
|
338
|
+
else:
|
|
339
|
+
values["creator_uid"] = creator_uid
|
|
340
|
+
values["creation_date"] = datetime.now()
|
|
341
|
+
values["active"] = True
|
|
342
|
+
|
|
343
|
+
keyword_table = TableMetadata(db_connection, get_db_version=False).get("keyword")
|
|
344
|
+
with db_connection.engine.connect() as conn:
|
|
345
|
+
id = add_table_row(conn, keyword_table, values)
|
|
346
|
+
|
|
347
|
+
return id
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
__all__ = ["DataRegistryException", "DataRegistryNYI"]
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class DataRegistryException(Exception):
|
|
5
|
+
pass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DataRegistryNYI(DataRegistryException):
|
|
9
|
+
def __init__(self, feature=""):
|
|
10
|
+
msg = f"Feature {feature} not yet implemented"
|
|
11
|
+
self.msg = msg
|
|
12
|
+
super().__init__(self.msg)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
__all__ = ["get_git_info"]
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_git_info(pkg_root):
|
|
7
|
+
"""
|
|
8
|
+
Parameters
|
|
9
|
+
pkg_root string root directory of a git repo
|
|
10
|
+
|
|
11
|
+
Returns
|
|
12
|
+
git_hash string current git hash
|
|
13
|
+
is_clean boolean
|
|
14
|
+
|
|
15
|
+
"""
|
|
16
|
+
import git
|
|
17
|
+
|
|
18
|
+
repo = git.Repo(pkg_root)
|
|
19
|
+
has_uncommitted = repo.is_dirty()
|
|
20
|
+
has_untracked = len(repo.untracked_files) > 0
|
|
21
|
+
return repo.commit().hexsha, not (has_uncommitted or has_untracked)
|