lsstdesc-dataregistry 0.5.4rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. lsstdesc_dataregistry-0.5.4rc1/LICENSE +29 -0
  2. lsstdesc_dataregistry-0.5.4rc1/PKG-INFO +42 -0
  3. lsstdesc_dataregistry-0.5.4rc1/README.md +22 -0
  4. lsstdesc_dataregistry-0.5.4rc1/pyproject.toml +45 -0
  5. lsstdesc_dataregistry-0.5.4rc1/setup.cfg +4 -0
  6. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/DataRegistry.py +127 -0
  7. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/__init__.py +6 -0
  8. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/_version.py +1 -0
  9. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/db_basic.py +347 -0
  10. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/exceptions.py +12 -0
  11. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/git_util.py +21 -0
  12. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/query.py +592 -0
  13. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/__init__.py +1 -0
  14. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/base_table_class.py +218 -0
  15. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/dataset.py +649 -0
  16. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/dataset_alias.py +92 -0
  17. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/dataset_util.py +74 -0
  18. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/execution.py +97 -0
  19. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/registrar.py +52 -0
  20. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/registrar/registrar_util.py +342 -0
  21. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/schema/__init__.py +1 -0
  22. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/schema/keywords.yaml +4 -0
  23. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/schema/load_schema.py +61 -0
  24. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/schema/schema.yaml +468 -0
  25. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry/site_config/site_rootdir.yaml +1 -0
  26. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry_cli/cli.py +267 -0
  27. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry_cli/delete.py +35 -0
  28. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry_cli/query.py +97 -0
  29. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry_cli/register.py +66 -0
  30. lsstdesc_dataregistry-0.5.4rc1/src/dataregistry_cli/show.py +55 -0
  31. lsstdesc_dataregistry-0.5.4rc1/src/lsstdesc_dataregistry.egg-info/PKG-INFO +42 -0
  32. lsstdesc_dataregistry-0.5.4rc1/src/lsstdesc_dataregistry.egg-info/SOURCES.txt +34 -0
  33. lsstdesc_dataregistry-0.5.4rc1/src/lsstdesc_dataregistry.egg-info/dependency_links.txt +1 -0
  34. lsstdesc_dataregistry-0.5.4rc1/src/lsstdesc_dataregistry.egg-info/entry_points.txt +2 -0
  35. lsstdesc_dataregistry-0.5.4rc1/src/lsstdesc_dataregistry.egg-info/requires.txt +7 -0
  36. lsstdesc_dataregistry-0.5.4rc1/src/lsstdesc_dataregistry.egg-info/top_level.txt +2 -0
@@ -0,0 +1,29 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2022, JoanneBogart
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ 3. Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,42 @@
1
+ Metadata-Version: 2.1
2
+ Name: lsstdesc-dataregistry
3
+ Version: 0.5.4rc1
4
+ Summary: Creation and user API for DESC data registry.
5
+ Author-email: Joanne Bogart <jrb@slac.stanford.edu>, Stuart McAlpine <stuart.mcalpine@fysik.su.se>
6
+ Keywords: desc,python,registry
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: Python :: 3.9
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Requires-Python: <3.12,>=3.9
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: psycopg2
15
+ Requires-Dist: sqlalchemy
16
+ Requires-Dist: pyyaml
17
+ Requires-Dist: pandas
18
+ Provides-Extra: docs
19
+ Requires-Dist: sphinx_rtd_theme; extra == "docs"
20
+
21
+ [![python](https://img.shields.io/badge/Python-3.9-3776AB.svg?style=flat&logo=python&logoColor=white)](https://www.python.org)
22
+
23
+ <img src="docs/source/_static/DREGS_logo_v2.png" width="300"/>
24
+
25
+ **The ``dataregistry`` is currently undergoing beta testing, if you would like to participate please get in touch!**
26
+
27
+ ### What is the data registry?
28
+
29
+ The data registry is a facility to store and share datasets from DESC related projects and pipelines.
30
+
31
+ The data, once registered, are transferred and stored at a central location at NERSC, which can then be later accessed and queried using the ``dataregistry`` Python package, or the CLI.
32
+
33
+ ### Documentation
34
+
35
+ The full documentation for the data registry can be found [here](http://lsstdesc.org/dataregistry).
36
+
37
+ ### Contact
38
+
39
+ For any further information, please get in touch!
40
+
41
+ - Admin: Joanne Bogart ([@JoanneBogart](https://www.github.com/JoanneBogart))
42
+ - Admin: Stuart McAlpine ([@stuartmcalpine](https://www.github.com/stuartmcalpine))
@@ -0,0 +1,22 @@
1
+ [![python](https://img.shields.io/badge/Python-3.9-3776AB.svg?style=flat&logo=python&logoColor=white)](https://www.python.org)
2
+
3
+ <img src="docs/source/_static/DREGS_logo_v2.png" width="300"/>
4
+
5
+ **The ``dataregistry`` is currently undergoing beta testing, if you would like to participate please get in touch!**
6
+
7
+ ### What is the data registry?
8
+
9
+ The data registry is a facility to store and share datasets from DESC related projects and pipelines.
10
+
11
+ The data, once registered, are transferred and stored at a central location at NERSC, which can then be later accessed and queried using the ``dataregistry`` Python package, or the CLI.
12
+
13
+ ### Documentation
14
+
15
+ The full documentation for the data registry can be found [here](http://lsstdesc.org/dataregistry).
16
+
17
+ ### Contact
18
+
19
+ For any further information, please get in touch!
20
+
21
+ - Admin: Joanne Bogart ([@JoanneBogart](https://www.github.com/JoanneBogart))
22
+ - Admin: Stuart McAlpine ([@stuartmcalpine](https://www.github.com/stuartmcalpine))
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["setuptools >= 61.0"] # PEP 621 compliant
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ dynamic = ["version"]
7
+ name = "lsstdesc-dataregistry"
8
+ description = "Creation and user API for DESC data registry."
9
+ readme = "README.md"
10
+ authors = [
11
+ { name = "Joanne Bogart", email = "jrb@slac.stanford.edu" },
12
+ { name = "Stuart McAlpine", email = "stuart.mcalpine@fysik.su.se" }
13
+ ]
14
+ license = { file = "LICENCE" }
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.9",
18
+ "Programming Language :: Python :: 3.10",
19
+ "Programming Language :: Python :: 3.11",
20
+ ]
21
+ keywords = ["desc", "python", "registry"]
22
+ # NOTE: Those needing to create a database will also need to install the
23
+ # GitPython package. It is not needed for accessing an existing db.
24
+ dependencies = [
25
+ 'psycopg2',
26
+ 'sqlalchemy',
27
+ 'pyyaml',
28
+ 'pandas'
29
+ ]
30
+ requires-python = ">=3.9,<3.12" # Supported versions (in CI)
31
+
32
+ [tool.setuptools.dynamic]
33
+ version = {attr = "dataregistry._version.__version__"}
34
+
35
+ [project.optional-dependencies]
36
+ docs = ["sphinx_rtd_theme"]
37
+
38
+ [tool.setuptools.packages.find]
39
+ where = ["src"]
40
+
41
+ [project.scripts]
42
+ dregs = "dataregistry_cli.cli:main"
43
+
44
+ [tool.setuptools.package-data]
45
+ "dataregistry" = ["site_config/site_rootdir.yaml", "schema/*.yaml"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,127 @@
1
+ from dataregistry.db_basic import DbConnection
2
+ from dataregistry.query import Query
3
+ from dataregistry.registrar import Registrar
4
+ import yaml
5
+ import os
6
+
7
+ _HERE = os.path.dirname(__file__)
8
+ _SITE_CONFIG_PATH = os.path.join(_HERE, "site_config", "site_rootdir.yaml")
9
+
10
+
11
+ class DataRegistry:
12
+ def __init__(
13
+ self,
14
+ owner=None,
15
+ owner_type=None,
16
+ config_file=None,
17
+ schema=None,
18
+ root_dir=None,
19
+ verbose=False,
20
+ site=None,
21
+ ):
22
+ """
23
+ Primary data registry wrapper class.
24
+
25
+ The DataRegistry class links to both the Registrar class, to
26
+ register/modify/delete datasets, and the Query class, to query existing
27
+ datasets.
28
+
29
+ Links to the database is done automatically using the:
30
+ - the users config file (if None defaults are used)
31
+ - the passed schema (if None the default schema is used)
32
+
33
+ The `root_dir` is the location the data is copied to. This can be
34
+ manually passed, or alternately a predefined `site` can be chosen. If
35
+ nether are chosen, the NERSC site will be selected as the default.
36
+
37
+ Parameters
38
+ ----------
39
+ owner : str
40
+ To set the default owner for all registered datasets in this
41
+ instance.
42
+ owner_type : str
43
+ To set the default owner_type for all registered datasets in this
44
+ instance.
45
+ config_file : str
46
+ Path to config file, if None, default location is assumed.
47
+ schema : str
48
+ Schema to connect to, if None, default schema is assumed.
49
+ root_dir : str
50
+ Root directory for datasets, if None, default is assumed.
51
+ verbose : bool
52
+ True for more output.
53
+ site : str
54
+ Can be used instead of `root_dir`. Some predefined "sites" are
55
+ built in, such as "nersc", which will set the `root_dir` to the
56
+ data registry's default data location at NERSC.
57
+ """
58
+
59
+ # Establish connection to database
60
+ self.db_connection = DbConnection(config_file, schema=schema,
61
+ verbose=verbose)
62
+
63
+ # Work out the location of the root directory
64
+ self.root_dir = self._get_root_dir(root_dir, site)
65
+
66
+ # Create registrar object
67
+ self.Registrar = Registrar(self.db_connection, self.root_dir,
68
+ owner, owner_type)
69
+
70
+ # Create query object
71
+ self.Query = Query(self.db_connection, self.root_dir)
72
+
73
+ def _get_root_dir(self, root_dir, site):
74
+ """
75
+ What is the location of the root_dir we are pairing with?
76
+
77
+ In order of priority:
78
+ - If manually passed `root_dir` is not None, use that.
79
+ - If manually passed `site` is not None, use that.
80
+ - If env DATAREG_SITE is set, use that.
81
+ - Else use `site="nersc"`.
82
+
83
+ All `site`s are assumed to be postgres. Sqlite users must manually
84
+ specify the `root_dir.
85
+
86
+ Parameters
87
+ ----------
88
+ root_dir : str
89
+ site : str
90
+
91
+ Returns
92
+ -------
93
+ - : str
94
+ Path to root directory
95
+ """
96
+
97
+ # Load the site config yaml file
98
+ with open(_SITE_CONFIG_PATH) as f:
99
+ data = yaml.safe_load(f)
100
+
101
+ # Sqlite case
102
+ if self.db_connection._dialect == "sqlite":
103
+ # Sqlite cannot work with `site`s, must pass a `root_dir`
104
+ if root_dir is None:
105
+ raise ValueError("Must pass a `root_dir` using Sqlite")
106
+ else:
107
+ # root_dir cannot equal a site path when using Sqlite
108
+ for a, v in data.items():
109
+ if root_dir == v:
110
+ raise ValueError(
111
+ "`root_dir` must not equal a pre-defined site with Sqlite"
112
+ )
113
+ return root_dir
114
+
115
+ # Non Sqlite case
116
+ else:
117
+ if root_dir is None:
118
+ if site is not None:
119
+ if site.lower() not in data.keys():
120
+ raise ValueError(f"{site} is not a valid site")
121
+ root_dir = data[site.lower()]
122
+ elif os.getenv("DATAREG_SITE"):
123
+ root_dir = data[os.getenv("DATAREG_SITE").lower()]
124
+ else:
125
+ root_dir = data["nersc"]
126
+
127
+ return root_dir
@@ -0,0 +1,6 @@
1
+ from ._version import __version__
2
+ from .db_basic import *
3
+ from .registrar import *
4
+ from .query import *
5
+ from .git_util import *
6
+ from .DataRegistry import DataRegistry
@@ -0,0 +1 @@
1
+ __version__ = "0.5.4-rc1"
@@ -0,0 +1,347 @@
1
+ from sqlalchemy import engine_from_config
2
+ from sqlalchemy.engine import make_url
3
+ from sqlalchemy import MetaData
4
+ from sqlalchemy import column, insert, select
5
+ import yaml
6
+ import os
7
+ from datetime import datetime
8
+ from dataregistry import __version__
9
+ from dataregistry.exceptions import DataRegistryException
10
+
11
+ """
12
+ Low-level utility routines and classes for accessing the registry
13
+ """
14
+
15
+ SCHEMA_VERSION = "registry_beta"
16
+
17
+ __all__ = [
18
+ "DbConnection",
19
+ "add_table_row",
20
+ "TableMetadata",
21
+ "SCHEMA_VERSION",
22
+ ]
23
+
24
+
25
+ def _get_dataregistry_config(config_file=None, verbose=False):
26
+ """
27
+ Locate the data registry configuration file.
28
+
29
+ The code will check three scenarios, which are, in order of priority:
30
+ - The config_file has been manually passed
31
+ - The DATAREG_CONFIG env variable has been set
32
+ - The default location (the .config_reg_access file in $HOME)
33
+
34
+ If none of these are true, an exception is raised.
35
+
36
+ Parameters
37
+ ----------
38
+ config_file : str, optional
39
+ Manually set the location of the config file
40
+ verbose : bool, optional
41
+ True for more output
42
+
43
+ Returns
44
+ -------
45
+ config_file : str
46
+ Path to data registry configuration file
47
+ """
48
+
49
+ _default_loc = os.path.join(os.getenv("HOME"), ".config_reg_access")
50
+
51
+ # Case where the user has manually specified the location
52
+ if config_file is not None:
53
+ if verbose:
54
+ print(f"Using manually passed config file ({config_file})")
55
+ return config_file
56
+
57
+ # Case where the env variable is set
58
+ elif os.getenv("DATAREG_CONFIG"):
59
+ if verbose:
60
+ print(
61
+ "Using DATAREG_CONFIG env var for config file",
62
+ f"({os.getenv('DATAREG_CONFIG')})",
63
+ )
64
+ return os.getenv("DATAREG_CONFIG")
65
+
66
+ # Finally check default location in $HOME
67
+ elif os.path.isfile(_default_loc):
68
+ if verbose:
69
+ print("Using default location for config file", f"({_default_loc})")
70
+ return _default_loc
71
+ else:
72
+ raise ValueError("Unable to located data registry config file")
73
+
74
+
75
+ def add_table_row(conn, table_meta, values, commit=True):
76
+ """
77
+ Generic insert, given connection, metadata for a table and column values to
78
+ be used.
79
+
80
+ Parameters
81
+ ----------
82
+ conn : SQLAlchemy Engine object
83
+ Connection to the database
84
+ table_meta : TableMetadata object
85
+ Table we are inserting data into
86
+ values : dict
87
+ Properties to be entered
88
+ commit : bool, optional
89
+ True to commit changes to database (default True)
90
+
91
+ Returns
92
+ -------
93
+ - : int
94
+ Primary key for new row if successful
95
+ """
96
+
97
+ result = conn.execute(insert(table_meta), [values])
98
+
99
+ if commit:
100
+ conn.commit()
101
+
102
+ return result.inserted_primary_key[0]
103
+
104
+
105
+ class DbConnection:
106
+ def __init__(self, config_file=None, schema=None, verbose=False):
107
+ """
108
+ Simple class to act as container for connection
109
+
110
+ Parameters
111
+ ----------
112
+ config : str, optional
113
+ Path to config file with low-level connection information.
114
+ If None, default location is assumed
115
+ schema : str, optional
116
+ Schema to connect to. If None, default schema is assumed
117
+ verbose : bool, optional
118
+ If True, produce additional output
119
+ """
120
+
121
+ # Extract connection info from configuration file
122
+ with open(_get_dataregistry_config(config_file, verbose)) as f:
123
+ connection_parameters = yaml.safe_load(f)
124
+
125
+ # Build the engine
126
+ self._engine = engine_from_config(connection_parameters)
127
+
128
+ # Pull out the working schema version
129
+ driver = make_url(connection_parameters["sqlalchemy.url"]).drivername
130
+ self._dialect = driver.split("+")[0]
131
+
132
+ if self._dialect == "sqlite":
133
+ self._schema = None
134
+ else:
135
+ if schema is None:
136
+ self._schema = SCHEMA_VERSION
137
+ else:
138
+ self._schema = schema
139
+
140
+ @property
141
+ def engine(self):
142
+ return self._engine
143
+
144
+ @property
145
+ def dialect(self):
146
+ return self._dialect
147
+
148
+ @property
149
+ def schema(self):
150
+ return self._schema
151
+
152
+
153
+ class TableMetadata:
154
+ def __init__(self, db_connection, get_db_version=True):
155
+ """
156
+ Keep and dispense table metadata
157
+
158
+ Parameters
159
+ ----------
160
+ db_connection : DbConnection object
161
+ Stores information about the DB connection
162
+ get_db_version : bool, optional
163
+ True to extract the DB version from the provenance table
164
+ """
165
+
166
+ self._metadata = MetaData(schema=db_connection.schema)
167
+ self._engine = db_connection.engine
168
+ self._schema = db_connection.schema
169
+
170
+ # Load all existing tables
171
+ self._metadata.reflect(self._engine, db_connection.schema)
172
+
173
+ # Fetch and save db versioning, assoc. production schema
174
+ # if present and requested
175
+ self._prod_schema = None
176
+ if db_connection.dialect == "sqlite":
177
+ prov_name = "provenance"
178
+ else:
179
+ prov_name = ".".join([self._schema, "provenance"])
180
+
181
+ if prov_name not in self._metadata.tables:
182
+ raise DataRegistryException("Incompatible database: no Provenance table")
183
+
184
+ if prov_name in self._metadata.tables and get_db_version:
185
+ prov_table = self._metadata.tables[prov_name]
186
+ stmt = select(column("associated_production")).select_from(prov_table)
187
+ stmt = stmt.order_by(prov_table.c.provenance_id.desc())
188
+ with self._engine.connect() as conn:
189
+ results = conn.execute(stmt)
190
+ r = results.fetchone()
191
+ self._prod_schema = r[0]
192
+
193
+ cols = ["db_version_major", "db_version_minor", "db_version_patch"]
194
+
195
+ stmt = select(*[column(c) for c in cols])
196
+ stmt = stmt.select_from(prov_table)
197
+ stmt = stmt.order_by(prov_table.c.provenance_id.desc())
198
+ with self._engine.connect() as conn:
199
+ results = conn.execute(stmt)
200
+ r = results.fetchone()
201
+ self._db_major = r[0]
202
+ self._db_minor = r[1]
203
+ self._db_patch = r[2]
204
+ else:
205
+ self._db_major = None
206
+ self._db_minor = None
207
+ self._db_patch = None
208
+
209
+ @property
210
+ def db_version_major(self):
211
+ return self._db_major
212
+
213
+ @property
214
+ def db_version_minor(self):
215
+ return self._db_minor
216
+
217
+ @property
218
+ def db_version_patch(self):
219
+ return self._db_patch
220
+
221
+ def get(self, tbl):
222
+ if "." not in tbl:
223
+ if self._schema:
224
+ tbl = ".".join([self._schema, tbl])
225
+ if tbl not in self._metadata.tables.keys():
226
+ try:
227
+ self._metadata.reflect(self._engine, only=[tbl])
228
+ except Exception:
229
+ raise ValueError(f"No such table {tbl}")
230
+ return self._metadata.tables[tbl]
231
+
232
+
233
+ def _insert_provenance(
234
+ db_connection,
235
+ db_version_major,
236
+ db_version_minor,
237
+ db_version_patch,
238
+ update_method,
239
+ comment=None,
240
+ associated_production="production",
241
+ ):
242
+ """
243
+ Write a row to the provenance table. Includes version of db schema,
244
+ version of code, etc.
245
+
246
+ Parameters
247
+ ----------
248
+ db_version_major : int
249
+ db_version_minor : int
250
+ db_version_patch : int
251
+ update_method : str
252
+ One of "create", "migrate"
253
+ comment : str, optional
254
+ Briefly describe reason for new version
255
+ associated_production : str, defaults to "production"
256
+ Name of production schema, if any, this schema may reference
257
+
258
+ Returns
259
+ -------
260
+ id : int
261
+ Id of new row in provenance table
262
+ """
263
+ from dataregistry.git_util import get_git_info
264
+ from git import InvalidGitRepositoryError
265
+
266
+ version_fields = __version__.split(".")
267
+ patch = version_fields[2]
268
+ suffix = None
269
+ if "-" in patch:
270
+ subfields = patch.split("-")
271
+ patch = subfields[0]
272
+ suffix = "-".join(subfields[1:])
273
+
274
+ values = dict()
275
+ values["code_version_major"] = version_fields[0]
276
+ values["code_version_minor"] = version_fields[1]
277
+ values["code_version_patch"] = patch
278
+ if suffix:
279
+ values["code_version_suffix"] = suffix
280
+ values["db_version_major"] = db_version_major
281
+ values["db_version_minor"] = db_version_minor
282
+ values["db_version_patch"] = db_version_patch
283
+ values["schema_enabled_date"] = datetime.now()
284
+ values["creator_uid"] = os.getenv("USER")
285
+ pkg_root = os.path.join(os.path.dirname(__file__), "../..")
286
+
287
+ # If this is a git repo, save hash and state
288
+ try:
289
+ git_hash, is_clean = get_git_info(pkg_root)
290
+ values["git_hash"] = git_hash
291
+ values["repo_is_clean"] = is_clean
292
+ except InvalidGitRepositoryError:
293
+ # no git repo; this is an install. Code version is sufficient
294
+ pass
295
+
296
+ values["update_method"] = update_method
297
+ if comment is not None:
298
+ values["comment"] = comment
299
+ if associated_production is not None: # None is normal for sqlite
300
+ values["associated_production"] = associated_production
301
+ prov_table = TableMetadata(db_connection,
302
+ get_db_version=False).get("provenance")
303
+ with db_connection.engine.connect() as conn:
304
+ id = add_table_row(conn, prov_table, values)
305
+
306
+ return id
307
+
308
+ def _insert_keyword(
309
+ db_connection,
310
+ keyword,
311
+ system,
312
+ creator_uid=None,
313
+ ):
314
+ """
315
+ Write a row to a keyword table.
316
+
317
+ Parameters
318
+ ----------
319
+ db_connection : DbConnection class
320
+ Conenction to the database
321
+ keyword : str
322
+ Keyword to add
323
+ system : bool
324
+ True if this is a preset system keyword (False for user custom keyword)
325
+ creator_uid : int, optional
326
+
327
+ Returns
328
+ -------
329
+ id : int
330
+ Id of new row in keyword table
331
+ """
332
+
333
+ values = dict()
334
+ values["keyword"] = keyword
335
+ values["system"] = system
336
+ if creator_uid is None:
337
+ values["creator_uid"] = os.getenv("USER")
338
+ else:
339
+ values["creator_uid"] = creator_uid
340
+ values["creation_date"] = datetime.now()
341
+ values["active"] = True
342
+
343
+ keyword_table = TableMetadata(db_connection, get_db_version=False).get("keyword")
344
+ with db_connection.engine.connect() as conn:
345
+ id = add_table_row(conn, keyword_table, values)
346
+
347
+ return id
@@ -0,0 +1,12 @@
1
+ __all__ = ["DataRegistryException", "DataRegistryNYI"]
2
+
3
+
4
+ class DataRegistryException(Exception):
5
+ pass
6
+
7
+
8
+ class DataRegistryNYI(DataRegistryException):
9
+ def __init__(self, feature=""):
10
+ msg = f"Feature {feature} not yet implemented"
11
+ self.msg = msg
12
+ super().__init__(self.msg)
@@ -0,0 +1,21 @@
1
+ import os
2
+
3
+ __all__ = ["get_git_info"]
4
+
5
+
6
+ def get_git_info(pkg_root):
7
+ """
8
+ Parameters
9
+ pkg_root string root directory of a git repo
10
+
11
+ Returns
12
+ git_hash string current git hash
13
+ is_clean boolean
14
+
15
+ """
16
+ import git
17
+
18
+ repo = git.Repo(pkg_root)
19
+ has_uncommitted = repo.is_dirty()
20
+ has_untracked = len(repo.untracked_files) > 0
21
+ return repo.commit().hexsha, not (has_uncommitted or has_untracked)