dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backend.py +93 -0
- dsgrid/__init__.py +22 -0
- dsgrid/api/__init__.py +0 -0
- dsgrid/api/api_manager.py +179 -0
- dsgrid/api/app.py +419 -0
- dsgrid/api/models.py +60 -0
- dsgrid/api/response_models.py +116 -0
- dsgrid/apps/__init__.py +0 -0
- dsgrid/apps/project_viewer/app.py +216 -0
- dsgrid/apps/registration_gui.py +444 -0
- dsgrid/chronify.py +32 -0
- dsgrid/cli/__init__.py +0 -0
- dsgrid/cli/common.py +120 -0
- dsgrid/cli/config.py +176 -0
- dsgrid/cli/download.py +13 -0
- dsgrid/cli/dsgrid.py +157 -0
- dsgrid/cli/dsgrid_admin.py +92 -0
- dsgrid/cli/install_notebooks.py +62 -0
- dsgrid/cli/query.py +729 -0
- dsgrid/cli/registry.py +1862 -0
- dsgrid/cloud/__init__.py +0 -0
- dsgrid/cloud/cloud_storage_interface.py +140 -0
- dsgrid/cloud/factory.py +31 -0
- dsgrid/cloud/fake_storage_interface.py +37 -0
- dsgrid/cloud/s3_storage_interface.py +156 -0
- dsgrid/common.py +36 -0
- dsgrid/config/__init__.py +0 -0
- dsgrid/config/annual_time_dimension_config.py +194 -0
- dsgrid/config/common.py +142 -0
- dsgrid/config/config_base.py +148 -0
- dsgrid/config/dataset_config.py +907 -0
- dsgrid/config/dataset_schema_handler_factory.py +46 -0
- dsgrid/config/date_time_dimension_config.py +136 -0
- dsgrid/config/dimension_config.py +54 -0
- dsgrid/config/dimension_config_factory.py +65 -0
- dsgrid/config/dimension_mapping_base.py +350 -0
- dsgrid/config/dimension_mappings_config.py +48 -0
- dsgrid/config/dimensions.py +1025 -0
- dsgrid/config/dimensions_config.py +71 -0
- dsgrid/config/file_schema.py +190 -0
- dsgrid/config/index_time_dimension_config.py +80 -0
- dsgrid/config/input_dataset_requirements.py +31 -0
- dsgrid/config/mapping_tables.py +209 -0
- dsgrid/config/noop_time_dimension_config.py +42 -0
- dsgrid/config/project_config.py +1462 -0
- dsgrid/config/registration_models.py +188 -0
- dsgrid/config/representative_period_time_dimension_config.py +194 -0
- dsgrid/config/simple_models.py +49 -0
- dsgrid/config/supplemental_dimension.py +29 -0
- dsgrid/config/time_dimension_base_config.py +192 -0
- dsgrid/data_models.py +155 -0
- dsgrid/dataset/__init__.py +0 -0
- dsgrid/dataset/dataset.py +123 -0
- dsgrid/dataset/dataset_expression_handler.py +86 -0
- dsgrid/dataset/dataset_mapping_manager.py +121 -0
- dsgrid/dataset/dataset_schema_handler_base.py +945 -0
- dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
- dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
- dsgrid/dataset/growth_rates.py +162 -0
- dsgrid/dataset/models.py +51 -0
- dsgrid/dataset/table_format_handler_base.py +257 -0
- dsgrid/dataset/table_format_handler_factory.py +17 -0
- dsgrid/dataset/unpivoted_table.py +121 -0
- dsgrid/dimension/__init__.py +0 -0
- dsgrid/dimension/base_models.py +230 -0
- dsgrid/dimension/dimension_filters.py +308 -0
- dsgrid/dimension/standard.py +252 -0
- dsgrid/dimension/time.py +352 -0
- dsgrid/dimension/time_utils.py +103 -0
- dsgrid/dsgrid_rc.py +88 -0
- dsgrid/exceptions.py +105 -0
- dsgrid/filesystem/__init__.py +0 -0
- dsgrid/filesystem/cloud_filesystem.py +32 -0
- dsgrid/filesystem/factory.py +32 -0
- dsgrid/filesystem/filesystem_interface.py +136 -0
- dsgrid/filesystem/local_filesystem.py +74 -0
- dsgrid/filesystem/s3_filesystem.py +118 -0
- dsgrid/loggers.py +132 -0
- dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
- dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
- dsgrid/notebooks/registration.ipynb +48 -0
- dsgrid/notebooks/start_notebook.sh +11 -0
- dsgrid/project.py +451 -0
- dsgrid/query/__init__.py +0 -0
- dsgrid/query/dataset_mapping_plan.py +142 -0
- dsgrid/query/derived_dataset.py +388 -0
- dsgrid/query/models.py +728 -0
- dsgrid/query/query_context.py +287 -0
- dsgrid/query/query_submitter.py +994 -0
- dsgrid/query/report_factory.py +19 -0
- dsgrid/query/report_peak_load.py +70 -0
- dsgrid/query/reports_base.py +20 -0
- dsgrid/registry/__init__.py +0 -0
- dsgrid/registry/bulk_register.py +165 -0
- dsgrid/registry/common.py +287 -0
- dsgrid/registry/config_update_checker_base.py +63 -0
- dsgrid/registry/data_store_factory.py +34 -0
- dsgrid/registry/data_store_interface.py +74 -0
- dsgrid/registry/dataset_config_generator.py +158 -0
- dsgrid/registry/dataset_registry_manager.py +950 -0
- dsgrid/registry/dataset_update_checker.py +16 -0
- dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
- dsgrid/registry/dimension_mapping_update_checker.py +16 -0
- dsgrid/registry/dimension_registry_manager.py +413 -0
- dsgrid/registry/dimension_update_checker.py +16 -0
- dsgrid/registry/duckdb_data_store.py +207 -0
- dsgrid/registry/filesystem_data_store.py +150 -0
- dsgrid/registry/filter_registry_manager.py +123 -0
- dsgrid/registry/project_config_generator.py +57 -0
- dsgrid/registry/project_registry_manager.py +1623 -0
- dsgrid/registry/project_update_checker.py +48 -0
- dsgrid/registry/registration_context.py +223 -0
- dsgrid/registry/registry_auto_updater.py +316 -0
- dsgrid/registry/registry_database.py +667 -0
- dsgrid/registry/registry_interface.py +446 -0
- dsgrid/registry/registry_manager.py +558 -0
- dsgrid/registry/registry_manager_base.py +367 -0
- dsgrid/registry/versioning.py +92 -0
- dsgrid/rust_ext/__init__.py +14 -0
- dsgrid/rust_ext/find_minimal_patterns.py +129 -0
- dsgrid/spark/__init__.py +0 -0
- dsgrid/spark/functions.py +589 -0
- dsgrid/spark/types.py +110 -0
- dsgrid/tests/__init__.py +0 -0
- dsgrid/tests/common.py +140 -0
- dsgrid/tests/make_us_data_registry.py +265 -0
- dsgrid/tests/register_derived_datasets.py +103 -0
- dsgrid/tests/utils.py +25 -0
- dsgrid/time/__init__.py +0 -0
- dsgrid/time/time_conversions.py +80 -0
- dsgrid/time/types.py +67 -0
- dsgrid/units/__init__.py +0 -0
- dsgrid/units/constants.py +113 -0
- dsgrid/units/convert.py +71 -0
- dsgrid/units/energy.py +145 -0
- dsgrid/units/power.py +87 -0
- dsgrid/utils/__init__.py +0 -0
- dsgrid/utils/dataset.py +830 -0
- dsgrid/utils/files.py +179 -0
- dsgrid/utils/filters.py +125 -0
- dsgrid/utils/id_remappings.py +100 -0
- dsgrid/utils/py_expression_eval/LICENSE +19 -0
- dsgrid/utils/py_expression_eval/README.md +8 -0
- dsgrid/utils/py_expression_eval/__init__.py +847 -0
- dsgrid/utils/py_expression_eval/tests.py +283 -0
- dsgrid/utils/run_command.py +70 -0
- dsgrid/utils/scratch_dir_context.py +65 -0
- dsgrid/utils/spark.py +918 -0
- dsgrid/utils/spark_partition.py +98 -0
- dsgrid/utils/timing.py +239 -0
- dsgrid/utils/utilities.py +221 -0
- dsgrid/utils/versioning.py +36 -0
- dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
- dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
- dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
- dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
- dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
dsgrid/cli/common.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import rich_click as click
|
|
7
|
+
|
|
8
|
+
from dsgrid.dsgrid_rc import DsgridRuntimeConfig
|
|
9
|
+
from dsgrid.exceptions import DSGBaseException
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def check_output_directory(path: Path, fs_interface, force: bool):
|
|
16
|
+
"""Ensures that the parameter path is an empty directory.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
path : Path
|
|
21
|
+
fs_interface : FilesystemInterface
|
|
22
|
+
force : bool
|
|
23
|
+
If False and the directory exists and has content, exit.
|
|
24
|
+
"""
|
|
25
|
+
if path.exists():
|
|
26
|
+
if not bool(path.iterdir()):
|
|
27
|
+
return
|
|
28
|
+
if force:
|
|
29
|
+
fs_interface.rm_tree(path)
|
|
30
|
+
else:
|
|
31
|
+
print(
|
|
32
|
+
f"{path} already exists. Choose a different name or pass --force to overwrite it.",
|
|
33
|
+
file=sys.stderr,
|
|
34
|
+
)
|
|
35
|
+
sys.exit(1)
|
|
36
|
+
|
|
37
|
+
path.mkdir()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_log_level_from_str(level):
|
|
41
|
+
"""Convert a log level string to logging type."""
|
|
42
|
+
match level:
|
|
43
|
+
case "debug":
|
|
44
|
+
return logging.DEBUG
|
|
45
|
+
case "info":
|
|
46
|
+
return logging.INFO
|
|
47
|
+
case "warning":
|
|
48
|
+
return logging.WARNING
|
|
49
|
+
case "error":
|
|
50
|
+
return logging.ERROR
|
|
51
|
+
case _:
|
|
52
|
+
msg = f"Unsupported level={level}"
|
|
53
|
+
raise Exception(msg)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_value_from_context(ctx, field) -> Any:
|
|
57
|
+
"""Get the field value from the root of a click context."""
|
|
58
|
+
return ctx.find_root().params[field]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def handle_dsgrid_exception(ctx, func, *args, **kwargs) -> tuple[Any, int]:
|
|
62
|
+
"""Handle any dsgrid exceptions as specified by the CLI parameters."""
|
|
63
|
+
res = None
|
|
64
|
+
try:
|
|
65
|
+
res = func(*args, **kwargs)
|
|
66
|
+
return res, 0
|
|
67
|
+
except DSGBaseException:
|
|
68
|
+
exc_type, exc_value, exc_tb = sys.exc_info()
|
|
69
|
+
filename = exc_tb.tb_frame.f_code.co_filename
|
|
70
|
+
line = exc_tb.tb_lineno
|
|
71
|
+
msg = f'{func.__name__} failed: exception={exc_type.__name__} message="{exc_value}" {filename=} {line=}'
|
|
72
|
+
logger.error(msg)
|
|
73
|
+
if ctx.find_root().params["reraise_exceptions"]:
|
|
74
|
+
raise
|
|
75
|
+
return res, 1
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def handle_scratch_dir(*args):
|
|
79
|
+
"""Handle the user input for scratch_dir. If a path is passed, ensure it exists."""
|
|
80
|
+
val = args[2]
|
|
81
|
+
if val is None:
|
|
82
|
+
return val
|
|
83
|
+
path = Path(val)
|
|
84
|
+
if not path.exists:
|
|
85
|
+
msg = f"scratch-dir={path} does not exist"
|
|
86
|
+
raise ValueError(msg)
|
|
87
|
+
return path
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def path_callback(*args) -> Path | None:
|
|
91
|
+
"""Ensure that a Path CLI option value is returned as a Path object."""
|
|
92
|
+
val = args[2]
|
|
93
|
+
if val is None:
|
|
94
|
+
return val
|
|
95
|
+
return Path(val)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# Copied from
|
|
99
|
+
# https://stackoverflow.com/questions/45868549/creating-a-click-option-with-prompt-that-shows-only-if-default-value-is-empty
|
|
100
|
+
# and modified for our desired password behavior.
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class OptionPromptPassword(click.Option):
|
|
104
|
+
"""Custom class that only prompts for the password if the user set a different username value
|
|
105
|
+
than what is in the runtime config file."""
|
|
106
|
+
|
|
107
|
+
def get_default(self, ctx, **kwargs):
|
|
108
|
+
config = DsgridRuntimeConfig.load()
|
|
109
|
+
username = ctx.find_root().params.get("username")
|
|
110
|
+
if username != config.database_user:
|
|
111
|
+
return None
|
|
112
|
+
return config.database_password
|
|
113
|
+
|
|
114
|
+
def prompt_for_value(self, ctx):
|
|
115
|
+
default = self.get_default(ctx)
|
|
116
|
+
|
|
117
|
+
if default is None:
|
|
118
|
+
return super().prompt_for_value(ctx)
|
|
119
|
+
|
|
120
|
+
return default
|
dsgrid/cli/config.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""CLI commands to manage the dsgrid runtime configuration"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
import rich_click as click
|
|
7
|
+
|
|
8
|
+
from dsgrid.common import BackendEngine
|
|
9
|
+
from dsgrid.cli.common import handle_scratch_dir
|
|
10
|
+
from dsgrid.dsgrid_rc import (
|
|
11
|
+
DsgridRuntimeConfig,
|
|
12
|
+
DEFAULT_THRIFT_SERVER_URL,
|
|
13
|
+
DEFAULT_BACKEND,
|
|
14
|
+
)
|
|
15
|
+
from dsgrid.exceptions import DSGInvalidParameter
|
|
16
|
+
from dsgrid.registry.common import DatabaseConnection
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@click.group()
|
|
23
|
+
def config():
|
|
24
|
+
"""Config commands"""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
_config_epilog = """
|
|
28
|
+
Create a dsgrid configuration file to store registry connection settings and
|
|
29
|
+
other dsgrid parameters.
|
|
30
|
+
|
|
31
|
+
Examples:\n
|
|
32
|
+
$ dsgrid config create sqlite:///./registry.db\n
|
|
33
|
+
$ dsgrid config create sqlite:////projects/dsgrid/registries/standard-scenarios/registry.db\n
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@click.command(epilog=_config_epilog)
|
|
38
|
+
@click.argument("url")
|
|
39
|
+
@click.option(
|
|
40
|
+
"-b",
|
|
41
|
+
"--backend-engine",
|
|
42
|
+
type=click.Choice([x.value for x in BackendEngine]),
|
|
43
|
+
default=DEFAULT_BACKEND.value,
|
|
44
|
+
help="Backend engine for SQL processing",
|
|
45
|
+
)
|
|
46
|
+
@click.option(
|
|
47
|
+
"-t",
|
|
48
|
+
"--thrift-server-url",
|
|
49
|
+
type=str,
|
|
50
|
+
default=DEFAULT_THRIFT_SERVER_URL,
|
|
51
|
+
help="URL for the Apache Thrift Server to be used by chronify. "
|
|
52
|
+
"Only applies if Spark is the backend engine.",
|
|
53
|
+
)
|
|
54
|
+
@click.option(
|
|
55
|
+
"-m",
|
|
56
|
+
"--use-hive-metastore",
|
|
57
|
+
is_flag=True,
|
|
58
|
+
default=False,
|
|
59
|
+
help="Set this flag to use a Hive metastore when sharing data with chronify. "
|
|
60
|
+
"Only applies if Spark is the backend engine.",
|
|
61
|
+
)
|
|
62
|
+
@click.option(
|
|
63
|
+
"--timings/--no-timings",
|
|
64
|
+
default=False,
|
|
65
|
+
is_flag=True,
|
|
66
|
+
show_default=True,
|
|
67
|
+
help="Enable tracking of function timings.",
|
|
68
|
+
)
|
|
69
|
+
@click.option(
|
|
70
|
+
"--use-absolute-db-path/--no-use-absolute-db-path",
|
|
71
|
+
default=True,
|
|
72
|
+
is_flag=True,
|
|
73
|
+
show_default=True,
|
|
74
|
+
help="Convert the SQLite database file path to an absolute path.",
|
|
75
|
+
)
|
|
76
|
+
# @click.option(
|
|
77
|
+
# "-U",
|
|
78
|
+
# "--username",
|
|
79
|
+
# type=str,
|
|
80
|
+
# default=getpass.getuser(),
|
|
81
|
+
# help="Database username",
|
|
82
|
+
# )
|
|
83
|
+
# @click.option(
|
|
84
|
+
# "-P",
|
|
85
|
+
# "--password",
|
|
86
|
+
# prompt=True,
|
|
87
|
+
# hide_input=True,
|
|
88
|
+
# type=str,
|
|
89
|
+
# default=DEFAULT_DB_PASSWORD,
|
|
90
|
+
# help="Database username",
|
|
91
|
+
# )
|
|
92
|
+
# @click.option(
|
|
93
|
+
# "-o",
|
|
94
|
+
# is_flag=True,
|
|
95
|
+
# default=False,
|
|
96
|
+
# show_default=True,
|
|
97
|
+
# help="Run registry commands in offline mode. WARNING: any commands you perform in offline "
|
|
98
|
+
# "mode run the risk of being out-of-sync with the latest dsgrid registry, and any write "
|
|
99
|
+
# "commands will not be officially synced with the remote registry",
|
|
100
|
+
# )
|
|
101
|
+
@click.option(
|
|
102
|
+
"--console-level",
|
|
103
|
+
default="info",
|
|
104
|
+
show_default=True,
|
|
105
|
+
help="Console log level.",
|
|
106
|
+
)
|
|
107
|
+
@click.option(
|
|
108
|
+
"--file-level",
|
|
109
|
+
default="info",
|
|
110
|
+
show_default=True,
|
|
111
|
+
help="File log level.",
|
|
112
|
+
)
|
|
113
|
+
@click.option(
|
|
114
|
+
"-r",
|
|
115
|
+
"--reraise-exceptions",
|
|
116
|
+
is_flag=True,
|
|
117
|
+
default=False,
|
|
118
|
+
show_default=True,
|
|
119
|
+
help="Re-raise any dsgrid exception. Default is to log the exception and exit.",
|
|
120
|
+
)
|
|
121
|
+
@click.option(
|
|
122
|
+
"-s",
|
|
123
|
+
"--scratch-dir",
|
|
124
|
+
default=None,
|
|
125
|
+
callback=handle_scratch_dir,
|
|
126
|
+
help="Base directory for dsgrid temporary directories. Must be accessible on all compute "
|
|
127
|
+
"nodes. Defaults to the current directory.",
|
|
128
|
+
)
|
|
129
|
+
def create(
|
|
130
|
+
url,
|
|
131
|
+
backend_engine,
|
|
132
|
+
thrift_server_url,
|
|
133
|
+
use_hive_metastore,
|
|
134
|
+
timings,
|
|
135
|
+
use_absolute_db_path,
|
|
136
|
+
# username,
|
|
137
|
+
# password,
|
|
138
|
+
# offline,
|
|
139
|
+
console_level,
|
|
140
|
+
file_level,
|
|
141
|
+
reraise_exceptions,
|
|
142
|
+
scratch_dir,
|
|
143
|
+
):
|
|
144
|
+
"""Create a local dsgrid runtime configuration file."""
|
|
145
|
+
conn = DatabaseConnection(url=url)
|
|
146
|
+
try:
|
|
147
|
+
db_filename = conn.get_filename()
|
|
148
|
+
if use_absolute_db_path and not db_filename.is_absolute():
|
|
149
|
+
conn.url = f"sqlite:///{db_filename.resolve()}"
|
|
150
|
+
|
|
151
|
+
except DSGInvalidParameter as exc:
|
|
152
|
+
print(str(exc), file=sys.stderr)
|
|
153
|
+
sys.exit(1)
|
|
154
|
+
|
|
155
|
+
if not db_filename.exists():
|
|
156
|
+
print(f"The registry database file {db_filename} does not exist.", file=sys.stderr)
|
|
157
|
+
sys.exit(1)
|
|
158
|
+
|
|
159
|
+
dsgrid_config = DsgridRuntimeConfig(
|
|
160
|
+
backend_engine=backend_engine,
|
|
161
|
+
thrift_server_url=thrift_server_url,
|
|
162
|
+
use_hive_metastore=use_hive_metastore,
|
|
163
|
+
timings=timings,
|
|
164
|
+
database_url=conn.url,
|
|
165
|
+
# database_user=username,
|
|
166
|
+
# database_password=password,
|
|
167
|
+
offline=True,
|
|
168
|
+
console_level=console_level,
|
|
169
|
+
file_level=file_level,
|
|
170
|
+
reraise_exceptions=reraise_exceptions,
|
|
171
|
+
scratch_dir=scratch_dir,
|
|
172
|
+
)
|
|
173
|
+
dsgrid_config.dump()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
config.add_command(create)
|
dsgrid/cli/download.py
ADDED
dsgrid/cli/dsgrid.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Main CLI command for dsgrid."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import rich_click as click
|
|
7
|
+
|
|
8
|
+
import dsgrid
|
|
9
|
+
from chronify.utils.path_utils import check_overwrite
|
|
10
|
+
from dsgrid.common import LOCAL_REGISTRY
|
|
11
|
+
from dsgrid.registry.common import DatabaseConnection, DataStoreType
|
|
12
|
+
from dsgrid.registry.registry_manager import RegistryManager
|
|
13
|
+
from dsgrid.utils.timing import timer_stats_collector
|
|
14
|
+
from dsgrid.cli.common import get_log_level_from_str, handle_scratch_dir
|
|
15
|
+
from dsgrid.cli.config import config
|
|
16
|
+
from dsgrid.cli.download import download
|
|
17
|
+
from dsgrid.cli.install_notebooks import install_notebooks
|
|
18
|
+
from dsgrid.cli.query import query
|
|
19
|
+
from dsgrid.cli.registry import registry
|
|
20
|
+
from dsgrid.loggers import setup_logging, check_log_file_size, disable_console_logging
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@click.group()
|
|
27
|
+
@click.option(
|
|
28
|
+
"-c",
|
|
29
|
+
"--console-level",
|
|
30
|
+
default=dsgrid.runtime_config.console_level,
|
|
31
|
+
show_default=True,
|
|
32
|
+
help="Console log level.",
|
|
33
|
+
)
|
|
34
|
+
@click.option(
|
|
35
|
+
"-f",
|
|
36
|
+
"--file-level",
|
|
37
|
+
default=dsgrid.runtime_config.file_level,
|
|
38
|
+
show_default=True,
|
|
39
|
+
help="File log level.",
|
|
40
|
+
)
|
|
41
|
+
@click.option("-l", "--log-file", type=Path, default="dsgrid.log", help="Log to this file.")
|
|
42
|
+
@click.option(
|
|
43
|
+
"-n", "--no-prompts", default=False, is_flag=True, show_default=True, help="Do not prompt."
|
|
44
|
+
)
|
|
45
|
+
@click.option(
|
|
46
|
+
"--timings/--no-timings",
|
|
47
|
+
default=dsgrid.runtime_config.timings,
|
|
48
|
+
is_flag=True,
|
|
49
|
+
show_default=True,
|
|
50
|
+
help="Enable tracking of function timings.",
|
|
51
|
+
)
|
|
52
|
+
@click.option(
|
|
53
|
+
"-u",
|
|
54
|
+
"--url",
|
|
55
|
+
type=str,
|
|
56
|
+
default=dsgrid.runtime_config.database_url,
|
|
57
|
+
envvar="DSGRID_REGISTRY_DATABASE_URL",
|
|
58
|
+
help="Database URL. Ex: http://localhost:8529",
|
|
59
|
+
)
|
|
60
|
+
@click.option(
|
|
61
|
+
"-r",
|
|
62
|
+
"--reraise-exceptions",
|
|
63
|
+
is_flag=True,
|
|
64
|
+
default=dsgrid.runtime_config.reraise_exceptions,
|
|
65
|
+
show_default=True,
|
|
66
|
+
help="Re-raise any dsgrid exception. Default is to log the exception and exit.",
|
|
67
|
+
)
|
|
68
|
+
@click.option(
|
|
69
|
+
"-s",
|
|
70
|
+
"--scratch-dir",
|
|
71
|
+
default=dsgrid.runtime_config.scratch_dir,
|
|
72
|
+
callback=handle_scratch_dir,
|
|
73
|
+
help="Base directory for dsgrid temporary directories. Must be accessible on all compute "
|
|
74
|
+
"nodes. Defaults to the current directory.",
|
|
75
|
+
)
|
|
76
|
+
@click.pass_context
|
|
77
|
+
def cli(
|
|
78
|
+
ctx,
|
|
79
|
+
console_level,
|
|
80
|
+
file_level,
|
|
81
|
+
log_file,
|
|
82
|
+
no_prompts,
|
|
83
|
+
timings,
|
|
84
|
+
url,
|
|
85
|
+
reraise_exceptions,
|
|
86
|
+
scratch_dir,
|
|
87
|
+
):
|
|
88
|
+
"""dsgrid commands"""
|
|
89
|
+
if timings:
|
|
90
|
+
timer_stats_collector.enable()
|
|
91
|
+
else:
|
|
92
|
+
timer_stats_collector.disable()
|
|
93
|
+
path = Path(log_file)
|
|
94
|
+
check_log_file_size(path, no_prompts=no_prompts)
|
|
95
|
+
ctx.params["console_level"] = get_log_level_from_str(console_level)
|
|
96
|
+
ctx.params["file_level"] = get_log_level_from_str(file_level)
|
|
97
|
+
setup_logging(
|
|
98
|
+
"dsgrid",
|
|
99
|
+
path,
|
|
100
|
+
console_level=ctx.params["console_level"],
|
|
101
|
+
file_level=ctx.params["file_level"],
|
|
102
|
+
mode="a",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@cli.result_callback()
|
|
107
|
+
def callback(*args, **kwargs):
|
|
108
|
+
with disable_console_logging(name="dsgrid"):
|
|
109
|
+
timer_stats_collector.log_stats()
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
_create_registry_epilog = """
|
|
113
|
+
Examples:\n
|
|
114
|
+
$ dsgrid create-registry sqlite:////projects/dsgrid/my_project/registry.db -p /projects/dsgrid/my_project/registry-data\n
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@click.command(name="create-registry", epilog=_create_registry_epilog)
|
|
119
|
+
@click.argument("url")
|
|
120
|
+
@click.option(
|
|
121
|
+
"-p",
|
|
122
|
+
"--data-path",
|
|
123
|
+
default=LOCAL_REGISTRY,
|
|
124
|
+
show_default=True,
|
|
125
|
+
callback=lambda *x: Path(x[2]),
|
|
126
|
+
help="Local dsgrid registry data path. Must not contain the registry file listed in URL.",
|
|
127
|
+
)
|
|
128
|
+
@click.option(
|
|
129
|
+
"-f",
|
|
130
|
+
"--overwrite",
|
|
131
|
+
"--force",
|
|
132
|
+
is_flag=True,
|
|
133
|
+
default=False,
|
|
134
|
+
help="Delete registry_path and the database if they already exist.",
|
|
135
|
+
)
|
|
136
|
+
@click.option(
|
|
137
|
+
"-t",
|
|
138
|
+
"--data-store-type",
|
|
139
|
+
type=click.Choice([x.value for x in DataStoreType]),
|
|
140
|
+
default=DataStoreType.FILESYSTEM.value,
|
|
141
|
+
show_default=True,
|
|
142
|
+
help="Type of store to use for the registry data.",
|
|
143
|
+
callback=lambda *x: DataStoreType(x[2]),
|
|
144
|
+
)
|
|
145
|
+
def create_registry(url: str, data_path: Path, overwrite: bool, data_store_type: DataStoreType):
|
|
146
|
+
"""Create a new registry."""
|
|
147
|
+
check_overwrite(data_path, overwrite)
|
|
148
|
+
conn = DatabaseConnection(url=url)
|
|
149
|
+
RegistryManager.create(conn, data_path, overwrite=overwrite, data_store_type=data_store_type)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
cli.add_command(config)
|
|
153
|
+
cli.add_command(create_registry)
|
|
154
|
+
cli.add_command(download)
|
|
155
|
+
cli.add_command(install_notebooks)
|
|
156
|
+
cli.add_command(query)
|
|
157
|
+
cli.add_command(registry)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""CLI for dsgrid admin commands (testing purposes only)."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import rich_click as click
|
|
7
|
+
|
|
8
|
+
from dsgrid.config.simple_models import RegistrySimpleModel
|
|
9
|
+
from dsgrid.dsgrid_rc import DsgridRuntimeConfig
|
|
10
|
+
from dsgrid.loggers import setup_logging, check_log_file_size
|
|
11
|
+
from dsgrid.registry.common import DatabaseConnection
|
|
12
|
+
from dsgrid.registry.filter_registry_manager import FilterRegistryManager
|
|
13
|
+
from dsgrid.registry.registry_manager import RegistryManager
|
|
14
|
+
from dsgrid.utils.files import load_data
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
_config = DsgridRuntimeConfig.load()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@click.group()
|
|
22
|
+
@click.option("-l", "--log-file", default="dsgrid_admin.log", type=str, help="Log to this file.")
|
|
23
|
+
@click.option(
|
|
24
|
+
"-n", "--no-prompts", default=False, is_flag=True, show_default=True, help="Do not prompt."
|
|
25
|
+
)
|
|
26
|
+
@click.option(
|
|
27
|
+
"--verbose", is_flag=True, default=False, show_default=True, help="Enable verbose log output."
|
|
28
|
+
)
|
|
29
|
+
def cli(log_file, no_prompts, verbose):
|
|
30
|
+
"""dsgrid-admin commands (for testing purposes only)"""
|
|
31
|
+
path = Path(log_file)
|
|
32
|
+
level = logging.DEBUG if verbose else logging.INFO
|
|
33
|
+
check_log_file_size(path, no_prompts=no_prompts)
|
|
34
|
+
setup_logging("dsgrid", path, console_level=level, file_level=level, mode="a")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@click.command()
|
|
38
|
+
@click.option(
|
|
39
|
+
"--src-database-url",
|
|
40
|
+
required=True,
|
|
41
|
+
help="Source dsgrid registry database URL.",
|
|
42
|
+
)
|
|
43
|
+
@click.option(
|
|
44
|
+
"--dst-database-url",
|
|
45
|
+
default="dsgrid",
|
|
46
|
+
required=True,
|
|
47
|
+
help="Destination dsgrid registry database URL.",
|
|
48
|
+
)
|
|
49
|
+
@click.argument("dst_data_path", type=click.Path(exists=False), callback=lambda *x: Path(x[2]))
|
|
50
|
+
@click.argument("config_file", type=click.Path(exists=True), callback=lambda *x: Path(x[2]))
|
|
51
|
+
@click.option(
|
|
52
|
+
"-m",
|
|
53
|
+
"--mode",
|
|
54
|
+
default="data-symlinks",
|
|
55
|
+
type=click.Choice(["copy", "data-symlinks", "rsync"]),
|
|
56
|
+
show_default=True,
|
|
57
|
+
help="Controls whether to copy all data, make symlinks to data files, or sync data with the "
|
|
58
|
+
"rsync utility (not available on Windows).",
|
|
59
|
+
)
|
|
60
|
+
@click.option(
|
|
61
|
+
"-f",
|
|
62
|
+
"--overwrite",
|
|
63
|
+
"--force",
|
|
64
|
+
default=False,
|
|
65
|
+
is_flag=True,
|
|
66
|
+
show_default=True,
|
|
67
|
+
help="Overwrite dst_registry_path if it already exists. Does not apply if using rsync.",
|
|
68
|
+
)
|
|
69
|
+
def make_filtered_registry(
|
|
70
|
+
src_database_url,
|
|
71
|
+
dst_database_url,
|
|
72
|
+
dst_data_path: Path,
|
|
73
|
+
config_file: Path,
|
|
74
|
+
mode,
|
|
75
|
+
overwrite,
|
|
76
|
+
):
|
|
77
|
+
"""Make a filtered registry for testing purposes."""
|
|
78
|
+
simple_model = RegistrySimpleModel(**load_data(config_file))
|
|
79
|
+
src_conn = DatabaseConnection(url=src_database_url)
|
|
80
|
+
dst_conn = DatabaseConnection(url=dst_database_url)
|
|
81
|
+
RegistryManager.copy(
|
|
82
|
+
src_conn,
|
|
83
|
+
dst_conn,
|
|
84
|
+
dst_data_path,
|
|
85
|
+
mode=mode,
|
|
86
|
+
force=overwrite,
|
|
87
|
+
)
|
|
88
|
+
with FilterRegistryManager.load(dst_conn, offline_mode=True, use_remote_data=False) as mgr:
|
|
89
|
+
mgr.filter(simple_model=simple_model)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
cli.add_command(make_filtered_registry)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import rich_click as click
|
|
6
|
+
|
|
7
|
+
import dsgrid
|
|
8
|
+
|
|
9
|
+
NOTEBOOKS_DIRNAME = "dsgrid-notebooks"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.command()
|
|
13
|
+
@click.option(
|
|
14
|
+
"-p",
|
|
15
|
+
"--path",
|
|
16
|
+
default=Path.home(),
|
|
17
|
+
show_default=True,
|
|
18
|
+
type=click.Path(),
|
|
19
|
+
help="Path to install dsgrid notebooks.",
|
|
20
|
+
callback=lambda _, __, x: Path(x) / NOTEBOOKS_DIRNAME,
|
|
21
|
+
)
|
|
22
|
+
@click.option(
|
|
23
|
+
"-f",
|
|
24
|
+
"--overwrite",
|
|
25
|
+
"--force",
|
|
26
|
+
default=False,
|
|
27
|
+
show_default=True,
|
|
28
|
+
is_flag=True,
|
|
29
|
+
help="If true, overwrite existing files.",
|
|
30
|
+
)
|
|
31
|
+
def install_notebooks(path, overwrite):
|
|
32
|
+
"""Install dsgrid notebooks to a local path."""
|
|
33
|
+
src_path = Path(dsgrid.__path__[0]) / "notebooks"
|
|
34
|
+
if not src_path.exists():
|
|
35
|
+
print(f"Unexpected error: dsgrid notebooks are not stored in {src_path}", file=sys.stderr)
|
|
36
|
+
sys.exit(1)
|
|
37
|
+
|
|
38
|
+
path.mkdir(exist_ok=True, parents=True)
|
|
39
|
+
to_copy = []
|
|
40
|
+
existing = []
|
|
41
|
+
for src_file in src_path.iterdir():
|
|
42
|
+
if src_file.suffix in (".ipynb", ".sh"):
|
|
43
|
+
dst = path / src_file.name
|
|
44
|
+
if dst.exists() and not overwrite:
|
|
45
|
+
existing.append(dst)
|
|
46
|
+
else:
|
|
47
|
+
to_copy.append((src_file, dst))
|
|
48
|
+
if existing:
|
|
49
|
+
print(
|
|
50
|
+
f"Existing files: {[str(x) for x in existing]}. "
|
|
51
|
+
"Choose a different location or set overwrite=true to overwrite.",
|
|
52
|
+
file=sys.stderr,
|
|
53
|
+
)
|
|
54
|
+
sys.exit(1)
|
|
55
|
+
|
|
56
|
+
if not to_copy:
|
|
57
|
+
print("No notebook files found", file=sys.stderr)
|
|
58
|
+
sys.exit(1)
|
|
59
|
+
|
|
60
|
+
for src, dst in to_copy:
|
|
61
|
+
shutil.copyfile(src, dst)
|
|
62
|
+
print(f"Installed {dst}")
|