dsgrid-toolkit 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dsgrid-toolkit might be problematic. Click here for more details.
- dsgrid/__init__.py +22 -0
- dsgrid/api/__init__.py +0 -0
- dsgrid/api/api_manager.py +179 -0
- dsgrid/api/app.py +420 -0
- dsgrid/api/models.py +60 -0
- dsgrid/api/response_models.py +116 -0
- dsgrid/apps/__init__.py +0 -0
- dsgrid/apps/project_viewer/app.py +216 -0
- dsgrid/apps/registration_gui.py +444 -0
- dsgrid/chronify.py +22 -0
- dsgrid/cli/__init__.py +0 -0
- dsgrid/cli/common.py +120 -0
- dsgrid/cli/config.py +177 -0
- dsgrid/cli/download.py +13 -0
- dsgrid/cli/dsgrid.py +142 -0
- dsgrid/cli/dsgrid_admin.py +349 -0
- dsgrid/cli/install_notebooks.py +62 -0
- dsgrid/cli/query.py +711 -0
- dsgrid/cli/registry.py +1773 -0
- dsgrid/cloud/__init__.py +0 -0
- dsgrid/cloud/cloud_storage_interface.py +140 -0
- dsgrid/cloud/factory.py +31 -0
- dsgrid/cloud/fake_storage_interface.py +37 -0
- dsgrid/cloud/s3_storage_interface.py +156 -0
- dsgrid/common.py +35 -0
- dsgrid/config/__init__.py +0 -0
- dsgrid/config/annual_time_dimension_config.py +187 -0
- dsgrid/config/common.py +131 -0
- dsgrid/config/config_base.py +148 -0
- dsgrid/config/dataset_config.py +684 -0
- dsgrid/config/dataset_schema_handler_factory.py +41 -0
- dsgrid/config/date_time_dimension_config.py +108 -0
- dsgrid/config/dimension_config.py +54 -0
- dsgrid/config/dimension_config_factory.py +65 -0
- dsgrid/config/dimension_mapping_base.py +349 -0
- dsgrid/config/dimension_mappings_config.py +48 -0
- dsgrid/config/dimensions.py +775 -0
- dsgrid/config/dimensions_config.py +71 -0
- dsgrid/config/index_time_dimension_config.py +76 -0
- dsgrid/config/input_dataset_requirements.py +31 -0
- dsgrid/config/mapping_tables.py +209 -0
- dsgrid/config/noop_time_dimension_config.py +42 -0
- dsgrid/config/project_config.py +1457 -0
- dsgrid/config/registration_models.py +199 -0
- dsgrid/config/representative_period_time_dimension_config.py +194 -0
- dsgrid/config/simple_models.py +49 -0
- dsgrid/config/supplemental_dimension.py +29 -0
- dsgrid/config/time_dimension_base_config.py +200 -0
- dsgrid/data_models.py +155 -0
- dsgrid/dataset/__init__.py +0 -0
- dsgrid/dataset/dataset.py +123 -0
- dsgrid/dataset/dataset_expression_handler.py +86 -0
- dsgrid/dataset/dataset_mapping_manager.py +121 -0
- dsgrid/dataset/dataset_schema_handler_base.py +899 -0
- dsgrid/dataset/dataset_schema_handler_one_table.py +196 -0
- dsgrid/dataset/dataset_schema_handler_standard.py +303 -0
- dsgrid/dataset/growth_rates.py +162 -0
- dsgrid/dataset/models.py +44 -0
- dsgrid/dataset/table_format_handler_base.py +257 -0
- dsgrid/dataset/table_format_handler_factory.py +17 -0
- dsgrid/dataset/unpivoted_table.py +121 -0
- dsgrid/dimension/__init__.py +0 -0
- dsgrid/dimension/base_models.py +218 -0
- dsgrid/dimension/dimension_filters.py +308 -0
- dsgrid/dimension/standard.py +213 -0
- dsgrid/dimension/time.py +531 -0
- dsgrid/dimension/time_utils.py +88 -0
- dsgrid/dsgrid_rc.py +88 -0
- dsgrid/exceptions.py +105 -0
- dsgrid/filesystem/__init__.py +0 -0
- dsgrid/filesystem/cloud_filesystem.py +32 -0
- dsgrid/filesystem/factory.py +32 -0
- dsgrid/filesystem/filesystem_interface.py +136 -0
- dsgrid/filesystem/local_filesystem.py +74 -0
- dsgrid/filesystem/s3_filesystem.py +118 -0
- dsgrid/loggers.py +132 -0
- dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +950 -0
- dsgrid/notebooks/registration.ipynb +48 -0
- dsgrid/notebooks/start_notebook.sh +11 -0
- dsgrid/project.py +451 -0
- dsgrid/query/__init__.py +0 -0
- dsgrid/query/dataset_mapping_plan.py +142 -0
- dsgrid/query/derived_dataset.py +384 -0
- dsgrid/query/models.py +726 -0
- dsgrid/query/query_context.py +287 -0
- dsgrid/query/query_submitter.py +847 -0
- dsgrid/query/report_factory.py +19 -0
- dsgrid/query/report_peak_load.py +70 -0
- dsgrid/query/reports_base.py +20 -0
- dsgrid/registry/__init__.py +0 -0
- dsgrid/registry/bulk_register.py +161 -0
- dsgrid/registry/common.py +287 -0
- dsgrid/registry/config_update_checker_base.py +63 -0
- dsgrid/registry/data_store_factory.py +34 -0
- dsgrid/registry/data_store_interface.py +69 -0
- dsgrid/registry/dataset_config_generator.py +156 -0
- dsgrid/registry/dataset_registry_manager.py +734 -0
- dsgrid/registry/dataset_update_checker.py +16 -0
- dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
- dsgrid/registry/dimension_mapping_update_checker.py +16 -0
- dsgrid/registry/dimension_registry_manager.py +413 -0
- dsgrid/registry/dimension_update_checker.py +16 -0
- dsgrid/registry/duckdb_data_store.py +185 -0
- dsgrid/registry/filesystem_data_store.py +141 -0
- dsgrid/registry/filter_registry_manager.py +123 -0
- dsgrid/registry/project_config_generator.py +57 -0
- dsgrid/registry/project_registry_manager.py +1616 -0
- dsgrid/registry/project_update_checker.py +48 -0
- dsgrid/registry/registration_context.py +223 -0
- dsgrid/registry/registry_auto_updater.py +316 -0
- dsgrid/registry/registry_database.py +662 -0
- dsgrid/registry/registry_interface.py +446 -0
- dsgrid/registry/registry_manager.py +544 -0
- dsgrid/registry/registry_manager_base.py +367 -0
- dsgrid/registry/versioning.py +92 -0
- dsgrid/spark/__init__.py +0 -0
- dsgrid/spark/functions.py +545 -0
- dsgrid/spark/types.py +50 -0
- dsgrid/tests/__init__.py +0 -0
- dsgrid/tests/common.py +139 -0
- dsgrid/tests/make_us_data_registry.py +204 -0
- dsgrid/tests/register_derived_datasets.py +103 -0
- dsgrid/tests/utils.py +25 -0
- dsgrid/time/__init__.py +0 -0
- dsgrid/time/time_conversions.py +80 -0
- dsgrid/time/types.py +67 -0
- dsgrid/units/__init__.py +0 -0
- dsgrid/units/constants.py +113 -0
- dsgrid/units/convert.py +71 -0
- dsgrid/units/energy.py +145 -0
- dsgrid/units/power.py +87 -0
- dsgrid/utils/__init__.py +0 -0
- dsgrid/utils/dataset.py +612 -0
- dsgrid/utils/files.py +179 -0
- dsgrid/utils/filters.py +125 -0
- dsgrid/utils/id_remappings.py +100 -0
- dsgrid/utils/py_expression_eval/LICENSE +19 -0
- dsgrid/utils/py_expression_eval/README.md +8 -0
- dsgrid/utils/py_expression_eval/__init__.py +847 -0
- dsgrid/utils/py_expression_eval/tests.py +283 -0
- dsgrid/utils/run_command.py +70 -0
- dsgrid/utils/scratch_dir_context.py +64 -0
- dsgrid/utils/spark.py +918 -0
- dsgrid/utils/spark_partition.py +98 -0
- dsgrid/utils/timing.py +239 -0
- dsgrid/utils/utilities.py +184 -0
- dsgrid/utils/versioning.py +36 -0
- dsgrid_toolkit-0.2.0.dist-info/METADATA +216 -0
- dsgrid_toolkit-0.2.0.dist-info/RECORD +152 -0
- dsgrid_toolkit-0.2.0.dist-info/WHEEL +4 -0
- dsgrid_toolkit-0.2.0.dist-info/entry_points.txt +4 -0
- dsgrid_toolkit-0.2.0.dist-info/licenses/LICENSE +29 -0
dsgrid/cli/config.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""CLI commands to manage the dsgrid runtime configuration"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
import rich_click as click
|
|
7
|
+
|
|
8
|
+
from dsgrid.common import BackendEngine
|
|
9
|
+
from dsgrid.cli.common import handle_scratch_dir
|
|
10
|
+
from dsgrid.dsgrid_rc import (
|
|
11
|
+
DsgridRuntimeConfig,
|
|
12
|
+
DEFAULT_THRIFT_SERVER_URL,
|
|
13
|
+
DEFAULT_BACKEND,
|
|
14
|
+
)
|
|
15
|
+
from dsgrid.exceptions import DSGInvalidParameter
|
|
16
|
+
from dsgrid.registry.common import DatabaseConnection
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@click.group()
|
|
23
|
+
def config():
|
|
24
|
+
"""Config commands"""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
_config_epilog = """
|
|
28
|
+
Create a dsgrid configuration file to store registry connection settings and
|
|
29
|
+
other dsgrid parameters.
|
|
30
|
+
|
|
31
|
+
Examples:\n
|
|
32
|
+
$ dsgrid config create sqlite:///./registry.db\n
|
|
33
|
+
$ dsgrid config create sqlite:////projects/dsgrid/registries/standard-scenarios/registry.db\n
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@click.command(epilog=_config_epilog)
|
|
38
|
+
@click.argument("url")
|
|
39
|
+
@click.option(
|
|
40
|
+
"-b",
|
|
41
|
+
"--backend-engine",
|
|
42
|
+
type=click.Choice([x.value for x in BackendEngine]),
|
|
43
|
+
default=DEFAULT_BACKEND,
|
|
44
|
+
help="Backend engine for SQL processing",
|
|
45
|
+
)
|
|
46
|
+
@click.option(
|
|
47
|
+
"-t",
|
|
48
|
+
"--thrift-server-url",
|
|
49
|
+
type=str,
|
|
50
|
+
default=DEFAULT_THRIFT_SERVER_URL,
|
|
51
|
+
help="URL for the Apache Thrift Server to be used by chronify. "
|
|
52
|
+
"Only applies if Spark is the backend engine.",
|
|
53
|
+
)
|
|
54
|
+
@click.option(
|
|
55
|
+
"-m",
|
|
56
|
+
"--use-hive-metastore",
|
|
57
|
+
is_flag=True,
|
|
58
|
+
default=False,
|
|
59
|
+
help="Set this flag to use a Hive metastore when sharing data with chronify. "
|
|
60
|
+
"Only applies if Spark is the backend engine.",
|
|
61
|
+
)
|
|
62
|
+
@click.option(
|
|
63
|
+
"--timings/--no-timings",
|
|
64
|
+
default=False,
|
|
65
|
+
is_flag=True,
|
|
66
|
+
show_default=True,
|
|
67
|
+
help="Enable tracking of function timings.",
|
|
68
|
+
)
|
|
69
|
+
@click.option(
|
|
70
|
+
"--use-absolute-db-path/--no-use-absolute-db-path",
|
|
71
|
+
default=True,
|
|
72
|
+
is_flag=True,
|
|
73
|
+
show_default=True,
|
|
74
|
+
help="Convert the SQLite database file path to an absolute path.",
|
|
75
|
+
)
|
|
76
|
+
# @click.option(
|
|
77
|
+
# "-U",
|
|
78
|
+
# "--username",
|
|
79
|
+
# type=str,
|
|
80
|
+
# default=getpass.getuser(),
|
|
81
|
+
# help="Database username",
|
|
82
|
+
# )
|
|
83
|
+
# @click.option(
|
|
84
|
+
# "-P",
|
|
85
|
+
# "--password",
|
|
86
|
+
# prompt=True,
|
|
87
|
+
# hide_input=True,
|
|
88
|
+
# type=str,
|
|
89
|
+
# default=DEFAULT_DB_PASSWORD,
|
|
90
|
+
# help="Database username",
|
|
91
|
+
# )
|
|
92
|
+
# @click.option(
|
|
93
|
+
# "-o",
|
|
94
|
+
# "--offline",
|
|
95
|
+
# is_flag=True,
|
|
96
|
+
# default=False,
|
|
97
|
+
# show_default=True,
|
|
98
|
+
# help="Run registry commands in offline mode. WARNING: any commands you perform in offline "
|
|
99
|
+
# "mode run the risk of being out-of-sync with the latest dsgrid registry, and any write "
|
|
100
|
+
# "commands will not be officially synced with the remote registry",
|
|
101
|
+
# )
|
|
102
|
+
@click.option(
|
|
103
|
+
"--console-level",
|
|
104
|
+
default="info",
|
|
105
|
+
show_default=True,
|
|
106
|
+
help="Console log level.",
|
|
107
|
+
)
|
|
108
|
+
@click.option(
|
|
109
|
+
"--file-level",
|
|
110
|
+
default="info",
|
|
111
|
+
show_default=True,
|
|
112
|
+
help="File log level.",
|
|
113
|
+
)
|
|
114
|
+
@click.option(
|
|
115
|
+
"-r",
|
|
116
|
+
"--reraise-exceptions",
|
|
117
|
+
is_flag=True,
|
|
118
|
+
default=False,
|
|
119
|
+
show_default=True,
|
|
120
|
+
help="Re-raise any dsgrid exception. Default is to log the exception and exit.",
|
|
121
|
+
)
|
|
122
|
+
@click.option(
|
|
123
|
+
"-s",
|
|
124
|
+
"--scratch-dir",
|
|
125
|
+
default=None,
|
|
126
|
+
callback=handle_scratch_dir,
|
|
127
|
+
help="Base directory for dsgrid temporary directories. Must be accessible on all compute "
|
|
128
|
+
"nodes. Defaults to the current directory.",
|
|
129
|
+
)
|
|
130
|
+
def create(
|
|
131
|
+
url,
|
|
132
|
+
backend_engine,
|
|
133
|
+
thrift_server_url,
|
|
134
|
+
use_hive_metastore,
|
|
135
|
+
timings,
|
|
136
|
+
use_absolute_db_path,
|
|
137
|
+
# username,
|
|
138
|
+
# password,
|
|
139
|
+
# offline,
|
|
140
|
+
console_level,
|
|
141
|
+
file_level,
|
|
142
|
+
reraise_exceptions,
|
|
143
|
+
scratch_dir,
|
|
144
|
+
):
|
|
145
|
+
"""Create a local dsgrid runtime configuration file."""
|
|
146
|
+
conn = DatabaseConnection(url=url)
|
|
147
|
+
try:
|
|
148
|
+
db_filename = conn.get_filename()
|
|
149
|
+
if use_absolute_db_path and not db_filename.is_absolute():
|
|
150
|
+
conn.url = f"sqlite:///{db_filename.resolve()}"
|
|
151
|
+
|
|
152
|
+
except DSGInvalidParameter as exc:
|
|
153
|
+
print(str(exc), file=sys.stderr)
|
|
154
|
+
sys.exit(1)
|
|
155
|
+
|
|
156
|
+
if not db_filename.exists():
|
|
157
|
+
print(f"The registry database file {db_filename} does not exist.", file=sys.stderr)
|
|
158
|
+
sys.exit(1)
|
|
159
|
+
|
|
160
|
+
dsgrid_config = DsgridRuntimeConfig(
|
|
161
|
+
backend_engine=backend_engine,
|
|
162
|
+
thrift_server_url=thrift_server_url,
|
|
163
|
+
use_hive_metastore=use_hive_metastore,
|
|
164
|
+
timings=timings,
|
|
165
|
+
database_url=conn.url,
|
|
166
|
+
# database_user=username,
|
|
167
|
+
# database_password=password,
|
|
168
|
+
offline=True,
|
|
169
|
+
console_level=console_level,
|
|
170
|
+
file_level=file_level,
|
|
171
|
+
reraise_exceptions=reraise_exceptions,
|
|
172
|
+
scratch_dir=scratch_dir,
|
|
173
|
+
)
|
|
174
|
+
dsgrid_config.dump()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
config.add_command(create)
|
dsgrid/cli/download.py
ADDED
dsgrid/cli/dsgrid.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Main CLI command for dsgrid."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import rich_click as click
|
|
7
|
+
|
|
8
|
+
import dsgrid
|
|
9
|
+
from dsgrid.utils.timing import timer_stats_collector
|
|
10
|
+
from dsgrid.cli.common import get_log_level_from_str, handle_scratch_dir
|
|
11
|
+
from dsgrid.cli.config import config
|
|
12
|
+
from dsgrid.cli.download import download
|
|
13
|
+
from dsgrid.cli.install_notebooks import install_notebooks
|
|
14
|
+
from dsgrid.cli.query import query
|
|
15
|
+
from dsgrid.cli.registry import registry
|
|
16
|
+
from dsgrid.loggers import setup_logging, check_log_file_size, disable_console_logging
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@click.group()
|
|
23
|
+
@click.option(
|
|
24
|
+
"-c",
|
|
25
|
+
"--console-level",
|
|
26
|
+
default=dsgrid.runtime_config.console_level,
|
|
27
|
+
show_default=True,
|
|
28
|
+
help="Console log level.",
|
|
29
|
+
)
|
|
30
|
+
@click.option(
|
|
31
|
+
"-f",
|
|
32
|
+
"--file-level",
|
|
33
|
+
default=dsgrid.runtime_config.file_level,
|
|
34
|
+
show_default=True,
|
|
35
|
+
help="File log level.",
|
|
36
|
+
)
|
|
37
|
+
@click.option("-l", "--log-file", type=Path, default="dsgrid.log", help="Log to this file.")
|
|
38
|
+
@click.option(
|
|
39
|
+
"-n", "--no-prompts", default=False, is_flag=True, show_default=True, help="Do not prompt."
|
|
40
|
+
)
|
|
41
|
+
@click.option(
|
|
42
|
+
"--offline/--online",
|
|
43
|
+
is_flag=True,
|
|
44
|
+
default=dsgrid.runtime_config.offline,
|
|
45
|
+
show_default=True,
|
|
46
|
+
help="Run registry commands in offline mode. WARNING: any commands you perform in offline "
|
|
47
|
+
"mode run the risk of being out-of-sync with the latest dsgrid registry, and any write "
|
|
48
|
+
"commands will not be officially synced with the remote registry",
|
|
49
|
+
)
|
|
50
|
+
@click.option(
|
|
51
|
+
"--timings/--no-timings",
|
|
52
|
+
default=dsgrid.runtime_config.timings,
|
|
53
|
+
is_flag=True,
|
|
54
|
+
show_default=True,
|
|
55
|
+
help="Enable tracking of function timings.",
|
|
56
|
+
)
|
|
57
|
+
# Server-related options are commented-out because the registry is currently only
|
|
58
|
+
# supported in SQLite. If/when we add postgres support, these can be added back.
|
|
59
|
+
# @click.option(
|
|
60
|
+
# "-U",
|
|
61
|
+
# "--username",
|
|
62
|
+
# type=str,
|
|
63
|
+
# default=dsgrid.runtime_config.database_user,
|
|
64
|
+
# help="Database username",
|
|
65
|
+
# )
|
|
66
|
+
# @click.option(
|
|
67
|
+
# "-P",
|
|
68
|
+
# "--password",
|
|
69
|
+
# prompt=True,
|
|
70
|
+
# hide_input=True,
|
|
71
|
+
# cls=OptionPromptPassword,
|
|
72
|
+
# help="dsgrid registry password. Will prompt unless it is passed or the username matches the "
|
|
73
|
+
# "runtime config file.",
|
|
74
|
+
# )
|
|
75
|
+
@click.option(
|
|
76
|
+
"-u",
|
|
77
|
+
"--url",
|
|
78
|
+
type=str,
|
|
79
|
+
default=dsgrid.runtime_config.database_url,
|
|
80
|
+
envvar="DSGRID_REGISTRY_DATABASE_URL",
|
|
81
|
+
help="Database URL. Ex: http://localhost:8529",
|
|
82
|
+
)
|
|
83
|
+
@click.option(
|
|
84
|
+
"-r",
|
|
85
|
+
"--reraise-exceptions",
|
|
86
|
+
is_flag=True,
|
|
87
|
+
default=dsgrid.runtime_config.reraise_exceptions,
|
|
88
|
+
show_default=True,
|
|
89
|
+
help="Re-raise any dsgrid exception. Default is to log the exception and exit.",
|
|
90
|
+
)
|
|
91
|
+
@click.option(
|
|
92
|
+
"-s",
|
|
93
|
+
"--scratch-dir",
|
|
94
|
+
default=dsgrid.runtime_config.scratch_dir,
|
|
95
|
+
callback=handle_scratch_dir,
|
|
96
|
+
help="Base directory for dsgrid temporary directories. Must be accessible on all compute "
|
|
97
|
+
"nodes. Defaults to the current directory.",
|
|
98
|
+
)
|
|
99
|
+
@click.pass_context
|
|
100
|
+
def cli(
|
|
101
|
+
ctx,
|
|
102
|
+
console_level,
|
|
103
|
+
file_level,
|
|
104
|
+
log_file,
|
|
105
|
+
no_prompts,
|
|
106
|
+
offline,
|
|
107
|
+
timings,
|
|
108
|
+
# username,
|
|
109
|
+
# password,
|
|
110
|
+
url,
|
|
111
|
+
reraise_exceptions,
|
|
112
|
+
scratch_dir,
|
|
113
|
+
):
|
|
114
|
+
"""dsgrid commands"""
|
|
115
|
+
if timings:
|
|
116
|
+
timer_stats_collector.enable()
|
|
117
|
+
else:
|
|
118
|
+
timer_stats_collector.disable()
|
|
119
|
+
path = Path(log_file)
|
|
120
|
+
check_log_file_size(path, no_prompts=no_prompts)
|
|
121
|
+
ctx.params["console_level"] = get_log_level_from_str(console_level)
|
|
122
|
+
ctx.params["file_level"] = get_log_level_from_str(file_level)
|
|
123
|
+
setup_logging(
|
|
124
|
+
"dsgrid",
|
|
125
|
+
path,
|
|
126
|
+
console_level=ctx.params["console_level"],
|
|
127
|
+
file_level=ctx.params["file_level"],
|
|
128
|
+
mode="a",
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@cli.result_callback()
|
|
133
|
+
def callback(*args, **kwargs):
|
|
134
|
+
with disable_console_logging(name="dsgrid"):
|
|
135
|
+
timer_stats_collector.log_stats()
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
cli.add_command(config)
|
|
139
|
+
cli.add_command(download)
|
|
140
|
+
cli.add_command(install_notebooks)
|
|
141
|
+
cli.add_command(query)
|
|
142
|
+
cli.add_command(registry)
|
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
"""Main CLI command for dsgrid."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import rich_click as click
|
|
8
|
+
from chronify.utils.path_utils import check_overwrite
|
|
9
|
+
|
|
10
|
+
from dsgrid.cli.common import get_value_from_context
|
|
11
|
+
from dsgrid.common import LOCAL_REGISTRY, REMOTE_REGISTRY
|
|
12
|
+
from dsgrid.config.simple_models import RegistrySimpleModel
|
|
13
|
+
from dsgrid.dsgrid_rc import DsgridRuntimeConfig
|
|
14
|
+
from dsgrid.loggers import setup_logging, check_log_file_size
|
|
15
|
+
from dsgrid.registry.common import (
|
|
16
|
+
DataStoreType,
|
|
17
|
+
DatabaseConnection,
|
|
18
|
+
DatasetRegistryStatus,
|
|
19
|
+
VersionUpdateType,
|
|
20
|
+
)
|
|
21
|
+
from dsgrid.registry.registry_manager import RegistryManager
|
|
22
|
+
from dsgrid.registry.filter_registry_manager import FilterRegistryManager
|
|
23
|
+
from dsgrid.utils.files import load_data
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
_config = DsgridRuntimeConfig.load()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
"""
|
|
31
|
+
Click Group Definitions
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@click.group()
|
|
36
|
+
# Server-related options are commented-out because the registry is currently only
|
|
37
|
+
# supported in SQLite. If/when we add postgres support, these can be added back.
|
|
38
|
+
# @click.option(
|
|
39
|
+
# "-U",
|
|
40
|
+
# "--username",
|
|
41
|
+
# default=_config.database_user,
|
|
42
|
+
# show_default=True,
|
|
43
|
+
# help="dsgrid registry user name",
|
|
44
|
+
# )
|
|
45
|
+
# @click.option(
|
|
46
|
+
# "-P",
|
|
47
|
+
# "--password",
|
|
48
|
+
# prompt=True,
|
|
49
|
+
# hide_input=True,
|
|
50
|
+
# cls=OptionPromptPassword,
|
|
51
|
+
# help="dsgrid registry password. Will prompt unless it is passed or the username matches the "
|
|
52
|
+
# "runtime config file.",
|
|
53
|
+
# )
|
|
54
|
+
@click.option(
|
|
55
|
+
"--url",
|
|
56
|
+
default=_config.database_url,
|
|
57
|
+
show_default=True,
|
|
58
|
+
envvar="DSGRID_REGISTRY_DATABASE_URL",
|
|
59
|
+
help="dsgrid registry database URL. Override with the environment variable "
|
|
60
|
+
"DSGRID_REGISTRY_DATABASE_URL",
|
|
61
|
+
)
|
|
62
|
+
@click.option("-l", "--log-file", default="dsgrid_admin.log", type=str, help="Log to this file.")
|
|
63
|
+
@click.option(
|
|
64
|
+
"-n", "--no-prompts", default=False, is_flag=True, show_default=True, help="Do not prompt."
|
|
65
|
+
)
|
|
66
|
+
@click.option(
|
|
67
|
+
"--offline/--online",
|
|
68
|
+
is_flag=True,
|
|
69
|
+
default=_config.offline,
|
|
70
|
+
show_default=True,
|
|
71
|
+
help="run in registry commands in offline mode. WARNING: any commands you perform in offline "
|
|
72
|
+
"mode run the risk of being out-of-sync with the latest dsgrid registry, and any write "
|
|
73
|
+
"commands will not be officially synced with the remote registry",
|
|
74
|
+
)
|
|
75
|
+
@click.option(
|
|
76
|
+
"--verbose", is_flag=True, default=False, show_default=True, help="Enable verbose log output."
|
|
77
|
+
)
|
|
78
|
+
# def cli(url, username, password, log_file, no_prompts, offline, verbose):
|
|
79
|
+
def cli(url, log_file, no_prompts, offline, verbose):
|
|
80
|
+
"""dsgrid-admin commands"""
|
|
81
|
+
path = Path(log_file)
|
|
82
|
+
level = logging.DEBUG if verbose else logging.INFO
|
|
83
|
+
check_log_file_size(path, no_prompts=no_prompts)
|
|
84
|
+
setup_logging("dsgrid", path, console_level=level, file_level=level, mode="a")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@click.group()
|
|
88
|
+
@click.option(
|
|
89
|
+
"--remote-path",
|
|
90
|
+
default=REMOTE_REGISTRY,
|
|
91
|
+
show_default=True,
|
|
92
|
+
help="path to dsgrid remote registry",
|
|
93
|
+
)
|
|
94
|
+
@click.pass_context
|
|
95
|
+
def registry(ctx, remote_path):
|
|
96
|
+
"""Manage a registry."""
|
|
97
|
+
no_prompts = ctx.parent.params["no_prompts"]
|
|
98
|
+
if "--help" in sys.argv:
|
|
99
|
+
ctx.obj = None
|
|
100
|
+
else:
|
|
101
|
+
conn = DatabaseConnection(
|
|
102
|
+
url=get_value_from_context(ctx, "url"),
|
|
103
|
+
# username=get_value_from_context(ctx, "username"),
|
|
104
|
+
# password=get_value_from_context(ctx, "password"),
|
|
105
|
+
)
|
|
106
|
+
ctx.obj = RegistryManager.load(
|
|
107
|
+
conn,
|
|
108
|
+
remote_path,
|
|
109
|
+
offline_mode=get_value_from_context(ctx, "offline"),
|
|
110
|
+
no_prompts=no_prompts,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@click.group()
|
|
115
|
+
@click.pass_obj
|
|
116
|
+
def dimensions(registry_manager: RegistryManager):
|
|
117
|
+
"""Dimension subcommands"""
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@click.group()
|
|
121
|
+
@click.pass_obj
|
|
122
|
+
def dimension_mappings(registry_manager: RegistryManager):
|
|
123
|
+
"""Dimension mapping subcommands"""
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@click.group()
|
|
127
|
+
@click.pass_obj
|
|
128
|
+
def projects(registry_manager: RegistryManager):
|
|
129
|
+
"""Project subcommands"""
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@click.group()
|
|
133
|
+
@click.pass_obj
|
|
134
|
+
def datasets(registry_manager: RegistryManager):
|
|
135
|
+
"""Dataset subcommands"""
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
"""
|
|
139
|
+
Registry Commands
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
_create_epilog = """
|
|
143
|
+
Examples:\n
|
|
144
|
+
$ dsgrid-admin create-registry sqlite:////projects/dsgrid/my_project/registry.db -p /projects/dsgrid/my_project/registry-data\n
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@click.command(epilog=_create_epilog)
|
|
149
|
+
@click.argument("url")
|
|
150
|
+
@click.option(
|
|
151
|
+
"-p",
|
|
152
|
+
"--data-path",
|
|
153
|
+
default=LOCAL_REGISTRY,
|
|
154
|
+
show_default=True,
|
|
155
|
+
callback=lambda *x: Path(x[2]),
|
|
156
|
+
help="Local dsgrid registry data path. Must not contain the registry file listed in URL.",
|
|
157
|
+
)
|
|
158
|
+
@click.option(
|
|
159
|
+
"-f",
|
|
160
|
+
"--overwrite",
|
|
161
|
+
"--force",
|
|
162
|
+
is_flag=True,
|
|
163
|
+
default=False,
|
|
164
|
+
help="Delete registry_path and the database if they already exist.",
|
|
165
|
+
)
|
|
166
|
+
@click.option(
|
|
167
|
+
"-t",
|
|
168
|
+
"--data-store-type",
|
|
169
|
+
type=click.Choice([x.value for x in DataStoreType]),
|
|
170
|
+
default=DataStoreType.FILESYSTEM.value,
|
|
171
|
+
show_default=True,
|
|
172
|
+
help="Type of store to use for the registry data.",
|
|
173
|
+
callback=lambda *x: DataStoreType(x[2]),
|
|
174
|
+
)
|
|
175
|
+
@click.pass_context
|
|
176
|
+
def create_registry(
|
|
177
|
+
ctx, url: str, data_path: Path, overwrite: bool, data_store_type: DataStoreType
|
|
178
|
+
):
|
|
179
|
+
"""Create a new registry."""
|
|
180
|
+
check_overwrite(data_path, overwrite)
|
|
181
|
+
conn = DatabaseConnection(
|
|
182
|
+
url=url, # This may change if/when we support a server database.
|
|
183
|
+
# url=get_value_from_context(ctx, "url"),
|
|
184
|
+
# username=get_value_from_context(ctx, "username"),
|
|
185
|
+
# password=get_value_from_context(ctx, "password"),
|
|
186
|
+
)
|
|
187
|
+
RegistryManager.create(conn, data_path, overwrite=overwrite, data_store_type=data_store_type)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
"""
|
|
191
|
+
Dimension Commands
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
@click.command(name="remove")
|
|
196
|
+
@click.argument("dimension-id")
|
|
197
|
+
@click.pass_obj
|
|
198
|
+
def remove_dimension(registry_manager: RegistryManager, dimension_id: str):
|
|
199
|
+
"""Remove a dimension from the dsgrid repository."""
|
|
200
|
+
registry_manager.dimension_manager.remove(dimension_id)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
"""
|
|
204
|
+
Dimension Mapping Commands
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
@click.command(name="remove")
|
|
209
|
+
@click.argument("dimension-mapping-id")
|
|
210
|
+
@click.pass_obj
|
|
211
|
+
def remove_dimension_mapping(registry_manager: RegistryManager, dimension_mapping_id: str):
|
|
212
|
+
"""Remove a dimension mapping from the dsgrid repository."""
|
|
213
|
+
registry_manager.dimension_mapping_manager.remove(dimension_mapping_id)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
"""
|
|
217
|
+
Project Commands
|
|
218
|
+
"""
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
@click.command(name="remove")
|
|
222
|
+
@click.argument("project-id")
|
|
223
|
+
@click.pass_obj
|
|
224
|
+
def remove_project(registry_manager: RegistryManager, project_id: str):
|
|
225
|
+
"""Remove a project from the dsgrid repository."""
|
|
226
|
+
registry_manager.project_manager.remove(project_id)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
"""
|
|
230
|
+
Dataset Commands
|
|
231
|
+
"""
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
@click.command(name="remove")
|
|
235
|
+
@click.argument("dataset-ids", nargs=-1)
|
|
236
|
+
@click.pass_obj
|
|
237
|
+
def remove_datasets(registry_manager: RegistryManager, dataset_ids: list[str]):
|
|
238
|
+
"""Remove one or more datasets from the dsgrid repository."""
|
|
239
|
+
dataset_mgr = registry_manager.dataset_manager
|
|
240
|
+
project_mgr = registry_manager.project_manager
|
|
241
|
+
|
|
242
|
+
# Ensure that all dataset IDs are valid before removing any of them.
|
|
243
|
+
for dataset_id in dataset_ids:
|
|
244
|
+
dataset_mgr.get_by_id(dataset_id)
|
|
245
|
+
|
|
246
|
+
for dataset_id in dataset_ids:
|
|
247
|
+
registry_manager.dataset_manager.remove(dataset_id)
|
|
248
|
+
|
|
249
|
+
dataset_ids_set = set(dataset_ids)
|
|
250
|
+
for project_id in project_mgr.list_ids():
|
|
251
|
+
config = project_mgr.get_by_id(project_id)
|
|
252
|
+
removed_dataset_ids = []
|
|
253
|
+
for dataset in config.iter_datasets():
|
|
254
|
+
if (
|
|
255
|
+
dataset.dataset_id in dataset_ids_set
|
|
256
|
+
and dataset.status == DatasetRegistryStatus.REGISTERED
|
|
257
|
+
):
|
|
258
|
+
dataset.status = DatasetRegistryStatus.UNREGISTERED
|
|
259
|
+
dataset.mapping_references.clear()
|
|
260
|
+
removed_dataset_ids.append(dataset.dataset_id)
|
|
261
|
+
if removed_dataset_ids:
|
|
262
|
+
ids = ", ".join(removed_dataset_ids)
|
|
263
|
+
msg = (
|
|
264
|
+
f"Set status for datasets {ids} to unregistered in project {project_id} "
|
|
265
|
+
"after removal."
|
|
266
|
+
)
|
|
267
|
+
project_mgr.update(config, VersionUpdateType.MAJOR, msg)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@click.command()
|
|
271
|
+
@click.option(
|
|
272
|
+
"--src-database-url",
|
|
273
|
+
required=True,
|
|
274
|
+
help="Source dsgrid registry database URL.",
|
|
275
|
+
)
|
|
276
|
+
@click.option(
|
|
277
|
+
"--dst-database-url",
|
|
278
|
+
default="dsgrid",
|
|
279
|
+
required=True,
|
|
280
|
+
help="Destination dsgrid registry database URL.",
|
|
281
|
+
)
|
|
282
|
+
@click.argument("dst_data_path", type=click.Path(exists=False), callback=lambda *x: Path(x[2]))
|
|
283
|
+
@click.argument("config_file", type=click.Path(exists=True), callback=lambda *x: Path(x[2]))
|
|
284
|
+
@click.option(
|
|
285
|
+
"-m",
|
|
286
|
+
"--mode",
|
|
287
|
+
default="data-symlinks",
|
|
288
|
+
type=click.Choice(["copy", "data-symlinks", "rsync"]),
|
|
289
|
+
show_default=True,
|
|
290
|
+
help="Controls whether to copy all data, make symlinks to data files, or sync data with the "
|
|
291
|
+
"rsync utility (not available on Windows).",
|
|
292
|
+
)
|
|
293
|
+
@click.option(
|
|
294
|
+
"-f",
|
|
295
|
+
"--overwrite",
|
|
296
|
+
"--force",
|
|
297
|
+
default=False,
|
|
298
|
+
is_flag=True,
|
|
299
|
+
show_default=True,
|
|
300
|
+
help="Overwrite dst_registry_path if it already exists. Does not apply if using rsync.",
|
|
301
|
+
)
|
|
302
|
+
@click.pass_context
|
|
303
|
+
def make_filtered_registry(
|
|
304
|
+
ctx,
|
|
305
|
+
src_database_url,
|
|
306
|
+
dst_database_url,
|
|
307
|
+
dst_data_path: Path,
|
|
308
|
+
config_file: Path,
|
|
309
|
+
mode,
|
|
310
|
+
overwrite,
|
|
311
|
+
):
|
|
312
|
+
"""Make a filtered registry for testing purposes."""
|
|
313
|
+
simple_model = RegistrySimpleModel(**load_data(config_file))
|
|
314
|
+
# username = get_value_from_context(ctx, "username")
|
|
315
|
+
# password = get_value_from_context(ctx, "password")
|
|
316
|
+
src_conn = DatabaseConnection(
|
|
317
|
+
url=src_database_url,
|
|
318
|
+
# username=username,
|
|
319
|
+
# password=password,
|
|
320
|
+
)
|
|
321
|
+
dst_conn = DatabaseConnection(
|
|
322
|
+
url=dst_database_url,
|
|
323
|
+
# username=username,
|
|
324
|
+
# password=password,
|
|
325
|
+
)
|
|
326
|
+
RegistryManager.copy(
|
|
327
|
+
src_conn,
|
|
328
|
+
dst_conn,
|
|
329
|
+
dst_data_path,
|
|
330
|
+
mode=mode,
|
|
331
|
+
force=overwrite,
|
|
332
|
+
)
|
|
333
|
+
mgr = FilterRegistryManager.load(dst_conn, offline_mode=True, use_remote_data=False)
|
|
334
|
+
mgr.filter(simple_model=simple_model)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
cli.add_command(registry)
|
|
338
|
+
cli.add_command(create_registry)
|
|
339
|
+
cli.add_command(make_filtered_registry)
|
|
340
|
+
|
|
341
|
+
registry.add_command(dimensions)
|
|
342
|
+
registry.add_command(dimension_mappings)
|
|
343
|
+
registry.add_command(projects)
|
|
344
|
+
registry.add_command(datasets)
|
|
345
|
+
|
|
346
|
+
dimensions.add_command(remove_dimension)
|
|
347
|
+
dimension_mappings.add_command(remove_dimension_mapping)
|
|
348
|
+
projects.add_command(remove_project)
|
|
349
|
+
datasets.add_command(remove_datasets)
|