dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. build_backend.py +93 -0
  2. dsgrid/__init__.py +22 -0
  3. dsgrid/api/__init__.py +0 -0
  4. dsgrid/api/api_manager.py +179 -0
  5. dsgrid/api/app.py +419 -0
  6. dsgrid/api/models.py +60 -0
  7. dsgrid/api/response_models.py +116 -0
  8. dsgrid/apps/__init__.py +0 -0
  9. dsgrid/apps/project_viewer/app.py +216 -0
  10. dsgrid/apps/registration_gui.py +444 -0
  11. dsgrid/chronify.py +32 -0
  12. dsgrid/cli/__init__.py +0 -0
  13. dsgrid/cli/common.py +120 -0
  14. dsgrid/cli/config.py +176 -0
  15. dsgrid/cli/download.py +13 -0
  16. dsgrid/cli/dsgrid.py +157 -0
  17. dsgrid/cli/dsgrid_admin.py +92 -0
  18. dsgrid/cli/install_notebooks.py +62 -0
  19. dsgrid/cli/query.py +729 -0
  20. dsgrid/cli/registry.py +1862 -0
  21. dsgrid/cloud/__init__.py +0 -0
  22. dsgrid/cloud/cloud_storage_interface.py +140 -0
  23. dsgrid/cloud/factory.py +31 -0
  24. dsgrid/cloud/fake_storage_interface.py +37 -0
  25. dsgrid/cloud/s3_storage_interface.py +156 -0
  26. dsgrid/common.py +36 -0
  27. dsgrid/config/__init__.py +0 -0
  28. dsgrid/config/annual_time_dimension_config.py +194 -0
  29. dsgrid/config/common.py +142 -0
  30. dsgrid/config/config_base.py +148 -0
  31. dsgrid/config/dataset_config.py +907 -0
  32. dsgrid/config/dataset_schema_handler_factory.py +46 -0
  33. dsgrid/config/date_time_dimension_config.py +136 -0
  34. dsgrid/config/dimension_config.py +54 -0
  35. dsgrid/config/dimension_config_factory.py +65 -0
  36. dsgrid/config/dimension_mapping_base.py +350 -0
  37. dsgrid/config/dimension_mappings_config.py +48 -0
  38. dsgrid/config/dimensions.py +1025 -0
  39. dsgrid/config/dimensions_config.py +71 -0
  40. dsgrid/config/file_schema.py +190 -0
  41. dsgrid/config/index_time_dimension_config.py +80 -0
  42. dsgrid/config/input_dataset_requirements.py +31 -0
  43. dsgrid/config/mapping_tables.py +209 -0
  44. dsgrid/config/noop_time_dimension_config.py +42 -0
  45. dsgrid/config/project_config.py +1462 -0
  46. dsgrid/config/registration_models.py +188 -0
  47. dsgrid/config/representative_period_time_dimension_config.py +194 -0
  48. dsgrid/config/simple_models.py +49 -0
  49. dsgrid/config/supplemental_dimension.py +29 -0
  50. dsgrid/config/time_dimension_base_config.py +192 -0
  51. dsgrid/data_models.py +155 -0
  52. dsgrid/dataset/__init__.py +0 -0
  53. dsgrid/dataset/dataset.py +123 -0
  54. dsgrid/dataset/dataset_expression_handler.py +86 -0
  55. dsgrid/dataset/dataset_mapping_manager.py +121 -0
  56. dsgrid/dataset/dataset_schema_handler_base.py +945 -0
  57. dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
  58. dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
  59. dsgrid/dataset/growth_rates.py +162 -0
  60. dsgrid/dataset/models.py +51 -0
  61. dsgrid/dataset/table_format_handler_base.py +257 -0
  62. dsgrid/dataset/table_format_handler_factory.py +17 -0
  63. dsgrid/dataset/unpivoted_table.py +121 -0
  64. dsgrid/dimension/__init__.py +0 -0
  65. dsgrid/dimension/base_models.py +230 -0
  66. dsgrid/dimension/dimension_filters.py +308 -0
  67. dsgrid/dimension/standard.py +252 -0
  68. dsgrid/dimension/time.py +352 -0
  69. dsgrid/dimension/time_utils.py +103 -0
  70. dsgrid/dsgrid_rc.py +88 -0
  71. dsgrid/exceptions.py +105 -0
  72. dsgrid/filesystem/__init__.py +0 -0
  73. dsgrid/filesystem/cloud_filesystem.py +32 -0
  74. dsgrid/filesystem/factory.py +32 -0
  75. dsgrid/filesystem/filesystem_interface.py +136 -0
  76. dsgrid/filesystem/local_filesystem.py +74 -0
  77. dsgrid/filesystem/s3_filesystem.py +118 -0
  78. dsgrid/loggers.py +132 -0
  79. dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
  80. dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
  81. dsgrid/notebooks/registration.ipynb +48 -0
  82. dsgrid/notebooks/start_notebook.sh +11 -0
  83. dsgrid/project.py +451 -0
  84. dsgrid/query/__init__.py +0 -0
  85. dsgrid/query/dataset_mapping_plan.py +142 -0
  86. dsgrid/query/derived_dataset.py +388 -0
  87. dsgrid/query/models.py +728 -0
  88. dsgrid/query/query_context.py +287 -0
  89. dsgrid/query/query_submitter.py +994 -0
  90. dsgrid/query/report_factory.py +19 -0
  91. dsgrid/query/report_peak_load.py +70 -0
  92. dsgrid/query/reports_base.py +20 -0
  93. dsgrid/registry/__init__.py +0 -0
  94. dsgrid/registry/bulk_register.py +165 -0
  95. dsgrid/registry/common.py +287 -0
  96. dsgrid/registry/config_update_checker_base.py +63 -0
  97. dsgrid/registry/data_store_factory.py +34 -0
  98. dsgrid/registry/data_store_interface.py +74 -0
  99. dsgrid/registry/dataset_config_generator.py +158 -0
  100. dsgrid/registry/dataset_registry_manager.py +950 -0
  101. dsgrid/registry/dataset_update_checker.py +16 -0
  102. dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
  103. dsgrid/registry/dimension_mapping_update_checker.py +16 -0
  104. dsgrid/registry/dimension_registry_manager.py +413 -0
  105. dsgrid/registry/dimension_update_checker.py +16 -0
  106. dsgrid/registry/duckdb_data_store.py +207 -0
  107. dsgrid/registry/filesystem_data_store.py +150 -0
  108. dsgrid/registry/filter_registry_manager.py +123 -0
  109. dsgrid/registry/project_config_generator.py +57 -0
  110. dsgrid/registry/project_registry_manager.py +1623 -0
  111. dsgrid/registry/project_update_checker.py +48 -0
  112. dsgrid/registry/registration_context.py +223 -0
  113. dsgrid/registry/registry_auto_updater.py +316 -0
  114. dsgrid/registry/registry_database.py +667 -0
  115. dsgrid/registry/registry_interface.py +446 -0
  116. dsgrid/registry/registry_manager.py +558 -0
  117. dsgrid/registry/registry_manager_base.py +367 -0
  118. dsgrid/registry/versioning.py +92 -0
  119. dsgrid/rust_ext/__init__.py +14 -0
  120. dsgrid/rust_ext/find_minimal_patterns.py +129 -0
  121. dsgrid/spark/__init__.py +0 -0
  122. dsgrid/spark/functions.py +589 -0
  123. dsgrid/spark/types.py +110 -0
  124. dsgrid/tests/__init__.py +0 -0
  125. dsgrid/tests/common.py +140 -0
  126. dsgrid/tests/make_us_data_registry.py +265 -0
  127. dsgrid/tests/register_derived_datasets.py +103 -0
  128. dsgrid/tests/utils.py +25 -0
  129. dsgrid/time/__init__.py +0 -0
  130. dsgrid/time/time_conversions.py +80 -0
  131. dsgrid/time/types.py +67 -0
  132. dsgrid/units/__init__.py +0 -0
  133. dsgrid/units/constants.py +113 -0
  134. dsgrid/units/convert.py +71 -0
  135. dsgrid/units/energy.py +145 -0
  136. dsgrid/units/power.py +87 -0
  137. dsgrid/utils/__init__.py +0 -0
  138. dsgrid/utils/dataset.py +830 -0
  139. dsgrid/utils/files.py +179 -0
  140. dsgrid/utils/filters.py +125 -0
  141. dsgrid/utils/id_remappings.py +100 -0
  142. dsgrid/utils/py_expression_eval/LICENSE +19 -0
  143. dsgrid/utils/py_expression_eval/README.md +8 -0
  144. dsgrid/utils/py_expression_eval/__init__.py +847 -0
  145. dsgrid/utils/py_expression_eval/tests.py +283 -0
  146. dsgrid/utils/run_command.py +70 -0
  147. dsgrid/utils/scratch_dir_context.py +65 -0
  148. dsgrid/utils/spark.py +918 -0
  149. dsgrid/utils/spark_partition.py +98 -0
  150. dsgrid/utils/timing.py +239 -0
  151. dsgrid/utils/utilities.py +221 -0
  152. dsgrid/utils/versioning.py +36 -0
  153. dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
  154. dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
  155. dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
  156. dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
  157. dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
dsgrid/cli/common.py ADDED
@@ -0,0 +1,120 @@
1
+ import logging
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ import rich_click as click
7
+
8
+ from dsgrid.dsgrid_rc import DsgridRuntimeConfig
9
+ from dsgrid.exceptions import DSGBaseException
10
+
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def check_output_directory(path: Path, fs_interface, force: bool):
16
+ """Ensures that the parameter path is an empty directory.
17
+
18
+ Parameters
19
+ ----------
20
+ path : Path
21
+ fs_interface : FilesystemInterface
22
+ force : bool
23
+ If False and the directory exists and has content, exit.
24
+ """
25
+ if path.exists():
26
+ if not bool(path.iterdir()):
27
+ return
28
+ if force:
29
+ fs_interface.rm_tree(path)
30
+ else:
31
+ print(
32
+ f"{path} already exists. Choose a different name or pass --force to overwrite it.",
33
+ file=sys.stderr,
34
+ )
35
+ sys.exit(1)
36
+
37
+ path.mkdir()
38
+
39
+
40
+ def get_log_level_from_str(level):
41
+ """Convert a log level string to logging type."""
42
+ match level:
43
+ case "debug":
44
+ return logging.DEBUG
45
+ case "info":
46
+ return logging.INFO
47
+ case "warning":
48
+ return logging.WARNING
49
+ case "error":
50
+ return logging.ERROR
51
+ case _:
52
+ msg = f"Unsupported level={level}"
53
+ raise Exception(msg)
54
+
55
+
56
+ def get_value_from_context(ctx, field) -> Any:
57
+ """Get the field value from the root of a click context."""
58
+ return ctx.find_root().params[field]
59
+
60
+
61
+ def handle_dsgrid_exception(ctx, func, *args, **kwargs) -> tuple[Any, int]:
62
+ """Handle any dsgrid exceptions as specified by the CLI parameters."""
63
+ res = None
64
+ try:
65
+ res = func(*args, **kwargs)
66
+ return res, 0
67
+ except DSGBaseException:
68
+ exc_type, exc_value, exc_tb = sys.exc_info()
69
+ filename = exc_tb.tb_frame.f_code.co_filename
70
+ line = exc_tb.tb_lineno
71
+ msg = f'{func.__name__} failed: exception={exc_type.__name__} message="{exc_value}" {filename=} {line=}'
72
+ logger.error(msg)
73
+ if ctx.find_root().params["reraise_exceptions"]:
74
+ raise
75
+ return res, 1
76
+
77
+
78
+ def handle_scratch_dir(*args):
79
+ """Handle the user input for scratch_dir. If a path is passed, ensure it exists."""
80
+ val = args[2]
81
+ if val is None:
82
+ return val
83
+ path = Path(val)
84
+ if not path.exists:
85
+ msg = f"scratch-dir={path} does not exist"
86
+ raise ValueError(msg)
87
+ return path
88
+
89
+
90
+ def path_callback(*args) -> Path | None:
91
+ """Ensure that a Path CLI option value is returned as a Path object."""
92
+ val = args[2]
93
+ if val is None:
94
+ return val
95
+ return Path(val)
96
+
97
+
98
+ # Copied from
99
+ # https://stackoverflow.com/questions/45868549/creating-a-click-option-with-prompt-that-shows-only-if-default-value-is-empty
100
+ # and modified for our desired password behavior.
101
+
102
+
103
+ class OptionPromptPassword(click.Option):
104
+ """Custom class that only prompts for the password if the user set a different username value
105
+ than what is in the runtime config file."""
106
+
107
+ def get_default(self, ctx, **kwargs):
108
+ config = DsgridRuntimeConfig.load()
109
+ username = ctx.find_root().params.get("username")
110
+ if username != config.database_user:
111
+ return None
112
+ return config.database_password
113
+
114
+ def prompt_for_value(self, ctx):
115
+ default = self.get_default(ctx)
116
+
117
+ if default is None:
118
+ return super().prompt_for_value(ctx)
119
+
120
+ return default
dsgrid/cli/config.py ADDED
@@ -0,0 +1,176 @@
1
+ """CLI commands to manage the dsgrid runtime configuration"""
2
+
3
+ import logging
4
+ import sys
5
+
6
+ import rich_click as click
7
+
8
+ from dsgrid.common import BackendEngine
9
+ from dsgrid.cli.common import handle_scratch_dir
10
+ from dsgrid.dsgrid_rc import (
11
+ DsgridRuntimeConfig,
12
+ DEFAULT_THRIFT_SERVER_URL,
13
+ DEFAULT_BACKEND,
14
+ )
15
+ from dsgrid.exceptions import DSGInvalidParameter
16
+ from dsgrid.registry.common import DatabaseConnection
17
+
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @click.group()
23
+ def config():
24
+ """Config commands"""
25
+
26
+
27
+ _config_epilog = """
28
+ Create a dsgrid configuration file to store registry connection settings and
29
+ other dsgrid parameters.
30
+
31
+ Examples:\n
32
+ $ dsgrid config create sqlite:///./registry.db\n
33
+ $ dsgrid config create sqlite:////projects/dsgrid/registries/standard-scenarios/registry.db\n
34
+ """
35
+
36
+
37
+ @click.command(epilog=_config_epilog)
38
+ @click.argument("url")
39
+ @click.option(
40
+ "-b",
41
+ "--backend-engine",
42
+ type=click.Choice([x.value for x in BackendEngine]),
43
+ default=DEFAULT_BACKEND.value,
44
+ help="Backend engine for SQL processing",
45
+ )
46
+ @click.option(
47
+ "-t",
48
+ "--thrift-server-url",
49
+ type=str,
50
+ default=DEFAULT_THRIFT_SERVER_URL,
51
+ help="URL for the Apache Thrift Server to be used by chronify. "
52
+ "Only applies if Spark is the backend engine.",
53
+ )
54
+ @click.option(
55
+ "-m",
56
+ "--use-hive-metastore",
57
+ is_flag=True,
58
+ default=False,
59
+ help="Set this flag to use a Hive metastore when sharing data with chronify. "
60
+ "Only applies if Spark is the backend engine.",
61
+ )
62
+ @click.option(
63
+ "--timings/--no-timings",
64
+ default=False,
65
+ is_flag=True,
66
+ show_default=True,
67
+ help="Enable tracking of function timings.",
68
+ )
69
+ @click.option(
70
+ "--use-absolute-db-path/--no-use-absolute-db-path",
71
+ default=True,
72
+ is_flag=True,
73
+ show_default=True,
74
+ help="Convert the SQLite database file path to an absolute path.",
75
+ )
76
+ # @click.option(
77
+ # "-U",
78
+ # "--username",
79
+ # type=str,
80
+ # default=getpass.getuser(),
81
+ # help="Database username",
82
+ # )
83
+ # @click.option(
84
+ # "-P",
85
+ # "--password",
86
+ # prompt=True,
87
+ # hide_input=True,
88
+ # type=str,
89
+ # default=DEFAULT_DB_PASSWORD,
90
+ # help="Database username",
91
+ # )
92
+ # @click.option(
93
+ # "-o",
94
+ # is_flag=True,
95
+ # default=False,
96
+ # show_default=True,
97
+ # help="Run registry commands in offline mode. WARNING: any commands you perform in offline "
98
+ # "mode run the risk of being out-of-sync with the latest dsgrid registry, and any write "
99
+ # "commands will not be officially synced with the remote registry",
100
+ # )
101
+ @click.option(
102
+ "--console-level",
103
+ default="info",
104
+ show_default=True,
105
+ help="Console log level.",
106
+ )
107
+ @click.option(
108
+ "--file-level",
109
+ default="info",
110
+ show_default=True,
111
+ help="File log level.",
112
+ )
113
+ @click.option(
114
+ "-r",
115
+ "--reraise-exceptions",
116
+ is_flag=True,
117
+ default=False,
118
+ show_default=True,
119
+ help="Re-raise any dsgrid exception. Default is to log the exception and exit.",
120
+ )
121
+ @click.option(
122
+ "-s",
123
+ "--scratch-dir",
124
+ default=None,
125
+ callback=handle_scratch_dir,
126
+ help="Base directory for dsgrid temporary directories. Must be accessible on all compute "
127
+ "nodes. Defaults to the current directory.",
128
+ )
129
+ def create(
130
+ url,
131
+ backend_engine,
132
+ thrift_server_url,
133
+ use_hive_metastore,
134
+ timings,
135
+ use_absolute_db_path,
136
+ # username,
137
+ # password,
138
+ # offline,
139
+ console_level,
140
+ file_level,
141
+ reraise_exceptions,
142
+ scratch_dir,
143
+ ):
144
+ """Create a local dsgrid runtime configuration file."""
145
+ conn = DatabaseConnection(url=url)
146
+ try:
147
+ db_filename = conn.get_filename()
148
+ if use_absolute_db_path and not db_filename.is_absolute():
149
+ conn.url = f"sqlite:///{db_filename.resolve()}"
150
+
151
+ except DSGInvalidParameter as exc:
152
+ print(str(exc), file=sys.stderr)
153
+ sys.exit(1)
154
+
155
+ if not db_filename.exists():
156
+ print(f"The registry database file {db_filename} does not exist.", file=sys.stderr)
157
+ sys.exit(1)
158
+
159
+ dsgrid_config = DsgridRuntimeConfig(
160
+ backend_engine=backend_engine,
161
+ thrift_server_url=thrift_server_url,
162
+ use_hive_metastore=use_hive_metastore,
163
+ timings=timings,
164
+ database_url=conn.url,
165
+ # database_user=username,
166
+ # database_password=password,
167
+ offline=True,
168
+ console_level=console_level,
169
+ file_level=file_level,
170
+ reraise_exceptions=reraise_exceptions,
171
+ scratch_dir=scratch_dir,
172
+ )
173
+ dsgrid_config.dump()
174
+
175
+
176
+ config.add_command(create)
dsgrid/cli/download.py ADDED
@@ -0,0 +1,13 @@
1
+ """Download a dataset."""
2
+
3
+ import sys
4
+
5
+ import rich_click as click
6
+
7
+
8
+ @click.command()
9
+ @click.argument("dataset")
10
+ def download(dataset):
11
+ """Download a dataset."""
12
+ print("not currently functional")
13
+ sys.exit(1)
dsgrid/cli/dsgrid.py ADDED
@@ -0,0 +1,157 @@
1
+ """Main CLI command for dsgrid."""
2
+
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ import rich_click as click
7
+
8
+ import dsgrid
9
+ from chronify.utils.path_utils import check_overwrite
10
+ from dsgrid.common import LOCAL_REGISTRY
11
+ from dsgrid.registry.common import DatabaseConnection, DataStoreType
12
+ from dsgrid.registry.registry_manager import RegistryManager
13
+ from dsgrid.utils.timing import timer_stats_collector
14
+ from dsgrid.cli.common import get_log_level_from_str, handle_scratch_dir
15
+ from dsgrid.cli.config import config
16
+ from dsgrid.cli.download import download
17
+ from dsgrid.cli.install_notebooks import install_notebooks
18
+ from dsgrid.cli.query import query
19
+ from dsgrid.cli.registry import registry
20
+ from dsgrid.loggers import setup_logging, check_log_file_size, disable_console_logging
21
+
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ @click.group()
27
+ @click.option(
28
+ "-c",
29
+ "--console-level",
30
+ default=dsgrid.runtime_config.console_level,
31
+ show_default=True,
32
+ help="Console log level.",
33
+ )
34
+ @click.option(
35
+ "-f",
36
+ "--file-level",
37
+ default=dsgrid.runtime_config.file_level,
38
+ show_default=True,
39
+ help="File log level.",
40
+ )
41
+ @click.option("-l", "--log-file", type=Path, default="dsgrid.log", help="Log to this file.")
42
+ @click.option(
43
+ "-n", "--no-prompts", default=False, is_flag=True, show_default=True, help="Do not prompt."
44
+ )
45
+ @click.option(
46
+ "--timings/--no-timings",
47
+ default=dsgrid.runtime_config.timings,
48
+ is_flag=True,
49
+ show_default=True,
50
+ help="Enable tracking of function timings.",
51
+ )
52
+ @click.option(
53
+ "-u",
54
+ "--url",
55
+ type=str,
56
+ default=dsgrid.runtime_config.database_url,
57
+ envvar="DSGRID_REGISTRY_DATABASE_URL",
58
+ help="Database URL. Ex: http://localhost:8529",
59
+ )
60
+ @click.option(
61
+ "-r",
62
+ "--reraise-exceptions",
63
+ is_flag=True,
64
+ default=dsgrid.runtime_config.reraise_exceptions,
65
+ show_default=True,
66
+ help="Re-raise any dsgrid exception. Default is to log the exception and exit.",
67
+ )
68
+ @click.option(
69
+ "-s",
70
+ "--scratch-dir",
71
+ default=dsgrid.runtime_config.scratch_dir,
72
+ callback=handle_scratch_dir,
73
+ help="Base directory for dsgrid temporary directories. Must be accessible on all compute "
74
+ "nodes. Defaults to the current directory.",
75
+ )
76
+ @click.pass_context
77
+ def cli(
78
+ ctx,
79
+ console_level,
80
+ file_level,
81
+ log_file,
82
+ no_prompts,
83
+ timings,
84
+ url,
85
+ reraise_exceptions,
86
+ scratch_dir,
87
+ ):
88
+ """dsgrid commands"""
89
+ if timings:
90
+ timer_stats_collector.enable()
91
+ else:
92
+ timer_stats_collector.disable()
93
+ path = Path(log_file)
94
+ check_log_file_size(path, no_prompts=no_prompts)
95
+ ctx.params["console_level"] = get_log_level_from_str(console_level)
96
+ ctx.params["file_level"] = get_log_level_from_str(file_level)
97
+ setup_logging(
98
+ "dsgrid",
99
+ path,
100
+ console_level=ctx.params["console_level"],
101
+ file_level=ctx.params["file_level"],
102
+ mode="a",
103
+ )
104
+
105
+
106
+ @cli.result_callback()
107
+ def callback(*args, **kwargs):
108
+ with disable_console_logging(name="dsgrid"):
109
+ timer_stats_collector.log_stats()
110
+
111
+
112
+ _create_registry_epilog = """
113
+ Examples:\n
114
+ $ dsgrid create-registry sqlite:////projects/dsgrid/my_project/registry.db -p /projects/dsgrid/my_project/registry-data\n
115
+ """
116
+
117
+
118
+ @click.command(name="create-registry", epilog=_create_registry_epilog)
119
+ @click.argument("url")
120
+ @click.option(
121
+ "-p",
122
+ "--data-path",
123
+ default=LOCAL_REGISTRY,
124
+ show_default=True,
125
+ callback=lambda *x: Path(x[2]),
126
+ help="Local dsgrid registry data path. Must not contain the registry file listed in URL.",
127
+ )
128
+ @click.option(
129
+ "-f",
130
+ "--overwrite",
131
+ "--force",
132
+ is_flag=True,
133
+ default=False,
134
+ help="Delete registry_path and the database if they already exist.",
135
+ )
136
+ @click.option(
137
+ "-t",
138
+ "--data-store-type",
139
+ type=click.Choice([x.value for x in DataStoreType]),
140
+ default=DataStoreType.FILESYSTEM.value,
141
+ show_default=True,
142
+ help="Type of store to use for the registry data.",
143
+ callback=lambda *x: DataStoreType(x[2]),
144
+ )
145
+ def create_registry(url: str, data_path: Path, overwrite: bool, data_store_type: DataStoreType):
146
+ """Create a new registry."""
147
+ check_overwrite(data_path, overwrite)
148
+ conn = DatabaseConnection(url=url)
149
+ RegistryManager.create(conn, data_path, overwrite=overwrite, data_store_type=data_store_type)
150
+
151
+
152
+ cli.add_command(config)
153
+ cli.add_command(create_registry)
154
+ cli.add_command(download)
155
+ cli.add_command(install_notebooks)
156
+ cli.add_command(query)
157
+ cli.add_command(registry)
@@ -0,0 +1,92 @@
1
+ """CLI for dsgrid admin commands (testing purposes only)."""
2
+
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ import rich_click as click
7
+
8
+ from dsgrid.config.simple_models import RegistrySimpleModel
9
+ from dsgrid.dsgrid_rc import DsgridRuntimeConfig
10
+ from dsgrid.loggers import setup_logging, check_log_file_size
11
+ from dsgrid.registry.common import DatabaseConnection
12
+ from dsgrid.registry.filter_registry_manager import FilterRegistryManager
13
+ from dsgrid.registry.registry_manager import RegistryManager
14
+ from dsgrid.utils.files import load_data
15
+
16
+
17
+ logger = logging.getLogger(__name__)
18
+ _config = DsgridRuntimeConfig.load()
19
+
20
+
21
+ @click.group()
22
+ @click.option("-l", "--log-file", default="dsgrid_admin.log", type=str, help="Log to this file.")
23
+ @click.option(
24
+ "-n", "--no-prompts", default=False, is_flag=True, show_default=True, help="Do not prompt."
25
+ )
26
+ @click.option(
27
+ "--verbose", is_flag=True, default=False, show_default=True, help="Enable verbose log output."
28
+ )
29
+ def cli(log_file, no_prompts, verbose):
30
+ """dsgrid-admin commands (for testing purposes only)"""
31
+ path = Path(log_file)
32
+ level = logging.DEBUG if verbose else logging.INFO
33
+ check_log_file_size(path, no_prompts=no_prompts)
34
+ setup_logging("dsgrid", path, console_level=level, file_level=level, mode="a")
35
+
36
+
37
+ @click.command()
38
+ @click.option(
39
+ "--src-database-url",
40
+ required=True,
41
+ help="Source dsgrid registry database URL.",
42
+ )
43
+ @click.option(
44
+ "--dst-database-url",
45
+ default="dsgrid",
46
+ required=True,
47
+ help="Destination dsgrid registry database URL.",
48
+ )
49
+ @click.argument("dst_data_path", type=click.Path(exists=False), callback=lambda *x: Path(x[2]))
50
+ @click.argument("config_file", type=click.Path(exists=True), callback=lambda *x: Path(x[2]))
51
+ @click.option(
52
+ "-m",
53
+ "--mode",
54
+ default="data-symlinks",
55
+ type=click.Choice(["copy", "data-symlinks", "rsync"]),
56
+ show_default=True,
57
+ help="Controls whether to copy all data, make symlinks to data files, or sync data with the "
58
+ "rsync utility (not available on Windows).",
59
+ )
60
+ @click.option(
61
+ "-f",
62
+ "--overwrite",
63
+ "--force",
64
+ default=False,
65
+ is_flag=True,
66
+ show_default=True,
67
+ help="Overwrite dst_registry_path if it already exists. Does not apply if using rsync.",
68
+ )
69
+ def make_filtered_registry(
70
+ src_database_url,
71
+ dst_database_url,
72
+ dst_data_path: Path,
73
+ config_file: Path,
74
+ mode,
75
+ overwrite,
76
+ ):
77
+ """Make a filtered registry for testing purposes."""
78
+ simple_model = RegistrySimpleModel(**load_data(config_file))
79
+ src_conn = DatabaseConnection(url=src_database_url)
80
+ dst_conn = DatabaseConnection(url=dst_database_url)
81
+ RegistryManager.copy(
82
+ src_conn,
83
+ dst_conn,
84
+ dst_data_path,
85
+ mode=mode,
86
+ force=overwrite,
87
+ )
88
+ with FilterRegistryManager.load(dst_conn, offline_mode=True, use_remote_data=False) as mgr:
89
+ mgr.filter(simple_model=simple_model)
90
+
91
+
92
+ cli.add_command(make_filtered_registry)
@@ -0,0 +1,62 @@
1
+ import shutil
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ import rich_click as click
6
+
7
+ import dsgrid
8
+
9
+ NOTEBOOKS_DIRNAME = "dsgrid-notebooks"
10
+
11
+
12
+ @click.command()
13
+ @click.option(
14
+ "-p",
15
+ "--path",
16
+ default=Path.home(),
17
+ show_default=True,
18
+ type=click.Path(),
19
+ help="Path to install dsgrid notebooks.",
20
+ callback=lambda _, __, x: Path(x) / NOTEBOOKS_DIRNAME,
21
+ )
22
+ @click.option(
23
+ "-f",
24
+ "--overwrite",
25
+ "--force",
26
+ default=False,
27
+ show_default=True,
28
+ is_flag=True,
29
+ help="If true, overwrite existing files.",
30
+ )
31
+ def install_notebooks(path, overwrite):
32
+ """Install dsgrid notebooks to a local path."""
33
+ src_path = Path(dsgrid.__path__[0]) / "notebooks"
34
+ if not src_path.exists():
35
+ print(f"Unexpected error: dsgrid notebooks are not stored in {src_path}", file=sys.stderr)
36
+ sys.exit(1)
37
+
38
+ path.mkdir(exist_ok=True, parents=True)
39
+ to_copy = []
40
+ existing = []
41
+ for src_file in src_path.iterdir():
42
+ if src_file.suffix in (".ipynb", ".sh"):
43
+ dst = path / src_file.name
44
+ if dst.exists() and not overwrite:
45
+ existing.append(dst)
46
+ else:
47
+ to_copy.append((src_file, dst))
48
+ if existing:
49
+ print(
50
+ f"Existing files: {[str(x) for x in existing]}. "
51
+ "Choose a different location or set overwrite=true to overwrite.",
52
+ file=sys.stderr,
53
+ )
54
+ sys.exit(1)
55
+
56
+ if not to_copy:
57
+ print("No notebook files found", file=sys.stderr)
58
+ sys.exit(1)
59
+
60
+ for src, dst in to_copy:
61
+ shutil.copyfile(src, dst)
62
+ print(f"Installed {dst}")