dsgrid-toolkit 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dsgrid-toolkit might be problematic. Click here for more details.

Files changed (152) hide show
  1. dsgrid/__init__.py +22 -0
  2. dsgrid/api/__init__.py +0 -0
  3. dsgrid/api/api_manager.py +179 -0
  4. dsgrid/api/app.py +420 -0
  5. dsgrid/api/models.py +60 -0
  6. dsgrid/api/response_models.py +116 -0
  7. dsgrid/apps/__init__.py +0 -0
  8. dsgrid/apps/project_viewer/app.py +216 -0
  9. dsgrid/apps/registration_gui.py +444 -0
  10. dsgrid/chronify.py +22 -0
  11. dsgrid/cli/__init__.py +0 -0
  12. dsgrid/cli/common.py +120 -0
  13. dsgrid/cli/config.py +177 -0
  14. dsgrid/cli/download.py +13 -0
  15. dsgrid/cli/dsgrid.py +142 -0
  16. dsgrid/cli/dsgrid_admin.py +349 -0
  17. dsgrid/cli/install_notebooks.py +62 -0
  18. dsgrid/cli/query.py +711 -0
  19. dsgrid/cli/registry.py +1773 -0
  20. dsgrid/cloud/__init__.py +0 -0
  21. dsgrid/cloud/cloud_storage_interface.py +140 -0
  22. dsgrid/cloud/factory.py +31 -0
  23. dsgrid/cloud/fake_storage_interface.py +37 -0
  24. dsgrid/cloud/s3_storage_interface.py +156 -0
  25. dsgrid/common.py +35 -0
  26. dsgrid/config/__init__.py +0 -0
  27. dsgrid/config/annual_time_dimension_config.py +187 -0
  28. dsgrid/config/common.py +131 -0
  29. dsgrid/config/config_base.py +148 -0
  30. dsgrid/config/dataset_config.py +684 -0
  31. dsgrid/config/dataset_schema_handler_factory.py +41 -0
  32. dsgrid/config/date_time_dimension_config.py +108 -0
  33. dsgrid/config/dimension_config.py +54 -0
  34. dsgrid/config/dimension_config_factory.py +65 -0
  35. dsgrid/config/dimension_mapping_base.py +349 -0
  36. dsgrid/config/dimension_mappings_config.py +48 -0
  37. dsgrid/config/dimensions.py +775 -0
  38. dsgrid/config/dimensions_config.py +71 -0
  39. dsgrid/config/index_time_dimension_config.py +76 -0
  40. dsgrid/config/input_dataset_requirements.py +31 -0
  41. dsgrid/config/mapping_tables.py +209 -0
  42. dsgrid/config/noop_time_dimension_config.py +42 -0
  43. dsgrid/config/project_config.py +1457 -0
  44. dsgrid/config/registration_models.py +199 -0
  45. dsgrid/config/representative_period_time_dimension_config.py +194 -0
  46. dsgrid/config/simple_models.py +49 -0
  47. dsgrid/config/supplemental_dimension.py +29 -0
  48. dsgrid/config/time_dimension_base_config.py +200 -0
  49. dsgrid/data_models.py +155 -0
  50. dsgrid/dataset/__init__.py +0 -0
  51. dsgrid/dataset/dataset.py +123 -0
  52. dsgrid/dataset/dataset_expression_handler.py +86 -0
  53. dsgrid/dataset/dataset_mapping_manager.py +121 -0
  54. dsgrid/dataset/dataset_schema_handler_base.py +899 -0
  55. dsgrid/dataset/dataset_schema_handler_one_table.py +196 -0
  56. dsgrid/dataset/dataset_schema_handler_standard.py +303 -0
  57. dsgrid/dataset/growth_rates.py +162 -0
  58. dsgrid/dataset/models.py +44 -0
  59. dsgrid/dataset/table_format_handler_base.py +257 -0
  60. dsgrid/dataset/table_format_handler_factory.py +17 -0
  61. dsgrid/dataset/unpivoted_table.py +121 -0
  62. dsgrid/dimension/__init__.py +0 -0
  63. dsgrid/dimension/base_models.py +218 -0
  64. dsgrid/dimension/dimension_filters.py +308 -0
  65. dsgrid/dimension/standard.py +213 -0
  66. dsgrid/dimension/time.py +531 -0
  67. dsgrid/dimension/time_utils.py +88 -0
  68. dsgrid/dsgrid_rc.py +88 -0
  69. dsgrid/exceptions.py +105 -0
  70. dsgrid/filesystem/__init__.py +0 -0
  71. dsgrid/filesystem/cloud_filesystem.py +32 -0
  72. dsgrid/filesystem/factory.py +32 -0
  73. dsgrid/filesystem/filesystem_interface.py +136 -0
  74. dsgrid/filesystem/local_filesystem.py +74 -0
  75. dsgrid/filesystem/s3_filesystem.py +118 -0
  76. dsgrid/loggers.py +132 -0
  77. dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +950 -0
  78. dsgrid/notebooks/registration.ipynb +48 -0
  79. dsgrid/notebooks/start_notebook.sh +11 -0
  80. dsgrid/project.py +451 -0
  81. dsgrid/query/__init__.py +0 -0
  82. dsgrid/query/dataset_mapping_plan.py +142 -0
  83. dsgrid/query/derived_dataset.py +384 -0
  84. dsgrid/query/models.py +726 -0
  85. dsgrid/query/query_context.py +287 -0
  86. dsgrid/query/query_submitter.py +847 -0
  87. dsgrid/query/report_factory.py +19 -0
  88. dsgrid/query/report_peak_load.py +70 -0
  89. dsgrid/query/reports_base.py +20 -0
  90. dsgrid/registry/__init__.py +0 -0
  91. dsgrid/registry/bulk_register.py +161 -0
  92. dsgrid/registry/common.py +287 -0
  93. dsgrid/registry/config_update_checker_base.py +63 -0
  94. dsgrid/registry/data_store_factory.py +34 -0
  95. dsgrid/registry/data_store_interface.py +69 -0
  96. dsgrid/registry/dataset_config_generator.py +156 -0
  97. dsgrid/registry/dataset_registry_manager.py +734 -0
  98. dsgrid/registry/dataset_update_checker.py +16 -0
  99. dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
  100. dsgrid/registry/dimension_mapping_update_checker.py +16 -0
  101. dsgrid/registry/dimension_registry_manager.py +413 -0
  102. dsgrid/registry/dimension_update_checker.py +16 -0
  103. dsgrid/registry/duckdb_data_store.py +185 -0
  104. dsgrid/registry/filesystem_data_store.py +141 -0
  105. dsgrid/registry/filter_registry_manager.py +123 -0
  106. dsgrid/registry/project_config_generator.py +57 -0
  107. dsgrid/registry/project_registry_manager.py +1616 -0
  108. dsgrid/registry/project_update_checker.py +48 -0
  109. dsgrid/registry/registration_context.py +223 -0
  110. dsgrid/registry/registry_auto_updater.py +316 -0
  111. dsgrid/registry/registry_database.py +662 -0
  112. dsgrid/registry/registry_interface.py +446 -0
  113. dsgrid/registry/registry_manager.py +544 -0
  114. dsgrid/registry/registry_manager_base.py +367 -0
  115. dsgrid/registry/versioning.py +92 -0
  116. dsgrid/spark/__init__.py +0 -0
  117. dsgrid/spark/functions.py +545 -0
  118. dsgrid/spark/types.py +50 -0
  119. dsgrid/tests/__init__.py +0 -0
  120. dsgrid/tests/common.py +139 -0
  121. dsgrid/tests/make_us_data_registry.py +204 -0
  122. dsgrid/tests/register_derived_datasets.py +103 -0
  123. dsgrid/tests/utils.py +25 -0
  124. dsgrid/time/__init__.py +0 -0
  125. dsgrid/time/time_conversions.py +80 -0
  126. dsgrid/time/types.py +67 -0
  127. dsgrid/units/__init__.py +0 -0
  128. dsgrid/units/constants.py +113 -0
  129. dsgrid/units/convert.py +71 -0
  130. dsgrid/units/energy.py +145 -0
  131. dsgrid/units/power.py +87 -0
  132. dsgrid/utils/__init__.py +0 -0
  133. dsgrid/utils/dataset.py +612 -0
  134. dsgrid/utils/files.py +179 -0
  135. dsgrid/utils/filters.py +125 -0
  136. dsgrid/utils/id_remappings.py +100 -0
  137. dsgrid/utils/py_expression_eval/LICENSE +19 -0
  138. dsgrid/utils/py_expression_eval/README.md +8 -0
  139. dsgrid/utils/py_expression_eval/__init__.py +847 -0
  140. dsgrid/utils/py_expression_eval/tests.py +283 -0
  141. dsgrid/utils/run_command.py +70 -0
  142. dsgrid/utils/scratch_dir_context.py +64 -0
  143. dsgrid/utils/spark.py +918 -0
  144. dsgrid/utils/spark_partition.py +98 -0
  145. dsgrid/utils/timing.py +239 -0
  146. dsgrid/utils/utilities.py +184 -0
  147. dsgrid/utils/versioning.py +36 -0
  148. dsgrid_toolkit-0.2.0.dist-info/METADATA +216 -0
  149. dsgrid_toolkit-0.2.0.dist-info/RECORD +152 -0
  150. dsgrid_toolkit-0.2.0.dist-info/WHEEL +4 -0
  151. dsgrid_toolkit-0.2.0.dist-info/entry_points.txt +4 -0
  152. dsgrid_toolkit-0.2.0.dist-info/licenses/LICENSE +29 -0
dsgrid/cli/config.py ADDED
@@ -0,0 +1,177 @@
1
+ """CLI commands to manage the dsgrid runtime configuration"""
2
+
3
+ import logging
4
+ import sys
5
+
6
+ import rich_click as click
7
+
8
+ from dsgrid.common import BackendEngine
9
+ from dsgrid.cli.common import handle_scratch_dir
10
+ from dsgrid.dsgrid_rc import (
11
+ DsgridRuntimeConfig,
12
+ DEFAULT_THRIFT_SERVER_URL,
13
+ DEFAULT_BACKEND,
14
+ )
15
+ from dsgrid.exceptions import DSGInvalidParameter
16
+ from dsgrid.registry.common import DatabaseConnection
17
+
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @click.group()
23
+ def config():
24
+ """Config commands"""
25
+
26
+
27
+ _config_epilog = """
28
+ Create a dsgrid configuration file to store registry connection settings and
29
+ other dsgrid parameters.
30
+
31
+ Examples:\n
32
+ $ dsgrid config create sqlite:///./registry.db\n
33
+ $ dsgrid config create sqlite:////projects/dsgrid/registries/standard-scenarios/registry.db\n
34
+ """
35
+
36
+
37
+ @click.command(epilog=_config_epilog)
38
+ @click.argument("url")
39
+ @click.option(
40
+ "-b",
41
+ "--backend-engine",
42
+ type=click.Choice([x.value for x in BackendEngine]),
43
+ default=DEFAULT_BACKEND,
44
+ help="Backend engine for SQL processing",
45
+ )
46
+ @click.option(
47
+ "-t",
48
+ "--thrift-server-url",
49
+ type=str,
50
+ default=DEFAULT_THRIFT_SERVER_URL,
51
+ help="URL for the Apache Thrift Server to be used by chronify. "
52
+ "Only applies if Spark is the backend engine.",
53
+ )
54
+ @click.option(
55
+ "-m",
56
+ "--use-hive-metastore",
57
+ is_flag=True,
58
+ default=False,
59
+ help="Set this flag to use a Hive metastore when sharing data with chronify. "
60
+ "Only applies if Spark is the backend engine.",
61
+ )
62
+ @click.option(
63
+ "--timings/--no-timings",
64
+ default=False,
65
+ is_flag=True,
66
+ show_default=True,
67
+ help="Enable tracking of function timings.",
68
+ )
69
+ @click.option(
70
+ "--use-absolute-db-path/--no-use-absolute-db-path",
71
+ default=True,
72
+ is_flag=True,
73
+ show_default=True,
74
+ help="Convert the SQLite database file path to an absolute path.",
75
+ )
76
+ # @click.option(
77
+ # "-U",
78
+ # "--username",
79
+ # type=str,
80
+ # default=getpass.getuser(),
81
+ # help="Database username",
82
+ # )
83
+ # @click.option(
84
+ # "-P",
85
+ # "--password",
86
+ # prompt=True,
87
+ # hide_input=True,
88
+ # type=str,
89
+ # default=DEFAULT_DB_PASSWORD,
90
+ # help="Database username",
91
+ # )
92
+ # @click.option(
93
+ # "-o",
94
+ # "--offline",
95
+ # is_flag=True,
96
+ # default=False,
97
+ # show_default=True,
98
+ # help="Run registry commands in offline mode. WARNING: any commands you perform in offline "
99
+ # "mode run the risk of being out-of-sync with the latest dsgrid registry, and any write "
100
+ # "commands will not be officially synced with the remote registry",
101
+ # )
102
+ @click.option(
103
+ "--console-level",
104
+ default="info",
105
+ show_default=True,
106
+ help="Console log level.",
107
+ )
108
+ @click.option(
109
+ "--file-level",
110
+ default="info",
111
+ show_default=True,
112
+ help="File log level.",
113
+ )
114
+ @click.option(
115
+ "-r",
116
+ "--reraise-exceptions",
117
+ is_flag=True,
118
+ default=False,
119
+ show_default=True,
120
+ help="Re-raise any dsgrid exception. Default is to log the exception and exit.",
121
+ )
122
+ @click.option(
123
+ "-s",
124
+ "--scratch-dir",
125
+ default=None,
126
+ callback=handle_scratch_dir,
127
+ help="Base directory for dsgrid temporary directories. Must be accessible on all compute "
128
+ "nodes. Defaults to the current directory.",
129
+ )
130
+ def create(
131
+ url,
132
+ backend_engine,
133
+ thrift_server_url,
134
+ use_hive_metastore,
135
+ timings,
136
+ use_absolute_db_path,
137
+ # username,
138
+ # password,
139
+ # offline,
140
+ console_level,
141
+ file_level,
142
+ reraise_exceptions,
143
+ scratch_dir,
144
+ ):
145
+ """Create a local dsgrid runtime configuration file."""
146
+ conn = DatabaseConnection(url=url)
147
+ try:
148
+ db_filename = conn.get_filename()
149
+ if use_absolute_db_path and not db_filename.is_absolute():
150
+ conn.url = f"sqlite:///{db_filename.resolve()}"
151
+
152
+ except DSGInvalidParameter as exc:
153
+ print(str(exc), file=sys.stderr)
154
+ sys.exit(1)
155
+
156
+ if not db_filename.exists():
157
+ print(f"The registry database file {db_filename} does not exist.", file=sys.stderr)
158
+ sys.exit(1)
159
+
160
+ dsgrid_config = DsgridRuntimeConfig(
161
+ backend_engine=backend_engine,
162
+ thrift_server_url=thrift_server_url,
163
+ use_hive_metastore=use_hive_metastore,
164
+ timings=timings,
165
+ database_url=conn.url,
166
+ # database_user=username,
167
+ # database_password=password,
168
+ offline=True,
169
+ console_level=console_level,
170
+ file_level=file_level,
171
+ reraise_exceptions=reraise_exceptions,
172
+ scratch_dir=scratch_dir,
173
+ )
174
+ dsgrid_config.dump()
175
+
176
+
177
+ config.add_command(create)
dsgrid/cli/download.py ADDED
@@ -0,0 +1,13 @@
1
+ """Download a dataset."""
2
+
3
+ import sys
4
+
5
+ import rich_click as click
6
+
7
+
8
+ @click.command()
9
+ @click.argument("dataset")
10
+ def download(dataset):
11
+ """Download a dataset."""
12
+ print("not currently functional")
13
+ sys.exit(1)
dsgrid/cli/dsgrid.py ADDED
@@ -0,0 +1,142 @@
1
+ """Main CLI command for dsgrid."""
2
+
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ import rich_click as click
7
+
8
+ import dsgrid
9
+ from dsgrid.utils.timing import timer_stats_collector
10
+ from dsgrid.cli.common import get_log_level_from_str, handle_scratch_dir
11
+ from dsgrid.cli.config import config
12
+ from dsgrid.cli.download import download
13
+ from dsgrid.cli.install_notebooks import install_notebooks
14
+ from dsgrid.cli.query import query
15
+ from dsgrid.cli.registry import registry
16
+ from dsgrid.loggers import setup_logging, check_log_file_size, disable_console_logging
17
+
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @click.group()
23
+ @click.option(
24
+ "-c",
25
+ "--console-level",
26
+ default=dsgrid.runtime_config.console_level,
27
+ show_default=True,
28
+ help="Console log level.",
29
+ )
30
+ @click.option(
31
+ "-f",
32
+ "--file-level",
33
+ default=dsgrid.runtime_config.file_level,
34
+ show_default=True,
35
+ help="File log level.",
36
+ )
37
+ @click.option("-l", "--log-file", type=Path, default="dsgrid.log", help="Log to this file.")
38
+ @click.option(
39
+ "-n", "--no-prompts", default=False, is_flag=True, show_default=True, help="Do not prompt."
40
+ )
41
+ @click.option(
42
+ "--offline/--online",
43
+ is_flag=True,
44
+ default=dsgrid.runtime_config.offline,
45
+ show_default=True,
46
+ help="Run registry commands in offline mode. WARNING: any commands you perform in offline "
47
+ "mode run the risk of being out-of-sync with the latest dsgrid registry, and any write "
48
+ "commands will not be officially synced with the remote registry",
49
+ )
50
+ @click.option(
51
+ "--timings/--no-timings",
52
+ default=dsgrid.runtime_config.timings,
53
+ is_flag=True,
54
+ show_default=True,
55
+ help="Enable tracking of function timings.",
56
+ )
57
+ # Server-related options are commented-out because the registry is currently only
58
+ # supported in SQLite. If/when we add postgres support, these can be added back.
59
+ # @click.option(
60
+ # "-U",
61
+ # "--username",
62
+ # type=str,
63
+ # default=dsgrid.runtime_config.database_user,
64
+ # help="Database username",
65
+ # )
66
+ # @click.option(
67
+ # "-P",
68
+ # "--password",
69
+ # prompt=True,
70
+ # hide_input=True,
71
+ # cls=OptionPromptPassword,
72
+ # help="dsgrid registry password. Will prompt unless it is passed or the username matches the "
73
+ # "runtime config file.",
74
+ # )
75
+ @click.option(
76
+ "-u",
77
+ "--url",
78
+ type=str,
79
+ default=dsgrid.runtime_config.database_url,
80
+ envvar="DSGRID_REGISTRY_DATABASE_URL",
81
+ help="Database URL. Ex: http://localhost:8529",
82
+ )
83
+ @click.option(
84
+ "-r",
85
+ "--reraise-exceptions",
86
+ is_flag=True,
87
+ default=dsgrid.runtime_config.reraise_exceptions,
88
+ show_default=True,
89
+ help="Re-raise any dsgrid exception. Default is to log the exception and exit.",
90
+ )
91
+ @click.option(
92
+ "-s",
93
+ "--scratch-dir",
94
+ default=dsgrid.runtime_config.scratch_dir,
95
+ callback=handle_scratch_dir,
96
+ help="Base directory for dsgrid temporary directories. Must be accessible on all compute "
97
+ "nodes. Defaults to the current directory.",
98
+ )
99
+ @click.pass_context
100
+ def cli(
101
+ ctx,
102
+ console_level,
103
+ file_level,
104
+ log_file,
105
+ no_prompts,
106
+ offline,
107
+ timings,
108
+ # username,
109
+ # password,
110
+ url,
111
+ reraise_exceptions,
112
+ scratch_dir,
113
+ ):
114
+ """dsgrid commands"""
115
+ if timings:
116
+ timer_stats_collector.enable()
117
+ else:
118
+ timer_stats_collector.disable()
119
+ path = Path(log_file)
120
+ check_log_file_size(path, no_prompts=no_prompts)
121
+ ctx.params["console_level"] = get_log_level_from_str(console_level)
122
+ ctx.params["file_level"] = get_log_level_from_str(file_level)
123
+ setup_logging(
124
+ "dsgrid",
125
+ path,
126
+ console_level=ctx.params["console_level"],
127
+ file_level=ctx.params["file_level"],
128
+ mode="a",
129
+ )
130
+
131
+
132
+ @cli.result_callback()
133
+ def callback(*args, **kwargs):
134
+ with disable_console_logging(name="dsgrid"):
135
+ timer_stats_collector.log_stats()
136
+
137
+
138
+ cli.add_command(config)
139
+ cli.add_command(download)
140
+ cli.add_command(install_notebooks)
141
+ cli.add_command(query)
142
+ cli.add_command(registry)
@@ -0,0 +1,349 @@
1
+ """Main CLI command for dsgrid."""
2
+
3
+ import logging
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ import rich_click as click
8
+ from chronify.utils.path_utils import check_overwrite
9
+
10
+ from dsgrid.cli.common import get_value_from_context
11
+ from dsgrid.common import LOCAL_REGISTRY, REMOTE_REGISTRY
12
+ from dsgrid.config.simple_models import RegistrySimpleModel
13
+ from dsgrid.dsgrid_rc import DsgridRuntimeConfig
14
+ from dsgrid.loggers import setup_logging, check_log_file_size
15
+ from dsgrid.registry.common import (
16
+ DataStoreType,
17
+ DatabaseConnection,
18
+ DatasetRegistryStatus,
19
+ VersionUpdateType,
20
+ )
21
+ from dsgrid.registry.registry_manager import RegistryManager
22
+ from dsgrid.registry.filter_registry_manager import FilterRegistryManager
23
+ from dsgrid.utils.files import load_data
24
+
25
+
26
+ logger = logging.getLogger(__name__)
27
+ _config = DsgridRuntimeConfig.load()
28
+
29
+
30
+ """
31
+ Click Group Definitions
32
+ """
33
+
34
+
35
+ @click.group()
36
+ # Server-related options are commented-out because the registry is currently only
37
+ # supported in SQLite. If/when we add postgres support, these can be added back.
38
+ # @click.option(
39
+ # "-U",
40
+ # "--username",
41
+ # default=_config.database_user,
42
+ # show_default=True,
43
+ # help="dsgrid registry user name",
44
+ # )
45
+ # @click.option(
46
+ # "-P",
47
+ # "--password",
48
+ # prompt=True,
49
+ # hide_input=True,
50
+ # cls=OptionPromptPassword,
51
+ # help="dsgrid registry password. Will prompt unless it is passed or the username matches the "
52
+ # "runtime config file.",
53
+ # )
54
+ @click.option(
55
+ "--url",
56
+ default=_config.database_url,
57
+ show_default=True,
58
+ envvar="DSGRID_REGISTRY_DATABASE_URL",
59
+ help="dsgrid registry database URL. Override with the environment variable "
60
+ "DSGRID_REGISTRY_DATABASE_URL",
61
+ )
62
+ @click.option("-l", "--log-file", default="dsgrid_admin.log", type=str, help="Log to this file.")
63
+ @click.option(
64
+ "-n", "--no-prompts", default=False, is_flag=True, show_default=True, help="Do not prompt."
65
+ )
66
+ @click.option(
67
+ "--offline/--online",
68
+ is_flag=True,
69
+ default=_config.offline,
70
+ show_default=True,
71
+ help="run in registry commands in offline mode. WARNING: any commands you perform in offline "
72
+ "mode run the risk of being out-of-sync with the latest dsgrid registry, and any write "
73
+ "commands will not be officially synced with the remote registry",
74
+ )
75
+ @click.option(
76
+ "--verbose", is_flag=True, default=False, show_default=True, help="Enable verbose log output."
77
+ )
78
+ # def cli(url, username, password, log_file, no_prompts, offline, verbose):
79
+ def cli(url, log_file, no_prompts, offline, verbose):
80
+ """dsgrid-admin commands"""
81
+ path = Path(log_file)
82
+ level = logging.DEBUG if verbose else logging.INFO
83
+ check_log_file_size(path, no_prompts=no_prompts)
84
+ setup_logging("dsgrid", path, console_level=level, file_level=level, mode="a")
85
+
86
+
87
+ @click.group()
88
+ @click.option(
89
+ "--remote-path",
90
+ default=REMOTE_REGISTRY,
91
+ show_default=True,
92
+ help="path to dsgrid remote registry",
93
+ )
94
+ @click.pass_context
95
+ def registry(ctx, remote_path):
96
+ """Manage a registry."""
97
+ no_prompts = ctx.parent.params["no_prompts"]
98
+ if "--help" in sys.argv:
99
+ ctx.obj = None
100
+ else:
101
+ conn = DatabaseConnection(
102
+ url=get_value_from_context(ctx, "url"),
103
+ # username=get_value_from_context(ctx, "username"),
104
+ # password=get_value_from_context(ctx, "password"),
105
+ )
106
+ ctx.obj = RegistryManager.load(
107
+ conn,
108
+ remote_path,
109
+ offline_mode=get_value_from_context(ctx, "offline"),
110
+ no_prompts=no_prompts,
111
+ )
112
+
113
+
114
+ @click.group()
115
+ @click.pass_obj
116
+ def dimensions(registry_manager: RegistryManager):
117
+ """Dimension subcommands"""
118
+
119
+
120
+ @click.group()
121
+ @click.pass_obj
122
+ def dimension_mappings(registry_manager: RegistryManager):
123
+ """Dimension mapping subcommands"""
124
+
125
+
126
+ @click.group()
127
+ @click.pass_obj
128
+ def projects(registry_manager: RegistryManager):
129
+ """Project subcommands"""
130
+
131
+
132
+ @click.group()
133
+ @click.pass_obj
134
+ def datasets(registry_manager: RegistryManager):
135
+ """Dataset subcommands"""
136
+
137
+
138
+ """
139
+ Registry Commands
140
+ """
141
+
142
+ _create_epilog = """
143
+ Examples:\n
144
+ $ dsgrid-admin create-registry sqlite:////projects/dsgrid/my_project/registry.db -p /projects/dsgrid/my_project/registry-data\n
145
+ """
146
+
147
+
148
+ @click.command(epilog=_create_epilog)
149
+ @click.argument("url")
150
+ @click.option(
151
+ "-p",
152
+ "--data-path",
153
+ default=LOCAL_REGISTRY,
154
+ show_default=True,
155
+ callback=lambda *x: Path(x[2]),
156
+ help="Local dsgrid registry data path. Must not contain the registry file listed in URL.",
157
+ )
158
+ @click.option(
159
+ "-f",
160
+ "--overwrite",
161
+ "--force",
162
+ is_flag=True,
163
+ default=False,
164
+ help="Delete registry_path and the database if they already exist.",
165
+ )
166
+ @click.option(
167
+ "-t",
168
+ "--data-store-type",
169
+ type=click.Choice([x.value for x in DataStoreType]),
170
+ default=DataStoreType.FILESYSTEM.value,
171
+ show_default=True,
172
+ help="Type of store to use for the registry data.",
173
+ callback=lambda *x: DataStoreType(x[2]),
174
+ )
175
+ @click.pass_context
176
+ def create_registry(
177
+ ctx, url: str, data_path: Path, overwrite: bool, data_store_type: DataStoreType
178
+ ):
179
+ """Create a new registry."""
180
+ check_overwrite(data_path, overwrite)
181
+ conn = DatabaseConnection(
182
+ url=url, # This may change if/when we support a server database.
183
+ # url=get_value_from_context(ctx, "url"),
184
+ # username=get_value_from_context(ctx, "username"),
185
+ # password=get_value_from_context(ctx, "password"),
186
+ )
187
+ RegistryManager.create(conn, data_path, overwrite=overwrite, data_store_type=data_store_type)
188
+
189
+
190
+ """
191
+ Dimension Commands
192
+ """
193
+
194
+
195
+ @click.command(name="remove")
196
+ @click.argument("dimension-id")
197
+ @click.pass_obj
198
+ def remove_dimension(registry_manager: RegistryManager, dimension_id: str):
199
+ """Remove a dimension from the dsgrid repository."""
200
+ registry_manager.dimension_manager.remove(dimension_id)
201
+
202
+
203
+ """
204
+ Dimension Mapping Commands
205
+ """
206
+
207
+
208
+ @click.command(name="remove")
209
+ @click.argument("dimension-mapping-id")
210
+ @click.pass_obj
211
+ def remove_dimension_mapping(registry_manager: RegistryManager, dimension_mapping_id: str):
212
+ """Remove a dimension mapping from the dsgrid repository."""
213
+ registry_manager.dimension_mapping_manager.remove(dimension_mapping_id)
214
+
215
+
216
+ """
217
+ Project Commands
218
+ """
219
+
220
+
221
+ @click.command(name="remove")
222
+ @click.argument("project-id")
223
+ @click.pass_obj
224
+ def remove_project(registry_manager: RegistryManager, project_id: str):
225
+ """Remove a project from the dsgrid repository."""
226
+ registry_manager.project_manager.remove(project_id)
227
+
228
+
229
+ """
230
+ Dataset Commands
231
+ """
232
+
233
+
234
+ @click.command(name="remove")
235
+ @click.argument("dataset-ids", nargs=-1)
236
+ @click.pass_obj
237
+ def remove_datasets(registry_manager: RegistryManager, dataset_ids: list[str]):
238
+ """Remove one or more datasets from the dsgrid repository."""
239
+ dataset_mgr = registry_manager.dataset_manager
240
+ project_mgr = registry_manager.project_manager
241
+
242
+ # Ensure that all dataset IDs are valid before removing any of them.
243
+ for dataset_id in dataset_ids:
244
+ dataset_mgr.get_by_id(dataset_id)
245
+
246
+ for dataset_id in dataset_ids:
247
+ registry_manager.dataset_manager.remove(dataset_id)
248
+
249
+ dataset_ids_set = set(dataset_ids)
250
+ for project_id in project_mgr.list_ids():
251
+ config = project_mgr.get_by_id(project_id)
252
+ removed_dataset_ids = []
253
+ for dataset in config.iter_datasets():
254
+ if (
255
+ dataset.dataset_id in dataset_ids_set
256
+ and dataset.status == DatasetRegistryStatus.REGISTERED
257
+ ):
258
+ dataset.status = DatasetRegistryStatus.UNREGISTERED
259
+ dataset.mapping_references.clear()
260
+ removed_dataset_ids.append(dataset.dataset_id)
261
+ if removed_dataset_ids:
262
+ ids = ", ".join(removed_dataset_ids)
263
+ msg = (
264
+ f"Set status for datasets {ids} to unregistered in project {project_id} "
265
+ "after removal."
266
+ )
267
+ project_mgr.update(config, VersionUpdateType.MAJOR, msg)
268
+
269
+
270
+ @click.command()
271
+ @click.option(
272
+ "--src-database-url",
273
+ required=True,
274
+ help="Source dsgrid registry database URL.",
275
+ )
276
+ @click.option(
277
+ "--dst-database-url",
278
+ default="dsgrid",
279
+ required=True,
280
+ help="Destination dsgrid registry database URL.",
281
+ )
282
+ @click.argument("dst_data_path", type=click.Path(exists=False), callback=lambda *x: Path(x[2]))
283
+ @click.argument("config_file", type=click.Path(exists=True), callback=lambda *x: Path(x[2]))
284
+ @click.option(
285
+ "-m",
286
+ "--mode",
287
+ default="data-symlinks",
288
+ type=click.Choice(["copy", "data-symlinks", "rsync"]),
289
+ show_default=True,
290
+ help="Controls whether to copy all data, make symlinks to data files, or sync data with the "
291
+ "rsync utility (not available on Windows).",
292
+ )
293
+ @click.option(
294
+ "-f",
295
+ "--overwrite",
296
+ "--force",
297
+ default=False,
298
+ is_flag=True,
299
+ show_default=True,
300
+ help="Overwrite dst_registry_path if it already exists. Does not apply if using rsync.",
301
+ )
302
+ @click.pass_context
303
+ def make_filtered_registry(
304
+ ctx,
305
+ src_database_url,
306
+ dst_database_url,
307
+ dst_data_path: Path,
308
+ config_file: Path,
309
+ mode,
310
+ overwrite,
311
+ ):
312
+ """Make a filtered registry for testing purposes."""
313
+ simple_model = RegistrySimpleModel(**load_data(config_file))
314
+ # username = get_value_from_context(ctx, "username")
315
+ # password = get_value_from_context(ctx, "password")
316
+ src_conn = DatabaseConnection(
317
+ url=src_database_url,
318
+ # username=username,
319
+ # password=password,
320
+ )
321
+ dst_conn = DatabaseConnection(
322
+ url=dst_database_url,
323
+ # username=username,
324
+ # password=password,
325
+ )
326
+ RegistryManager.copy(
327
+ src_conn,
328
+ dst_conn,
329
+ dst_data_path,
330
+ mode=mode,
331
+ force=overwrite,
332
+ )
333
+ mgr = FilterRegistryManager.load(dst_conn, offline_mode=True, use_remote_data=False)
334
+ mgr.filter(simple_model=simple_model)
335
+
336
+
337
+ cli.add_command(registry)
338
+ cli.add_command(create_registry)
339
+ cli.add_command(make_filtered_registry)
340
+
341
+ registry.add_command(dimensions)
342
+ registry.add_command(dimension_mappings)
343
+ registry.add_command(projects)
344
+ registry.add_command(datasets)
345
+
346
+ dimensions.add_command(remove_dimension)
347
+ dimension_mappings.add_command(remove_dimension_mapping)
348
+ projects.add_command(remove_project)
349
+ datasets.add_command(remove_datasets)