dsgrid-toolkit 0.3.2__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/PKG-INFO +8 -5
  2. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/build_backend.py +17 -5
  3. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/__init__.py +4 -4
  4. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/api/app.py +6 -2
  5. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/cli/config.py +2 -2
  6. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/cli/dsgrid.py +2 -3
  7. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/cli/registry.py +63 -30
  8. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/common.py +4 -0
  9. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/dataset_config.py +70 -26
  10. dsgrid_toolkit-0.4.0/dsgrid/config/date_time_dimension_config.py +173 -0
  11. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/dimensions.py +174 -88
  12. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/index_time_dimension_config.py +2 -4
  13. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/project_config.py +13 -7
  14. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dataset/dataset_schema_handler_base.py +109 -34
  15. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dataset/dataset_schema_handler_one_table.py +5 -1
  16. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dataset/dataset_schema_handler_two_table.py +26 -1
  17. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dimension/time.py +13 -3
  18. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/query/query_submitter.py +39 -51
  19. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/query/report_peak_load.py +1 -1
  20. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/bulk_register.py +4 -4
  21. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/common.py +13 -0
  22. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/data_store_interface.py +26 -6
  23. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/dataset_registry_manager.py +341 -78
  24. dsgrid_toolkit-0.4.0/dsgrid/registry/duckdb_data_store.py +258 -0
  25. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/filesystem_data_store.py +34 -0
  26. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/project_registry_manager.py +3 -3
  27. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/registry_database.py +16 -6
  28. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/rust_ext/find_minimal_patterns.py +11 -3
  29. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/spark/functions.py +4 -4
  30. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/tests/common.py +5 -5
  31. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/tests/make_us_data_registry.py +40 -8
  32. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/tests/utils.py +3 -3
  33. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/dataset.py +476 -55
  34. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/files.py +37 -6
  35. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/run_command.py +12 -2
  36. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/scratch_dir_context.py +13 -2
  37. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/spark.py +14 -12
  38. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/pyproject.toml +13 -10
  39. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/rust/Cargo.lock +69 -76
  40. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/rust/Cargo.toml +1 -1
  41. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/rust/src/lib.rs +22 -4
  42. dsgrid_toolkit-0.3.2/dsgrid/cli/install_notebooks.py +0 -62
  43. dsgrid_toolkit-0.3.2/dsgrid/config/date_time_dimension_config.py +0 -136
  44. dsgrid_toolkit-0.3.2/dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +0 -949
  45. dsgrid_toolkit-0.3.2/dsgrid/notebooks/registration.ipynb +0 -48
  46. dsgrid_toolkit-0.3.2/dsgrid/notebooks/start_notebook.sh +0 -11
  47. dsgrid_toolkit-0.3.2/dsgrid/registry/duckdb_data_store.py +0 -207
  48. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/LICENSE +0 -0
  49. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/README.md +0 -0
  50. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/api/__init__.py +0 -0
  51. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/api/api_manager.py +0 -0
  52. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/api/models.py +0 -0
  53. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/api/response_models.py +0 -0
  54. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/apps/__init__.py +0 -0
  55. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/apps/project_viewer/app.py +0 -0
  56. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/apps/registration_gui.py +0 -0
  57. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/chronify.py +0 -0
  58. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/cli/__init__.py +0 -0
  59. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/cli/common.py +0 -0
  60. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/cli/download.py +0 -0
  61. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/cli/dsgrid_admin.py +0 -0
  62. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/cli/query.py +0 -0
  63. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/cloud/__init__.py +0 -0
  64. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/cloud/cloud_storage_interface.py +0 -0
  65. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/cloud/factory.py +0 -0
  66. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/cloud/fake_storage_interface.py +0 -0
  67. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/cloud/s3_storage_interface.py +0 -0
  68. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/__init__.py +0 -0
  69. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/annual_time_dimension_config.py +0 -0
  70. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/common.py +0 -0
  71. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/config_base.py +0 -0
  72. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/dataset_schema_handler_factory.py +0 -0
  73. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/dimension_config.py +0 -0
  74. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/dimension_config_factory.py +0 -0
  75. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/dimension_mapping_base.py +0 -0
  76. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/dimension_mappings_config.py +0 -0
  77. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/dimensions_config.py +0 -0
  78. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/file_schema.py +0 -0
  79. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/input_dataset_requirements.py +0 -0
  80. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/mapping_tables.py +0 -0
  81. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/noop_time_dimension_config.py +0 -0
  82. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/registration_models.py +0 -0
  83. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/representative_period_time_dimension_config.py +0 -0
  84. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/simple_models.py +0 -0
  85. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/supplemental_dimension.py +0 -0
  86. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/config/time_dimension_base_config.py +0 -0
  87. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/data_models.py +0 -0
  88. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dataset/__init__.py +0 -0
  89. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dataset/dataset.py +0 -0
  90. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dataset/dataset_expression_handler.py +0 -0
  91. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dataset/dataset_mapping_manager.py +0 -0
  92. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dataset/growth_rates.py +0 -0
  93. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dataset/models.py +0 -0
  94. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dataset/table_format_handler_base.py +0 -0
  95. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dataset/table_format_handler_factory.py +0 -0
  96. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dataset/unpivoted_table.py +0 -0
  97. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dimension/__init__.py +0 -0
  98. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dimension/base_models.py +0 -0
  99. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dimension/dimension_filters.py +0 -0
  100. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dimension/standard.py +0 -0
  101. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dimension/time_utils.py +0 -0
  102. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/dsgrid_rc.py +0 -0
  103. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/exceptions.py +0 -0
  104. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/filesystem/__init__.py +0 -0
  105. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/filesystem/cloud_filesystem.py +0 -0
  106. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/filesystem/factory.py +0 -0
  107. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/filesystem/filesystem_interface.py +0 -0
  108. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/filesystem/local_filesystem.py +0 -0
  109. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/filesystem/s3_filesystem.py +0 -0
  110. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/loggers.py +0 -0
  111. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/project.py +0 -0
  112. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/query/__init__.py +0 -0
  113. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/query/dataset_mapping_plan.py +0 -0
  114. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/query/derived_dataset.py +0 -0
  115. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/query/models.py +0 -0
  116. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/query/query_context.py +0 -0
  117. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/query/report_factory.py +0 -0
  118. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/query/reports_base.py +0 -0
  119. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/__init__.py +0 -0
  120. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/config_update_checker_base.py +0 -0
  121. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/data_store_factory.py +0 -0
  122. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/dataset_config_generator.py +0 -0
  123. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/dataset_update_checker.py +0 -0
  124. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/dimension_mapping_registry_manager.py +0 -0
  125. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/dimension_mapping_update_checker.py +0 -0
  126. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/dimension_registry_manager.py +0 -0
  127. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/dimension_update_checker.py +0 -0
  128. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/filter_registry_manager.py +0 -0
  129. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/project_config_generator.py +0 -0
  130. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/project_update_checker.py +0 -0
  131. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/registration_context.py +0 -0
  132. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/registry_auto_updater.py +0 -0
  133. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/registry_interface.py +0 -0
  134. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/registry_manager.py +0 -0
  135. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/registry_manager_base.py +0 -0
  136. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/registry/versioning.py +0 -0
  137. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/rust_ext/__init__.py +0 -0
  138. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/spark/__init__.py +0 -0
  139. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/spark/types.py +0 -0
  140. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/tests/__init__.py +0 -0
  141. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/tests/register_derived_datasets.py +0 -0
  142. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/time/__init__.py +0 -0
  143. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/time/time_conversions.py +0 -0
  144. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/time/types.py +0 -0
  145. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/units/__init__.py +0 -0
  146. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/units/constants.py +0 -0
  147. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/units/convert.py +0 -0
  148. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/units/energy.py +0 -0
  149. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/units/power.py +0 -0
  150. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/__init__.py +0 -0
  151. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/filters.py +0 -0
  152. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/id_remappings.py +0 -0
  153. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/py_expression_eval/LICENSE +0 -0
  154. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/py_expression_eval/README.md +0 -0
  155. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/py_expression_eval/__init__.py +0 -0
  156. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/py_expression_eval/tests.py +0 -0
  157. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/spark_partition.py +0 -0
  158. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/timing.py +0 -0
  159. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/utilities.py +0 -0
  160. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/dsgrid/utils/versioning.py +0 -0
  161. {dsgrid_toolkit-0.3.2 → dsgrid_toolkit-0.4.0}/rust/README.md +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dsgrid-toolkit
3
- Version: 0.3.2
3
+ Version: 0.4.0
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Science/Research
6
6
  Classifier: License :: OSI Approved :: BSD License
7
7
  Classifier: Natural Language :: English
8
8
  Classifier: Programming Language :: Python :: 3.11
9
- Requires-Dist: chronify~=0.6.0
9
+ Requires-Dist: chronify~=0.7.0
10
10
  Requires-Dist: click>=8.2,<9
11
11
  Requires-Dist: dash
12
12
  Requires-Dist: dash-bootstrap-components
@@ -35,16 +35,19 @@ Requires-Dist: pyarrow ; extra == 'dev'
35
35
  Requires-Dist: maturin ; extra == 'dev'
36
36
  Requires-Dist: furo ; extra == 'doc'
37
37
  Requires-Dist: ghp-import ; extra == 'doc'
38
+ Requires-Dist: linkify-it-py~=2.0 ; extra == 'doc'
39
+ Requires-Dist: myst-parser~=2.0 ; extra == 'doc'
38
40
  Requires-Dist: numpydoc ; extra == 'doc'
39
41
  Requires-Dist: pandas-stubs ; extra == 'doc'
40
42
  Requires-Dist: ruff ; extra == 'doc'
41
43
  Requires-Dist: sphinx~=7.2 ; extra == 'doc'
42
44
  Requires-Dist: sphinx-click~=5.0 ; extra == 'doc'
43
45
  Requires-Dist: sphinx-copybutton~=0.5.2 ; extra == 'doc'
46
+ Requires-Dist: sphinx-design~=0.5.0 ; extra == 'doc'
44
47
  Requires-Dist: sphinx-tabs~=3.4 ; extra == 'doc'
45
48
  Requires-Dist: sphinx-argparse~=0.4.0 ; extra == 'doc'
46
49
  Requires-Dist: sphinxcontrib-programoutput ; extra == 'doc'
47
- Requires-Dist: autodoc-pydantic[erdantic]~=2.0 ; extra == 'doc'
50
+ Requires-Dist: autodoc-pydantic~=2.0 ; extra == 'doc'
48
51
  Requires-Dist: twine ; extra == 'release'
49
52
  Requires-Dist: setuptools ; extra == 'release'
50
53
  Requires-Dist: wheel ; extra == 'release'
@@ -59,8 +62,8 @@ Provides-Extra: spark
59
62
  License-File: LICENSE
60
63
  Summary: Python API for accessing demand-side grid model (dsgrid) datasets
61
64
  Keywords: dsgrid
62
- Author-email: Elaine Hale <elaine.hale@nrel.gov>, Lixi Liu <lixi.liu@nrel.gov>, Meghan Mooney <meghan.mooney@nrel.gov>, Daniel Thom <daniel.thom@nrel.gov>
63
- Maintainer-email: Elaine Hale <elaine.hale@nrel.gov>
65
+ Author-email: Elaine Hale <elaine.hale@nlr.gov>, Daniel Thom <daniel.thom@nlr.gov>, Lixi Liu <lixi.liu@nlr.gov>, Meghan Mooney <meghan.mooney@nlr.gov>
66
+ Maintainer-email: Elaine Hale <elaine.hale@nlr.gov>
64
67
  Requires-Python: >=3.11
65
68
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
66
69
  Project-URL: Documentation, https://dsgrid.github.io/dsgrid/
@@ -31,12 +31,21 @@ def _warn_no_rust():
31
31
  sys.stderr.flush()
32
32
 
33
33
 
34
- # For wheel builds (CI), always use maturin
34
+ # For wheel builds, use maturin if Rust is available, otherwise fall back to setuptools
35
35
  def build_wheel(wheel_directory, config_settings=None, metadata_directory=None):
36
- """Build wheel - requires Rust for distribution builds."""
37
- import maturin
36
+ """Build wheel - uses Rust if available, otherwise falls back to setuptools."""
37
+ if RUST_AVAILABLE:
38
+ import maturin
39
+
40
+ return maturin.build_wheel(wheel_directory, config_settings, metadata_directory)
41
+ else:
42
+ _warn_no_rust()
43
+ # Fall back to setuptools for wheel build without Rust extension
44
+ import setuptools.build_meta
38
45
 
39
- return maturin.build_wheel(wheel_directory, config_settings, metadata_directory)
46
+ return setuptools.build_meta.build_wheel(
47
+ wheel_directory, config_settings, metadata_directory
48
+ )
40
49
 
41
50
 
42
51
  def build_sdist(sdist_directory, config_settings=None):
@@ -48,7 +57,10 @@ def build_sdist(sdist_directory, config_settings=None):
48
57
 
49
58
  def get_requires_for_build_wheel(config_settings=None):
50
59
  """Get requirements for building wheel."""
51
- return ["maturin>=1.0,<2.0"]
60
+ if RUST_AVAILABLE:
61
+ return ["maturin>=1.0,<2.0"]
62
+ else:
63
+ return ["setuptools>=61.0"]
52
64
 
53
65
 
54
66
  def get_requires_for_build_sdist(config_settings=None):
@@ -9,11 +9,11 @@ __description__ = (
9
9
  "Python API for registring and accessing demand-side grid model (dsgrid) datasets"
10
10
  )
11
11
  __url__ = "https://github.com/dsgrid/dsgrid"
12
- __version__ = "0.3.2"
13
- __author__ = "NREL"
14
- __maintainer_email__ = "elaine.hale@nrel.gov"
12
+ __version__ = "0.4.0"
13
+ __author__ = "National Laboratory of the Rockies (formerly NREL)"
14
+ __maintainer_email__ = "elaine.hale@nlr.gov"
15
15
  __license__ = "BSD-3"
16
- __copyright__ = "Copyright {}, The Alliance for Sustainable Energy, LLC".format(
16
+ __copyright__ = "Copyright {}, Alliance for Energy Innovation, LLC".format(
17
17
  dt.date.today().year
18
18
  )
19
19
 
@@ -360,13 +360,15 @@ def download_async_task_archive_file(async_task_id: int):
360
360
 
361
361
 
362
362
  def _submit_project_query(spark_query: SparkSubmitProjectQueryRequest, async_task_id):
363
- with NamedTemporaryFile(mode="w", suffix=".json") as fp:
363
+ fp = NamedTemporaryFile(mode="w", suffix=".json", delete=False)
364
+ try:
364
365
  query = spark_query.query
365
366
  fp.write(query.model_dump_json())
366
367
  fp.write("\n")
367
368
  fp.flush()
369
+ fp.close()
368
370
  output_dir = Path(QUERY_OUTPUT_DIR)
369
- dsgrid_exec = "dsgrid-cli.py"
371
+ dsgrid_exec = "dsgrid-cli.exe" if sys.platform == "win32" else "dsgrid-cli.py"
370
372
  base_cmd = (
371
373
  f"--url={DSGRID_REGISTRY_DATABASE_URL} "
372
374
  f"query project run "
@@ -402,6 +404,8 @@ def _submit_project_query(spark_query: SparkSubmitProjectQueryRequest, async_tas
402
404
  archive_file="",
403
405
  archive_file_size_mb=0,
404
406
  )
407
+ finally:
408
+ Path(fp.name).unlink(missing_ok=True)
405
409
 
406
410
  api_mgr.complete_async_task(async_task_id, ret, result=result)
407
411
 
@@ -13,7 +13,7 @@ from dsgrid.dsgrid_rc import (
13
13
  DEFAULT_BACKEND,
14
14
  )
15
15
  from dsgrid.exceptions import DSGInvalidParameter
16
- from dsgrid.registry.common import DatabaseConnection
16
+ from dsgrid.registry.common import DatabaseConnection, make_sqlite_url
17
17
 
18
18
 
19
19
  logger = logging.getLogger(__name__)
@@ -146,7 +146,7 @@ def create(
146
146
  try:
147
147
  db_filename = conn.get_filename()
148
148
  if use_absolute_db_path and not db_filename.is_absolute():
149
- conn.url = f"sqlite:///{db_filename.resolve()}"
149
+ conn.url = make_sqlite_url(db_filename.resolve())
150
150
 
151
151
  except DSGInvalidParameter as exc:
152
152
  print(str(exc), file=sys.stderr)
@@ -14,7 +14,6 @@ from dsgrid.utils.timing import timer_stats_collector
14
14
  from dsgrid.cli.common import get_log_level_from_str, handle_scratch_dir
15
15
  from dsgrid.cli.config import config
16
16
  from dsgrid.cli.download import download
17
- from dsgrid.cli.install_notebooks import install_notebooks
18
17
  from dsgrid.cli.query import query
19
18
  from dsgrid.cli.registry import registry
20
19
  from dsgrid.loggers import setup_logging, check_log_file_size, disable_console_logging
@@ -146,12 +145,12 @@ def create_registry(url: str, data_path: Path, overwrite: bool, data_store_type:
146
145
  """Create a new registry."""
147
146
  check_overwrite(data_path, overwrite)
148
147
  conn = DatabaseConnection(url=url)
149
- RegistryManager.create(conn, data_path, overwrite=overwrite, data_store_type=data_store_type)
148
+ mgr = RegistryManager.create(conn, data_path, overwrite=overwrite, data_store_type=data_store_type)
149
+ mgr.dispose()
150
150
 
151
151
 
152
152
  cli.add_command(config)
153
153
  cli.add_command(create_registry)
154
154
  cli.add_command(download)
155
- cli.add_command(install_notebooks)
156
155
  cli.add_command(query)
157
156
  cli.add_command(registry)
@@ -35,6 +35,53 @@ from dsgrid.utils.filters import ACCEPTED_OPS
35
35
  logger = logging.getLogger(__name__)
36
36
 
37
37
 
38
+ class _LazyRegistryManager:
39
+ """Defers RegistryManager creation until first attribute access.
40
+
41
+ This allows Click to process --help on subcommands without requiring a
42
+ registry URL, while still failing with a clear error message when a
43
+ command actually needs the registry manager.
44
+ """
45
+
46
+ def __init__(self, ctx):
47
+ self._ctx = ctx
48
+ self._manager = None
49
+ self._initialized = False
50
+
51
+ def _init_manager(self):
52
+ if self._initialized:
53
+ return
54
+ self._initialized = True
55
+ url = get_value_from_context(self._ctx, "url")
56
+ if url is None:
57
+ msg = (
58
+ "A registry URL is required. Pass it as 'dsgrid --url <URL> registry <command>' "
59
+ "or set it via the DSGRID_REGISTRY_DATABASE_URL environment variable "
60
+ "or in ~/.dsgrid.json5."
61
+ )
62
+ raise click.UsageError(msg)
63
+ conn = DatabaseConnection(url=url)
64
+ scratch_dir = get_value_from_context(self._ctx, "scratch_dir")
65
+ no_prompts = self._ctx.parent.params["no_prompts"]
66
+ self._manager = RegistryManager.load(
67
+ conn,
68
+ REMOTE_REGISTRY,
69
+ offline_mode=True,
70
+ no_prompts=no_prompts,
71
+ scratch_dir=scratch_dir,
72
+ )
73
+
74
+ def __getattr__(self, name):
75
+ if name.startswith("_"):
76
+ raise AttributeError(name)
77
+ self._init_manager()
78
+ return getattr(self._manager, name)
79
+
80
+ def dispose(self):
81
+ if self._manager is not None:
82
+ self._manager.dispose()
83
+
84
+
38
85
  def _version_info_callback(*args) -> VersionInfo | None:
39
86
  val = args[2]
40
87
  if val is None:
@@ -61,21 +108,7 @@ Click Group Definitions
61
108
  @click.pass_context
62
109
  def registry(ctx):
63
110
  """Manage a registry."""
64
- conn = DatabaseConnection(
65
- url=get_value_from_context(ctx, "url"),
66
- )
67
- scratch_dir = get_value_from_context(ctx, "scratch_dir")
68
- no_prompts = ctx.parent.params["no_prompts"]
69
- if "--help" in sys.argv:
70
- ctx.obj = None
71
- else:
72
- ctx.obj = RegistryManager.load(
73
- conn,
74
- REMOTE_REGISTRY,
75
- offline_mode=True,
76
- no_prompts=no_prompts,
77
- scratch_dir=scratch_dir,
78
- )
111
+ ctx.obj = _LazyRegistryManager(ctx)
79
112
 
80
113
 
81
114
  @registry.result_callback()
@@ -834,11 +867,11 @@ $ dsgrid registry projects register-and-submit-dataset \\ \n
834
867
  "prepend them with this path.",
835
868
  )
836
869
  @click.option(
837
- "-M",
838
- "--missing-associations-base-dir",
870
+ "-A",
871
+ "--associations-base-dir",
839
872
  type=click.Path(exists=True),
840
873
  callback=path_callback,
841
- help="Base directory for missing associations files. If set and missing associations "
874
+ help="Base directory for missing/expected associations files. If set and associations "
842
875
  "paths are relative, prepend them with this path.",
843
876
  )
844
877
  @click.pass_obj
@@ -853,7 +886,7 @@ def register_and_submit_dataset(
853
886
  project_id,
854
887
  log_message,
855
888
  data_base_dir,
856
- missing_associations_base_dir,
889
+ associations_base_dir,
857
890
  ):
858
891
  """Register a dataset and then submit it to a dsgrid project.
859
892
 
@@ -873,7 +906,7 @@ def register_and_submit_dataset(
873
906
  dimension_mapping_references_file=dimension_mapping_references_file,
874
907
  autogen_reverse_supplemental_mappings=autogen_reverse_supplemental_mappings,
875
908
  data_base_dir=data_base_dir,
876
- missing_associations_base_dir=missing_associations_base_dir,
909
+ associations_base_dir=associations_base_dir,
877
910
  )
878
911
  if res[1] != 0:
879
912
  ctx.exit(res[1])
@@ -1430,11 +1463,11 @@ $ dsgrid registry datasets register dataset.json5 --data-base-dir /path/to/data
1430
1463
  "prepend them with this path.",
1431
1464
  )
1432
1465
  @click.option(
1433
- "-M",
1434
- "--missing-associations-base-dir",
1466
+ "-A",
1467
+ "--associations-base-dir",
1435
1468
  type=click.Path(exists=True),
1436
1469
  callback=path_callback,
1437
- help="Base directory for missing associations files. If set and missing associations "
1470
+ help="Base directory for missing/expected associations files. If set and associations "
1438
1471
  "paths are relative, prepend them with this path.",
1439
1472
  )
1440
1473
  @click.pass_obj
@@ -1445,7 +1478,7 @@ def register_dataset(
1445
1478
  dataset_config_file: Path,
1446
1479
  log_message: str,
1447
1480
  data_base_dir: Path | None,
1448
- missing_associations_base_dir: Path | None,
1481
+ associations_base_dir: Path | None,
1449
1482
  ):
1450
1483
  """Register a new dataset with the registry. The contents of the JSON/JSON5 file
1451
1484
  must match the data model defined by this documentation:
@@ -1463,7 +1496,7 @@ def register_dataset(
1463
1496
  submitter,
1464
1497
  log_message,
1465
1498
  data_base_dir=data_base_dir,
1466
- missing_associations_base_dir=missing_associations_base_dir,
1499
+ associations_base_dir=associations_base_dir,
1467
1500
  )
1468
1501
  if res[1] != 0:
1469
1502
  ctx.exit(res[1])
@@ -1761,11 +1794,11 @@ $ dsgrid registry bulk-register registration.json5 -j journal__11f733f6-ac9b-4f7
1761
1794
  "prepend them with this path.",
1762
1795
  )
1763
1796
  @click.option(
1764
- "-M",
1765
- "--missing-associations-base-dir",
1797
+ "-A",
1798
+ "--associations-base-dir",
1766
1799
  type=click.Path(exists=True),
1767
1800
  callback=path_callback,
1768
- help="Base directory for missing associations files. If set and missing associations "
1801
+ help="Base directory for missing/expected associations files. If set and associations "
1769
1802
  "paths are relative, prepend them with this path.",
1770
1803
  )
1771
1804
  @click.option(
@@ -1792,7 +1825,7 @@ def bulk_register_cli(
1792
1825
  registry_manager: RegistryManager,
1793
1826
  registration_file: Path,
1794
1827
  data_base_dir: Path | None,
1795
- missing_associations_base_dir: Path | None,
1828
+ associations_base_dir: Path | None,
1796
1829
  repo_base_dir: Path | None,
1797
1830
  journal_file: Path | None,
1798
1831
  ):
@@ -1811,7 +1844,7 @@ def bulk_register_cli(
1811
1844
  registry_manager,
1812
1845
  registration_file,
1813
1846
  data_base_dir=data_base_dir,
1814
- missing_associations_base_dir=missing_associations_base_dir,
1847
+ associations_base_dir=associations_base_dir,
1815
1848
  repo_base_dir=repo_base_dir,
1816
1849
  journal_file=journal_file,
1817
1850
  )
@@ -1,7 +1,10 @@
1
1
  import enum
2
2
  import os
3
+ import sys
3
4
  from pathlib import Path
4
5
 
6
+ IS_WINDOWS = sys.platform == "win32"
7
+
5
8
  # AWS_PROFILE_NAME = "nrel-aws-dsgrid"
6
9
  REMOTE_REGISTRY = "s3://nrel-dsgrid-registry"
7
10
 
@@ -27,6 +30,7 @@ SCALING_FACTOR_COLUMN = "scaling_factor"
27
30
  SYNC_EXCLUDE_LIST = ["*.DS_Store", "**/*.lock"]
28
31
  TIME_ZONE_COLUMN = "time_zone"
29
32
  VALUE_COLUMN = "value"
33
+ TIME_COLUMN = "timestamp"
30
34
 
31
35
 
32
36
  class BackendEngine(enum.StrEnum):
@@ -48,6 +48,11 @@ ALLOWED_LOAD_DATA_LOOKUP_FILENAMES = (
48
48
  "load_data_lookup.json",
49
49
  )
50
50
  ALLOWED_DATA_FILES = ALLOWED_LOAD_DATA_FILENAMES + ALLOWED_LOAD_DATA_LOOKUP_FILENAMES
51
+ ALLOWED_EXPECTED_DIMENSION_ASSOCIATIONS_FILENAMES = (
52
+ "expected_associations.csv",
53
+ "expected_associations.parquet",
54
+ )
55
+ EXPECTED_ASSOCIATIONS_DIR_NAME = "expected_associations"
51
56
  ALLOWED_MISSING_DIMENSION_ASSOCATIONS_FILENAMES = (
52
57
  "missing_associations.csv",
53
58
  "missing_associations.parquet",
@@ -209,6 +214,15 @@ class UserDataLayout(DSGBaseModel):
209
214
  title="lookup_data_file",
210
215
  description="Defines the lookup data file. Required if the table format is 'two_table'.",
211
216
  )
217
+ expected_associations: list[str] = Field(
218
+ default=[],
219
+ title="expected_associations",
220
+ description="List of paths to expected associations files (e.g., "
221
+ "expected_associations.parquet) or directories of files containing expected combinations "
222
+ "by dimension type (e.g., geography__subsector.csv, subsector__metric.csv). "
223
+ "When provided, only these dimension combinations are required to be present in the "
224
+ "data, rather than the full cross-join of all dimension records.",
225
+ )
212
226
  missing_associations: list[str] = Field(
213
227
  default=[],
214
228
  title="missing_associations",
@@ -631,6 +645,34 @@ def make_unvalidated_dataset_config(
631
645
  class DatasetConfig(ConfigBase):
632
646
  """Provides an interface to a DatasetConfigModel."""
633
647
 
648
+ @staticmethod
649
+ def _resolve_path(
650
+ raw_path: str,
651
+ base_dir: Path | None,
652
+ config_file_parent: Path,
653
+ ) -> Path:
654
+ """Resolve a possibly-relative path against a base directory.
655
+
656
+ Parameters
657
+ ----------
658
+ raw_path
659
+ Path string from the config (may be relative or absolute).
660
+ base_dir
661
+ If set and the path is relative, resolve against this directory.
662
+ config_file_parent
663
+ Fallback directory when *base_dir* is None.
664
+
665
+ Returns
666
+ -------
667
+ Path
668
+ Resolved absolute path.
669
+ """
670
+ path = Path(raw_path)
671
+ if not path.is_absolute():
672
+ parent = base_dir if base_dir is not None else config_file_parent
673
+ path = (parent / path).resolve()
674
+ return path
675
+
634
676
  def __init__(self, model):
635
677
  super().__init__(model)
636
678
  self._dimensions = {} # ConfigKey to DimensionConfig
@@ -652,7 +694,7 @@ class DatasetConfig(ConfigBase):
652
694
  cls,
653
695
  config_file: Path,
654
696
  data_base_dir: Path | None = None,
655
- missing_associations_base_dir: Path | None = None,
697
+ associations_base_dir: Path | None = None,
656
698
  ) -> "DatasetConfig":
657
699
  """Load a dataset config from a user-provided config file.
658
700
 
@@ -666,9 +708,10 @@ class DatasetConfig(ConfigBase):
666
708
  data_base_dir : Path | None, optional
667
709
  Base directory for data files. If set and data file paths are relative,
668
710
  prepend them with this path instead of using the config file's parent directory.
669
- missing_associations_base_dir : Path | None, optional
670
- Base directory for missing associations files. If set and paths are relative,
671
- prepend them with this path instead of using the config file's parent directory.
711
+ associations_base_dir : Path | None, optional
712
+ Base directory for missing/expected associations files. If set and paths are
713
+ relative, prepend them with this path instead of using the config file's
714
+ parent directory.
672
715
 
673
716
  Returns
674
717
  -------
@@ -694,12 +737,9 @@ class DatasetConfig(ConfigBase):
694
737
  raise DSGInvalidParameter(msg)
695
738
 
696
739
  # Resolve data file path
697
- data_path = Path(user_layout.data_file.path)
698
- if not data_path.is_absolute():
699
- if data_base_dir is not None:
700
- data_path = (data_base_dir / data_path).resolve()
701
- else:
702
- data_path = (config_file.parent / data_path).resolve()
740
+ data_path = cls._resolve_path(
741
+ user_layout.data_file.path, data_base_dir, config_file.parent
742
+ )
703
743
  if str(data_path).startswith("s3://"):
704
744
  msg = "Registering a dataset from an S3 path is not supported."
705
745
  raise DSGInvalidParameter(msg)
@@ -714,28 +754,25 @@ class DatasetConfig(ConfigBase):
714
754
  if user_layout.lookup_data_file is None:
715
755
  msg = "Two-table format requires lookup_data_file in data_layout"
716
756
  raise DSGInvalidParameter(msg)
717
- lookup_path = Path(user_layout.lookup_data_file.path)
718
- if not lookup_path.is_absolute():
719
- if data_base_dir is not None:
720
- lookup_path = (data_base_dir / lookup_path).resolve()
721
- else:
722
- lookup_path = (config_file.parent / lookup_path).resolve()
757
+ lookup_path = cls._resolve_path(
758
+ user_layout.lookup_data_file.path, data_base_dir, config_file.parent
759
+ )
723
760
  if not lookup_path.exists():
724
761
  msg = f"Lookup data file does not exist: {lookup_path}"
725
762
  raise DSGInvalidParameter(msg)
726
763
  user_layout.lookup_data_file.path = str(lookup_path)
727
764
 
765
+ # Resolve expected associations paths
766
+ user_layout.expected_associations = [
767
+ str(cls._resolve_path(p, associations_base_dir, config_file.parent))
768
+ for p in user_layout.expected_associations
769
+ ]
770
+
728
771
  # Resolve missing associations paths
729
- resolved_missing_paths: list[str] = []
730
- for missing_assoc in user_layout.missing_associations:
731
- missing_path = Path(missing_assoc)
732
- if not missing_path.is_absolute():
733
- if missing_associations_base_dir is not None:
734
- missing_path = (missing_associations_base_dir / missing_path).resolve()
735
- else:
736
- missing_path = (config_file.parent / missing_path).resolve()
737
- resolved_missing_paths.append(str(missing_path))
738
- user_layout.missing_associations = resolved_missing_paths
772
+ user_layout.missing_associations = [
773
+ str(cls._resolve_path(p, associations_base_dir, config_file.parent))
774
+ for p in user_layout.missing_associations
775
+ ]
739
776
 
740
777
  return config
741
778
 
@@ -758,6 +795,13 @@ class DatasetConfig(ConfigBase):
758
795
  return self.model.data_layout.lookup_data_file
759
796
  return None
760
797
 
798
+ @property
799
+ def expected_associations_paths(self) -> list[Path]:
800
+ """Return the list of expected associations paths if available."""
801
+ if self.model.data_layout is not None:
802
+ return [Path(p) for p in self.model.data_layout.expected_associations]
803
+ return []
804
+
761
805
  @property
762
806
  def missing_associations_paths(self) -> list[Path]:
763
807
  """Return the list of missing associations paths if available."""