dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backend.py +93 -0
- dsgrid/__init__.py +22 -0
- dsgrid/api/__init__.py +0 -0
- dsgrid/api/api_manager.py +179 -0
- dsgrid/api/app.py +419 -0
- dsgrid/api/models.py +60 -0
- dsgrid/api/response_models.py +116 -0
- dsgrid/apps/__init__.py +0 -0
- dsgrid/apps/project_viewer/app.py +216 -0
- dsgrid/apps/registration_gui.py +444 -0
- dsgrid/chronify.py +32 -0
- dsgrid/cli/__init__.py +0 -0
- dsgrid/cli/common.py +120 -0
- dsgrid/cli/config.py +176 -0
- dsgrid/cli/download.py +13 -0
- dsgrid/cli/dsgrid.py +157 -0
- dsgrid/cli/dsgrid_admin.py +92 -0
- dsgrid/cli/install_notebooks.py +62 -0
- dsgrid/cli/query.py +729 -0
- dsgrid/cli/registry.py +1862 -0
- dsgrid/cloud/__init__.py +0 -0
- dsgrid/cloud/cloud_storage_interface.py +140 -0
- dsgrid/cloud/factory.py +31 -0
- dsgrid/cloud/fake_storage_interface.py +37 -0
- dsgrid/cloud/s3_storage_interface.py +156 -0
- dsgrid/common.py +36 -0
- dsgrid/config/__init__.py +0 -0
- dsgrid/config/annual_time_dimension_config.py +194 -0
- dsgrid/config/common.py +142 -0
- dsgrid/config/config_base.py +148 -0
- dsgrid/config/dataset_config.py +907 -0
- dsgrid/config/dataset_schema_handler_factory.py +46 -0
- dsgrid/config/date_time_dimension_config.py +136 -0
- dsgrid/config/dimension_config.py +54 -0
- dsgrid/config/dimension_config_factory.py +65 -0
- dsgrid/config/dimension_mapping_base.py +350 -0
- dsgrid/config/dimension_mappings_config.py +48 -0
- dsgrid/config/dimensions.py +1025 -0
- dsgrid/config/dimensions_config.py +71 -0
- dsgrid/config/file_schema.py +190 -0
- dsgrid/config/index_time_dimension_config.py +80 -0
- dsgrid/config/input_dataset_requirements.py +31 -0
- dsgrid/config/mapping_tables.py +209 -0
- dsgrid/config/noop_time_dimension_config.py +42 -0
- dsgrid/config/project_config.py +1462 -0
- dsgrid/config/registration_models.py +188 -0
- dsgrid/config/representative_period_time_dimension_config.py +194 -0
- dsgrid/config/simple_models.py +49 -0
- dsgrid/config/supplemental_dimension.py +29 -0
- dsgrid/config/time_dimension_base_config.py +192 -0
- dsgrid/data_models.py +155 -0
- dsgrid/dataset/__init__.py +0 -0
- dsgrid/dataset/dataset.py +123 -0
- dsgrid/dataset/dataset_expression_handler.py +86 -0
- dsgrid/dataset/dataset_mapping_manager.py +121 -0
- dsgrid/dataset/dataset_schema_handler_base.py +945 -0
- dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
- dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
- dsgrid/dataset/growth_rates.py +162 -0
- dsgrid/dataset/models.py +51 -0
- dsgrid/dataset/table_format_handler_base.py +257 -0
- dsgrid/dataset/table_format_handler_factory.py +17 -0
- dsgrid/dataset/unpivoted_table.py +121 -0
- dsgrid/dimension/__init__.py +0 -0
- dsgrid/dimension/base_models.py +230 -0
- dsgrid/dimension/dimension_filters.py +308 -0
- dsgrid/dimension/standard.py +252 -0
- dsgrid/dimension/time.py +352 -0
- dsgrid/dimension/time_utils.py +103 -0
- dsgrid/dsgrid_rc.py +88 -0
- dsgrid/exceptions.py +105 -0
- dsgrid/filesystem/__init__.py +0 -0
- dsgrid/filesystem/cloud_filesystem.py +32 -0
- dsgrid/filesystem/factory.py +32 -0
- dsgrid/filesystem/filesystem_interface.py +136 -0
- dsgrid/filesystem/local_filesystem.py +74 -0
- dsgrid/filesystem/s3_filesystem.py +118 -0
- dsgrid/loggers.py +132 -0
- dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
- dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
- dsgrid/notebooks/registration.ipynb +48 -0
- dsgrid/notebooks/start_notebook.sh +11 -0
- dsgrid/project.py +451 -0
- dsgrid/query/__init__.py +0 -0
- dsgrid/query/dataset_mapping_plan.py +142 -0
- dsgrid/query/derived_dataset.py +388 -0
- dsgrid/query/models.py +728 -0
- dsgrid/query/query_context.py +287 -0
- dsgrid/query/query_submitter.py +994 -0
- dsgrid/query/report_factory.py +19 -0
- dsgrid/query/report_peak_load.py +70 -0
- dsgrid/query/reports_base.py +20 -0
- dsgrid/registry/__init__.py +0 -0
- dsgrid/registry/bulk_register.py +165 -0
- dsgrid/registry/common.py +287 -0
- dsgrid/registry/config_update_checker_base.py +63 -0
- dsgrid/registry/data_store_factory.py +34 -0
- dsgrid/registry/data_store_interface.py +74 -0
- dsgrid/registry/dataset_config_generator.py +158 -0
- dsgrid/registry/dataset_registry_manager.py +950 -0
- dsgrid/registry/dataset_update_checker.py +16 -0
- dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
- dsgrid/registry/dimension_mapping_update_checker.py +16 -0
- dsgrid/registry/dimension_registry_manager.py +413 -0
- dsgrid/registry/dimension_update_checker.py +16 -0
- dsgrid/registry/duckdb_data_store.py +207 -0
- dsgrid/registry/filesystem_data_store.py +150 -0
- dsgrid/registry/filter_registry_manager.py +123 -0
- dsgrid/registry/project_config_generator.py +57 -0
- dsgrid/registry/project_registry_manager.py +1623 -0
- dsgrid/registry/project_update_checker.py +48 -0
- dsgrid/registry/registration_context.py +223 -0
- dsgrid/registry/registry_auto_updater.py +316 -0
- dsgrid/registry/registry_database.py +667 -0
- dsgrid/registry/registry_interface.py +446 -0
- dsgrid/registry/registry_manager.py +558 -0
- dsgrid/registry/registry_manager_base.py +367 -0
- dsgrid/registry/versioning.py +92 -0
- dsgrid/rust_ext/__init__.py +14 -0
- dsgrid/rust_ext/find_minimal_patterns.py +129 -0
- dsgrid/spark/__init__.py +0 -0
- dsgrid/spark/functions.py +589 -0
- dsgrid/spark/types.py +110 -0
- dsgrid/tests/__init__.py +0 -0
- dsgrid/tests/common.py +140 -0
- dsgrid/tests/make_us_data_registry.py +265 -0
- dsgrid/tests/register_derived_datasets.py +103 -0
- dsgrid/tests/utils.py +25 -0
- dsgrid/time/__init__.py +0 -0
- dsgrid/time/time_conversions.py +80 -0
- dsgrid/time/types.py +67 -0
- dsgrid/units/__init__.py +0 -0
- dsgrid/units/constants.py +113 -0
- dsgrid/units/convert.py +71 -0
- dsgrid/units/energy.py +145 -0
- dsgrid/units/power.py +87 -0
- dsgrid/utils/__init__.py +0 -0
- dsgrid/utils/dataset.py +830 -0
- dsgrid/utils/files.py +179 -0
- dsgrid/utils/filters.py +125 -0
- dsgrid/utils/id_remappings.py +100 -0
- dsgrid/utils/py_expression_eval/LICENSE +19 -0
- dsgrid/utils/py_expression_eval/README.md +8 -0
- dsgrid/utils/py_expression_eval/__init__.py +847 -0
- dsgrid/utils/py_expression_eval/tests.py +283 -0
- dsgrid/utils/run_command.py +70 -0
- dsgrid/utils/scratch_dir_context.py +65 -0
- dsgrid/utils/spark.py +918 -0
- dsgrid/utils/spark_partition.py +98 -0
- dsgrid/utils/timing.py +239 -0
- dsgrid/utils/utilities.py +221 -0
- dsgrid/utils/versioning.py +36 -0
- dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
- dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
- dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
- dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
- dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from .local_filesystem import LocalFilesystem
|
|
4
|
+
|
|
5
|
+
# from .s3_filesystem import S3Filesystem
|
|
6
|
+
|
|
7
|
+
# from dsgrid.common import AWS_PROFILE_NAME
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def make_filesystem_interface(path):
|
|
11
|
+
"""Make an instance of FilesystemInterface appropriate for the path.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
path : str
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
FilesystemInterface
|
|
20
|
+
|
|
21
|
+
"""
|
|
22
|
+
if isinstance(path, Path):
|
|
23
|
+
path = str(path)
|
|
24
|
+
if path.lower().startswith("s3"):
|
|
25
|
+
msg = f"Support for S3 is currently disabled: {path=}"
|
|
26
|
+
raise NotImplementedError(msg)
|
|
27
|
+
# path = "s3" + path[2:]
|
|
28
|
+
# fs_intf = S3Filesystem(path, AWS_PROFILE_NAME)
|
|
29
|
+
else:
|
|
30
|
+
fs_intf = LocalFilesystem()
|
|
31
|
+
|
|
32
|
+
return fs_intf
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Defines interface to access all filesystems"""
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class FilesystemInterface(abc.ABC):
|
|
8
|
+
"""Interface to access and edit directories and files on a local or remote filesystem"""
|
|
9
|
+
|
|
10
|
+
@abc.abstractmethod
|
|
11
|
+
def copy_file(self, src, dst):
|
|
12
|
+
"""Copy a file to a destination.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
src : str
|
|
17
|
+
Path to source file
|
|
18
|
+
dst : str
|
|
19
|
+
Path to destination file
|
|
20
|
+
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
@abc.abstractmethod
|
|
24
|
+
def copy_tree(self, src, dst):
|
|
25
|
+
"""Copy src to dst recursively.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
src : str
|
|
30
|
+
Source directory
|
|
31
|
+
dst : str
|
|
32
|
+
Destination directory
|
|
33
|
+
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
@abc.abstractmethod
|
|
37
|
+
def exists(self, path):
|
|
38
|
+
"""Return True if path exists.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
path : str
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
bool
|
|
47
|
+
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
@abc.abstractmethod
|
|
51
|
+
def listdir(self, directory, files_only=False, directories_only=False, exclude_hidden=False):
|
|
52
|
+
"""List the contents of a directory.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
directory : str
|
|
57
|
+
files_only : bool
|
|
58
|
+
only return files
|
|
59
|
+
directories_only : bool
|
|
60
|
+
only return directories
|
|
61
|
+
exclude_hidden : bool
|
|
62
|
+
exclude names starting with "."
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
list
|
|
67
|
+
list of str
|
|
68
|
+
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
@abc.abstractmethod
|
|
72
|
+
def mkdir(self, directory):
|
|
73
|
+
"""Make a directory. Do nothing if the directory exists.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
directory : str
|
|
78
|
+
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
@abc.abstractmethod
|
|
82
|
+
def path(self, path) -> Path:
|
|
83
|
+
"""Return an object that meets the interface of pathlib.Path.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
path : str
|
|
88
|
+
|
|
89
|
+
Returns
|
|
90
|
+
-------
|
|
91
|
+
Path
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
@abc.abstractmethod
|
|
95
|
+
def rglob(
|
|
96
|
+
self,
|
|
97
|
+
directory,
|
|
98
|
+
files_only=False,
|
|
99
|
+
directories_only=False,
|
|
100
|
+
exclude_hidden=False,
|
|
101
|
+
pattern="*",
|
|
102
|
+
):
|
|
103
|
+
"""Recursively search a path and return a list of relative paths that match criteria.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
directory : str
|
|
108
|
+
files_only : bool, optional
|
|
109
|
+
Return files only, by default False
|
|
110
|
+
directories_only : bool, optional
|
|
111
|
+
Return directories only, by default False
|
|
112
|
+
exclude_hidden : bool, optional
|
|
113
|
+
Exclude hidden files, by default False
|
|
114
|
+
pattern : str, optional
|
|
115
|
+
Search for files with a specific pattern, by default "*"
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
@abc.abstractmethod
|
|
119
|
+
def rm_tree(self, directory):
|
|
120
|
+
"""Remove all files and directories, recursively.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
directory : str
|
|
125
|
+
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
@abc.abstractmethod
|
|
129
|
+
def touch(self, path):
|
|
130
|
+
"""Touch
|
|
131
|
+
|
|
132
|
+
Parameters
|
|
133
|
+
----------
|
|
134
|
+
directory : str
|
|
135
|
+
filepath
|
|
136
|
+
"""
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Implementation for local filesytem"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from dsgrid.filesystem.filesystem_interface import FilesystemInterface
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LocalFilesystem(FilesystemInterface):
|
|
14
|
+
"""Provides access to the local filesystem."""
|
|
15
|
+
|
|
16
|
+
def copy_file(self, src, dst):
|
|
17
|
+
return shutil.copyfile(src, dst)
|
|
18
|
+
|
|
19
|
+
def copy_tree(self, src, dst):
|
|
20
|
+
return shutil.copytree(src, dst)
|
|
21
|
+
|
|
22
|
+
def exists(self, path):
|
|
23
|
+
return os.path.exists(path)
|
|
24
|
+
|
|
25
|
+
def listdir(self, directory, files_only=False, directories_only=False, exclude_hidden=False):
|
|
26
|
+
contents = os.listdir(directory)
|
|
27
|
+
if exclude_hidden:
|
|
28
|
+
contents = [x for x in contents if not x.startswith(".")]
|
|
29
|
+
if files_only:
|
|
30
|
+
return [x for x in contents if os.path.isfile(os.path.join(directory, x))]
|
|
31
|
+
if directories_only:
|
|
32
|
+
return [x for x in contents if os.path.isdir(os.path.join(directory, x))]
|
|
33
|
+
return contents
|
|
34
|
+
|
|
35
|
+
def path(self, path) -> Path:
|
|
36
|
+
return Path(path)
|
|
37
|
+
|
|
38
|
+
def rglob(
|
|
39
|
+
self,
|
|
40
|
+
directory,
|
|
41
|
+
files_only=False,
|
|
42
|
+
directories_only=False,
|
|
43
|
+
exclude_hidden=False,
|
|
44
|
+
pattern="*",
|
|
45
|
+
):
|
|
46
|
+
contents = [c for c in Path(directory).rglob(pattern)]
|
|
47
|
+
if exclude_hidden:
|
|
48
|
+
# NOTE: this does not currently ignore hidden directories in the path.
|
|
49
|
+
contents = [x for x in contents if not x.name.startswith(".")]
|
|
50
|
+
if files_only:
|
|
51
|
+
return [x for x in contents if os.path.isfile(x)]
|
|
52
|
+
if directories_only:
|
|
53
|
+
return [x for x in contents if os.path.isdir(x)]
|
|
54
|
+
return contents
|
|
55
|
+
|
|
56
|
+
def mkdir(self, directory):
|
|
57
|
+
os.makedirs(directory, exist_ok=True)
|
|
58
|
+
|
|
59
|
+
def rm_tree(self, directory):
|
|
60
|
+
return shutil.rmtree(directory)
|
|
61
|
+
|
|
62
|
+
def rm(self, path):
|
|
63
|
+
if os.path.exists(path):
|
|
64
|
+
if os.path.isdir(path):
|
|
65
|
+
if os.listdir(path):
|
|
66
|
+
self.rm_tree(path)
|
|
67
|
+
else:
|
|
68
|
+
os.removedirs(path)
|
|
69
|
+
elif os.path.isfile(path):
|
|
70
|
+
os.remove(path)
|
|
71
|
+
logger.warning("path %s does not exist", path)
|
|
72
|
+
|
|
73
|
+
def touch(self, path):
|
|
74
|
+
Path(path).touch()
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""Implementation for AWS S3 bucket filesystem"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
import boto3
|
|
7
|
+
from s3path import S3Path, register_configuration_parameter
|
|
8
|
+
|
|
9
|
+
from .cloud_filesystem import CloudFilesystemInterface
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class S3Filesystem(CloudFilesystemInterface):
|
|
16
|
+
"""Provides access to an AWS S3 bucket."""
|
|
17
|
+
|
|
18
|
+
REGEX_S3_PATH = re.compile(r"s3:\/\/(?P<bucket>[\w-]+)\/?(?P<prefix>.*)?")
|
|
19
|
+
|
|
20
|
+
def __init__(self, path, profile):
|
|
21
|
+
match = self.REGEX_S3_PATH.search(str(path))
|
|
22
|
+
assert match, f"Failed to parse AWS S3 bucket: {path}"
|
|
23
|
+
self._bucket = match.groupdict()["bucket"]
|
|
24
|
+
self._relpath = match.groupdict()["prefix"]
|
|
25
|
+
self._uri = str(path)
|
|
26
|
+
self._profile = profile
|
|
27
|
+
self._session = boto3.session.Session(profile_name=self._profile)
|
|
28
|
+
self._client = self._session.client("s3")
|
|
29
|
+
|
|
30
|
+
register_configuration_parameter(S3Path("/"), resource=self._session.resource("s3"))
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def profile(self):
|
|
34
|
+
"""Return the AWS profile."""
|
|
35
|
+
return self._profile
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def bucket(self):
|
|
39
|
+
"""Return the AWS bucket."""
|
|
40
|
+
return self._bucket
|
|
41
|
+
|
|
42
|
+
def _Key(self, path):
|
|
43
|
+
"""Get formatted S3 key from provided path for S3Path module"""
|
|
44
|
+
if not path:
|
|
45
|
+
path = ""
|
|
46
|
+
path = str(path)
|
|
47
|
+
if path.startswith(f"/{self._bucket}"):
|
|
48
|
+
path = path[len(f"/{self._bucket}") + 1 :]
|
|
49
|
+
elif path.startswith(self._bucket):
|
|
50
|
+
path = path[len(self._bucket) + 1 :]
|
|
51
|
+
elif path.startswith(self._uri):
|
|
52
|
+
path = path.replace(self._uri + "/", "")
|
|
53
|
+
elif path.startswith("/"):
|
|
54
|
+
path = path[1:]
|
|
55
|
+
return path
|
|
56
|
+
|
|
57
|
+
def check_versions(self, directory):
|
|
58
|
+
assert False, "not supported yet"
|
|
59
|
+
|
|
60
|
+
def copy_file(self, src, dst):
|
|
61
|
+
assert False, "not supported yet"
|
|
62
|
+
|
|
63
|
+
def copy_tree(self, src, dst):
|
|
64
|
+
assert False, "not supported yet"
|
|
65
|
+
|
|
66
|
+
def exists(self, path):
|
|
67
|
+
return self.path(path).exists()
|
|
68
|
+
|
|
69
|
+
def listdir(
|
|
70
|
+
self, directory="", files_only=False, directories_only=False, exclude_hidden=False
|
|
71
|
+
):
|
|
72
|
+
contents = [x for x in self.path(directory).glob("*") if x.name != ""]
|
|
73
|
+
if exclude_hidden:
|
|
74
|
+
# NOTE: this does not currently ignore hidden directories in the path.
|
|
75
|
+
contents = [x for x in contents if not x.name.startswith(".")]
|
|
76
|
+
if files_only:
|
|
77
|
+
return [x.name for x in contents if x.is_file()]
|
|
78
|
+
if directories_only:
|
|
79
|
+
return [x.name for x in contents if x.is_dir()]
|
|
80
|
+
return [x.name for x in contents]
|
|
81
|
+
|
|
82
|
+
def list_versions(self, path):
|
|
83
|
+
assert False, "not supported yet"
|
|
84
|
+
# self._s3.list_object_versions(Bucket=self._bucket, Prefix=prefix)
|
|
85
|
+
|
|
86
|
+
def mkdir(self, directory):
|
|
87
|
+
key = self._Key(directory)
|
|
88
|
+
self._client.put_object(Bucket=self._bucket, Body="", Key=f"{key}/")
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
def rglob(
|
|
92
|
+
self,
|
|
93
|
+
directory=None,
|
|
94
|
+
files_only=False,
|
|
95
|
+
directories_only=False,
|
|
96
|
+
exclude_hidden=True,
|
|
97
|
+
pattern="*",
|
|
98
|
+
):
|
|
99
|
+
directory = str(self.path(directory))
|
|
100
|
+
contents = list(self.path(directory).rglob(pattern))
|
|
101
|
+
if exclude_hidden:
|
|
102
|
+
# NOTE: this does not currently ignore hidden directories in the path.
|
|
103
|
+
contents = [str(x) for x in contents if not x.name.startswith(".")]
|
|
104
|
+
if files_only:
|
|
105
|
+
return [str(x) for x in contents if x.is_file()]
|
|
106
|
+
if directories_only:
|
|
107
|
+
return [str(x) for x in contents if x.is_dir()]
|
|
108
|
+
return [str(x) for x in contents]
|
|
109
|
+
|
|
110
|
+
def rm_tree(self, directory):
|
|
111
|
+
assert False, "not supported yet"
|
|
112
|
+
|
|
113
|
+
def path(self, path):
|
|
114
|
+
"""Returns S3Path"""
|
|
115
|
+
return S3Path(f"/{self._bucket}/{self._Key(path)}")
|
|
116
|
+
|
|
117
|
+
def touch(self, path):
|
|
118
|
+
self.path(path).touch()
|
dsgrid/loggers.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""Contains logging configuration data."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import logging.config
|
|
5
|
+
import os
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
|
|
8
|
+
import chronify.loggers
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# ETH@20210325 - What if you want to set up logging for all loggers, or for all
|
|
12
|
+
# dsgrid loggers? What name should be provided? Should that be the default?
|
|
13
|
+
# Should filename default to None?
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# ETH@20210325 - name and packages seems like two different functions? That is,
|
|
17
|
+
# you're either setting up logger name, or you want to set up a bunch of loggers
|
|
18
|
+
# for the different packages?
|
|
19
|
+
def setup_logging(
|
|
20
|
+
name, filename, console_level=logging.INFO, file_level=logging.INFO, packages=None, mode="w"
|
|
21
|
+
):
|
|
22
|
+
"""Configures logging to file and console.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
name : str
|
|
27
|
+
logger name
|
|
28
|
+
filename : str | None
|
|
29
|
+
log filename
|
|
30
|
+
console_level : int, optional
|
|
31
|
+
console log level. defaults to logging.INFO
|
|
32
|
+
file_level : int, optional
|
|
33
|
+
file log level. defaults to logging.INFO
|
|
34
|
+
packages : list, optional
|
|
35
|
+
enable logging for these package names. Always adds dsgrid.
|
|
36
|
+
"""
|
|
37
|
+
log_config = {
|
|
38
|
+
"version": 1,
|
|
39
|
+
"disable_existing_loggers": False,
|
|
40
|
+
"formatters": {
|
|
41
|
+
"basic": {"format": "%(message)s"},
|
|
42
|
+
"short": {
|
|
43
|
+
"format": "%(asctime)s - %(levelname)s [%(name)s "
|
|
44
|
+
"%(filename)s:%(lineno)d] : %(message)s",
|
|
45
|
+
},
|
|
46
|
+
"detailed": {
|
|
47
|
+
"format": "%(asctime)s - %(levelname)s [%(name)s "
|
|
48
|
+
"%(filename)s:%(lineno)d] : %(message)s",
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
"handlers": {
|
|
52
|
+
"console": {
|
|
53
|
+
"level": console_level,
|
|
54
|
+
"formatter": "short",
|
|
55
|
+
"class": "logging.StreamHandler",
|
|
56
|
+
},
|
|
57
|
+
"file": {
|
|
58
|
+
"class": "logging.FileHandler",
|
|
59
|
+
"level": file_level,
|
|
60
|
+
"filename": filename,
|
|
61
|
+
"mode": mode,
|
|
62
|
+
"formatter": "detailed",
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
"loggers": {
|
|
66
|
+
name: {"handlers": ["console", "file"], "level": "DEBUG", "propagate": True},
|
|
67
|
+
},
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
packages = packages or []
|
|
71
|
+
packages = set(packages)
|
|
72
|
+
packages.add("dsgrid")
|
|
73
|
+
for package in packages:
|
|
74
|
+
log_config["loggers"][package] = {
|
|
75
|
+
"handlers": ["console"],
|
|
76
|
+
"level": "DEBUG",
|
|
77
|
+
"propagate": True,
|
|
78
|
+
}
|
|
79
|
+
if filename is not None:
|
|
80
|
+
log_config["loggers"][package]["handlers"].append("file")
|
|
81
|
+
|
|
82
|
+
# ETH@20210325 - This logic should be applied to packages as well? This makes
|
|
83
|
+
# me think that this should really be two functions--one for setting up a
|
|
84
|
+
# logger by name and the other for setting up loggers for a list of packages.
|
|
85
|
+
# DT: I think the issue is fixed, but we can still consider your point.
|
|
86
|
+
if filename is None:
|
|
87
|
+
log_config["handlers"].pop("file")
|
|
88
|
+
|
|
89
|
+
logging.config.dictConfig(log_config)
|
|
90
|
+
logger = logging.getLogger(name)
|
|
91
|
+
|
|
92
|
+
# TODO: more consideration is warranted, but this is usually what we want.
|
|
93
|
+
# If we migrate dsgrid to use loguru, it will be easier. We could use the TRACE level
|
|
94
|
+
# in dsgrid.
|
|
95
|
+
chronify.loggers.setup_logging(
|
|
96
|
+
console_level="WARNING",
|
|
97
|
+
file_level="DEBUG",
|
|
98
|
+
filename=filename,
|
|
99
|
+
mode=mode,
|
|
100
|
+
)
|
|
101
|
+
return logger
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def check_log_file_size(filename, limit_mb=10, no_prompts=False):
|
|
105
|
+
if not filename.exists():
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
size_mb = filename.stat().st_size / (1024 * 1024)
|
|
109
|
+
if size_mb > limit_mb and not no_prompts:
|
|
110
|
+
msg = f"The log file {filename} has exceeded {limit_mb} MiB. Delete it? [Y] >>> "
|
|
111
|
+
val = input(msg)
|
|
112
|
+
if val == "" or val.lower() == "y":
|
|
113
|
+
os.remove(filename)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@contextmanager
|
|
117
|
+
def disable_console_logging(name="dsgrid"):
|
|
118
|
+
logger = logging.getLogger(name)
|
|
119
|
+
console_level = None
|
|
120
|
+
try:
|
|
121
|
+
for handler in logger.handlers:
|
|
122
|
+
if handler.name == "console":
|
|
123
|
+
console_level = handler.level
|
|
124
|
+
handler.setLevel(logging.FATAL)
|
|
125
|
+
break
|
|
126
|
+
yield
|
|
127
|
+
finally:
|
|
128
|
+
for handler in logger.handlers:
|
|
129
|
+
if handler.name == "console":
|
|
130
|
+
assert console_level is not None
|
|
131
|
+
handler.setLevel(console_level)
|
|
132
|
+
break
|
|
Binary file
|