plexus-python-common 1.0.74__py3-none-any.whl → 1.1.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plexus/common/utils/argutils.py +221 -0
- plexus/common/utils/config.py +120 -59
- plexus/common/utils/csvutils.py +241 -0
- plexus/common/utils/dbutils.py +431 -0
- plexus/common/utils/dtutils.py +395 -0
- plexus/common/utils/funcutils.py +306 -0
- plexus/common/utils/iterutils.py +912 -0
- plexus/common/utils/jsonutils.py +459 -73
- plexus/common/utils/logger.py +117 -0
- plexus/common/utils/numutils.py +142 -0
- plexus/common/utils/pathutils.py +235 -201
- plexus/common/utils/randutils.py +350 -0
- plexus/common/utils/retry.py +252 -0
- plexus/common/utils/span.py +256 -0
- plexus/common/utils/strutils.py +188 -391
- plexus/common/utils/testutils.py +137 -145
- plexus/common/utils/typeutils.py +212 -0
- plexus_python_common-1.1.82.dist-info/METADATA +23 -0
- plexus_python_common-1.1.82.dist-info/RECORD +23 -0
- plexus/common/carto/OSMFile.py +0 -259
- plexus/common/carto/OSMNode.py +0 -25
- plexus/common/carto/OSMTags.py +0 -101
- plexus/common/carto/OSMWay.py +0 -24
- plexus/common/carto/__init__.py +0 -11
- plexus/common/resources/__init__.py +0 -0
- plexus/common/resources/tags/__init__.py +0 -35
- plexus/common/resources/tags/unittest-1.0.0.tagset.yaml +0 -98
- plexus/common/resources/tags/universal-1.0.0.tagset.yaml +0 -1390
- plexus/common/utils/apiutils.py +0 -31
- plexus/common/utils/bagutils.py +0 -331
- plexus/common/utils/datautils.py +0 -233
- plexus/common/utils/dockerutils.py +0 -181
- plexus/common/utils/gisutils.py +0 -406
- plexus/common/utils/ormutils.py +0 -1638
- plexus/common/utils/s3utils.py +0 -939
- plexus/common/utils/sqlutils.py +0 -9
- plexus/common/utils/tagutils.py +0 -1476
- plexus_python_common-1.0.74.dist-info/METADATA +0 -42
- plexus_python_common-1.0.74.dist-info/RECORD +0 -29
- {plexus_python_common-1.0.74.dist-info → plexus_python_common-1.1.82.dist-info}/WHEEL +0 -0
- {plexus_python_common-1.0.74.dist-info → plexus_python_common-1.1.82.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import dataclasses
|
|
3
|
+
import inspect
|
|
4
|
+
import typing
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from plexus.common.utils.typeutils import is_identical_type
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"ParserTreeNode",
|
|
12
|
+
"ParserTree",
|
|
13
|
+
"ArgParseSpec",
|
|
14
|
+
"argparse_spec",
|
|
15
|
+
"make_argparse"
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ParserTreeNode(object):
|
|
20
|
+
"""
|
|
21
|
+
Represents a node in the parser tree, holding a command, its parser, and any child nodes. Each node may have
|
|
22
|
+
subparsers and a list of child nodes representing subcommands.
|
|
23
|
+
|
|
24
|
+
:param command: The command string for this node.
|
|
25
|
+
:param parser: The ``ArgumentParser`` associated with this node.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, command: str, parser: argparse.ArgumentParser):
|
|
29
|
+
self.command = command
|
|
30
|
+
self.parser = parser
|
|
31
|
+
self.subparsers = None
|
|
32
|
+
self.child_nodes: list[ParserTreeNode] = []
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def construct_parser_tree(
|
|
36
|
+
root_node: ParserTreeNode,
|
|
37
|
+
command_chain: list[str],
|
|
38
|
+
command_key_prefix: str,
|
|
39
|
+
**kwargs,
|
|
40
|
+
) -> list[ParserTreeNode]:
|
|
41
|
+
"""
|
|
42
|
+
Constructs a parser tree by traversing or creating nodes for each command in the command chain. Returns the path
|
|
43
|
+
from the ``root_node`` to the last node in the chain.
|
|
44
|
+
|
|
45
|
+
:param root_node: The root node of the parser tree.
|
|
46
|
+
:param command_chain: A list of command strings representing the path.
|
|
47
|
+
:param command_key_prefix: Prefix for command keys in the parser.
|
|
48
|
+
:param kwargs: Additional keyword arguments for parser creation.
|
|
49
|
+
:return: A list of ``ParserTreeNode`` objects representing the path from root to the last command.
|
|
50
|
+
"""
|
|
51
|
+
node_path = [root_node]
|
|
52
|
+
if len(command_chain) == 0:
|
|
53
|
+
return node_path
|
|
54
|
+
|
|
55
|
+
node = root_node
|
|
56
|
+
for depth, command in enumerate(command_chain):
|
|
57
|
+
if node.subparsers is None:
|
|
58
|
+
node.subparsers = node.parser.add_subparsers(dest=f"{command_key_prefix}:{depth}")
|
|
59
|
+
for child_node in node.child_nodes:
|
|
60
|
+
if child_node.command == command:
|
|
61
|
+
node = child_node
|
|
62
|
+
break
|
|
63
|
+
else:
|
|
64
|
+
if depth == len(command_chain) - 1:
|
|
65
|
+
child_parser = node.subparsers.add_parser(command, **kwargs)
|
|
66
|
+
else:
|
|
67
|
+
child_parser = node.subparsers.add_parser(command)
|
|
68
|
+
child_node = ParserTreeNode(command, child_parser)
|
|
69
|
+
node.child_nodes.append(child_node)
|
|
70
|
+
node = child_node
|
|
71
|
+
node_path.append(node)
|
|
72
|
+
|
|
73
|
+
return node_path
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ParserTree(object):
|
|
77
|
+
"""
|
|
78
|
+
Represents a tree structure for managing ``argparse`` parsers and subcommands. Provides methods to add subcommand
|
|
79
|
+
parsers and parse arguments, returning the command chain and parsed namespace.
|
|
80
|
+
|
|
81
|
+
:param root_parser: The root ``ArgumentParser``.
|
|
82
|
+
:param command_key_prefix: Prefix for command keys in the parser tree.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
def __init__(self, root_parser: argparse.ArgumentParser, command_key_prefix: str = "command"):
|
|
86
|
+
self.root_node = ParserTreeNode("", root_parser)
|
|
87
|
+
self.command_key_prefix = command_key_prefix
|
|
88
|
+
|
|
89
|
+
def add_subcommand_parser(self, command_chain: list[str], **kwargs) -> argparse.ArgumentParser:
|
|
90
|
+
"""
|
|
91
|
+
Adds a subcommand parser for the specified command chain, creating intermediate nodes as needed.
|
|
92
|
+
|
|
93
|
+
:param command_chain: A list of command strings representing the subcommand path.
|
|
94
|
+
:param kwargs: Additional keyword arguments for parser creation.
|
|
95
|
+
:return: The ``ArgumentParser`` for the last command in the chain.
|
|
96
|
+
"""
|
|
97
|
+
*_, last_node = construct_parser_tree(self.root_node, command_chain, self.command_key_prefix, **kwargs)
|
|
98
|
+
return last_node.parser
|
|
99
|
+
|
|
100
|
+
def parse_args(self, args: list[str] | None = None) -> tuple[list[str], argparse.Namespace]:
|
|
101
|
+
"""
|
|
102
|
+
Parses the provided argument list, returning the command chain and the parsed namespace.
|
|
103
|
+
|
|
104
|
+
:param args: The list of arguments to parse. If ``None``, parses ``sys.argv``.
|
|
105
|
+
:return: A tuple containing the list of command strings and the parsed ``Namespace``.
|
|
106
|
+
"""
|
|
107
|
+
known_args_namespace = self.root_node.parser.parse_args(args)
|
|
108
|
+
|
|
109
|
+
command_pairs = []
|
|
110
|
+
namespace = argparse.Namespace()
|
|
111
|
+
for key, value in dict(vars(known_args_namespace)).items():
|
|
112
|
+
if key.startswith(self.command_key_prefix) and value is not None:
|
|
113
|
+
command_pairs.append((key, value))
|
|
114
|
+
else:
|
|
115
|
+
setattr(namespace, key, value)
|
|
116
|
+
|
|
117
|
+
return list(command for _, command in sorted(command_pairs)), namespace
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclasses.dataclass(frozen=True)
|
|
121
|
+
class ArgParseSpec(object):
|
|
122
|
+
"""
|
|
123
|
+
Specification for an argument to be added to an ``ArgumentParser``. Allows detailed configuration of argument
|
|
124
|
+
properties such as ``flag``, ``name``, ``type``, ``action``, ``default``, ``choices``, and help text.
|
|
125
|
+
|
|
126
|
+
:param flag: The optional flag for the argument (e.g., '-f').
|
|
127
|
+
:param name: The name of the argument (e.g., '--file').
|
|
128
|
+
:param action: The action to be taken by the argument parser.
|
|
129
|
+
:param default: The default value for the argument.
|
|
130
|
+
:param type: The type of the argument value.
|
|
131
|
+
:param choices: A list of valid choices for the argument.
|
|
132
|
+
:param required: Whether the argument is required.
|
|
133
|
+
:param help: The help text for the argument.
|
|
134
|
+
"""
|
|
135
|
+
flag: str | None = None
|
|
136
|
+
name: str | None = None
|
|
137
|
+
action: str | None = None
|
|
138
|
+
default: Any = None
|
|
139
|
+
type: typing.Type | None = None
|
|
140
|
+
choices: list[Any] | None = None
|
|
141
|
+
required: bool | None = None
|
|
142
|
+
help: str | None = None
|
|
143
|
+
|
|
144
|
+
def make_kwargs(self) -> dict[str, Any]:
|
|
145
|
+
"""
|
|
146
|
+
Constructs a dictionary of keyword arguments for ``ArgumentParser.add_argument``, omitting any that are
|
|
147
|
+
``None``.
|
|
148
|
+
|
|
149
|
+
:return: A dictionary of argument properties suitable for ``ArgumentParser.add_argument``.
|
|
150
|
+
"""
|
|
151
|
+
kwargs = dict(
|
|
152
|
+
action=self.action,
|
|
153
|
+
default=self.default,
|
|
154
|
+
type=self.type,
|
|
155
|
+
choices=self.choices,
|
|
156
|
+
required=self.required,
|
|
157
|
+
help=self.help,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return {key: value for key, value in kwargs.items() if value is not None}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
argparse_spec = ArgParseSpec
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def make_argparse(func, parser: argparse.ArgumentParser = None) -> argparse.ArgumentParser:
|
|
167
|
+
"""
|
|
168
|
+
Automatically generates an ``ArgumentParser`` for the given function by inspecting its signature and parameter
|
|
169
|
+
annotations. Supports ``ArgParseSpec`` for detailed argument configuration.
|
|
170
|
+
|
|
171
|
+
:param func: The function whose parameters will be used to generate arguments.
|
|
172
|
+
:param parser: An optional ``ArgumentParser`` to add arguments to. If ``None``, a new parser is created.
|
|
173
|
+
:return: The ``ArgumentParser`` with arguments added based on the function signature.
|
|
174
|
+
"""
|
|
175
|
+
if parser is None:
|
|
176
|
+
parser = argparse.ArgumentParser()
|
|
177
|
+
|
|
178
|
+
def is_type_of(a: Any, *bs) -> bool:
|
|
179
|
+
return any(is_identical_type(a, b, strict_optional=False, covariant=True) for b in bs)
|
|
180
|
+
|
|
181
|
+
sig = inspect.signature(func)
|
|
182
|
+
for name, param in sig.parameters.items():
|
|
183
|
+
|
|
184
|
+
arg_name = f"--{name.replace('_', '-')}"
|
|
185
|
+
|
|
186
|
+
if param.annotation is None:
|
|
187
|
+
arg_type = str
|
|
188
|
+
elif is_type_of(param.annotation, str, Sequence[str]):
|
|
189
|
+
arg_type = str
|
|
190
|
+
elif is_type_of(param.annotation, int, Sequence[int]):
|
|
191
|
+
arg_type = int
|
|
192
|
+
elif is_type_of(param.annotation, float, Sequence[float]):
|
|
193
|
+
arg_type = float
|
|
194
|
+
elif is_type_of(param.annotation, bool, Sequence[bool]):
|
|
195
|
+
arg_type = bool
|
|
196
|
+
else:
|
|
197
|
+
arg_type = str
|
|
198
|
+
|
|
199
|
+
arg_action = "append" if typing.get_origin(param.annotation) in {list, Sequence} else None
|
|
200
|
+
arg_default = None if param.default is inspect.Parameter.empty else param.default
|
|
201
|
+
|
|
202
|
+
if isinstance(arg_default, ArgParseSpec):
|
|
203
|
+
spec = arg_default
|
|
204
|
+
spec = dataclasses.replace(spec,
|
|
205
|
+
name=spec.name or arg_name,
|
|
206
|
+
type=spec.type if spec.type is not None else arg_type,
|
|
207
|
+
action=spec.action if spec.action is not None else arg_action)
|
|
208
|
+
|
|
209
|
+
if spec.flag is None:
|
|
210
|
+
parser.add_argument(spec.name, **spec.make_kwargs())
|
|
211
|
+
else:
|
|
212
|
+
parser.add_argument(spec.flag, spec.name, **spec.make_kwargs())
|
|
213
|
+
|
|
214
|
+
else:
|
|
215
|
+
parser.add_argument(arg_name,
|
|
216
|
+
type=arg_type,
|
|
217
|
+
action=arg_action,
|
|
218
|
+
required=arg_default is None,
|
|
219
|
+
default=arg_default)
|
|
220
|
+
|
|
221
|
+
return parser
|
plexus/common/utils/config.py
CHANGED
|
@@ -1,63 +1,124 @@
|
|
|
1
|
+
import configparser
|
|
1
2
|
import os
|
|
3
|
+
import pathlib
|
|
4
|
+
from typing import Self
|
|
2
5
|
|
|
3
|
-
from
|
|
4
|
-
from
|
|
5
|
-
from iker.common.utils.funcutils import memorized
|
|
6
|
-
from iker.common.utils.pathutils import make_path
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@memorized()
|
|
10
|
-
def config(config_path: str | os.PathLike[str] = "") -> Config:
|
|
11
|
-
default_items: list[tuple[str, str, str]] = [
|
|
12
|
-
("plexus", "logging.level", "INFO"),
|
|
13
|
-
("plexus", "logging.format", "%(asctime)s [%(levelname)s] %(name)s: %(message)s"),
|
|
14
|
-
]
|
|
15
|
-
|
|
16
|
-
instance = Config(config_path or make_path("~/.plexus.cfg", expand=True, normalize=True, absolute=True))
|
|
17
|
-
instance.restore()
|
|
18
|
-
instance.update(default_items, overwrite=False)
|
|
19
|
-
|
|
20
|
-
return instance
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def config_print_or_set(config: Config, section: str, key: str, value: str):
|
|
24
|
-
if value is not None:
|
|
25
|
-
if section is None or key is None:
|
|
26
|
-
raise ValueError("cannot specify value without section and key")
|
|
27
|
-
|
|
28
|
-
old_value = config.get(section, key)
|
|
29
|
-
config.set(section, key, value)
|
|
30
|
-
config.persist()
|
|
31
|
-
|
|
32
|
-
print(f"Configuration file '{config.config_path}'", )
|
|
33
|
-
print(f"Section <{section}>")
|
|
34
|
-
print(f" {key} = {old_value} -> {value}")
|
|
35
|
-
|
|
36
|
-
else:
|
|
37
|
-
if section is None and key is None:
|
|
38
|
-
print(f"Configuration file '{config.config_path}'", )
|
|
39
|
-
for section in config.config_parser.sections():
|
|
40
|
-
print(f"Section <{section}>")
|
|
41
|
-
for key, value in config.config_parser.items(section):
|
|
42
|
-
print(f" {key} = {value}")
|
|
43
|
-
|
|
44
|
-
elif section is not None and key is None:
|
|
45
|
-
if not config.has_section(section):
|
|
46
|
-
logger.warning("Configuration section '%s' not found", section)
|
|
47
|
-
return
|
|
48
|
-
print(f"Configuration file '{config.config_path}'", )
|
|
49
|
-
print(f"Section <{section}>")
|
|
50
|
-
for key, value in config.config_parser.items(section):
|
|
51
|
-
print(f" {key} = {value}")
|
|
52
|
-
|
|
53
|
-
elif section is not None and key is not None:
|
|
54
|
-
value = config.get(section, key)
|
|
55
|
-
if value is None:
|
|
56
|
-
logger.warning("Configuration section '%s' key '%s' not found", section, key)
|
|
57
|
-
return
|
|
58
|
-
print(f"Configuration file '{config.config_path}'", )
|
|
59
|
-
print(f"Section <{section}>")
|
|
60
|
-
print(f" {key} = {value}")
|
|
6
|
+
from plexus.common.utils import logger
|
|
7
|
+
from plexus.common.utils.strutils import is_blank, trim_to_empty
|
|
61
8
|
|
|
9
|
+
|
|
10
|
+
class Config(object):
|
|
11
|
+
def __init__(self, config_path: str | os.PathLike[str] | None = None):
|
|
12
|
+
if not is_blank(config_path := trim_to_empty(None if config_path is None else str(config_path))):
|
|
13
|
+
self.config_path = pathlib.Path(config_path)
|
|
62
14
|
else:
|
|
63
|
-
|
|
15
|
+
self.config_path = None
|
|
16
|
+
self.config_parser: configparser.RawConfigParser = configparser.RawConfigParser(strict=False)
|
|
17
|
+
|
|
18
|
+
def __len__(self):
|
|
19
|
+
return sum(len(self.config_parser.options(section)) for section in self.config_parser.sections())
|
|
20
|
+
|
|
21
|
+
def update(self, tuples: list[tuple[str, str, str]], *, overwrite: bool = False):
|
|
22
|
+
for section, option, value in tuples:
|
|
23
|
+
if not self.config_parser.has_section(section):
|
|
24
|
+
self.config_parser.add_section(section)
|
|
25
|
+
if overwrite or not self.config_parser.has_option(section, option):
|
|
26
|
+
self.config_parser.set(section, option, value)
|
|
27
|
+
|
|
28
|
+
def restore(self) -> bool:
|
|
29
|
+
self.config_parser = configparser.RawConfigParser(strict=False)
|
|
30
|
+
if self.config_path is None:
|
|
31
|
+
return False
|
|
32
|
+
try:
|
|
33
|
+
if not self.config_path.exists():
|
|
34
|
+
raise IOError("file not found")
|
|
35
|
+
self.config_parser.read(self.config_path, encoding="utf-8")
|
|
36
|
+
return True
|
|
37
|
+
except IOError as e:
|
|
38
|
+
logger.exception("Failed to restore config from file <'%s'>", self.config_path)
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
def persist(self) -> bool:
|
|
42
|
+
if self.config_path is None:
|
|
43
|
+
return False
|
|
44
|
+
try:
|
|
45
|
+
with open(self.config_path, "w") as fh:
|
|
46
|
+
self.config_parser.write(fh)
|
|
47
|
+
return True
|
|
48
|
+
except IOError as e:
|
|
49
|
+
logger.exception("Failed to persist config to file <'%s'>", self.config_path)
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
def has_section(self, section: str) -> bool:
|
|
53
|
+
return self.config_parser.has_section(section)
|
|
54
|
+
|
|
55
|
+
def has(self, section: str, option: str) -> bool:
|
|
56
|
+
return self.config_parser.has_option(section, option)
|
|
57
|
+
|
|
58
|
+
def get(self, section: str, option: str, default_value: str = None) -> str:
|
|
59
|
+
if self.config_parser.has_option(section, option):
|
|
60
|
+
return self.config_parser.get(section, option)
|
|
61
|
+
return default_value
|
|
62
|
+
|
|
63
|
+
def getint(self, section: str, option: str, default_value: int = None) -> int:
|
|
64
|
+
if self.config_parser.has_option(section, option):
|
|
65
|
+
return self.config_parser.getint(section, option)
|
|
66
|
+
return default_value
|
|
67
|
+
|
|
68
|
+
def getfloat(self, section: str, option: str, default_value: float = None) -> float:
|
|
69
|
+
if self.config_parser.has_option(section, option):
|
|
70
|
+
return self.config_parser.getfloat(section, option)
|
|
71
|
+
return default_value
|
|
72
|
+
|
|
73
|
+
def getboolean(self, section: str, option: str, default_value: bool = None) -> bool:
|
|
74
|
+
if self.config_parser.has_option(section, option):
|
|
75
|
+
return self.config_parser.getboolean(section, option)
|
|
76
|
+
return default_value
|
|
77
|
+
|
|
78
|
+
def set(self, section: str, option: str, value: str):
|
|
79
|
+
if not self.config_parser.has_section(section):
|
|
80
|
+
self.config_parser.add_section(section)
|
|
81
|
+
self.config_parser.set(section, option, value)
|
|
82
|
+
|
|
83
|
+
def sections(self) -> list[str]:
|
|
84
|
+
return self.config_parser.sections()
|
|
85
|
+
|
|
86
|
+
def options(self, section: str) -> list[str]:
|
|
87
|
+
if not self.config_parser.has_section(section):
|
|
88
|
+
return []
|
|
89
|
+
return self.config_parser.options(section)
|
|
90
|
+
|
|
91
|
+
def tuples(self) -> list[tuple[str, str, str]]:
|
|
92
|
+
result = []
|
|
93
|
+
for section in self.config_parser.sections():
|
|
94
|
+
for option in self.config_parser.options(section):
|
|
95
|
+
value = self.config_parser.get(section, option)
|
|
96
|
+
result.append((section, option, value))
|
|
97
|
+
return result
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class ConfigVisitor(object):
|
|
101
|
+
def __init__(self, config: Config, section: str, prefix: str = "", separator: str = "."):
|
|
102
|
+
self.config = config
|
|
103
|
+
self.section = section
|
|
104
|
+
self.prefix = prefix
|
|
105
|
+
self.separator = separator
|
|
106
|
+
|
|
107
|
+
def __str__(self):
|
|
108
|
+
return self.config.get(self.section, self.prefix)
|
|
109
|
+
|
|
110
|
+
def __int__(self):
|
|
111
|
+
return self.config.getint(self.section, self.prefix)
|
|
112
|
+
|
|
113
|
+
def __float__(self):
|
|
114
|
+
return self.config.getfloat(self.section, self.prefix)
|
|
115
|
+
|
|
116
|
+
def __bool__(self):
|
|
117
|
+
return self.config.getboolean(self.section, self.prefix)
|
|
118
|
+
|
|
119
|
+
def __getattr__(self, suffix: str) -> Self:
|
|
120
|
+
return self[suffix]
|
|
121
|
+
|
|
122
|
+
def __getitem__(self, suffix: str) -> Self:
|
|
123
|
+
new_prefix = suffix if is_blank(self.prefix) else self.separator.join([self.prefix, suffix])
|
|
124
|
+
return ConfigVisitor(self.config, self.section, new_prefix, self.separator)
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import os
|
|
3
|
+
from collections.abc import Callable, Generator, Iterable, Mapping, Sequence
|
|
4
|
+
from typing import Any
|
|
5
|
+
from typing import overload
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"CSVColumn",
|
|
9
|
+
"CSVView",
|
|
10
|
+
"column",
|
|
11
|
+
"view",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CSVColumn(object):
|
|
16
|
+
"""
|
|
17
|
+
Represents a column in a CSV schema, including its name, loader function, dumper function, and the string used for
|
|
18
|
+
null values.
|
|
19
|
+
|
|
20
|
+
:param name: The name of the column.
|
|
21
|
+
:param loader: A function to convert a string to the column's value type.
|
|
22
|
+
:param dumper: A function to convert the column's value to a string.
|
|
23
|
+
:param null_str: The string representation of a null value in this column.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
name: str,
|
|
29
|
+
loader: Callable[[str], Any] = str,
|
|
30
|
+
dumper: Callable[[Any], str] = str,
|
|
31
|
+
null_str: str = "",
|
|
32
|
+
):
|
|
33
|
+
self.name = name
|
|
34
|
+
self.loader = loader
|
|
35
|
+
self.dumper = dumper
|
|
36
|
+
self.null_str = null_str
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class CSVView(object):
|
|
40
|
+
"""
|
|
41
|
+
Represents a view for reading and writing CSV data according to a schema. Supports loading and dumping lines or
|
|
42
|
+
files, with options for headers and dictionary or list output.
|
|
43
|
+
|
|
44
|
+
:param schema: The sequence of ``CSVColumn`` objects defining the schema.
|
|
45
|
+
:param column_delimiter: The delimiter used to separate columns in the CSV (default: ",").
|
|
46
|
+
:param line_terminator: The string used to terminate lines in the CSV (default: "\n").
|
|
47
|
+
:param quote_char: The character used to quote fields in the CSV (default: '"').
|
|
48
|
+
:param strict: Whether to raise an error on malformed CSV (default: ``True``).
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
schema: Sequence[CSVColumn],
|
|
54
|
+
*,
|
|
55
|
+
column_delimiter: str = ",",
|
|
56
|
+
line_terminator: str = "\n",
|
|
57
|
+
quote_char: str = '"',
|
|
58
|
+
strict: bool = True,
|
|
59
|
+
):
|
|
60
|
+
self.schema = schema
|
|
61
|
+
self.column_delimiter = column_delimiter
|
|
62
|
+
self.line_terminator = line_terminator
|
|
63
|
+
self.quote_char = quote_char
|
|
64
|
+
self.strict = strict
|
|
65
|
+
|
|
66
|
+
@overload
|
|
67
|
+
def load_lines(
|
|
68
|
+
self,
|
|
69
|
+
lines: Iterable[Sequence[str]],
|
|
70
|
+
has_header: bool,
|
|
71
|
+
ret_dict: False = False,
|
|
72
|
+
) -> Generator[list[Any], None, None]:
|
|
73
|
+
...
|
|
74
|
+
|
|
75
|
+
@overload
|
|
76
|
+
def load_lines(
|
|
77
|
+
self,
|
|
78
|
+
lines: Iterable[Sequence[str]],
|
|
79
|
+
ret_dict: False = False,
|
|
80
|
+
) -> Generator[list[Any], None, None]:
|
|
81
|
+
...
|
|
82
|
+
|
|
83
|
+
@overload
|
|
84
|
+
def load_lines(
|
|
85
|
+
self,
|
|
86
|
+
lines: Iterable[Sequence[str]],
|
|
87
|
+
has_header: bool,
|
|
88
|
+
ret_dict: True = True,
|
|
89
|
+
) -> Generator[dict[str, Any], None, None]:
|
|
90
|
+
...
|
|
91
|
+
|
|
92
|
+
@overload
|
|
93
|
+
def load_lines(
|
|
94
|
+
self,
|
|
95
|
+
lines: Iterable[Sequence[str]],
|
|
96
|
+
ret_dict: True = True,
|
|
97
|
+
) -> Generator[dict[str, Any], None, None]:
|
|
98
|
+
...
|
|
99
|
+
|
|
100
|
+
def load_lines(
|
|
101
|
+
self,
|
|
102
|
+
lines: Iterable[Sequence[str]],
|
|
103
|
+
has_header: bool = True,
|
|
104
|
+
ret_dict: bool = False,
|
|
105
|
+
) -> Generator[list[Any] | dict[str, Any], None, None]:
|
|
106
|
+
"""
|
|
107
|
+
Loads CSV data from an iterable of lines, optionally using the first line as a header. Returns each row as a
|
|
108
|
+
list or dictionary, depending on ``ret_dict``.
|
|
109
|
+
|
|
110
|
+
:param lines: An iterable of CSV lines.
|
|
111
|
+
:param has_header: Whether the first line is a header.
|
|
112
|
+
:param ret_dict: Whether to return rows as dictionaries (``True``) or lists (``False``).
|
|
113
|
+
:return: A generator yielding each row as a list or dictionary.
|
|
114
|
+
"""
|
|
115
|
+
rows_iter = iter(lines)
|
|
116
|
+
if has_header:
|
|
117
|
+
header_cols = next(rows_iter)
|
|
118
|
+
if len(self.schema) != len(header_cols):
|
|
119
|
+
raise ValueError("size of the schema is not identical to size of the columns")
|
|
120
|
+
for c, header_col in zip(self.schema, header_cols):
|
|
121
|
+
if c.name != header_col:
|
|
122
|
+
raise ValueError("name of the schema is not equal to the name of the columns")
|
|
123
|
+
for cols in rows_iter:
|
|
124
|
+
if len(self.schema) != len(cols):
|
|
125
|
+
continue
|
|
126
|
+
if ret_dict:
|
|
127
|
+
yield {c.name: None if col == c.null_str else c.loader(col) for c, col in zip(self.schema, cols)}
|
|
128
|
+
else:
|
|
129
|
+
yield [None if col == c.null_str else c.loader(col) for c, col in zip(self.schema, cols)]
|
|
130
|
+
|
|
131
|
+
def dump_lines(
|
|
132
|
+
self,
|
|
133
|
+
data: Iterable[Sequence[Any] | Mapping[str, Any]],
|
|
134
|
+
has_header: bool = True,
|
|
135
|
+
) -> Generator[list[str], None, None]:
|
|
136
|
+
"""
|
|
137
|
+
Dumps data to CSV lines according to the schema, optionally including a header row.
|
|
138
|
+
|
|
139
|
+
:param data: An iterable of rows, each as a sequence or mapping.
|
|
140
|
+
:param has_header: Whether to include a header row.
|
|
141
|
+
:return: A generator yielding CSV lines as strings.
|
|
142
|
+
"""
|
|
143
|
+
if has_header:
|
|
144
|
+
yield list(c.name for c in self.schema)
|
|
145
|
+
for cols in data:
|
|
146
|
+
if isinstance(cols, Sequence):
|
|
147
|
+
if len(self.schema) != len(cols):
|
|
148
|
+
raise ValueError("size of the schema is not identical to size of the columns")
|
|
149
|
+
yield list(c.null_str if col is None else c.dumper(col) for c, col in zip(self.schema, cols))
|
|
150
|
+
if isinstance(cols, Mapping):
|
|
151
|
+
yield list(c.null_str if cols.get(c.name) is None else c.dumper(cols.get(c.name)) for c in self.schema)
|
|
152
|
+
|
|
153
|
+
@overload
|
|
154
|
+
def load_file(
|
|
155
|
+
self,
|
|
156
|
+
file_path: str | os.PathLike[str],
|
|
157
|
+
has_header: bool,
|
|
158
|
+
ret_dict: False = False,
|
|
159
|
+
**kwargs,
|
|
160
|
+
) -> Generator[list[Any], None, None]:
|
|
161
|
+
...
|
|
162
|
+
|
|
163
|
+
@overload
|
|
164
|
+
def load_file(
|
|
165
|
+
self,
|
|
166
|
+
file_path: str | os.PathLike[str],
|
|
167
|
+
ret_dict: False = False,
|
|
168
|
+
**kwargs,
|
|
169
|
+
) -> Generator[list[Any], None, None]:
|
|
170
|
+
...
|
|
171
|
+
|
|
172
|
+
@overload
|
|
173
|
+
def load_file(
|
|
174
|
+
self,
|
|
175
|
+
file_path: str | os.PathLike[str],
|
|
176
|
+
has_header: bool,
|
|
177
|
+
ret_dict: True = True,
|
|
178
|
+
**kwargs,
|
|
179
|
+
) -> Generator[dict[str, Any], None, None]:
|
|
180
|
+
...
|
|
181
|
+
|
|
182
|
+
@overload
|
|
183
|
+
def load_file(
|
|
184
|
+
self,
|
|
185
|
+
file_path: str | os.PathLike[str],
|
|
186
|
+
ret_dict: True = True,
|
|
187
|
+
**kwargs,
|
|
188
|
+
) -> Generator[dict[str, Any], None, None]:
|
|
189
|
+
...
|
|
190
|
+
|
|
191
|
+
def load_file(
|
|
192
|
+
self,
|
|
193
|
+
file_path: str | os.PathLike[str],
|
|
194
|
+
has_header: bool = True,
|
|
195
|
+
ret_dict: bool = False,
|
|
196
|
+
**kwargs,
|
|
197
|
+
) -> Generator[list[Any] | dict[str, Any], None, None]:
|
|
198
|
+
"""
|
|
199
|
+
Loads CSV data from a file, splitting by row delimiter and using the ``schema`` for parsing.
|
|
200
|
+
|
|
201
|
+
:param file_path: The path to the CSV file.
|
|
202
|
+
:param has_header: Whether the first line is a header.
|
|
203
|
+
:param ret_dict: Whether to return rows as dictionaries (``True``) or lists (``False``).
|
|
204
|
+
:param kwargs: Additional keyword arguments for file opening.
|
|
205
|
+
:return: A generator yielding each row as a list or dictionary.
|
|
206
|
+
"""
|
|
207
|
+
with open(file_path, mode="r", **kwargs) as fh:
|
|
208
|
+
reader = csv.reader(fh,
|
|
209
|
+
delimiter=self.column_delimiter,
|
|
210
|
+
lineterminator=self.line_terminator,
|
|
211
|
+
quotechar=self.quote_char,
|
|
212
|
+
strict=self.strict)
|
|
213
|
+
yield from self.load_lines(reader, has_header, ret_dict)
|
|
214
|
+
|
|
215
|
+
def dump_file(
|
|
216
|
+
self,
|
|
217
|
+
data: Iterable[Sequence[Any] | Mapping[str, Any]],
|
|
218
|
+
file_path: str | os.PathLike[str],
|
|
219
|
+
has_header: bool = True,
|
|
220
|
+
**kwargs,
|
|
221
|
+
) -> None:
|
|
222
|
+
"""
|
|
223
|
+
Dumps data to a CSV file according to the ``schema``, optionally including a header row.
|
|
224
|
+
|
|
225
|
+
:param data: An iterable of rows, each as a sequence or mapping.
|
|
226
|
+
:param file_path: The path to the output CSV file.
|
|
227
|
+
:param has_header: Whether to include a header row.
|
|
228
|
+
:param kwargs: Additional keyword arguments for file opening.
|
|
229
|
+
"""
|
|
230
|
+
with open(file_path, mode="w", **kwargs) as fh:
|
|
231
|
+
writer = csv.writer(fh,
|
|
232
|
+
delimiter=self.column_delimiter,
|
|
233
|
+
lineterminator=self.line_terminator,
|
|
234
|
+
quotechar=self.quote_char,
|
|
235
|
+
strict=self.strict)
|
|
236
|
+
for line in self.dump_lines(data, has_header):
|
|
237
|
+
writer.writerow(line)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
column = CSVColumn
|
|
241
|
+
view = CSVView
|