confarg 0.0.1.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
confarg/_files.py ADDED
@@ -0,0 +1,426 @@
1
+ # This Source Code Form is subject to the terms of the Mozilla Public
2
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ # file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+
5
+ """Config file loading for confarg."""
6
+
7
+ from __future__ import annotations
8
+
9
+ import csv
10
+ import json
11
+ import tomllib
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from confarg._errors import ConfargError, InvalidConfigFileError
16
+
17
+ INCLUDE_KEY = "__include__"
18
+
19
+
20
+ def _load_toml(path: Path) -> dict[str, Any]:
21
+ """Load and parse a TOML config file.
22
+
23
+ Args:
24
+ path: Path to the TOML file.
25
+
26
+ Returns:
27
+ A dict of the parsed TOML contents.
28
+
29
+ Raises:
30
+ InvalidConfigFileError: If the file is not found or contains invalid TOML.
31
+ """
32
+ try:
33
+ with Path(path).open("rb") as f:
34
+ return tomllib.load(f)
35
+ except FileNotFoundError:
36
+ raise InvalidConfigFileError.not_found(path) from None
37
+ except tomllib.TOMLDecodeError as e:
38
+ raise InvalidConfigFileError.malformed("TOML", path, e) from e
39
+
40
+
41
+ def _load_yaml(path: Path) -> dict[str, Any]:
42
+ """Load and parse a YAML config file.
43
+
44
+ Args:
45
+ path: Path to the YAML file.
46
+
47
+ Returns:
48
+ A dict of the parsed YAML contents, or an empty dict if the file
49
+ contains a non-dict value.
50
+
51
+ Raises:
52
+ InvalidConfigFileError: If PyYAML is not installed, the file is not found,
53
+ or the file contains invalid YAML.
54
+ """
55
+ try:
56
+ import yaml
57
+ except ImportError:
58
+ raise InvalidConfigFileError.missing_library("PyYAML", "pyyaml", "YAML support") from None
59
+ try:
60
+ with Path(path).open(encoding="utf-8") as f:
61
+ data = yaml.safe_load(f)
62
+ return data if isinstance(data, dict) else {}
63
+ except FileNotFoundError:
64
+ raise InvalidConfigFileError.not_found(path) from None
65
+ except yaml.YAMLError as e:
66
+ raise InvalidConfigFileError.malformed("YAML", path, e) from e
67
+
68
+
69
+ def _load_json(path: Path) -> dict[str, Any]:
70
+ """Load and parse a JSON config file.
71
+
72
+ Args:
73
+ path: Path to the JSON file.
74
+
75
+ Returns:
76
+ A dict of the parsed JSON contents.
77
+
78
+ Raises:
79
+ InvalidConfigFileError: If the file is not found, contains invalid JSON,
80
+ or the top-level value is not a JSON object.
81
+ """
82
+ try:
83
+ with Path(path).open(encoding="utf-8") as f:
84
+ data = json.load(f)
85
+ except FileNotFoundError:
86
+ raise InvalidConfigFileError.not_found(path) from None
87
+ except json.JSONDecodeError as e:
88
+ raise InvalidConfigFileError.malformed("JSON", path, e) from e
89
+ if not isinstance(data, dict):
90
+ raise InvalidConfigFileError(f"JSON config must be an object, got {type(data).__name__}: {path}")
91
+ return data
92
+
93
+
94
+ _LOADERS = {".toml": _load_toml, ".yaml": _load_yaml, ".yml": _load_yaml, ".json": _load_json}
95
+
96
+
97
+ def _load_yaml_item(path: Path) -> Any:
98
+ """Load YAML, returning the raw top-level value (dict, list, or scalar)."""
99
+ try:
100
+ import yaml
101
+ except ImportError:
102
+ raise InvalidConfigFileError.missing_library("PyYAML", "pyyaml", "YAML support") from None
103
+ try:
104
+ with Path(path).open(encoding="utf-8") as f:
105
+ return yaml.safe_load(f)
106
+ except FileNotFoundError:
107
+ raise InvalidConfigFileError.not_found(path) from None
108
+ except yaml.YAMLError as e:
109
+ raise InvalidConfigFileError.malformed("YAML", path, e) from e
110
+
111
+
112
+ def _load_json_item(path: Path) -> Any:
113
+ """Load JSON, returning the raw top-level value (dict, list, or scalar)."""
114
+ try:
115
+ with Path(path).open(encoding="utf-8") as f:
116
+ return json.load(f)
117
+ except FileNotFoundError:
118
+ raise InvalidConfigFileError.not_found(path) from None
119
+ except json.JSONDecodeError as e:
120
+ raise InvalidConfigFileError.malformed("JSON", path, e) from e
121
+
122
+
123
+ def _load_csv(path: Path, *, orient: str = "rows", delimiter: str = ",", header: bool = True) -> Any:
124
+ """Load a CSV/TSV file, returning a Python structure determined by orient and header.
125
+
126
+ orient='rows' (default):
127
+ header=True: single-column → list[str]; multi-column → list[dict[str, str]].
128
+ header=False: single-column → list[str]; multi-column → list[list[str]].
129
+ orient='columns':
130
+ header=True: dict[str, list[str]] keyed by column names from the header row.
131
+ header=False: dict[str, list[str]] keyed by positional index ("0", "1", …).
132
+ orient='raw':
133
+ list[list[str]] — every row returned as-is; header option is ignored.
134
+ """
135
+ if orient not in ("rows", "columns", "raw"):
136
+ raise ConfargError(f"Invalid CSV orient {orient!r}. Must be 'rows', 'columns', or 'raw'.")
137
+ try:
138
+ with path.open(newline="", encoding="utf-8-sig") as f:
139
+ if orient == "raw" or not header:
140
+ all_rows = [list(row) for row in csv.reader(f, delimiter=delimiter)]
141
+ if orient == "raw":
142
+ return all_rows
143
+ # header=False, orient rows or columns
144
+ if orient == "rows":
145
+ if not all_rows:
146
+ return []
147
+ return [row[0] for row in all_rows] if all(len(row) == 1 for row in all_rows) else all_rows
148
+ # orient == "columns", header=False: positional keys
149
+ if not all_rows:
150
+ return {}
151
+ ncols = len(all_rows[0])
152
+ col_data: dict[str, list[str]] = {str(i): [] for i in range(ncols)}
153
+ for row in all_rows:
154
+ for i, v in enumerate(row):
155
+ col_data[str(i)].append(v)
156
+ return col_data
157
+ # header=True, orient rows or columns
158
+ reader = csv.DictReader(f, delimiter=delimiter)
159
+ fieldnames = list(reader.fieldnames or [])
160
+ if orient == "columns":
161
+ result: dict[str, list[str]] = {name: [] for name in fieldnames}
162
+ for row in reader:
163
+ for k, v in row.items():
164
+ result[k].append(v)
165
+ return result
166
+ # orient == "rows"
167
+ rows = [dict(row) for row in reader]
168
+ if len(fieldnames) == 1:
169
+ return [row[fieldnames[0]] for row in rows]
170
+ return rows
171
+ except FileNotFoundError:
172
+ raise InvalidConfigFileError.not_found(path) from None
173
+
174
+
175
+ _ITEM_LOADERS: dict[str, Any] = {
176
+ ".toml": _load_toml, # TOML root is always a dict
177
+ ".yaml": _load_yaml_item,
178
+ ".yml": _load_yaml_item,
179
+ ".json": _load_json_item,
180
+ ".csv": lambda path: _load_csv(path, orient="rows"),
181
+ ".tsv": lambda path: _load_csv(path, orient="rows", delimiter="\t"),
182
+ }
183
+
184
+
185
+ def _load_file_item(path: Path) -> Any:
186
+ """Load a config file for append mode, returning the raw top-level value.
187
+
188
+ Unlike _load_file(), accepts top-level lists (YAML/JSON) so that a file
189
+ whose root is a list is treated as the single element to append.
190
+
191
+ Args:
192
+ path: Path to the config file.
193
+
194
+ Returns:
195
+ The raw top-level value: a dict, list, or scalar.
196
+
197
+ Raises:
198
+ InvalidConfigFileError: If the file format is unsupported, the file is
199
+ not found, or the file contents are invalid.
200
+ """
201
+ path = Path(path)
202
+ loader = _ITEM_LOADERS.get(path.suffix.lower())
203
+ if loader is None:
204
+ raise InvalidConfigFileError.unsupported_format(path.suffix.lower())
205
+ return loader(path)
206
+
207
+
208
+ def _parse_include_val(val: Any) -> tuple[str, dict[str, Any]]:
209
+ """Parse an INCLUDE_KEY value into (path_str, options).
210
+
211
+ Accepts either a plain string path or a dict with a required 'path' key and
212
+ optional per-format options (e.g. orient for CSV).
213
+ """
214
+ if isinstance(val, str):
215
+ return val, {}
216
+ if isinstance(val, dict) and isinstance(val.get("path"), str):
217
+ options = {k: v for k, v in val.items() if k != "path"}
218
+ return val["path"], options
219
+ raise ConfargError(f"{INCLUDE_KEY} must be a path string or a dict with a string 'path' key, got {val!r}")
220
+
221
+
222
+ def _resolve_node(data: Any, base_dir: Path, seen: frozenset[Path]) -> Any:
223
+ """Dispatch include resolution by node type."""
224
+ if isinstance(data, dict):
225
+ return _resolve_dict(data, base_dir, seen)
226
+ if isinstance(data, list):
227
+ return _resolve_list(data, base_dir, seen)
228
+ return data
229
+
230
+
231
+ def _resolve_dict(data: dict[str, Any], base_dir: Path, seen: frozenset[Path]) -> Any:
232
+ """Resolve INCLUDE_KEY in a dict node.
233
+
234
+ A pure include (no siblings) may return any type. An include with sibling
235
+ keys requires the included file to be a dict (for deep-merge).
236
+ """
237
+ from confarg._merge import _deep_merge
238
+
239
+ include_val = data.get(INCLUDE_KEY)
240
+ if include_val is not None:
241
+ path_str, options = _parse_include_val(include_val)
242
+ inc_path = (base_dir / path_str).resolve()
243
+ if inc_path in seen:
244
+ raise ConfargError(f"Circular include detected: {inc_path}")
245
+ included = _load_any(inc_path, seen | {inc_path}, options=options)
246
+ siblings = {k: v for k, v in data.items() if k != INCLUDE_KEY}
247
+ if not siblings:
248
+ return included
249
+ if not isinstance(included, dict):
250
+ raise ConfargError(
251
+ f"{INCLUDE_KEY} produced {type(included).__name__} but sibling keys are"
252
+ f" also present; can only merge sibling keys into a dict include"
253
+ )
254
+ result: dict[str, Any] = _deep_merge(included, siblings)
255
+ else:
256
+ result = dict(data)
257
+
258
+ for k, v in result.items():
259
+ result[k] = _resolve_node(v, base_dir, seen)
260
+
261
+ return result
262
+
263
+
264
+ def _resolve_list(data: list[Any], base_dir: Path, seen: frozenset[Path]) -> list[Any]:
265
+ """Resolve INCLUDE_KEY in list items.
266
+
267
+ A list item that is a pure {INCLUDE_KEY: path} dict is replaced by the
268
+ included file's content; if that content is itself a list it is spliced
269
+ (flattened) into the parent list. Items with sibling keys follow the same
270
+ rules as dict nodes.
271
+ """
272
+ from confarg._merge import _deep_merge
273
+
274
+ result: list[Any] = []
275
+ for item in data:
276
+ if isinstance(item, dict) and INCLUDE_KEY in item:
277
+ path_str, options = _parse_include_val(item[INCLUDE_KEY])
278
+ inc_path = (base_dir / path_str).resolve()
279
+ if inc_path in seen:
280
+ raise ConfargError(f"Circular include detected: {inc_path}")
281
+ included = _load_any(inc_path, seen | {inc_path}, options=options)
282
+ siblings = {k: v for k, v in item.items() if k != INCLUDE_KEY}
283
+ if not siblings:
284
+ if isinstance(included, list):
285
+ result.extend(included)
286
+ else:
287
+ result.append(included)
288
+ elif isinstance(included, dict):
289
+ merged = _deep_merge(included, siblings)
290
+ result.append(_resolve_node(merged, base_dir, seen))
291
+ else:
292
+ raise ConfargError(
293
+ f"{INCLUDE_KEY} produced {type(included).__name__} but sibling keys are"
294
+ f" also present; can only merge sibling keys into a dict include"
295
+ )
296
+ else:
297
+ result.append(_resolve_node(item, base_dir, seen))
298
+ return result
299
+
300
+
301
+ def _load_any(path: Path, seen: frozenset[Path], *, options: dict[str, Any] | None = None) -> Any:
302
+ """Load an included file as any type (dict, list, or scalar).
303
+
304
+ For CSV/TSV, options may contain 'orient' and 'header'.
305
+ """
306
+ ext = path.suffix.lower()
307
+ opts = options or {}
308
+ if ext in (".csv", ".tsv"):
309
+ delimiter = "\t" if ext == ".tsv" else ","
310
+ header_opt = opts.get("header", True)
311
+ if not isinstance(header_opt, bool):
312
+ raise ConfargError(f"'header' option must be a boolean (true/false), got {header_opt!r}")
313
+ data = _load_csv(
314
+ path,
315
+ orient=opts.get("orient", "rows"),
316
+ delimiter=delimiter,
317
+ header=header_opt,
318
+ )
319
+ else:
320
+ loader = _ITEM_LOADERS.get(ext)
321
+ if loader is None:
322
+ raise InvalidConfigFileError.unsupported_format(ext)
323
+ data = loader(path)
324
+ return _resolve_node(data, path.parent, seen)
325
+
326
+
327
+ def _load_raw(path: Path, seen: frozenset[Path]) -> dict[str, Any]:
328
+ """Load a root config file; result must be a dict."""
329
+ loader = _LOADERS.get(path.suffix.lower())
330
+ if loader is None:
331
+ raise InvalidConfigFileError.unsupported_format(path.suffix.lower())
332
+ data = loader(path)
333
+ result = _resolve_node(data, path.parent, seen)
334
+ if not isinstance(result, dict):
335
+ raise ConfargError(f"Top-level config file must resolve to a dict, got {type(result).__name__}: {path}")
336
+ return result
337
+
338
+
339
+ def _load_file(path: Path) -> dict[str, Any]:
340
+ """Load a config file, dispatching by extension.
341
+
342
+ Supports .toml, .yaml, .yml, and .json files. INCLUDE_KEY entries are
343
+ resolved recursively after loading.
344
+
345
+ Args:
346
+ path: Path to the config file.
347
+
348
+ Returns:
349
+ A dict of the parsed file contents with all includes resolved.
350
+
351
+ Raises:
352
+ InvalidConfigFileError: If the file format is unsupported, the file is
353
+ not found, or the file contents are invalid.
354
+ ConfargError: If an include value is not a string or a circular include
355
+ is detected.
356
+ """
357
+ path = Path(path)
358
+ return _load_raw(path, frozenset({path.resolve()}))
359
+
360
+
361
+ def _dump_toml(data: dict[str, Any], path: Path) -> None:
362
+ """Write a dict to a TOML file.
363
+
364
+ Args:
365
+ data: The dict to write.
366
+ path: Path to the output file.
367
+
368
+ Raises:
369
+ InvalidConfigFileError: If tomli_w is not installed.
370
+ """
371
+ try:
372
+ import tomli_w
373
+ except ImportError:
374
+ raise InvalidConfigFileError.missing_library("tomli_w", "tomli_w", "writing TOML files") from None
375
+ with Path(path).open("wb") as f:
376
+ tomli_w.dump(data, f)
377
+
378
+
379
+ def _dump_yaml(data: dict[str, Any], path: Path) -> None:
380
+ """Write a dict to a YAML file.
381
+
382
+ Args:
383
+ data: The dict to write.
384
+ path: Path to the output file.
385
+
386
+ Raises:
387
+ InvalidConfigFileError: If PyYAML is not installed.
388
+ """
389
+ try:
390
+ import yaml
391
+ except ImportError:
392
+ raise InvalidConfigFileError.missing_library("PyYAML", "pyyaml", "writing YAML files") from None
393
+ with Path(path).open("w", encoding="utf-8") as f:
394
+ yaml.dump(data, f, Dumper=yaml.SafeDumper)
395
+
396
+
397
+ def _dump_json(data: dict[str, Any], path: Path) -> None:
398
+ """Write a dict to a JSON file.
399
+
400
+ Args:
401
+ data: The dict to write.
402
+ path: Path to the output file.
403
+ """
404
+ with Path(path).open("w", encoding="utf-8") as f:
405
+ json.dump(data, f, indent=2)
406
+
407
+
408
+ _DUMPERS = {".toml": _dump_toml, ".yaml": _dump_yaml, ".yml": _dump_yaml, ".json": _dump_json}
409
+
410
+
411
+ def _dump_file(data: dict[str, Any], path: Path) -> None:
412
+ """Write a dict to a config file, dispatching by extension.
413
+
414
+ Args:
415
+ data: The dict to write.
416
+ path: Path to the output file.
417
+
418
+ Raises:
419
+ InvalidConfigFileError: If the file format is unsupported or the
420
+ required library is not installed.
421
+ """
422
+ path = Path(path)
423
+ dumper = _DUMPERS.get(path.suffix.lower())
424
+ if dumper is None:
425
+ raise InvalidConfigFileError.unsupported_format(path.suffix.lower())
426
+ dumper(data, path)