etlplus 0.12.10__py3-none-any.whl → 0.14.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. etlplus/README.md +1 -1
  2. etlplus/__init__.py +1 -26
  3. etlplus/api/__init__.py +10 -0
  4. etlplus/api/config.py +36 -20
  5. etlplus/api/endpoint_client.py +3 -3
  6. etlplus/api/enums.py +51 -0
  7. etlplus/api/pagination/client.py +1 -1
  8. etlplus/api/rate_limiting/config.py +13 -1
  9. etlplus/api/rate_limiting/rate_limiter.py +8 -11
  10. etlplus/api/request_manager.py +11 -6
  11. etlplus/api/transport.py +14 -2
  12. etlplus/api/types.py +7 -6
  13. etlplus/{run_helpers.py → api/utils.py} +205 -153
  14. etlplus/cli/handlers.py +17 -7
  15. etlplus/config/jobs.py +14 -4
  16. etlplus/dag.py +103 -0
  17. etlplus/enums.py +0 -32
  18. etlplus/file/cfg.py +2 -2
  19. etlplus/file/conf.py +2 -2
  20. etlplus/file/dta.py +77 -0
  21. etlplus/file/enums.py +10 -4
  22. etlplus/file/hbs.py +78 -0
  23. etlplus/file/hdf5.py +78 -0
  24. etlplus/file/jinja2.py +78 -0
  25. etlplus/file/mat.py +78 -0
  26. etlplus/file/mustache.py +78 -0
  27. etlplus/file/nc.py +78 -0
  28. etlplus/file/numbers.py +75 -0
  29. etlplus/file/ods.py +79 -0
  30. etlplus/file/properties.py +13 -13
  31. etlplus/file/rda.py +78 -0
  32. etlplus/file/rds.py +78 -0
  33. etlplus/file/sas7bdat.py +78 -0
  34. etlplus/file/sav.py +77 -0
  35. etlplus/file/sylk.py +77 -0
  36. etlplus/file/toml.py +1 -1
  37. etlplus/file/vm.py +78 -0
  38. etlplus/file/wks.py +77 -0
  39. etlplus/file/xlsm.py +79 -0
  40. etlplus/file/xpt.py +78 -0
  41. etlplus/file/zsav.py +77 -0
  42. etlplus/{validation → ops}/README.md +2 -2
  43. etlplus/ops/__init__.py +61 -0
  44. etlplus/{extract.py → ops/extract.py} +78 -94
  45. etlplus/{load.py → ops/load.py} +73 -93
  46. etlplus/{run.py → ops/run.py} +140 -110
  47. etlplus/{transform.py → ops/transform.py} +75 -68
  48. etlplus/{validation → ops}/utils.py +80 -15
  49. etlplus/{validate.py → ops/validate.py} +19 -9
  50. etlplus/types.py +2 -2
  51. {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/METADATA +91 -60
  52. {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/RECORD +56 -35
  53. etlplus/validation/__init__.py +0 -44
  54. {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/WHEEL +0 -0
  55. {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/entry_points.txt +0 -0
  56. {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/licenses/LICENSE +0 -0
  57. {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.transform` module.
2
+ :mod:`etlplus.ops.transform` module.
3
3
 
4
4
  Helpers to filter, map/rename, select, sort, aggregate, and otherwise
5
5
  transform JSON-like records (dicts and lists of dicts).
@@ -24,7 +24,7 @@ Basic pipeline with strings::
24
24
 
25
25
  Using enums for keys and functions::
26
26
 
27
- from .enums import PipelineStep, OperatorName, AggregateName
27
+ from etlplus.enums import PipelineStep, OperatorName, AggregateName
28
28
  ops = {
29
29
  PipelineStep.FILTER: {
30
30
  'field': 'age', 'op': OperatorName.GTE, 'value': 18
@@ -44,28 +44,28 @@ from collections.abc import Sequence
44
44
  from typing import Any
45
45
  from typing import cast
46
46
 
47
- from .enums import AggregateName
48
- from .enums import OperatorName
49
- from .enums import PipelineStep
47
+ from ..enums import AggregateName
48
+ from ..enums import OperatorName
49
+ from ..enums import PipelineStep
50
+ from ..types import AggregateFunc
51
+ from ..types import AggregateSpec
52
+ from ..types import FieldName
53
+ from ..types import Fields
54
+ from ..types import FilterSpec
55
+ from ..types import JSONData
56
+ from ..types import JSONDict
57
+ from ..types import JSONList
58
+ from ..types import MapSpec
59
+ from ..types import OperatorFunc
60
+ from ..types import PipelineConfig
61
+ from ..types import PipelineStepName
62
+ from ..types import SortKey
63
+ from ..types import StepApplier
64
+ from ..types import StepOrSteps
65
+ from ..types import StepSpec
66
+ from ..types import StrPath
67
+ from ..utils import to_number
50
68
  from .load import load_data
51
- from .types import AggregateFunc
52
- from .types import AggregateSpec
53
- from .types import FieldName
54
- from .types import Fields
55
- from .types import FilterSpec
56
- from .types import JSONData
57
- from .types import JSONDict
58
- from .types import JSONList
59
- from .types import MapSpec
60
- from .types import OperatorFunc
61
- from .types import PipelineConfig
62
- from .types import PipelineStepName
63
- from .types import SortKey
64
- from .types import StepApplier
65
- from .types import StepOrSteps
66
- from .types import StepSpec
67
- from .types import StrPath
68
- from .utils import to_number
69
69
 
70
70
  # SECTION: EXPORTS ========================================================== #
71
71
 
@@ -730,15 +730,16 @@ def _is_plain_fields_list(obj: Any) -> bool:
730
730
 
731
731
 
732
732
  _PIPELINE_STEPS: tuple[PipelineStepName, ...] = (
733
+ 'aggregate',
733
734
  'filter',
734
735
  'map',
735
736
  'select',
736
737
  'sort',
737
- 'aggregate',
738
738
  )
739
739
 
740
740
 
741
741
  _STEP_APPLIERS: dict[PipelineStepName, StepApplier] = {
742
+ 'aggregate': _apply_aggregate_step,
742
743
  'filter': _apply_filter_step,
743
744
  'map': _apply_map_step,
744
745
  'select': _apply_select_step,
@@ -746,7 +747,54 @@ _STEP_APPLIERS: dict[PipelineStepName, StepApplier] = {
746
747
  }
747
748
 
748
749
 
749
- # SECTION: EXPORTS ========================================================== #
750
+ # SECTION: FUNCTIONS ======================================================== #
751
+
752
+
753
+ # -- Helpers -- #
754
+
755
+
756
+ def apply_aggregate(
757
+ records: JSONList,
758
+ operation: AggregateSpec,
759
+ ) -> JSONDict:
760
+ """
761
+ Aggregate a numeric field or count presence.
762
+
763
+ Parameters
764
+ ----------
765
+ records : JSONList
766
+ Records to aggregate.
767
+ operation : AggregateSpec
768
+ Dict with keys ``field`` and ``func``. ``func`` is one of
769
+ ``'sum'``, ``'avg'``, ``'min'``, ``'max'``, or ``'count'``.
770
+ A callable may also be supplied for ``func``. Optionally, set
771
+ ``alias`` to control the output key name.
772
+
773
+ Returns
774
+ -------
775
+ JSONDict
776
+ A single-row result like ``{"sum_age": 42}``.
777
+
778
+ Notes
779
+ -----
780
+ Numeric operations ignore non-numeric values but count their presence
781
+ for ``'count'``.
782
+ """
783
+ field = operation.get('field')
784
+ func = operation.get('func')
785
+ alias = operation.get('alias')
786
+
787
+ if not field or func is None:
788
+ return {'error': 'Invalid aggregation operation'}
789
+
790
+ try:
791
+ aggregator = _resolve_aggregator(func)
792
+ except TypeError:
793
+ return {'error': f'Unknown aggregation function: {func}'}
794
+
795
+ nums, present = _collect_numeric_and_presence(records, field)
796
+ key_name = _derive_agg_key(func, field, alias)
797
+ return {key_name: aggregator(nums, present)}
750
798
 
751
799
 
752
800
  def apply_filter(
@@ -894,48 +942,7 @@ def apply_sort(
894
942
  )
895
943
 
896
944
 
897
- def apply_aggregate(
898
- records: JSONList,
899
- operation: AggregateSpec,
900
- ) -> JSONDict:
901
- """
902
- Aggregate a numeric field or count presence.
903
-
904
- Parameters
905
- ----------
906
- records : JSONList
907
- Records to aggregate.
908
- operation : AggregateSpec
909
- Dict with keys ``field`` and ``func``. ``func`` is one of
910
- ``'sum'``, ``'avg'``, ``'min'``, ``'max'``, or ``'count'``.
911
- A callable may also be supplied for ``func``. Optionally, set
912
- ``alias`` to control the output key name.
913
-
914
- Returns
915
- -------
916
- JSONDict
917
- A single-row result like ``{"sum_age": 42}``.
918
-
919
- Notes
920
- -----
921
- Numeric operations ignore non-numeric values but count their presence
922
- for ``'count'``.
923
- """
924
- field = operation.get('field')
925
- func = operation.get('func')
926
- alias = operation.get('alias')
927
-
928
- if not field or func is None:
929
- return {'error': 'Invalid aggregation operation'}
930
-
931
- try:
932
- aggregator = _resolve_aggregator(func)
933
- except TypeError:
934
- return {'error': f'Unknown aggregation function: {func}'}
935
-
936
- nums, present = _collect_numeric_and_presence(records, field)
937
- key_name = _derive_agg_key(func, field, alias)
938
- return {key_name: aggregator(nums, present)}
945
+ # -- Orchestration -- #
939
946
 
940
947
 
941
948
  def transform(
@@ -982,7 +989,7 @@ def transform(
982
989
 
983
990
  Using enums for keys and functions::
984
991
 
985
- from .enums import PipelineStep, OperatorName, AggregateName
992
+ from etlplus.enums import PipelineStep, OperatorName, AggregateName
986
993
  ops = {
987
994
  PipelineStep.FILTER: {
988
995
  'field': 'age', 'op': OperatorName.GTE, 'value': 18
@@ -1,7 +1,7 @@
1
1
  """
2
- :mod:`etlplus.validation.utils` module.
2
+ :mod:`etlplus.ops.utils` module.
3
3
 
4
- Utility helpers for conditional validation orchestration.
4
+ Utility helpers for conditional data ops orchestration.
5
5
 
6
6
  The helpers defined here embrace a "high cohesion, low coupling" design by
7
7
  isolating normalization, configuration, and logging responsibilities. The
@@ -13,11 +13,14 @@ offloading ancillary concerns to composable helpers.
13
13
  from __future__ import annotations
14
14
 
15
15
  from collections.abc import Callable
16
+ from collections.abc import Mapping
16
17
  from dataclasses import dataclass
18
+ from types import MappingProxyType
17
19
  from typing import Any
18
20
  from typing import Literal
19
21
  from typing import Self
20
22
  from typing import TypedDict
23
+ from typing import cast
21
24
 
22
25
  from ..types import StrAnyMap
23
26
  from ..utils import normalized_str
@@ -47,6 +50,30 @@ type ValidateFn = Callable[[Any, Ruleset], ValidationResult]
47
50
  type PrintFn = Callable[[Any], None]
48
51
 
49
52
 
53
+ # SECTION: INTERNAL CONSTANTS ============================================== #
54
+
55
+
56
+ _PHASE_CHOICES = MappingProxyType(
57
+ {
58
+ 'before_transform': 'before_transform',
59
+ 'after_transform': 'after_transform',
60
+ },
61
+ )
62
+ _SEVERITY_CHOICES = MappingProxyType(
63
+ {
64
+ 'warn': 'warn',
65
+ 'error': 'error',
66
+ },
67
+ )
68
+ _WINDOW_CHOICES = MappingProxyType(
69
+ {
70
+ 'before_transform': 'before_transform',
71
+ 'after_transform': 'after_transform',
72
+ 'both': 'both',
73
+ },
74
+ )
75
+
76
+
50
77
  # SECTION: DATA CLASSES ===================================================== #
51
78
 
52
79
 
@@ -291,11 +318,14 @@ def _normalize_phase(
291
318
  Normalized validation phase. Defaults to ``"before_transform"`` when
292
319
  unspecified.
293
320
  """
294
- match normalized_str(value):
295
- case 'after_transform':
296
- return 'after_transform'
297
- case _:
298
- return 'before_transform'
321
+ return cast(
322
+ ValidationPhase,
323
+ _normalize_choice(
324
+ value,
325
+ mapping=_PHASE_CHOICES,
326
+ default='before_transform',
327
+ ),
328
+ )
299
329
 
300
330
 
301
331
  def _normalize_severity(
@@ -314,7 +344,14 @@ def _normalize_severity(
314
344
  ValidationSeverity
315
345
  Normalized severity. Defaults to ``"error"`` when unspecified.
316
346
  """
317
- return 'warn' if normalized_str(value) == 'warn' else 'error'
347
+ return cast(
348
+ ValidationSeverity,
349
+ _normalize_choice(
350
+ value,
351
+ mapping=_SEVERITY_CHOICES,
352
+ default='error',
353
+ ),
354
+ )
318
355
 
319
356
 
320
357
  def _normalize_window(
@@ -333,13 +370,41 @@ def _normalize_window(
333
370
  ValidationWindow
334
371
  Normalized validation window. Defaults to ``"both"`` when unspecified.
335
372
  """
336
- match normalized_str(value):
337
- case 'before_transform':
338
- return 'before_transform'
339
- case 'after_transform':
340
- return 'after_transform'
341
- case _:
342
- return 'both'
373
+ return cast(
374
+ ValidationWindow,
375
+ _normalize_choice(
376
+ value,
377
+ mapping=_WINDOW_CHOICES,
378
+ default='both',
379
+ ),
380
+ )
381
+
382
+
383
+ def _normalize_choice(
384
+ value: str | None,
385
+ *,
386
+ mapping: Mapping[str, str],
387
+ default: str,
388
+ ) -> str:
389
+ """
390
+ Normalize a text value against a mapping with a default fallback.
391
+
392
+ Parameters
393
+ ----------
394
+ value : str | None
395
+ Input text to normalize.
396
+ mapping : Mapping[str, str]
397
+ Mapping of accepted values to normalized outputs.
398
+ default : str
399
+ Default to return when input is missing or unrecognized.
400
+
401
+ Returns
402
+ -------
403
+ str
404
+ Normalized value.
405
+ """
406
+ normalized = normalized_str(value)
407
+ return mapping.get(normalized, default)
343
408
 
344
409
 
345
410
  def _rule_name(
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.validation` module.
2
+ :mod:`etlplus.ops.validate` module.
3
3
 
4
4
  Validate dicts and lists of dicts using simple, schema-like rules.
5
5
 
@@ -34,11 +34,11 @@ from typing import Final
34
34
  from typing import Literal
35
35
  from typing import TypedDict
36
36
 
37
+ from ..types import JSONData
38
+ from ..types import Record
39
+ from ..types import StrAnyMap
40
+ from ..types import StrPath
37
41
  from .load import load_data
38
- from .types import JSONData
39
- from .types import Record
40
- from .types import StrAnyMap
41
- from .types import StrPath
42
42
 
43
43
  # SECTION: EXPORTS ========================================================== #
44
44
 
@@ -279,11 +279,15 @@ def _type_matches(
279
279
  bool
280
280
  ``True`` if the value matches the expected type; ``False`` if not.
281
281
  """
282
- py_type = TYPE_MAP.get(expected)
283
- if py_type:
284
- return isinstance(value, py_type)
282
+ if expected == 'number':
283
+ return _is_number(value)
284
+ if expected == 'integer':
285
+ return isinstance(value, int) and not isinstance(value, bool)
286
+ if expected == 'boolean':
287
+ return isinstance(value, bool)
285
288
 
286
- return False
289
+ py_type = TYPE_MAP.get(expected)
290
+ return isinstance(value, py_type) if py_type else False
287
291
 
288
292
 
289
293
  def _validate_record(
@@ -330,6 +334,9 @@ def _validate_record(
330
334
  # SECTION: FUNCTIONS ======================================================== #
331
335
 
332
336
 
337
+ # -- Helpers -- #
338
+
339
+
333
340
  def validate_field(
334
341
  value: Any,
335
342
  rules: StrAnyMap | FieldRules,
@@ -425,6 +432,9 @@ def validate_field(
425
432
  return {'valid': len(errors) == 0, 'errors': errors}
426
433
 
427
434
 
435
+ # -- Orchestration -- #
436
+
437
+
428
438
  def validate(
429
439
  source: StrPath | JSONData,
430
440
  rules: RulesMap | None = None,
etlplus/types.py CHANGED
@@ -193,8 +193,8 @@ type AggregateSpec = StrAnyMap
193
193
 
194
194
  # -- Pipelines-- #
195
195
 
196
- # Unified pipeline step spec consumed by :mod:`etlplus.transform`.
197
- type StepSpec = FilterSpec | MapSpec | SelectSpec | SortSpec | AggregateSpec
196
+ # Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
197
+ type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
198
198
 
199
199
  # Collections of steps
200
200
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: etlplus
3
- Version: 0.12.10
3
+ Version: 0.14.3
4
4
  Summary: A Swiss Army knife for simple ETL operations
5
5
  Home-page: https://github.com/Dagitali/ETLPlus
6
6
  Author: ETLPlus Team
@@ -79,8 +79,10 @@ package and command-line interface for data extraction, validation, transformati
79
79
  - [Binary Serialization and Interchange](#binary-serialization-and-interchange)
80
80
  - [Databases and Embedded Storage](#databases-and-embedded-storage)
81
81
  - [Spreadsheets](#spreadsheets)
82
- - [Data Archives](#data-archives)
82
+ - [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
83
83
  - [Logs and Event Streams](#logs-and-event-streams)
84
+ - [Data Archives](#data-archives)
85
+ - [Templates](#templates)
84
86
  - [Usage](#usage)
85
87
  - [Command Line Interface](#command-line-interface)
86
88
  - [Argument Order and Required Options](#argument-order-and-required-options)
@@ -194,7 +196,7 @@ etlplus extract file examples/data/sample.csv \
194
196
  [Python API](#python-api):
195
197
 
196
198
  ```python
197
- from etlplus import extract, transform, validate, load
199
+ from etlplus.ops import extract, transform, validate, load
198
200
 
199
201
  data = extract("file", "input.csv")
200
202
  ops = {"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}
@@ -221,93 +223,122 @@ DDL can be rendered from table specs for migrations or schema checks.
221
223
 
222
224
  ### Files (`file`)
223
225
 
224
- File formats are grouped as in `FileFormat`. Support is marked as:
226
+ Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
225
227
 
226
228
  - **Y**: implemented (may require optional dependencies)
227
229
  - **N**: stubbed or not yet implemented
228
230
 
229
231
  #### Stubbed / Placeholder
230
232
 
231
- | Format | Supported | Description |
232
- | --- | --- | --- |
233
+ | Format | Read | Write | Description |
234
+ | --- | --- | --- | --- |
233
235
  | `stub` | N | Placeholder format for tests and future connectors. |
234
236
 
235
237
  #### Tabular & Delimited Text
236
238
 
237
- | Format | Supported | Description |
238
- | --- | --- | --- |
239
- | `csv` | Y | Comma-Separated Values |
240
- | `fwf` | N | Fixed-Width Fields |
241
- | `dat` | N | Generic data file, often delimited or fixed-width |
242
- | `psv` | N | Pipe-Separated Values |
243
- | `tab` | N | Often synonymous with TSV |
244
- | `tsv` | Y | Tab-Separated Values |
245
- | `txt` | Y | Plain text, often delimited or fixed-width |
239
+ | Format | Read | Write | Description |
240
+ | --- | --- | --- | --- |
241
+ | `csv` | Y | Y | Comma-Separated Values |
242
+ | `dat` | N | N | Generic data file, often delimited or fixed-width |
243
+ | `fwf` | N | N | Fixed-Width Fields |
244
+ | `psv` | N | N | Pipe-Separated Values |
245
+ | `tab` | N | N | Often synonymous with TSV |
246
+ | `tsv` | Y | Y | Tab-Separated Values |
247
+ | `txt` | Y | Y | Plain text, often delimited or fixed-width |
246
248
 
247
249
  #### Semi-Structured Text
248
250
 
249
- | Format | Supported | Description |
250
- | --- | --- | --- |
251
- | `cfg` | N | Config-style key-value pairs |
252
- | `conf` | N | Config-style key-value pairs |
253
- | `ini` | N | Config-style key-value pairs |
254
- | `json` | Y | JavaScript Object Notation |
255
- | `ndjson` | Y | Newline-Delimited JSON |
256
- | `properties` | N | Java-style key-value pairs |
257
- | `toml` | N | Tom's Obvious Minimal Language |
258
- | `xml` | Y | Extensible Markup Language |
259
- | `yaml` | Y | YAML Ain't Markup Language |
251
+ | Format | Read | Write | Description |
252
+ | --- | --- | --- | --- |
253
+ | `cfg` | N | N | Config-style key-value pairs |
254
+ | `conf` | N | N | Config-style key-value pairs |
255
+ | `ini` | N | N | Config-style key-value pairs |
256
+ | `json` | Y | Y | JavaScript Object Notation |
257
+ | `ndjson` | Y | Y | Newline-Delimited JSON |
258
+ | `properties` | N | N | Java-style key-value pairs |
259
+ | `toml` | N | N | Tom's Obvious Minimal Language |
260
+ | `xml` | Y | Y | Extensible Markup Language |
261
+ | `yaml` | Y | Y | YAML Ain't Markup Language |
260
262
 
261
263
  #### Columnar / Analytics-Friendly
262
264
 
263
- | Format | Supported | Description |
264
- | --- | --- | --- |
265
- | `arrow` | N | Apache Arrow IPC |
266
- | `feather` | Y | Apache Arrow Feather |
267
- | `orc` | Y | Optimized Row Columnar; common in Hadoop |
268
- | `parquet` | Y | Apache Parquet; common in Big Data |
265
+ | Format | Read | Write | Description |
266
+ | --- | --- | --- | --- |
267
+ | `arrow` | N | N | Apache Arrow IPC |
268
+ | `feather` | Y | Y | Apache Arrow Feather |
269
+ | `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
270
+ | `parquet` | Y | Y | Apache Parquet; common in Big Data |
269
271
 
270
272
  #### Binary Serialization and Interchange
271
273
 
272
- | Format | Supported | Description |
273
- | --- | --- | --- |
274
- | `avro` | Y | Apache Avro |
275
- | `bson` | N | Binary JSON; common with MongoDB exports/dumps |
276
- | `cbor` | N | Concise Binary Object Representation |
277
- | `ion` | N | Amazon Ion |
278
- | `msgpack` | N | MessagePack |
279
- | `pb` | N | Protocol Buffers (Google Protobuf) |
280
- | `pbf` | N | Protocolbuffer Binary Format; often for GIS data |
281
- | `proto` | N | Protocol Buffers schema; often in .pb / .bin |
274
+ | Format | Read | Write | Description |
275
+ | --- | --- | --- | --- |
276
+ | `avro` | Y | Y | Apache Avro |
277
+ | `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
278
+ | `cbor` | N | N | Concise Binary Object Representation |
279
+ | `ion` | N | N | Amazon Ion |
280
+ | `msgpack` | N | N | MessagePack |
281
+ | `pb` | N | N | Protocol Buffers (Google Protobuf) |
282
+ | `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
283
+ | `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
282
284
 
283
285
  #### Databases and Embedded Storage
284
286
 
285
- | Format | Supported | Description |
286
- | --- | --- | --- |
287
- | `accdb` | N | Microsoft Access database file (newer format) |
288
- | `duckdb` | N | DuckDB database file |
289
- | `mdb` | N | Microsoft Access database file (older format) |
290
- | `sqlite` | N | SQLite database file |
287
+ | Format | Read | Write | Description |
288
+ | --- | --- | --- | --- |
289
+ | `accdb` | N | N | Microsoft Access (newer format) |
290
+ | `duckdb` | N | N | DuckDB |
291
+ | `mdb` | N | N | Microsoft Access (older format) |
292
+ | `sqlite` | N | N | SQLite |
291
293
 
292
294
  #### Spreadsheets
293
295
 
296
+ | Format | Read | Write | Description |
297
+ | --- | --- | --- | --- |
298
+ | `numbers` | N | N | Apple Numbers |
299
+ | `ods` | N | N | OpenDocument |
300
+ | `wks` | N | N | Lotus 1-2-3 |
301
+ | `xls` | Y | Y | Microsoft Excel (BIFF) |
302
+ | `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
303
+ | `xlsx` | Y | Y | Microsoft Excel (Open XML) |
304
+
305
+ #### Statistical / Scientific / Numeric Computing
306
+
307
+ | Format | Read | Write | Description |
308
+ | --- | --- | --- | --- |
309
+ | `dta` | N | N | Stata |
310
+ | `hdf5` | N | N | Hierarchical Data Format |
311
+ | `mat` | N | N | MATLAB |
312
+ | `nc` | N | N | NetCDF |
313
+ | `rda` | N | N | RData workspace/object |
314
+ | `rds` | N | N | R data |
315
+ | `sas7bdat` | N | N | SAS data |
316
+ | `sav` | N | N | SPSS data |
317
+ | `sylk` | N | N | Symbolic Link |
318
+ | `xpt` | N | N | SAS Transport |
319
+ | `zsav` | N | N | Compressed SPSS data |
320
+
321
+ #### Logs and Event Streams
322
+
294
323
  | Format | Supported | Description |
295
324
  | --- | --- | --- |
296
- | `xls` | Y | Microsoft Excel (BIFF); read-only |
297
- | `xlsx` | Y | Microsoft Excel (Open XML) |
325
+ | `log` | N | N | Generic log file |
298
326
 
299
327
  #### Data Archives
300
328
 
301
- | Format | Supported | Description |
302
- | --- | --- | --- |
303
- | `gz` | Y | Gzip-compressed file |
304
- | `zip` | Y | ZIP archive |
329
+ | Format | Read | Write | Description |
330
+ | --- | --- | --- | --- |
331
+ | `gz` | Y | Y | Gzip-compressed file |
332
+ | `zip` | Y | Y | ZIP archive |
305
333
 
306
- #### Logs and Event Streams
334
+ #### Templates
307
335
 
308
- | Format | Supported | Description |
309
- | --- | --- | --- |
310
- | `log` | N | Generic log file |
336
+ | Format | Read | Write | Description |
337
+ | --- | --- | --- | --- |
338
+ | `hbs` | N | N | Handlebars |
339
+ | `jinja2` | N | N | Jinja2 |
340
+ | `mustache` | N | N | Mustache |
341
+ | `vm` | N | N | Apache Velocity |
311
342
 
312
343
  ## Usage
313
344
 
@@ -500,7 +531,7 @@ cat examples/data/sample.json \
500
531
  Use ETLPlus as a Python library:
501
532
 
502
533
  ```python
503
- from etlplus import extract, validate, transform, load
534
+ from etlplus.ops import extract, validate, transform, load
504
535
 
505
536
  # Extract data
506
537
  data = extract("file", "data.json")
@@ -695,7 +726,7 @@ We split tests into two layers:
695
726
  pagination + rate limit defaults, file/API connector interactions) may touch temp files and use
696
727
  fake clients.
697
728
 
698
- If a test calls `etlplus.cli.main()` or `etlplus.run.run()` it’s integration by default. Full
729
+ If a test calls `etlplus.cli.main()` or `etlplus.ops.run.run()` it’s integration by default. Full
699
730
  criteria: [`CONTRIBUTING.md#testing`](CONTRIBUTING.md#testing).
700
731
 
701
732
  ### Code Coverage