etlplus 0.16.10__py3-none-any.whl → 0.17.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.properties` module.
3
3
 
4
- Stub helpers for reading/writing properties (PROPERTIES) files (not implemented
5
- yet).
4
+ Helpers for reading/writing properties (PROPERTIES) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -20,10 +19,10 @@ Notes
20
19
  from __future__ import annotations
21
20
 
22
21
  from pathlib import Path
22
+ from typing import Any
23
23
 
24
24
  from ..types import JSONData
25
- from ..types import JSONList
26
- from . import stub
25
+ from ..types import JSONDict
27
26
 
28
27
  # SECTION: EXPORTS ========================================================== #
29
28
 
@@ -35,12 +34,22 @@ __all__ = [
35
34
  ]
36
35
 
37
36
 
37
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
38
+
39
+
40
+ def _stringify(value: Any) -> str:
41
+ """Normalize properties values into strings."""
42
+ if value is None:
43
+ return ''
44
+ return str(value)
45
+
46
+
38
47
  # SECTION: FUNCTIONS ======================================================== #
39
48
 
40
49
 
41
50
  def read(
42
51
  path: Path,
43
- ) -> JSONList:
52
+ ) -> JSONData:
44
53
  """
45
54
  Read PROPERTIES content from *path*.
46
55
 
@@ -51,10 +60,28 @@ def read(
51
60
 
52
61
  Returns
53
62
  -------
54
- JSONList
55
- The list of dictionaries read from the PROPERTIES file.
63
+ JSONData
64
+ The structured data read from the PROPERTIES file.
56
65
  """
57
- return stub.read(path, format_name='PROPERTIES')
66
+ payload: JSONDict = {}
67
+ for line in path.read_text(encoding='utf-8').splitlines():
68
+ stripped = line.strip()
69
+ if not stripped or stripped.startswith(('#', '!')):
70
+ continue
71
+ separator_index = -1
72
+ for sep in ('=', ':'):
73
+ if sep in stripped:
74
+ separator_index = stripped.find(sep)
75
+ break
76
+ if separator_index == -1:
77
+ key = stripped
78
+ value = ''
79
+ else:
80
+ key = stripped[:separator_index].strip()
81
+ value = stripped[separator_index + 1:].strip()
82
+ if key:
83
+ payload[key] = value
84
+ return payload
58
85
 
59
86
 
60
87
  def write(
@@ -69,12 +96,25 @@ def write(
69
96
  path : Path
70
97
  Path to the PROPERTIES file on disk.
71
98
  data : JSONData
72
- Data to write as PROPERTIES. Should be a list of dictionaries or a
73
- single dictionary.
99
+ Data to write as PROPERTIES. Should be a dictionary.
74
100
 
75
101
  Returns
76
102
  -------
77
103
  int
78
- The number of rows written to the PROPERTIES file.
104
+ The number of records written to the PROPERTIES file.
105
+
106
+ Raises
107
+ ------
108
+ TypeError
109
+ If *data* is not a dictionary.
79
110
  """
80
- return stub.write(path, data, format_name='PROPERTIES')
111
+ if isinstance(data, list):
112
+ raise TypeError('PROPERTIES payloads must be a dict')
113
+ if not isinstance(data, dict):
114
+ raise TypeError('PROPERTIES payloads must be a dict')
115
+
116
+ path.parent.mkdir(parents=True, exist_ok=True)
117
+ with path.open('w', encoding='utf-8', newline='') as handle:
118
+ for key in sorted(data.keys()):
119
+ handle.write(f'{key}={_stringify(data[key])}\n')
120
+ return 1
etlplus/file/proto.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.proto` module.
3
3
 
4
- Stub helpers for reading/writing Protocol Buffers schema (PROTO) files (not
5
- implemented yet).
4
+ Helpers for reading/writing Protocol Buffers schema (PROTO) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -21,8 +20,6 @@ from __future__ import annotations
21
20
  from pathlib import Path
22
21
 
23
22
  from ..types import JSONData
24
- from ..types import JSONList
25
- from . import stub
26
23
 
27
24
  # SECTION: EXPORTS ========================================================== #
28
25
 
@@ -39,7 +36,7 @@ __all__ = [
39
36
 
40
37
  def read(
41
38
  path: Path,
42
- ) -> JSONList:
39
+ ) -> JSONData:
43
40
  """
44
41
  Read PROTO content from *path*.
45
42
 
@@ -50,10 +47,10 @@ def read(
50
47
 
51
48
  Returns
52
49
  -------
53
- JSONList
54
- The list of dictionaries read from the PROTO file.
50
+ JSONData
51
+ The structured data read from the PROTO file.
55
52
  """
56
- return stub.read(path, format_name='PROTO')
53
+ return {'schema': path.read_text(encoding='utf-8')}
57
54
 
58
55
 
59
56
  def write(
@@ -68,12 +65,27 @@ def write(
68
65
  path : Path
69
66
  Path to the PROTO file on disk.
70
67
  data : JSONData
71
- Data to write as PROTO. Should be a list of dictionaries or a
72
- single dictionary.
68
+ Data to write as PROTO. Should be a dictionary with ``schema``.
73
69
 
74
70
  Returns
75
71
  -------
76
72
  int
77
- The number of rows written to the PROTO file.
73
+ The number of records written to the PROTO file.
74
+
75
+ Raises
76
+ ------
77
+ TypeError
78
+ If *data* is not a dictionary or is missing a ``schema`` string.
78
79
  """
79
- return stub.write(path, data, format_name='PROTO')
80
+ if isinstance(data, list):
81
+ raise TypeError('PROTO payloads must be a dict')
82
+ if not isinstance(data, dict):
83
+ raise TypeError('PROTO payloads must be a dict')
84
+
85
+ schema = data.get('schema')
86
+ if not isinstance(schema, str):
87
+ raise TypeError('PROTO payloads must include a "schema" string')
88
+
89
+ path.parent.mkdir(parents=True, exist_ok=True)
90
+ path.write_text(schema, encoding='utf-8')
91
+ return 1
etlplus/file/psv.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.psv` module.
3
3
 
4
- Stub helpers for reading/writing Pipe-Separated Values (PSV) files (not
5
- implemented yet).
4
+ Helpers for reading/writing Pipe-Separated Values (PSV) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -24,7 +23,8 @@ from pathlib import Path
24
23
 
25
24
  from ..types import JSONData
26
25
  from ..types import JSONList
27
- from . import stub
26
+ from ._io import read_delimited
27
+ from ._io import write_delimited
28
28
 
29
29
  # SECTION: EXPORTS ========================================================== #
30
30
 
@@ -55,7 +55,7 @@ def read(
55
55
  JSONList
56
56
  The list of dictionaries read from the PSV file.
57
57
  """
58
- return stub.read(path, format_name='PSV')
58
+ return read_delimited(path, delimiter='|')
59
59
 
60
60
 
61
61
  def write(
@@ -78,4 +78,4 @@ def write(
78
78
  int
79
79
  The number of rows written to the PSV file.
80
80
  """
81
- return stub.write(path, data, format_name='PSV')
81
+ return write_delimited(path, data, delimiter='|')
etlplus/file/rda.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.rda` module.
3
3
 
4
- Stub helpers for reading/writing RData workspace/object bundle (RDA) files (not
5
- implemented yet).
4
+ Helpers for reading/writing RData workspace/object bundle (RDA) files.
6
5
 
7
6
  Notes
8
7
  -----
@@ -20,10 +19,13 @@ Notes
20
19
  from __future__ import annotations
21
20
 
22
21
  from pathlib import Path
22
+ from typing import Any
23
23
 
24
24
  from ..types import JSONData
25
- from ..types import JSONList
26
- from . import stub
25
+ from ..types import JSONDict
26
+ from ._imports import get_optional_module
27
+ from ._imports import get_pandas
28
+ from ._io import normalize_records
27
29
 
28
30
  # SECTION: EXPORTS ========================================================== #
29
31
 
@@ -35,12 +37,38 @@ __all__ = [
35
37
  ]
36
38
 
37
39
 
40
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
41
+
42
+
43
+ def _get_pyreadr() -> Any:
44
+ """Return the pyreadr module, importing it on first use."""
45
+ return get_optional_module(
46
+ 'pyreadr',
47
+ error_message=(
48
+ 'RDA support requires optional dependency "pyreadr".\n'
49
+ 'Install with: pip install pyreadr'
50
+ ),
51
+ )
52
+
53
+
54
+ def _coerce_r_object(value: Any, pandas: Any) -> JSONData:
55
+ if isinstance(value, pandas.DataFrame):
56
+ return value.to_dict(orient='records')
57
+ if isinstance(value, dict):
58
+ return value
59
+ if isinstance(value, list) and all(
60
+ isinstance(item, dict) for item in value
61
+ ):
62
+ return value
63
+ return {'value': value}
64
+
65
+
38
66
  # SECTION: FUNCTIONS ======================================================== #
39
67
 
40
68
 
41
69
  def read(
42
70
  path: Path,
43
- ) -> JSONList:
71
+ ) -> JSONData:
44
72
  """
45
73
  Read RDA content from *path*.
46
74
 
@@ -51,10 +79,21 @@ def read(
51
79
 
52
80
  Returns
53
81
  -------
54
- JSONList
55
- The list of dictionaries read from the RDA file.
82
+ JSONData
83
+ The structured data read from the RDA file.
56
84
  """
57
- return stub.read(path, format_name='RDA')
85
+ pyreadr = _get_pyreadr()
86
+ pandas = get_pandas('RDA')
87
+ result = pyreadr.read_r(str(path))
88
+ if not result:
89
+ return []
90
+ if len(result) == 1:
91
+ value = next(iter(result.values()))
92
+ return _coerce_r_object(value, pandas)
93
+ payload: JSONDict = {}
94
+ for key, value in result.items():
95
+ payload[str(key)] = _coerce_r_object(value, pandas)
96
+ return payload
58
97
 
59
98
 
60
99
  def write(
@@ -76,5 +115,40 @@ def write(
76
115
  -------
77
116
  int
78
117
  The number of rows written to the RDA file.
118
+
119
+ Raises
120
+ ------
121
+ ImportError
122
+ If "pyreadr" is not installed with write support.
123
+ TypeError
124
+ If *data* is not a dictionary or list of dictionaries.
79
125
  """
80
- return stub.write(path, data, format_name='RDA')
126
+ pyreadr = _get_pyreadr()
127
+ pandas = get_pandas('RDA')
128
+
129
+ if isinstance(data, list):
130
+ records = normalize_records(data, 'RDA')
131
+ frame = pandas.DataFrame.from_records(records)
132
+ count = len(records)
133
+ elif isinstance(data, dict):
134
+ frame = pandas.DataFrame.from_records([data])
135
+ count = 1
136
+ else:
137
+ raise TypeError('RDA payloads must be a dict or list of dicts')
138
+
139
+ writer = getattr(pyreadr, 'write_rdata', None) or getattr(
140
+ pyreadr,
141
+ 'write_rda',
142
+ None,
143
+ )
144
+ if writer is None:
145
+ raise ImportError(
146
+ 'RDA write support requires "pyreadr" with write_rdata().',
147
+ )
148
+
149
+ path.parent.mkdir(parents=True, exist_ok=True)
150
+ try:
151
+ writer(str(path), frame, df_name='data')
152
+ except TypeError:
153
+ writer(str(path), frame)
154
+ return count
etlplus/file/rds.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.rds` module.
3
3
 
4
- Stub helpers for reading/writing R (RDS) data files (not implemented yet).
4
+ Helpers for reading/writing R (RDS) data files.
5
5
 
6
6
  Notes
7
7
  -----
@@ -19,10 +19,13 @@ Notes
19
19
  from __future__ import annotations
20
20
 
21
21
  from pathlib import Path
22
+ from typing import Any
22
23
 
23
24
  from ..types import JSONData
24
- from ..types import JSONList
25
- from . import stub
25
+ from ..types import JSONDict
26
+ from ._imports import get_optional_module
27
+ from ._imports import get_pandas
28
+ from ._io import normalize_records
26
29
 
27
30
  # SECTION: EXPORTS ========================================================== #
28
31
 
@@ -34,12 +37,38 @@ __all__ = [
34
37
  ]
35
38
 
36
39
 
40
+ # SECTION: INTERNAL HELPERS ================================================ #
41
+
42
+
43
+ def _get_pyreadr() -> Any:
44
+ """Return the pyreadr module, importing it on first use."""
45
+ return get_optional_module(
46
+ 'pyreadr',
47
+ error_message=(
48
+ 'RDS support requires optional dependency "pyreadr".\n'
49
+ 'Install with: pip install pyreadr'
50
+ ),
51
+ )
52
+
53
+
54
+ def _coerce_r_object(value: Any, pandas: Any) -> JSONData:
55
+ if isinstance(value, pandas.DataFrame):
56
+ return value.to_dict(orient='records')
57
+ if isinstance(value, dict):
58
+ return value
59
+ if isinstance(value, list) and all(
60
+ isinstance(item, dict) for item in value
61
+ ):
62
+ return value
63
+ return {'value': value}
64
+
65
+
37
66
  # SECTION: FUNCTIONS ======================================================== #
38
67
 
39
68
 
40
69
  def read(
41
70
  path: Path,
42
- ) -> JSONList:
71
+ ) -> JSONData:
43
72
  """
44
73
  Read RDS content from *path*.
45
74
 
@@ -50,10 +79,21 @@ def read(
50
79
 
51
80
  Returns
52
81
  -------
53
- JSONList
54
- The list of dictionaries read from the RDS file.
82
+ JSONData
83
+ The structured data read from the RDS file.
55
84
  """
56
- return stub.read(path, format_name='RDS')
85
+ pyreadr = _get_pyreadr()
86
+ pandas = get_pandas('RDS')
87
+ result = pyreadr.read_r(str(path))
88
+ if not result:
89
+ return []
90
+ if len(result) == 1:
91
+ value = next(iter(result.values()))
92
+ return _coerce_r_object(value, pandas)
93
+ payload: JSONDict = {}
94
+ for key, value in result.items():
95
+ payload[str(key)] = _coerce_r_object(value, pandas)
96
+ return payload
57
97
 
58
98
 
59
99
  def write(
@@ -75,5 +115,33 @@ def write(
75
115
  -------
76
116
  int
77
117
  The number of rows written to the RDS file.
118
+
119
+ Raises
120
+ ------
121
+ ImportError
122
+ If "pyreadr" is not installed with write support.
123
+ TypeError
124
+ If *data* is not a dictionary or list of dictionaries.
78
125
  """
79
- return stub.write(path, data, format_name='RDS')
126
+ pyreadr = _get_pyreadr()
127
+ pandas = get_pandas('RDS')
128
+
129
+ if isinstance(data, list):
130
+ records = normalize_records(data, 'RDS')
131
+ frame = pandas.DataFrame.from_records(records)
132
+ count = len(records)
133
+ elif isinstance(data, dict):
134
+ frame = pandas.DataFrame.from_records([data])
135
+ count = 1
136
+ else:
137
+ raise TypeError('RDS payloads must be a dict or list of dicts')
138
+
139
+ writer = getattr(pyreadr, 'write_rds', None)
140
+ if writer is None:
141
+ raise ImportError(
142
+ 'RDS write support requires "pyreadr" with write_rds().',
143
+ )
144
+
145
+ path.parent.mkdir(parents=True, exist_ok=True)
146
+ writer(str(path), frame)
147
+ return count
etlplus/file/sas7bdat.py CHANGED
@@ -1,29 +1,31 @@
1
1
  """
2
2
  :mod:`etlplus.file.sas7bdat` module.
3
3
 
4
- Stub helpers for reading/writing SAS (SAS7BDAT) data files (not implemented
5
- yet).
4
+ Helpers for reading/writing SAS (SAS7BDAT) data files.
6
5
 
7
6
  Notes
8
7
  -----
9
- - A SAS7BDAT file is a binary file format used by SAS to store datasets,
10
- including variables, labels, and data types.
8
+ - A SAS7BDAT file is a proprietary binary file format created by SAS to store
9
+ datasets, including variables, labels, and data types.
11
10
  - Common cases:
12
- - Delimited text files (e.g., CSV, TSV).
13
- - Fixed-width formatted files.
14
- - Custom formats specific to certain applications.
11
+ - Statistical analysis pipelines.
12
+ - Data exchange with SAS tooling.
15
13
  - Rule of thumb:
16
- - If the file does not follow a specific standard format, use this module
17
- for reading and writing.
14
+ - If the file follows the SAS7BDAT specification, use this module for
15
+ reading and writing.
18
16
  """
19
17
 
20
18
  from __future__ import annotations
21
19
 
22
20
  from pathlib import Path
21
+ from typing import Any
22
+ from typing import cast
23
23
 
24
24
  from ..types import JSONData
25
25
  from ..types import JSONList
26
26
  from . import stub
27
+ from ._imports import get_optional_module
28
+ from ._imports import get_pandas
27
29
 
28
30
  # SECTION: EXPORTS ========================================================== #
29
31
 
@@ -35,6 +37,27 @@ __all__ = [
35
37
  ]
36
38
 
37
39
 
40
+ # SECTION: INTERNAL HELPERS ================================================ #
41
+
42
+
43
+ def _get_pyreadstat() -> Any:
44
+ """Return the pyreadstat module, importing it on first use."""
45
+ return get_optional_module(
46
+ 'pyreadstat',
47
+ error_message=(
48
+ 'SAS7BDAT support requires optional dependency "pyreadstat".\n'
49
+ 'Install with: pip install pyreadstat'
50
+ ),
51
+ )
52
+
53
+
54
+ def _raise_readstat_error(err: ImportError) -> None:
55
+ raise ImportError(
56
+ 'SAS7BDAT support requires optional dependency "pyreadstat".\n'
57
+ 'Install with: pip install pyreadstat',
58
+ ) from err
59
+
60
+
38
61
  # SECTION: FUNCTIONS ======================================================== #
39
62
 
40
63
 
@@ -42,7 +65,7 @@ def read(
42
65
  path: Path,
43
66
  ) -> JSONList:
44
67
  """
45
- Read DAT content from *path*.
68
+ Read SAS7BDAT content from *path*.
46
69
 
47
70
  Parameters
48
71
  ----------
@@ -54,7 +77,14 @@ def read(
54
77
  JSONList
55
78
  The list of dictionaries read from the SAS7BDAT file.
56
79
  """
57
- return stub.read(path, format_name='SAS7BDAT')
80
+ pandas = get_pandas('SAS7BDAT')
81
+ try:
82
+ frame = pandas.read_sas(path, format='sas7bdat')
83
+ except TypeError:
84
+ frame = pandas.read_sas(path)
85
+ except ImportError as err: # pragma: no cover
86
+ _raise_readstat_error(err)
87
+ return cast(JSONList, frame.to_dict(orient='records'))
58
88
 
59
89
 
60
90
  def write(
etlplus/file/sav.py CHANGED
@@ -1,27 +1,31 @@
1
1
  """
2
2
  :mod:`etlplus.file.sav` module.
3
3
 
4
- Stub helpers for reading/writing SPSS (SAV) data files (not implemented yet).
4
+ Helpers for reading/writing SPSS (SAV) files.
5
5
 
6
6
  Notes
7
7
  -----
8
- - A SAV file is a binary file format used by SPSS to store datasets, including
9
- variables, labels, and data types.
8
+ - A SAV file is a dataset created by SPSS.
10
9
  - Common cases:
11
- - Reading data for analysis in Python.
12
- - Writing processed data back to SPSS format.
10
+ - Survey and market research datasets.
11
+ - Statistical analysis workflows.
12
+ - Exchange with SPSS and compatible tools.
13
13
  - Rule of thumb:
14
- - If you need to work with SPSS data files, use this module for reading
14
+ - If the file follows the SAV specification, use this module for reading
15
15
  and writing.
16
16
  """
17
17
 
18
18
  from __future__ import annotations
19
19
 
20
20
  from pathlib import Path
21
+ from typing import Any
22
+ from typing import cast
21
23
 
22
24
  from ..types import JSONData
23
25
  from ..types import JSONList
24
- from . import stub
26
+ from ._imports import get_optional_module
27
+ from ._imports import get_pandas
28
+ from ._io import normalize_records
25
29
 
26
30
  # SECTION: EXPORTS ========================================================== #
27
31
 
@@ -33,6 +37,20 @@ __all__ = [
33
37
  ]
34
38
 
35
39
 
40
+ # SECTION: INTERNAL FUNCTION ================================================ #
41
+
42
+
43
+ def _get_pyreadstat() -> Any:
44
+ """Return the pyreadstat module, importing it on first use."""
45
+ return get_optional_module(
46
+ 'pyreadstat',
47
+ error_message=(
48
+ 'SAV support requires optional dependency "pyreadstat".\n'
49
+ 'Install with: pip install pyreadstat'
50
+ ),
51
+ )
52
+
53
+
36
54
  # SECTION: FUNCTIONS ======================================================== #
37
55
 
38
56
 
@@ -52,7 +70,9 @@ def read(
52
70
  JSONList
53
71
  The list of dictionaries read from the SAV file.
54
72
  """
55
- return stub.read(path, format_name='SAV')
73
+ pyreadstat = _get_pyreadstat()
74
+ frame, _meta = pyreadstat.read_sav(str(path))
75
+ return cast(JSONList, frame.to_dict(orient='records'))
56
76
 
57
77
 
58
78
  def write(
@@ -60,14 +80,14 @@ def write(
60
80
  data: JSONData,
61
81
  ) -> int:
62
82
  """
63
- Write *data* to SAV file at *path* and return record count.
83
+ Write *data* to SAV at *path* and return record count.
64
84
 
65
85
  Parameters
66
86
  ----------
67
87
  path : Path
68
88
  Path to the SAV file on disk.
69
89
  data : JSONData
70
- Data to write as SAV file. Should be a list of dictionaries or a
90
+ Data to write as SAV. Should be a list of dictionaries or a
71
91
  single dictionary.
72
92
 
73
93
  Returns
@@ -75,4 +95,13 @@ def write(
75
95
  int
76
96
  The number of rows written to the SAV file.
77
97
  """
78
- return stub.write(path, data, format_name='SAV')
98
+ records = normalize_records(data, 'SAV')
99
+ if not records:
100
+ return 0
101
+
102
+ pyreadstat = _get_pyreadstat()
103
+ pandas = get_pandas('SAV')
104
+ path.parent.mkdir(parents=True, exist_ok=True)
105
+ frame = pandas.DataFrame.from_records(records)
106
+ pyreadstat.write_sav(frame, str(path))
107
+ return len(records)