csvnorm 0.3.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
csvnorm-0.3.12/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 aborruso@gmail.com
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,279 @@
1
+ Metadata-Version: 2.4
2
+ Name: csvnorm
3
+ Version: 0.3.12
4
+ Summary: A command-line utility to validate and normalize CSV files
5
+ Author-email: aborruso <aborruso@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 aborruso@gmail.com
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/aborruso/prepare_data
29
+ Project-URL: Issues, https://github.com/aborruso/prepare_data/issues
30
+ Keywords: csv,data,normalization,validation,etl
31
+ Classifier: Development Status :: 5 - Production/Stable
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: Intended Audience :: Science/Research
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Operating System :: OS Independent
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.9
38
+ Classifier: Programming Language :: Python :: 3.10
39
+ Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Programming Language :: Python :: 3.12
41
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
42
+ Classifier: Topic :: Software Development :: Libraries
43
+ Classifier: Topic :: Utilities
44
+ Requires-Python: >=3.9
45
+ Description-Content-Type: text/markdown
46
+ License-File: LICENSE
47
+ Requires-Dist: charset-normalizer>=3.0.0
48
+ Requires-Dist: duckdb>=0.9.0
49
+ Requires-Dist: rich>=13.0.0
50
+ Requires-Dist: rich-argparse>=1.0.0
51
+ Provides-Extra: dev
52
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
53
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
54
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
55
+ Dynamic: license-file
56
+
57
+ [![PyPI version](https://badge.fury.io/py/csvnorm.svg)](https://pypi.org/project/csvnorm/)
58
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
59
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
60
+ [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/aborruso/csvnorm)
61
+
62
+ # csvnorm
63
+
64
+ A command-line utility to validate and normalize CSV files for initial exploration.
65
+
66
+ ## Installation
67
+
68
+ Recommended (uv):
69
+
70
+ ```bash
71
+ uv tool install csvnorm
72
+ ```
73
+
74
+ Or with pip:
75
+
76
+ ```bash
77
+ pip install csvnorm
78
+ ```
79
+
80
+ ## Purpose
81
+
82
+ This tool prepares CSV files for **basic exploratory data analysis (EDA)**, not for complex transformations. It focuses on achieving a clean, standardized baseline format that allows you to quickly assess data quality and structure before designing more sophisticated ETL pipelines.
83
+
84
+ **What it does:**
85
+ - Validates CSV structure and reports errors
86
+ - Normalizes encoding to UTF-8 when needed
87
+ - Normalizes delimiters and field names
88
+ - Creates a consistent starting point for data exploration
89
+
90
+ **What it doesn't do:**
91
+ - Complex data transformations or business logic
92
+ - Type inference or data validation beyond structure
93
+ - Heavy processing or aggregations
94
+
95
+ ## Features
96
+
97
+ - **CSV Validation**: Checks for common CSV errors and inconsistencies using DuckDB
98
+ - **Delimiter Normalization**: Converts all field separators to standard commas (`,`)
99
+ - **Field Name Normalization**: Converts column headers to snake_case format
100
+ - **Encoding Normalization**: Auto-detects encoding and converts to UTF-8 when needed (ASCII is already UTF-8 compatible)
101
+ - **Processing Summary**: Displays comprehensive statistics (rows, columns, file sizes) and error details
102
+ - **Error Reporting**: Exports detailed error file for invalid rows with summary panel
103
+ - **Remote URL Support**: Process CSV files directly from HTTP/HTTPS URLs without downloading
104
+
105
+ ## Usage
106
+
107
+ ```bash
108
+ csvnorm input.csv [options]
109
+ ```
110
+
111
+ ### Options
112
+
113
+ | Option | Description |
114
+ |--------|-------------|
115
+ | `-f, --force` | Force overwrite of existing output files |
116
+ | `-k, --keep-names` | Keep original column names (disable snake_case) |
117
+ | `-d, --delimiter CHAR` | Set custom output delimiter (default: `,`) |
118
+ | `-o, --output-file PATH` | Set output file path (absolute or relative) |
119
+ | `-V, --verbose` | Enable verbose output for debugging |
120
+ | `-v, --version` | Show version number |
121
+ | `-h, --help` | Show help message |
122
+
123
+ ### Examples
124
+
125
+ ```bash
126
+ # Basic usage (output: data.csv in current directory)
127
+ csvnorm data.csv
128
+
129
+ # Specify output file path
130
+ csvnorm data.csv -o output/processed.csv
131
+
132
+ # Use absolute path
133
+ csvnorm data.csv -o /tmp/data_normalized.csv
134
+
135
+ # Process remote CSV from URL
136
+ csvnorm "https://raw.githubusercontent.com/aborruso/csvnorm/refs/heads/main/test/Trasporto%20Pubblico%20Locale%20Settore%20Pubblico%20Allargato%20-%20Indicatore%202000-2020%20Trasferimenti%20Correnti%20su%20Entrate%20Correnti.csv" -o output.csv
137
+
138
+ # With semicolon delimiter
139
+ csvnorm data.csv -d ';' -o data_semicolon.csv
140
+
141
+ # Keep original headers
142
+ csvnorm data.csv --keep-names -o output.csv
143
+
144
+ # Force overwrite with verbose output
145
+ csvnorm data.csv -f -V -o processed.csv
146
+
147
+ # Custom output name and extension
148
+ csvnorm data.csv -o results.txt
149
+ ```
150
+
151
+ ### Output
152
+
153
+ Creates a normalized CSV file at the specified path with:
154
+ - UTF-8 encoding
155
+ - Consistent field delimiters
156
+ - Normalized column names (unless `--keep-names` is specified)
157
+ - Error report if any invalid rows are found (saved as `{output_name}_reject_errors.csv` in the same directory)
158
+ - Temporary encoding conversion files stored in system temp directory with auto-cleanup
159
+
160
+ Output file path behavior:
161
+ - If `-o` is specified: uses the exact path provided (supports absolute and relative paths)
162
+ - If `-o` is omitted: uses input filename in current working directory
163
+ - Any file extension is allowed (not limited to `.csv`)
164
+
165
+ For remote URLs:
166
+ - You must specify `-o` to set the output filename
167
+ - Encoding is handled automatically by DuckDB
168
+ - HTTP timeout is set to 30 seconds
169
+ - Only public URLs are supported (no authentication)
170
+
171
+ The tool provides modern terminal output with:
172
+ - Progress indicators for multi-step processing
173
+ - Color-coded error messages with panels
174
+ - Success summary table with statistics (rows, columns, file sizes)
175
+ - Encoding conversion status (converted/no conversion/remote; ASCII is already UTF-8 compatible)
176
+ - Error summary panel with reject count and error types when validation fails
177
+ - ASCII art banner with `--version` and `-V` verbose mode
178
+
179
+ **Success Example:**
180
+ ```
181
+ ✓ Success
182
+ Input: test/utf8_basic.csv
183
+ Output: output/utf8_basic.csv
184
+ Encoding: ascii (ASCII is UTF-8 compatible; no conversion needed)
185
+ Rows: 2
186
+ Columns: 3
187
+ Input size: 42 B
188
+ Output size: 43 B
189
+ Headers: normalized to snake_case
190
+ ```
191
+
192
+ **Error Example:**
193
+ ```
194
+ ✓ Success
195
+ Input: test/malformed_rows.csv
196
+ Output: output/malformed_rows.csv
197
+ Encoding: ascii (ASCII is UTF-8 compatible; no conversion needed)
198
+ Rows: 1
199
+ Columns: 4
200
+ Input size: 24 B
201
+ Output size: 40 B
202
+ Headers: normalized to snake_case
203
+
204
+ ╭──────────────────────────── ! Validation Failed ─────────────────────────────╮
205
+ │ Validation Errors: │
206
+ │ │
207
+ │ Rejected rows: 2 │
208
+ │ │
209
+ │ Error types: │
210
+ │ • Expected Number of Columns: 3 Found: 2 │
211
+ │ • Expected Number of Columns: 3 Found: 4 │
212
+ │ │
213
+ │ Details: output/malformed_rows_reject_errors.csv │
214
+ ╰──────────────────────────────────────────────────────────────────────────────╯
215
+ ```
216
+
217
+ ### Exit Codes
218
+
219
+ | Code | Meaning |
220
+ |------|---------|
221
+ | 0 | Success |
222
+ | 1 | Error (validation failed, file not found, etc.) |
223
+
224
+ ## Requirements
225
+
226
+ - Python 3.9+
227
+ - Dependencies (automatically installed):
228
+ - `charset-normalizer>=3.0.0` - Encoding detection
229
+ - `duckdb>=0.9.0` - CSV validation and normalization
230
+ - `rich>=13.0.0` - Modern terminal output formatting
231
+ - `rich-argparse>=1.0.0` - Enhanced CLI help formatting
232
+ - `pyfiglet>=0.8.post1,<1.0.0` - ASCII art banner
233
+
234
+ Optional extras:
235
+ - `[dev]` - Development dependencies (`pytest>=7.0.0`, `pytest-cov>=4.0.0`, `ruff>=0.1.0`)
236
+
237
+ ## Development
238
+
239
+ ### Setup
240
+
241
+ ```bash
242
+ git clone https://github.com/aborruso/csvnorm
243
+ cd csvnorm
244
+
245
+ # Create and activate venv with uv (recommended)
246
+ uv venv
247
+ source .venv/bin/activate
248
+ uv pip install -e ".[dev]"
249
+
250
+ # Or with pip
251
+ pip install -e ".[dev]"
252
+ ```
253
+
254
+ ### Testing
255
+
256
+ ```bash
257
+ pytest tests/ -v
258
+ ```
259
+
260
+ ### Project Structure
261
+
262
+ ```
263
+ csvnorm/
264
+ ├── src/csvnorm/
265
+ │ ├── __init__.py # Package version
266
+ │ ├── __main__.py # python -m support
267
+ │ ├── cli.py # CLI argument parsing
268
+ │ ├── core.py # Main processing pipeline
269
+ │ ├── encoding.py # Encoding detection/conversion
270
+ │ ├── validation.py # DuckDB validation
271
+ │ └── utils.py # Helper functions
272
+ ├── tests/ # Test suite
273
+ ├── test/ # CSV fixtures
274
+ └── pyproject.toml # Package configuration
275
+ ```
276
+
277
+ ## License
278
+
279
+ MIT License (c) 2026 aborruso@gmail.com - See LICENSE file for details
@@ -0,0 +1,223 @@
1
+ [![PyPI version](https://badge.fury.io/py/csvnorm.svg)](https://pypi.org/project/csvnorm/)
2
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
3
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
4
+ [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/aborruso/csvnorm)
5
+
6
+ # csvnorm
7
+
8
+ A command-line utility to validate and normalize CSV files for initial exploration.
9
+
10
+ ## Installation
11
+
12
+ Recommended (uv):
13
+
14
+ ```bash
15
+ uv tool install csvnorm
16
+ ```
17
+
18
+ Or with pip:
19
+
20
+ ```bash
21
+ pip install csvnorm
22
+ ```
23
+
24
+ ## Purpose
25
+
26
+ This tool prepares CSV files for **basic exploratory data analysis (EDA)**, not for complex transformations. It focuses on achieving a clean, standardized baseline format that allows you to quickly assess data quality and structure before designing more sophisticated ETL pipelines.
27
+
28
+ **What it does:**
29
+ - Validates CSV structure and reports errors
30
+ - Normalizes encoding to UTF-8 when needed
31
+ - Normalizes delimiters and field names
32
+ - Creates a consistent starting point for data exploration
33
+
34
+ **What it doesn't do:**
35
+ - Complex data transformations or business logic
36
+ - Type inference or data validation beyond structure
37
+ - Heavy processing or aggregations
38
+
39
+ ## Features
40
+
41
+ - **CSV Validation**: Checks for common CSV errors and inconsistencies using DuckDB
42
+ - **Delimiter Normalization**: Converts all field separators to standard commas (`,`)
43
+ - **Field Name Normalization**: Converts column headers to snake_case format
44
+ - **Encoding Normalization**: Auto-detects encoding and converts to UTF-8 when needed (ASCII is already UTF-8 compatible)
45
+ - **Processing Summary**: Displays comprehensive statistics (rows, columns, file sizes) and error details
46
+ - **Error Reporting**: Exports detailed error file for invalid rows with summary panel
47
+ - **Remote URL Support**: Process CSV files directly from HTTP/HTTPS URLs without downloading
48
+
49
+ ## Usage
50
+
51
+ ```bash
52
+ csvnorm input.csv [options]
53
+ ```
54
+
55
+ ### Options
56
+
57
+ | Option | Description |
58
+ |--------|-------------|
59
+ | `-f, --force` | Force overwrite of existing output files |
60
+ | `-k, --keep-names` | Keep original column names (disable snake_case) |
61
+ | `-d, --delimiter CHAR` | Set custom output delimiter (default: `,`) |
62
+ | `-o, --output-file PATH` | Set output file path (absolute or relative) |
63
+ | `-V, --verbose` | Enable verbose output for debugging |
64
+ | `-v, --version` | Show version number |
65
+ | `-h, --help` | Show help message |
66
+
67
+ ### Examples
68
+
69
+ ```bash
70
+ # Basic usage (output: data.csv in current directory)
71
+ csvnorm data.csv
72
+
73
+ # Specify output file path
74
+ csvnorm data.csv -o output/processed.csv
75
+
76
+ # Use absolute path
77
+ csvnorm data.csv -o /tmp/data_normalized.csv
78
+
79
+ # Process remote CSV from URL
80
+ csvnorm "https://raw.githubusercontent.com/aborruso/csvnorm/refs/heads/main/test/Trasporto%20Pubblico%20Locale%20Settore%20Pubblico%20Allargato%20-%20Indicatore%202000-2020%20Trasferimenti%20Correnti%20su%20Entrate%20Correnti.csv" -o output.csv
81
+
82
+ # With semicolon delimiter
83
+ csvnorm data.csv -d ';' -o data_semicolon.csv
84
+
85
+ # Keep original headers
86
+ csvnorm data.csv --keep-names -o output.csv
87
+
88
+ # Force overwrite with verbose output
89
+ csvnorm data.csv -f -V -o processed.csv
90
+
91
+ # Custom output name and extension
92
+ csvnorm data.csv -o results.txt
93
+ ```
94
+
95
+ ### Output
96
+
97
+ Creates a normalized CSV file at the specified path with:
98
+ - UTF-8 encoding
99
+ - Consistent field delimiters
100
+ - Normalized column names (unless `--keep-names` is specified)
101
+ - Error report if any invalid rows are found (saved as `{output_name}_reject_errors.csv` in the same directory)
102
+ - Temporary encoding conversion files stored in system temp directory with auto-cleanup
103
+
104
+ Output file path behavior:
105
+ - If `-o` is specified: uses the exact path provided (supports absolute and relative paths)
106
+ - If `-o` is omitted: uses input filename in current working directory
107
+ - Any file extension is allowed (not limited to `.csv`)
108
+
109
+ For remote URLs:
110
+ - You must specify `-o` to set the output filename
111
+ - Encoding is handled automatically by DuckDB
112
+ - HTTP timeout is set to 30 seconds
113
+ - Only public URLs are supported (no authentication)
114
+
115
+ The tool provides modern terminal output with:
116
+ - Progress indicators for multi-step processing
117
+ - Color-coded error messages with panels
118
+ - Success summary table with statistics (rows, columns, file sizes)
119
+ - Encoding conversion status (converted/no conversion/remote; ASCII is already UTF-8 compatible)
120
+ - Error summary panel with reject count and error types when validation fails
121
+ - ASCII art banner with `--version` and `-V` verbose mode
122
+
123
+ **Success Example:**
124
+ ```
125
+ ✓ Success
126
+ Input: test/utf8_basic.csv
127
+ Output: output/utf8_basic.csv
128
+ Encoding: ascii (ASCII is UTF-8 compatible; no conversion needed)
129
+ Rows: 2
130
+ Columns: 3
131
+ Input size: 42 B
132
+ Output size: 43 B
133
+ Headers: normalized to snake_case
134
+ ```
135
+
136
+ **Error Example:**
137
+ ```
138
+ ✓ Success
139
+ Input: test/malformed_rows.csv
140
+ Output: output/malformed_rows.csv
141
+ Encoding: ascii (ASCII is UTF-8 compatible; no conversion needed)
142
+ Rows: 1
143
+ Columns: 4
144
+ Input size: 24 B
145
+ Output size: 40 B
146
+ Headers: normalized to snake_case
147
+
148
+ ╭──────────────────────────── ! Validation Failed ─────────────────────────────╮
149
+ │ Validation Errors: │
150
+ │ │
151
+ │ Rejected rows: 2 │
152
+ │ │
153
+ │ Error types: │
154
+ │ • Expected Number of Columns: 3 Found: 2 │
155
+ │ • Expected Number of Columns: 3 Found: 4 │
156
+ │ │
157
+ │ Details: output/malformed_rows_reject_errors.csv │
158
+ ╰──────────────────────────────────────────────────────────────────────────────╯
159
+ ```
160
+
161
+ ### Exit Codes
162
+
163
+ | Code | Meaning |
164
+ |------|---------|
165
+ | 0 | Success |
166
+ | 1 | Error (validation failed, file not found, etc.) |
167
+
168
+ ## Requirements
169
+
170
+ - Python 3.9+
171
+ - Dependencies (automatically installed):
172
+ - `charset-normalizer>=3.0.0` - Encoding detection
173
+ - `duckdb>=0.9.0` - CSV validation and normalization
174
+ - `rich>=13.0.0` - Modern terminal output formatting
175
+ - `rich-argparse>=1.0.0` - Enhanced CLI help formatting
176
+ - `pyfiglet>=0.8.post1,<1.0.0` - ASCII art banner
177
+
178
+ Optional extras:
179
+ - `[dev]` - Development dependencies (`pytest>=7.0.0`, `pytest-cov>=4.0.0`, `ruff>=0.1.0`)
180
+
181
+ ## Development
182
+
183
+ ### Setup
184
+
185
+ ```bash
186
+ git clone https://github.com/aborruso/csvnorm
187
+ cd csvnorm
188
+
189
+ # Create and activate venv with uv (recommended)
190
+ uv venv
191
+ source .venv/bin/activate
192
+ uv pip install -e ".[dev]"
193
+
194
+ # Or with pip
195
+ pip install -e ".[dev]"
196
+ ```
197
+
198
+ ### Testing
199
+
200
+ ```bash
201
+ pytest tests/ -v
202
+ ```
203
+
204
+ ### Project Structure
205
+
206
+ ```
207
+ csvnorm/
208
+ ├── src/csvnorm/
209
+ │ ├── __init__.py # Package version
210
+ │ ├── __main__.py # python -m support
211
+ │ ├── cli.py # CLI argument parsing
212
+ │ ├── core.py # Main processing pipeline
213
+ │ ├── encoding.py # Encoding detection/conversion
214
+ │ ├── validation.py # DuckDB validation
215
+ │ └── utils.py # Helper functions
216
+ ├── tests/ # Test suite
217
+ ├── test/ # CSV fixtures
218
+ └── pyproject.toml # Package configuration
219
+ ```
220
+
221
+ ## License
222
+
223
+ MIT License (c) 2026 aborruso@gmail.com - See LICENSE file for details
@@ -0,0 +1,63 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "csvnorm"
7
+ version = "0.3.12"
8
+ description = "A command-line utility to validate and normalize CSV files"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = {file = "LICENSE"}
12
+ authors = [
13
+ {name = "aborruso", email = "aborruso@gmail.com"}
14
+ ]
15
+ keywords = ["csv", "data", "normalization", "validation", "etl"]
16
+ classifiers = [
17
+ "Development Status :: 5 - Production/Stable",
18
+ "Intended Audience :: Developers",
19
+ "Intended Audience :: Science/Research",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Operating System :: OS Independent",
22
+ "Programming Language :: Python :: 3",
23
+ "Programming Language :: Python :: 3.9",
24
+ "Programming Language :: Python :: 3.10",
25
+ "Programming Language :: Python :: 3.11",
26
+ "Programming Language :: Python :: 3.12",
27
+ "Topic :: Scientific/Engineering :: Information Analysis",
28
+ "Topic :: Software Development :: Libraries",
29
+ "Topic :: Utilities",
30
+ ]
31
+
32
+ dependencies = [
33
+ "charset-normalizer>=3.0.0",
34
+ "duckdb>=0.9.0",
35
+ "rich>=13.0.0",
36
+ "rich-argparse>=1.0.0",
37
+ ]
38
+
39
+ [project.optional-dependencies]
40
+ dev = [
41
+ "pytest>=7.0.0",
42
+ "pytest-cov>=4.0.0",
43
+ "ruff>=0.1.0",
44
+ ]
45
+
46
+ [project.urls]
47
+ Homepage = "https://github.com/aborruso/prepare_data"
48
+ Issues = "https://github.com/aborruso/prepare_data/issues"
49
+
50
+ [project.scripts]
51
+ csvnorm = "csvnorm.cli:main"
52
+
53
+ [tool.setuptools.packages.find]
54
+ where = ["src"]
55
+
56
+ [tool.ruff]
57
+ line-length = 88
58
+ target-version = "py38"
59
+
60
+ [tool.pytest.ini_options]
61
+ markers = [
62
+ "network: tests that require network access",
63
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,8 @@
1
+ """csvnorm - Validate and normalize CSV files."""
2
+
3
+ __version__ = "0.3.12"
4
+ __all__ = ["normalize_csv", "detect_encoding", "process_csv"]
5
+
6
+ from csvnorm.core import process_csv
7
+ from csvnorm.encoding import detect_encoding
8
+ from csvnorm.validation import normalize_csv
@@ -0,0 +1,6 @@
1
+ """Entry point for python -m csvnorm."""
2
+
3
+ from csvnorm.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()