sas2parquet 0.1.1__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sas2parquet-0.1.1 → sas2parquet-0.1.5}/PKG-INFO +1 -32
- {sas2parquet-0.1.1 → sas2parquet-0.1.5}/README.md +0 -31
- {sas2parquet-0.1.1 → sas2parquet-0.1.5}/pyproject.toml +1 -1
- {sas2parquet-0.1.1 → sas2parquet-0.1.5}/src/sas2parquet/convert.py +20 -0
- {sas2parquet-0.1.1 → sas2parquet-0.1.5}/LICENSE +0 -0
- {sas2parquet-0.1.1 → sas2parquet-0.1.5}/src/sas2parquet/__init__.py +0 -0
- {sas2parquet-0.1.1 → sas2parquet-0.1.5}/src/sas2parquet/cli.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sas2parquet
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: SAS → Parquet Hybrid Converter & Validator
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Author: Zaman Ziabakhshganji
|
|
@@ -128,37 +128,6 @@ Each file undergoes 4-stage validation:
|
|
|
128
128
|
3. Column order
|
|
129
129
|
4. Value comparison
|
|
130
130
|
|
|
131
|
-
## 💻 Development
|
|
132
|
-
```bash
|
|
133
|
-
git clone <repo>
|
|
134
|
-
cd sas2parquet
|
|
135
|
-
pip install -e .
|
|
136
|
-
```
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
## 🧪 Testing
|
|
140
|
-
|
|
141
|
-
This project includes a comprehensive **pytest** test suite covering:
|
|
142
|
-
|
|
143
|
-
- Schema inference and overrides
|
|
144
|
-
- SAS ↔ Parquet validation logic
|
|
145
|
-
- Error handling and retry behavior
|
|
146
|
-
- Directory mirroring (`sasdata/ → parquetdata/`)
|
|
147
|
-
- Logging and CLI execution paths
|
|
148
|
-
|
|
149
|
-
### Run tests
|
|
150
|
-
|
|
151
|
-
```bash
|
|
152
|
-
pip install -e .[dev]
|
|
153
|
-
pytest -q
|
|
154
|
-
```
|
|
155
|
-
|
|
156
|
-
## 📦 Dependencies
|
|
157
|
-
- pandas>=2.0
|
|
158
|
-
- polars>=0.20
|
|
159
|
-
- pyarrow>=15.0
|
|
160
|
-
- pyreadstat>=1.4
|
|
161
|
-
|
|
162
131
|
## 📄 License
|
|
163
132
|
MIT License
|
|
164
133
|
|
|
@@ -98,36 +98,5 @@ Each file undergoes 4-stage validation:
|
|
|
98
98
|
3. Column order
|
|
99
99
|
4. Value comparison
|
|
100
100
|
|
|
101
|
-
## 💻 Development
|
|
102
|
-
```bash
|
|
103
|
-
git clone <repo>
|
|
104
|
-
cd sas2parquet
|
|
105
|
-
pip install -e .
|
|
106
|
-
```
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
## 🧪 Testing
|
|
110
|
-
|
|
111
|
-
This project includes a comprehensive **pytest** test suite covering:
|
|
112
|
-
|
|
113
|
-
- Schema inference and overrides
|
|
114
|
-
- SAS ↔ Parquet validation logic
|
|
115
|
-
- Error handling and retry behavior
|
|
116
|
-
- Directory mirroring (`sasdata/ → parquetdata/`)
|
|
117
|
-
- Logging and CLI execution paths
|
|
118
|
-
|
|
119
|
-
### Run tests
|
|
120
|
-
|
|
121
|
-
```bash
|
|
122
|
-
pip install -e .[dev]
|
|
123
|
-
pytest -q
|
|
124
|
-
```
|
|
125
|
-
|
|
126
|
-
## 📦 Dependencies
|
|
127
|
-
- pandas>=2.0
|
|
128
|
-
- polars>=0.20
|
|
129
|
-
- pyarrow>=15.0
|
|
130
|
-
- pyreadstat>=1.4
|
|
131
|
-
|
|
132
101
|
## 📄 License
|
|
133
102
|
MIT License
|
|
@@ -317,7 +317,27 @@ def reconvert_file_ultimate(sas_path: Path, parquet_path: Path) -> bool:
|
|
|
317
317
|
writer.close()
|
|
318
318
|
print(" ✅ Conversion succeeded")
|
|
319
319
|
|
|
320
|
+
# ===== FULL PARQUET VALIDATION (WORKING) =====
|
|
321
|
+
print(" 🔍 Full Parquet validation...")
|
|
322
|
+
try:
|
|
323
|
+
pf = pq.ParquetFile(parquet_path)
|
|
324
|
+
total_rows = 0
|
|
325
|
+
num_groups = pf.metadata.num_row_groups
|
|
326
|
+
batch_count = 0
|
|
327
|
+
|
|
328
|
+
for batch in pf.iter_batches():
|
|
329
|
+
total_rows += batch.num_rows
|
|
330
|
+
batch_count += 1
|
|
331
|
+
|
|
332
|
+
print(f" ✅ Parquet fully validated: {total_rows:,} rows across {num_groups} groups ({batch_count} batches)")
|
|
333
|
+
pf.close()
|
|
334
|
+
except Exception as e:
|
|
335
|
+
print(f" ❌ Parquet validation failed: {e}")
|
|
336
|
+
return False
|
|
337
|
+
# ===== END =====
|
|
338
|
+
|
|
320
339
|
st, dt = compare_and_report_diffs(sas_path, parquet_path)
|
|
340
|
+
|
|
321
341
|
print(f" 🔍 Validation: {st}")
|
|
322
342
|
for d in dt:
|
|
323
343
|
print(" -", d.replace("\n", "\n "))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|