sas2parquet 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sas2parquet/convert.py CHANGED
@@ -317,7 +317,27 @@ def reconvert_file_ultimate(sas_path: Path, parquet_path: Path) -> bool:
317
317
  writer.close()
318
318
  print(" ✅ Conversion succeeded")
319
319
 
320
+ # ===== FULL PARQUET VALIDATION (WORKING) =====
321
+ print(" 🔍 Full Parquet validation...")
322
+ try:
323
+ pf = pq.ParquetFile(parquet_path)
324
+ total_rows = 0
325
+ num_groups = pf.metadata.num_row_groups
326
+ batch_count = 0
327
+
328
+ for batch in pf.iter_batches():
329
+ total_rows += batch.num_rows
330
+ batch_count += 1
331
+
332
+ print(f" ✅ Parquet fully validated: {total_rows:,} rows across {num_groups} groups ({batch_count} batches)")
333
+ pf.close()
334
+ except Exception as e:
335
+ print(f" ❌ Parquet validation failed: {e}")
336
+ return False
337
+ # ===== END =====
338
+
320
339
  st, dt = compare_and_report_diffs(sas_path, parquet_path)
340
+
321
341
  print(f" 🔍 Validation: {st}")
322
342
  for d in dt:
323
343
  print(" -", d.replace("\n", "\n "))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sas2parquet
3
- Version: 0.1.1
3
+ Version: 0.1.5
4
4
  Summary: SAS → Parquet Hybrid Converter & Validator
5
5
  License-File: LICENSE
6
6
  Author: Zaman Ziabakhshganji
@@ -128,37 +128,6 @@ Each file undergoes 4-stage validation:
128
128
  3. Column order
129
129
  4. Value comparison
130
130
 
131
- ## 💻 Development
132
- ```bash
133
- git clone <repo>
134
- cd sas2parquet
135
- pip install -e .
136
- ```
137
-
138
-
139
- ## 🧪 Testing
140
-
141
- This project includes a comprehensive **pytest** test suite covering:
142
-
143
- - Schema inference and overrides
144
- - SAS ↔ Parquet validation logic
145
- - Error handling and retry behavior
146
- - Directory mirroring (`sasdata/ → parquetdata/`)
147
- - Logging and CLI execution paths
148
-
149
- ### Run tests
150
-
151
- ```bash
152
- pip install -e .[dev]
153
- pytest -q
154
- ```
155
-
156
- ## 📦 Dependencies
157
- - pandas>=2.0
158
- - polars>=0.20
159
- - pyarrow>=15.0
160
- - pyreadstat>=1.4
161
-
162
131
  ## 📄 License
163
132
  MIT License
164
133
 
@@ -0,0 +1,8 @@
1
+ sas2parquet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ sas2parquet/cli.py,sha256=Gap4-lrHE3a-tOTRljqmRP6uX-epLFHnOYN0pdffU-g,879
3
+ sas2parquet/convert.py,sha256=bHfdtaI1sSeCSHKqARuuvbwgrNfnH1G_odKSEmVbNrI,14736
4
+ sas2parquet-0.1.5.dist-info/METADATA,sha256=nJNYKUGDINBS4M0Et6r2NRruczmd_v1NepPi5xdTHnU,3786
5
+ sas2parquet-0.1.5.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
6
+ sas2parquet-0.1.5.dist-info/entry_points.txt,sha256=pg57h0xD_3R9ZC_YfxLLfu_2p1JNhF8xDNS6v7kiSBY,52
7
+ sas2parquet-0.1.5.dist-info/licenses/LICENSE,sha256=ouRycIMUGF1zCj49-ijn1wIlTNknZEoLwAHUp0ifH-g,1066
8
+ sas2parquet-0.1.5.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- sas2parquet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- sas2parquet/cli.py,sha256=Gap4-lrHE3a-tOTRljqmRP6uX-epLFHnOYN0pdffU-g,879
3
- sas2parquet/convert.py,sha256=36hsDLM0uQF2tuwd9U8k3U5zEZtwQcEJWta3v4SkoXc,13950
4
- sas2parquet-0.1.1.dist-info/METADATA,sha256=LSc1S3ehB21qDV_s9cMdga27FUd3bQ8DUAUvWnmvIfY,4293
5
- sas2parquet-0.1.1.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
6
- sas2parquet-0.1.1.dist-info/entry_points.txt,sha256=pg57h0xD_3R9ZC_YfxLLfu_2p1JNhF8xDNS6v7kiSBY,52
7
- sas2parquet-0.1.1.dist-info/licenses/LICENSE,sha256=ouRycIMUGF1zCj49-ijn1wIlTNknZEoLwAHUp0ifH-g,1066
8
- sas2parquet-0.1.1.dist-info/RECORD,,