sas2parquet 0.1.2__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sas2parquet-0.1.2 → sas2parquet-0.1.6}/PKG-INFO +1 -1
- {sas2parquet-0.1.2 → sas2parquet-0.1.6}/pyproject.toml +1 -1
- {sas2parquet-0.1.2 → sas2parquet-0.1.6}/src/sas2parquet/convert.py +27 -1
- {sas2parquet-0.1.2 → sas2parquet-0.1.6}/LICENSE +0 -0
- {sas2parquet-0.1.2 → sas2parquet-0.1.6}/README.md +0 -0
- {sas2parquet-0.1.2 → sas2parquet-0.1.6}/src/sas2parquet/__init__.py +0 -0
- {sas2parquet-0.1.2 → sas2parquet-0.1.6}/src/sas2parquet/cli.py +0 -0
|
@@ -317,7 +317,27 @@ def reconvert_file_ultimate(sas_path: Path, parquet_path: Path) -> bool:
|
|
|
317
317
|
writer.close()
|
|
318
318
|
print(" ✅ Conversion succeeded")
|
|
319
319
|
|
|
320
|
+
# ===== FULL PARQUET VALIDATION (WORKING) =====
|
|
321
|
+
print(" 🔍 Full Parquet validation...")
|
|
322
|
+
try:
|
|
323
|
+
pf = pq.ParquetFile(parquet_path)
|
|
324
|
+
total_rows = 0
|
|
325
|
+
num_groups = pf.metadata.num_row_groups
|
|
326
|
+
batch_count = 0
|
|
327
|
+
|
|
328
|
+
for batch in pf.iter_batches():
|
|
329
|
+
total_rows += batch.num_rows
|
|
330
|
+
batch_count += 1
|
|
331
|
+
|
|
332
|
+
print(f" ✅ Parquet fully validated: {total_rows:,} rows across {num_groups} groups ({batch_count} batches)")
|
|
333
|
+
pf.close()
|
|
334
|
+
except Exception as e:
|
|
335
|
+
print(f" ❌ Parquet validation failed: {e}")
|
|
336
|
+
return False
|
|
337
|
+
# ===== END =====
|
|
338
|
+
|
|
320
339
|
st, dt = compare_and_report_diffs(sas_path, parquet_path)
|
|
340
|
+
|
|
321
341
|
print(f" 🔍 Validation: {st}")
|
|
322
342
|
for d in dt:
|
|
323
343
|
print(" -", d.replace("\n", "\n "))
|
|
@@ -366,8 +386,14 @@ def main():
|
|
|
366
386
|
print(f"\n🗂 Processing: {rel}")
|
|
367
387
|
|
|
368
388
|
# Mirror structure under parquetdata/ (which lives next to sasdata/)
|
|
369
|
-
pqf = (PARQUET_INPUT_DIR / rel).with_suffix('.parquet')
|
|
370
389
|
|
|
390
|
+
if rel.parent == Path("."):
|
|
391
|
+
pqf = (PARQUET_INPUT_DIR / rel.name).with_suffix(".parquet")
|
|
392
|
+
else:
|
|
393
|
+
parquet_dirs = [f"{p}_parquet" for p in rel.parent.parts]
|
|
394
|
+
pqf = (PARQUET_INPUT_DIR.joinpath(*parquet_dirs) / rel.name).with_suffix(".parquet")
|
|
395
|
+
|
|
396
|
+
|
|
371
397
|
reconvert_file_ultimate(sas, pqf)
|
|
372
398
|
print("-"*60)
|
|
373
399
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|