sas2parquet 0.1.1__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sas2parquet
3
- Version: 0.1.1
3
+ Version: 0.1.5
4
4
  Summary: SAS → Parquet Hybrid Converter & Validator
5
5
  License-File: LICENSE
6
6
  Author: Zaman Ziabakhshganji
@@ -128,37 +128,6 @@ Each file undergoes 4-stage validation:
128
128
  3. Column order
129
129
  4. Value comparison
130
130
 
131
- ## 💻 Development
132
- ```bash
133
- git clone <repo>
134
- cd sas2parquet
135
- pip install -e .
136
- ```
137
-
138
-
139
- ## 🧪 Testing
140
-
141
- This project includes a comprehensive **pytest** test suite covering:
142
-
143
- - Schema inference and overrides
144
- - SAS ↔ Parquet validation logic
145
- - Error handling and retry behavior
146
- - Directory mirroring (`sasdata/ → parquetdata/`)
147
- - Logging and CLI execution paths
148
-
149
- ### Run tests
150
-
151
- ```bash
152
- pip install -e .[dev]
153
- pytest -q
154
- ```
155
-
156
- ## 📦 Dependencies
157
- - pandas>=2.0
158
- - polars>=0.20
159
- - pyarrow>=15.0
160
- - pyreadstat>=1.4
161
-
162
131
  ## 📄 License
163
132
  MIT License
164
133
 
@@ -98,36 +98,5 @@ Each file undergoes 4-stage validation:
98
98
  3. Column order
99
99
  4. Value comparison
100
100
 
101
- ## 💻 Development
102
- ```bash
103
- git clone <repo>
104
- cd sas2parquet
105
- pip install -e .
106
- ```
107
-
108
-
109
- ## 🧪 Testing
110
-
111
- This project includes a comprehensive **pytest** test suite covering:
112
-
113
- - Schema inference and overrides
114
- - SAS ↔ Parquet validation logic
115
- - Error handling and retry behavior
116
- - Directory mirroring (`sasdata/ → parquetdata/`)
117
- - Logging and CLI execution paths
118
-
119
- ### Run tests
120
-
121
- ```bash
122
- pip install -e .[dev]
123
- pytest -q
124
- ```
125
-
126
- ## 📦 Dependencies
127
- - pandas>=2.0
128
- - polars>=0.20
129
- - pyarrow>=15.0
130
- - pyreadstat>=1.4
131
-
132
101
  ## 📄 License
133
102
  MIT License
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sas2parquet"
3
- version = "0.1.1"
3
+ version = "0.1.5"
4
4
  description = "SAS → Parquet Hybrid Converter & Validator"
5
5
  authors = [
6
6
  {name = "Zaman Ziabakhshganji",email = "zaman.ganji@gmail.com"}
@@ -317,7 +317,27 @@ def reconvert_file_ultimate(sas_path: Path, parquet_path: Path) -> bool:
317
317
  writer.close()
318
318
  print(" ✅ Conversion succeeded")
319
319
 
320
+ # ===== FULL PARQUET VALIDATION (WORKING) =====
321
+ print(" 🔍 Full Parquet validation...")
322
+ try:
323
+ pf = pq.ParquetFile(parquet_path)
324
+ total_rows = 0
325
+ num_groups = pf.metadata.num_row_groups
326
+ batch_count = 0
327
+
328
+ for batch in pf.iter_batches():
329
+ total_rows += batch.num_rows
330
+ batch_count += 1
331
+
332
+ print(f" ✅ Parquet fully validated: {total_rows:,} rows across {num_groups} groups ({batch_count} batches)")
333
+ pf.close()
334
+ except Exception as e:
335
+ print(f" ❌ Parquet validation failed: {e}")
336
+ return False
337
+ # ===== END =====
338
+
320
339
  st, dt = compare_and_report_diffs(sas_path, parquet_path)
340
+
321
341
  print(f" 🔍 Validation: {st}")
322
342
  for d in dt:
323
343
  print(" -", d.replace("\n", "\n "))
File without changes