parquet 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +124 -72
  3. data/Gemfile +1 -0
  4. data/README.md +10 -3
  5. data/ext/parquet/Cargo.toml +6 -6
  6. data/ext/parquet/src/adapter_ffi.rs +169 -34
  7. data/ext/parquet-core/Cargo.toml +6 -5
  8. data/ext/parquet-core/src/arrow_conversion.rs +234 -290
  9. data/ext/parquet-core/src/error.rs +28 -2
  10. data/ext/parquet-core/src/lib.rs +1 -1
  11. data/ext/parquet-core/src/reader.rs +104 -52
  12. data/ext/parquet-core/src/schema.rs +171 -4
  13. data/ext/parquet-core/src/test_utils.rs +1 -1
  14. data/ext/parquet-core/src/traits/schema.rs +46 -7
  15. data/ext/parquet-core/src/value.rs +7 -3
  16. data/ext/parquet-core/src/writer.rs +410 -52
  17. data/ext/parquet-core/tests/arrow_conversion_tests.rs +54 -35
  18. data/ext/parquet-core/tests/binary_data.rs +1 -1
  19. data/ext/parquet-core/tests/column_projection.rs +1 -1
  20. data/ext/parquet-core/tests/complex_types.rs +1 -1
  21. data/ext/parquet-core/tests/compression_tests.rs +1 -1
  22. data/ext/parquet-core/tests/concurrent_access.rs +10 -9
  23. data/ext/parquet-core/tests/decimal_tests.rs +7 -7
  24. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +1 -1
  25. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +18 -25
  26. data/ext/parquet-core/tests/null_handling_tests.rs +1 -1
  27. data/ext/parquet-core/tests/primitive_types.rs +1 -1
  28. data/ext/parquet-core/tests/real_world_patterns.rs +2 -2
  29. data/ext/parquet-core/tests/review_regressions.rs +787 -0
  30. data/ext/parquet-core/tests/roundtrip_correctness.rs +1 -1
  31. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +8 -0
  32. data/ext/parquet-core/tests/temporal_tests.rs +1 -1
  33. data/ext/parquet-core/tests/test_helpers.rs +1 -1
  34. data/ext/parquet-core/tests/writer_tests.rs +1 -1
  35. data/ext/parquet-ruby-adapter/Cargo.toml +6 -5
  36. data/ext/parquet-ruby-adapter/src/converter.rs +20 -23
  37. data/ext/parquet-ruby-adapter/src/error.rs +14 -21
  38. data/ext/parquet-ruby-adapter/src/lib.rs +6 -5
  39. data/ext/parquet-ruby-adapter/src/logger.rs +5 -2
  40. data/ext/parquet-ruby-adapter/src/metadata.rs +15 -15
  41. data/ext/parquet-ruby-adapter/src/reader.rs +67 -52
  42. data/ext/parquet-ruby-adapter/src/schema.rs +132 -87
  43. data/ext/parquet-ruby-adapter/src/string_cache.rs +72 -62
  44. data/ext/parquet-ruby-adapter/src/string_cache_test.rs +122 -0
  45. data/ext/parquet-ruby-adapter/src/string_storage.rs +632 -0
  46. data/ext/parquet-ruby-adapter/src/types.rs +5 -1
  47. data/ext/parquet-ruby-adapter/src/utils.rs +144 -74
  48. data/ext/parquet-ruby-adapter/src/writer.rs +82 -95
  49. data/lib/parquet/schema.rb +6 -2
  50. data/lib/parquet/version.rb +1 -1
  51. data/lib/parquet.rbi +54 -12
  52. metadata +19 -3
  53. data/ext/parquet-ruby-adapter/src/batch_manager.rs +0 -116
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fd1247418c45b1f64165c2fddbb835adb487a2d938e31b80eb015431caa167ed
4
- data.tar.gz: 07ef3cb8663a956736200121b4c99046079a52b7cb2136f2755f6fdafba2102c
3
+ metadata.gz: 4b40af9f0e3f469b64b66a59e050c801ab97ee7aad4927179c9923deeab278d7
4
+ data.tar.gz: 18cf939257c201b9485ce009194098260cfffd5b5db8f3e92d59e243b3e54b58
5
5
  SHA512:
6
- metadata.gz: 4b5c65cbae6d67c499e778835ba68a4bc0dd223006028e1c5ad832beedbf0c296b569af6362d0554464b512f25f3c1874e3cd18d8ada83668c92b79de4926079
7
- data.tar.gz: 4d1a8e2ee202b9fc810d25307bfe3315a9ba38fd50f9d9f7a796e69bcfe7cd95904820d509a86e5792974143eacc4cfd87a7ecf66cfe421db24fbf5b8f6afe82
6
+ metadata.gz: 0aad32ede402031bb3a6cb9ad617a76c0fc16e03305ad8bf0eea60db31b256cd47a0875d36f690c6cd030735646b880c70490eee461b712e1ad9c20e9cf2f847
7
+ data.tar.gz: 83d099424fe612c95d6f16f544594804aed5e5f8e987aa2cac8eff686852fc67c90dec747b8ea8d6137e4833faa02651f1a4f9a0f964e3c1c1d1c08064d70b4d
data/Cargo.lock CHANGED
@@ -63,8 +63,9 @@ dependencies = [
63
63
 
64
64
  [[package]]
65
65
  name = "arrow"
66
- version = "55.2.0"
67
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
66
+ version = "58.3.0"
67
+ source = "registry+https://github.com/rust-lang/crates.io-index"
68
+ checksum = "378530e55cd479eda3c14eb345310799717e6f76d0c332041e8487022166b471"
68
69
  dependencies = [
69
70
  "arrow-arith",
70
71
  "arrow-array",
@@ -83,21 +84,23 @@ dependencies = [
83
84
 
84
85
  [[package]]
85
86
  name = "arrow-arith"
86
- version = "55.2.0"
87
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
87
+ version = "58.3.0"
88
+ source = "registry+https://github.com/rust-lang/crates.io-index"
89
+ checksum = "a0ab212d2c1886e802f51c5212d78ebbcbb0bec980fff9dadc1eb8d45cd0b738"
88
90
  dependencies = [
89
91
  "arrow-array",
90
92
  "arrow-buffer",
91
93
  "arrow-data",
92
94
  "arrow-schema",
93
95
  "chrono",
94
- "num",
96
+ "num-traits",
95
97
  ]
96
98
 
97
99
  [[package]]
98
100
  name = "arrow-array"
99
- version = "55.2.0"
100
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
101
+ version = "58.3.0"
102
+ source = "registry+https://github.com/rust-lang/crates.io-index"
103
+ checksum = "cfd33d3e92f207444098c75b42de99d329562be0cf686b307b097cc52b4e999e"
101
104
  dependencies = [
102
105
  "ahash",
103
106
  "arrow-buffer",
@@ -105,28 +108,34 @@ dependencies = [
105
108
  "arrow-schema",
106
109
  "chrono",
107
110
  "half",
108
- "hashbrown",
109
- "num",
111
+ "hashbrown 0.17.1",
112
+ "num-complex",
113
+ "num-integer",
114
+ "num-traits",
110
115
  ]
111
116
 
112
117
  [[package]]
113
118
  name = "arrow-buffer"
114
- version = "55.2.0"
115
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
119
+ version = "58.3.0"
120
+ source = "registry+https://github.com/rust-lang/crates.io-index"
121
+ checksum = "0c6cd424c2693bcdbc150d843dc9d4d137dd2de4782ce6df491ad11a3a0416c0"
116
122
  dependencies = [
117
123
  "bytes",
118
124
  "half",
119
- "num",
125
+ "num-bigint",
126
+ "num-traits",
120
127
  ]
121
128
 
122
129
  [[package]]
123
130
  name = "arrow-cast"
124
- version = "55.2.0"
125
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
131
+ version = "58.3.0"
132
+ source = "registry+https://github.com/rust-lang/crates.io-index"
133
+ checksum = "4c5aefb56a2c02e9e2b30746241058b85f8983f0fcff2ba0c6d09006e1cded7f"
126
134
  dependencies = [
127
135
  "arrow-array",
128
136
  "arrow-buffer",
129
137
  "arrow-data",
138
+ "arrow-ord",
130
139
  "arrow-schema",
131
140
  "arrow-select",
132
141
  "atoi",
@@ -134,14 +143,15 @@ dependencies = [
134
143
  "chrono",
135
144
  "half",
136
145
  "lexical-core",
137
- "num",
146
+ "num-traits",
138
147
  "ryu",
139
148
  ]
140
149
 
141
150
  [[package]]
142
151
  name = "arrow-csv"
143
- version = "55.2.0"
144
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
152
+ version = "58.3.0"
153
+ source = "registry+https://github.com/rust-lang/crates.io-index"
154
+ checksum = "e94e8cf7e517657a52b91ea1263acf38c4ca62a84655d72458a3359b12ab97de"
145
155
  dependencies = [
146
156
  "arrow-array",
147
157
  "arrow-cast",
@@ -154,53 +164,62 @@ dependencies = [
154
164
 
155
165
  [[package]]
156
166
  name = "arrow-data"
157
- version = "55.2.0"
158
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
167
+ version = "58.3.0"
168
+ source = "registry+https://github.com/rust-lang/crates.io-index"
169
+ checksum = "3c88210023a2bfee1896af366309a3028fc3bcbd6515fa29a7990ee1baa08ee0"
159
170
  dependencies = [
160
171
  "arrow-buffer",
161
172
  "arrow-schema",
162
173
  "half",
163
- "num",
174
+ "num-integer",
175
+ "num-traits",
164
176
  ]
165
177
 
166
178
  [[package]]
167
179
  name = "arrow-ipc"
168
- version = "55.2.0"
169
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
180
+ version = "58.3.0"
181
+ source = "registry+https://github.com/rust-lang/crates.io-index"
182
+ checksum = "238438f0834483703d88896db6fe5a7138b2230debc31b34c0336c2996e3c64f"
170
183
  dependencies = [
171
184
  "arrow-array",
172
185
  "arrow-buffer",
173
186
  "arrow-data",
174
187
  "arrow-schema",
188
+ "arrow-select",
175
189
  "flatbuffers",
176
190
  "lz4_flex",
177
191
  ]
178
192
 
179
193
  [[package]]
180
194
  name = "arrow-json"
181
- version = "55.2.0"
182
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
195
+ version = "58.3.0"
196
+ source = "registry+https://github.com/rust-lang/crates.io-index"
197
+ checksum = "205ca2119e6d679d5c133c6f30e68f027738d95ed948cf77677ea69c7800036b"
183
198
  dependencies = [
184
199
  "arrow-array",
185
200
  "arrow-buffer",
186
201
  "arrow-cast",
187
- "arrow-data",
202
+ "arrow-ord",
188
203
  "arrow-schema",
204
+ "arrow-select",
189
205
  "chrono",
190
206
  "half",
191
207
  "indexmap",
208
+ "itoa",
192
209
  "lexical-core",
193
210
  "memchr",
194
- "num",
195
- "serde",
211
+ "num-traits",
212
+ "ryu",
213
+ "serde_core",
196
214
  "serde_json",
197
215
  "simdutf8",
198
216
  ]
199
217
 
200
218
  [[package]]
201
219
  name = "arrow-ord"
202
- version = "55.2.0"
203
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
220
+ version = "58.3.0"
221
+ source = "registry+https://github.com/rust-lang/crates.io-index"
222
+ checksum = "1bffd8fd2579286a5d63bac898159873e5094a79009940bcb42bbfce4f19f1d0"
204
223
  dependencies = [
205
224
  "arrow-array",
206
225
  "arrow-buffer",
@@ -211,8 +230,9 @@ dependencies = [
211
230
 
212
231
  [[package]]
213
232
  name = "arrow-row"
214
- version = "55.2.0"
215
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
233
+ version = "58.3.0"
234
+ source = "registry+https://github.com/rust-lang/crates.io-index"
235
+ checksum = "bab5994731204603c73ba69267616c50f80780774c6bb0476f1f830625115e0c"
216
236
  dependencies = [
217
237
  "arrow-array",
218
238
  "arrow-buffer",
@@ -223,30 +243,33 @@ dependencies = [
223
243
 
224
244
  [[package]]
225
245
  name = "arrow-schema"
226
- version = "55.2.0"
227
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
246
+ version = "58.3.0"
247
+ source = "registry+https://github.com/rust-lang/crates.io-index"
248
+ checksum = "f633dbfdf39c039ada1bf9e34c694816eb71fbb7dc78f613993b7245e078a1ed"
228
249
  dependencies = [
229
- "serde",
250
+ "serde_core",
230
251
  "serde_json",
231
252
  ]
232
253
 
233
254
  [[package]]
234
255
  name = "arrow-select"
235
- version = "55.2.0"
236
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
256
+ version = "58.3.0"
257
+ source = "registry+https://github.com/rust-lang/crates.io-index"
258
+ checksum = "8cd065c54172ac787cf3f2f8d4107e0d3fdc26edba76fdf4f4cc170258942222"
237
259
  dependencies = [
238
260
  "ahash",
239
261
  "arrow-array",
240
262
  "arrow-buffer",
241
263
  "arrow-data",
242
264
  "arrow-schema",
243
- "num",
265
+ "num-traits",
244
266
  ]
245
267
 
246
268
  [[package]]
247
269
  name = "arrow-string"
248
- version = "55.2.0"
249
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
270
+ version = "58.3.0"
271
+ source = "registry+https://github.com/rust-lang/crates.io-index"
272
+ checksum = "29dd7cda3ab9692f43a2e4acc444d760cc17b12bb6d8232ddf64e9bab7c06b42"
250
273
  dependencies = [
251
274
  "arrow-array",
252
275
  "arrow-buffer",
@@ -254,7 +277,7 @@ dependencies = [
254
277
  "arrow-schema",
255
278
  "arrow-select",
256
279
  "memchr",
257
- "num",
280
+ "num-traits",
258
281
  "regex",
259
282
  "regex-syntax",
260
283
  ]
@@ -551,6 +574,12 @@ version = "0.15.4"
551
574
  source = "registry+https://github.com/rust-lang/crates.io-index"
552
575
  checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
553
576
 
577
+ [[package]]
578
+ name = "hashbrown"
579
+ version = "0.17.1"
580
+ source = "registry+https://github.com/rust-lang/crates.io-index"
581
+ checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
582
+
554
583
  [[package]]
555
584
  name = "iana-time-zone"
556
585
  version = "0.1.63"
@@ -582,7 +611,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
582
611
  checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
583
612
  dependencies = [
584
613
  "equivalent",
585
- "hashbrown",
614
+ "hashbrown 0.15.4",
586
615
  ]
587
616
 
588
617
  [[package]]
@@ -827,30 +856,30 @@ checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
827
856
 
828
857
  [[package]]
829
858
  name = "lz4_flex"
830
- version = "0.11.5"
859
+ version = "0.13.1"
831
860
  source = "registry+https://github.com/rust-lang/crates.io-index"
832
- checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
861
+ checksum = "7ef0d4ed8669f8f8826eb00dc878084aa8f253506c4fd5e8f58f5bce72ddb97e"
833
862
  dependencies = [
834
863
  "twox-hash",
835
864
  ]
836
865
 
837
866
  [[package]]
838
867
  name = "magnus"
839
- version = "0.7.1"
868
+ version = "0.8.2"
840
869
  source = "registry+https://github.com/rust-lang/crates.io-index"
841
- checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
870
+ checksum = "3b36a5b126bbe97eb0d02d07acfeb327036c6319fd816139a49824a83b7f9012"
842
871
  dependencies = [
843
872
  "magnus-macros",
844
873
  "rb-sys",
845
- "rb-sys-env 0.1.2",
874
+ "rb-sys-env",
846
875
  "seq-macro",
847
876
  ]
848
877
 
849
878
  [[package]]
850
879
  name = "magnus-macros"
851
- version = "0.6.0"
880
+ version = "0.8.0"
852
881
  source = "registry+https://github.com/rust-lang/crates.io-index"
853
- checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
882
+ checksum = "47607461fd8e1513cb4f2076c197d8092d921a1ea75bd08af97398f593751892"
854
883
  dependencies = [
855
884
  "proc-macro2",
856
885
  "quote",
@@ -1013,11 +1042,11 @@ dependencies = [
1013
1042
  "mimalloc",
1014
1043
  "num",
1015
1044
  "ordered-float 5.0.0",
1016
- "parquet 55.2.0",
1045
+ "parquet 58.3.0",
1017
1046
  "parquet-ruby-adapter",
1018
1047
  "rand",
1019
1048
  "rb-sys",
1020
- "rb-sys-env 0.2.2",
1049
+ "rb-sys-env",
1021
1050
  "simdutf8",
1022
1051
  "tempfile",
1023
1052
  "thiserror",
@@ -1026,13 +1055,13 @@ dependencies = [
1026
1055
 
1027
1056
  [[package]]
1028
1057
  name = "parquet"
1029
- version = "55.2.0"
1030
- source = "git+https://github.com/njaremko/arrow-rs?branch=nathan_06-24-remove_primitive_map_key_assertion_on_record_reader#54858bf019ff3faeb8f5b562da8c01012162aef0"
1058
+ version = "58.3.0"
1059
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1060
+ checksum = "5dafa7d01085b62a47dd0c1829550a0a36710ea9c4fe358a05a85477cec8a908"
1031
1061
  dependencies = [
1032
1062
  "ahash",
1033
1063
  "arrow-array",
1034
1064
  "arrow-buffer",
1035
- "arrow-cast",
1036
1065
  "arrow-data",
1037
1066
  "arrow-ipc",
1038
1067
  "arrow-schema",
@@ -1043,10 +1072,11 @@ dependencies = [
1043
1072
  "chrono",
1044
1073
  "flate2",
1045
1074
  "half",
1046
- "hashbrown",
1075
+ "hashbrown 0.17.1",
1047
1076
  "lz4_flex",
1048
- "num",
1049
1077
  "num-bigint",
1078
+ "num-integer",
1079
+ "num-traits",
1050
1080
  "paste",
1051
1081
  "seq-macro",
1052
1082
  "serde_json",
@@ -1070,11 +1100,12 @@ dependencies = [
1070
1100
  "jiff",
1071
1101
  "num",
1072
1102
  "ordered-float 5.0.0",
1073
- "parquet 55.2.0",
1103
+ "parquet 58.3.0",
1074
1104
  "rand",
1075
1105
  "serde",
1076
1106
  "tempfile",
1077
1107
  "thiserror",
1108
+ "triomphe",
1078
1109
  "uuid",
1079
1110
  ]
1080
1111
 
@@ -1090,12 +1121,13 @@ dependencies = [
1090
1121
  "magnus",
1091
1122
  "num",
1092
1123
  "ordered-float 5.0.0",
1093
- "parquet 55.2.0",
1124
+ "parquet 58.3.0",
1094
1125
  "parquet-core",
1095
1126
  "rb-sys",
1096
- "rb-sys-env 0.2.2",
1127
+ "rb-sys-env",
1097
1128
  "tempfile",
1098
1129
  "thiserror",
1130
+ "triomphe",
1099
1131
  "uuid",
1100
1132
  ]
1101
1133
 
@@ -1190,18 +1222,18 @@ dependencies = [
1190
1222
 
1191
1223
  [[package]]
1192
1224
  name = "rb-sys"
1193
- version = "0.9.116"
1225
+ version = "0.9.124"
1194
1226
  source = "registry+https://github.com/rust-lang/crates.io-index"
1195
- checksum = "7059846f68396df83155779c75336ca24567741cb95256e6308c9fcc370e8dad"
1227
+ checksum = "c85c4188462601e2aa1469def389c17228566f82ea72f137ed096f21591bc489"
1196
1228
  dependencies = [
1197
1229
  "rb-sys-build",
1198
1230
  ]
1199
1231
 
1200
1232
  [[package]]
1201
1233
  name = "rb-sys-build"
1202
- version = "0.9.116"
1234
+ version = "0.9.124"
1203
1235
  source = "registry+https://github.com/rust-lang/crates.io-index"
1204
- checksum = "ac217510df41b9ffc041573e68d7a02aaff770c49943c7494441c4b224b0ecd0"
1236
+ checksum = "568068db4102230882e6d4ae8de6632e224ca75fe5970f6e026a04e91ed635d3"
1205
1237
  dependencies = [
1206
1238
  "bindgen",
1207
1239
  "lazy_static",
@@ -1212,12 +1244,6 @@ dependencies = [
1212
1244
  "syn",
1213
1245
  ]
1214
1246
 
1215
- [[package]]
1216
- name = "rb-sys-env"
1217
- version = "0.1.2"
1218
- source = "registry+https://github.com/rust-lang/crates.io-index"
1219
- checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
1220
-
1221
1247
  [[package]]
1222
1248
  name = "rb-sys-env"
1223
1249
  version = "0.2.2"
@@ -1307,18 +1333,28 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
1307
1333
 
1308
1334
  [[package]]
1309
1335
  name = "serde"
1310
- version = "1.0.219"
1336
+ version = "1.0.228"
1337
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1338
+ checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
1339
+ dependencies = [
1340
+ "serde_core",
1341
+ "serde_derive",
1342
+ ]
1343
+
1344
+ [[package]]
1345
+ name = "serde_core"
1346
+ version = "1.0.228"
1311
1347
  source = "registry+https://github.com/rust-lang/crates.io-index"
1312
- checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
1348
+ checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
1313
1349
  dependencies = [
1314
1350
  "serde_derive",
1315
1351
  ]
1316
1352
 
1317
1353
  [[package]]
1318
1354
  name = "serde_derive"
1319
- version = "1.0.219"
1355
+ version = "1.0.228"
1320
1356
  source = "registry+https://github.com/rust-lang/crates.io-index"
1321
- checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
1357
+ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
1322
1358
  dependencies = [
1323
1359
  "proc-macro2",
1324
1360
  "quote",
@@ -1361,6 +1397,12 @@ version = "1.1.1"
1361
1397
  source = "registry+https://github.com/rust-lang/crates.io-index"
1362
1398
  checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
1363
1399
 
1400
+ [[package]]
1401
+ name = "stable_deref_trait"
1402
+ version = "1.2.1"
1403
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1404
+ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
1405
+
1364
1406
  [[package]]
1365
1407
  name = "static_assertions"
1366
1408
  version = "1.1.0"
@@ -1431,6 +1473,16 @@ dependencies = [
1431
1473
  "crunchy",
1432
1474
  ]
1433
1475
 
1476
+ [[package]]
1477
+ name = "triomphe"
1478
+ version = "0.1.15"
1479
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1480
+ checksum = "dd69c5aa8f924c7519d6372789a74eac5b94fb0f8fcf0d4a97eb0bfc3e785f39"
1481
+ dependencies = [
1482
+ "serde",
1483
+ "stable_deref_trait",
1484
+ ]
1485
+
1434
1486
  [[package]]
1435
1487
  name = "twox-hash"
1436
1488
  version = "2.1.1"
data/Gemfile CHANGED
@@ -16,5 +16,6 @@ end
16
16
 
17
17
  group :test do
18
18
  gem "csv"
19
+ gem "logger"
19
20
  gem "minitest", "~> 5.0"
20
21
  end
data/README.md CHANGED
@@ -166,7 +166,7 @@ end
166
166
  Parquet.write_rows(rows,
167
167
  schema: schema,
168
168
  write_to: "output.parquet",
169
- batch_size: 5000 # Rows per batch (default: 1000)
169
+ batch_size: 5000 # Positive rows per batch (default: 1000)
170
170
  )
171
171
  ```
172
172
 
@@ -200,6 +200,9 @@ Parquet.write_columns(batches.each,
200
200
  )
201
201
  ```
202
202
 
203
+ `write_columns` also accepts `logger:` with the same Ruby logger interface as
204
+ row writes.
205
+
203
206
  ## Data Types
204
207
 
205
208
  ### Basic Types
@@ -340,7 +343,7 @@ Parquet.write_rows(data.each, schema: schema, write_to: "complex.parquet")
340
343
 
341
344
  ### Timezone Handling in Parquet
342
345
 
343
- **Critical**: The Parquet specification has a fundamental limitation with timezone storage:
346
+ The Parquet specification has a fundamental limitation with timezone storage:
344
347
 
345
348
  1. **UTC-normalized**: Any timestamp with timezone info (including "+09:00" or "America/New_York") is converted to UTC
346
349
  2. **Local/unzoned**: Timestamps without timezone info are stored as-is
@@ -382,11 +385,15 @@ Control memory usage with flush thresholds:
382
385
  Parquet.write_rows(huge_dataset.each,
383
386
  schema: schema,
384
387
  write_to: "output.parquet",
385
- batch_size: 1000, # Rows before considering flush
388
+ batch_size: 1000, # Positive rows before considering flush
386
389
  flush_threshold: 32 * 1024**2 # Flush if batch exceeds 32MB
387
390
  )
388
391
  ```
389
392
 
393
+ Write batch and sample sizes are bounded before buffer allocation. Very large
394
+ batch sizes are rejected, and wide schemas have a lower effective batch cap so
395
+ the writer cannot reserve unbounded per-column value slots.
396
+
390
397
  ## Architecture
391
398
 
392
399
  This gem uses a modular architecture:
@@ -11,16 +11,16 @@ rb-sys-env = "^0.2"
11
11
 
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
- arrow-array = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan_06-24-remove_primitive_map_key_assertion_on_record_reader" }
15
- arrow-buffer = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan_06-24-remove_primitive_map_key_assertion_on_record_reader" }
16
- arrow-ipc = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan_06-24-remove_primitive_map_key_assertion_on_record_reader", features = ["lz4"] }
17
- arrow-schema = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan_06-24-remove_primitive_map_key_assertion_on_record_reader" }
14
+ arrow-array = "58.3.0"
15
+ arrow-buffer = "58.3.0"
16
+ arrow-ipc = { version = "58.3.0", features = ["lz4"] }
17
+ arrow-schema = "58.3.0"
18
18
  bytes = "^1.9"
19
19
  either = "1.9"
20
20
  itertools = "^0.14"
21
21
  jiff = "0.2"
22
- magnus = { version = "0.7", features = ["rb-sys"] }
23
- parquet = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan_06-24-remove_primitive_map_key_assertion_on_record_reader", features = ["json"] }
22
+ magnus = { version = "0.8", features = ["rb-sys"] }
23
+ parquet = { version = "58.3.0", features = ["json"] }
24
24
  parquet-ruby-adapter = { path = "../parquet-ruby-adapter" }
25
25
  rand = "0.9"
26
26
  rb-sys = "^0.9"