parquet 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2b5b56cca903ed731d6981d3113e3833a6e6a6a0ffcd301040b32ab0c72bf9c1
4
- data.tar.gz: 0e4486e2a67a051852166ac81754c9bfb2807c2ffa51eeda5edb41050432e930
3
+ metadata.gz: 252c96a0dc96337e1d64514ee24ec0a470621fd25b5e1497de75f666e060e32e
4
+ data.tar.gz: 38749e71de16f448404bedf5f316a7b1cebbf51121bff05b76e83b50e7081968
5
5
  SHA512:
6
- metadata.gz: affe353c972f130973b309ca1ee928928278254830fa39bee8e4bed5452b5b381e2d27f6986067a0f9b27769a78f195887d371dc4825f61096414af92e9edb94
7
- data.tar.gz: 9c823757be4b81d3ccafb57571c1d98b530a0d10696712083a0447b4871bf90720ba588d8c48777926b3f7a7f2ffede0c2ed7c9a076420b4dea183b7290ba47e
6
+ metadata.gz: 5b4cc0e162f2f823e127167d93963b778c56f0ebaa1deda6bee87f5ff453b36cad1c7a77e6e53f35fde6c24492fcb6d002da0dab8749afcafd4ee7e084e81042
7
+ data.tar.gz: 39112b58e3c859b589a0b89d9ff59b18294c336c0d43357c5859a10153e5345b4fdc79cb1e1b09e5649441bb957c8c45506c3818bd1b13aaec8c5fa63a1415a8
data/Cargo.lock CHANGED
@@ -63,9 +63,9 @@ dependencies = [
63
63
 
64
64
  [[package]]
65
65
  name = "arrow-array"
66
- version = "54.1.0"
66
+ version = "54.2.0"
67
67
  source = "registry+https://github.com/rust-lang/crates.io-index"
68
- checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223"
68
+ checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a"
69
69
  dependencies = [
70
70
  "ahash",
71
71
  "arrow-buffer",
@@ -79,9 +79,9 @@ dependencies = [
79
79
 
80
80
  [[package]]
81
81
  name = "arrow-buffer"
82
- version = "54.1.0"
82
+ version = "54.2.0"
83
83
  source = "registry+https://github.com/rust-lang/crates.io-index"
84
- checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89"
84
+ checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a"
85
85
  dependencies = [
86
86
  "bytes",
87
87
  "half",
@@ -90,9 +90,9 @@ dependencies = [
90
90
 
91
91
  [[package]]
92
92
  name = "arrow-cast"
93
- version = "54.1.0"
93
+ version = "54.2.0"
94
94
  source = "registry+https://github.com/rust-lang/crates.io-index"
95
- checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870"
95
+ checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee"
96
96
  dependencies = [
97
97
  "arrow-array",
98
98
  "arrow-buffer",
@@ -110,9 +110,9 @@ dependencies = [
110
110
 
111
111
  [[package]]
112
112
  name = "arrow-data"
113
- version = "54.1.0"
113
+ version = "54.2.0"
114
114
  source = "registry+https://github.com/rust-lang/crates.io-index"
115
- checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754"
115
+ checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83"
116
116
  dependencies = [
117
117
  "arrow-buffer",
118
118
  "arrow-schema",
@@ -122,9 +122,9 @@ dependencies = [
122
122
 
123
123
  [[package]]
124
124
  name = "arrow-ipc"
125
- version = "54.1.0"
125
+ version = "54.2.0"
126
126
  source = "registry+https://github.com/rust-lang/crates.io-index"
127
- checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e"
127
+ checksum = "65c63da4afedde2b25ef69825cd4663ca76f78f79ffe2d057695742099130ff6"
128
128
  dependencies = [
129
129
  "arrow-array",
130
130
  "arrow-buffer",
@@ -135,15 +135,15 @@ dependencies = [
135
135
 
136
136
  [[package]]
137
137
  name = "arrow-schema"
138
- version = "54.1.0"
138
+ version = "54.2.0"
139
139
  source = "registry+https://github.com/rust-lang/crates.io-index"
140
- checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6"
140
+ checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735"
141
141
 
142
142
  [[package]]
143
143
  name = "arrow-select"
144
- version = "54.1.0"
144
+ version = "54.2.0"
145
145
  source = "registry+https://github.com/rust-lang/crates.io-index"
146
- checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807"
146
+ checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539"
147
147
  dependencies = [
148
148
  "ahash",
149
149
  "arrow-array",
@@ -247,9 +247,9 @@ checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
247
247
 
248
248
  [[package]]
249
249
  name = "cc"
250
- version = "1.2.11"
250
+ version = "1.2.15"
251
251
  source = "registry+https://github.com/rust-lang/crates.io-index"
252
- checksum = "e4730490333d58093109dc02c23174c3f4d490998c3fed3cc8e82d57afedb9cf"
252
+ checksum = "c736e259eea577f443d5c86c304f9f4ae0295c43f3ba05c21f1d66b5f06001af"
253
253
  dependencies = [
254
254
  "jobserver",
255
255
  "libc",
@@ -273,14 +273,14 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
273
273
 
274
274
  [[package]]
275
275
  name = "chrono"
276
- version = "0.4.39"
276
+ version = "0.4.40"
277
277
  source = "registry+https://github.com/rust-lang/crates.io-index"
278
- checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825"
278
+ checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c"
279
279
  dependencies = [
280
280
  "android-tzdata",
281
281
  "iana-time-zone",
282
282
  "num-traits",
283
- "windows-targets",
283
+ "windows-link",
284
284
  ]
285
285
 
286
286
  [[package]]
@@ -337,9 +337,9 @@ checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929"
337
337
 
338
338
  [[package]]
339
339
  name = "either"
340
- version = "1.13.0"
340
+ version = "1.14.0"
341
341
  source = "registry+https://github.com/rust-lang/crates.io-index"
342
- checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
342
+ checksum = "b7914353092ddf589ad78f25c5c1c21b7f80b0ff8621e7c814c3485b5306da9d"
343
343
 
344
344
  [[package]]
345
345
  name = "errno"
@@ -369,9 +369,9 @@ dependencies = [
369
369
 
370
370
  [[package]]
371
371
  name = "flate2"
372
- version = "1.0.35"
372
+ version = "1.1.0"
373
373
  source = "registry+https://github.com/rust-lang/crates.io-index"
374
- checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c"
374
+ checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc"
375
375
  dependencies = [
376
376
  "crc32fast",
377
377
  "miniz_oxide",
@@ -498,9 +498,9 @@ dependencies = [
498
498
 
499
499
  [[package]]
500
500
  name = "jiff"
501
- version = "0.1.29"
501
+ version = "0.2.1"
502
502
  source = "registry+https://github.com/rust-lang/crates.io-index"
503
- checksum = "c04ef77ae73f3cf50510712722f0c4e8b46f5aaa1bf5ffad2ae213e6495e78e5"
503
+ checksum = "3590fea8e9e22d449600c9bbd481a8163bef223e4ff938e5f55899f8cf1adb93"
504
504
  dependencies = [
505
505
  "jiff-tzdb-platform",
506
506
  "log",
@@ -622,9 +622,9 @@ dependencies = [
622
622
 
623
623
  [[package]]
624
624
  name = "libc"
625
- version = "0.2.169"
625
+ version = "0.2.170"
626
626
  source = "registry+https://github.com/rust-lang/crates.io-index"
627
- checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
627
+ checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828"
628
628
 
629
629
  [[package]]
630
630
  name = "libloading"
@@ -660,9 +660,9 @@ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
660
660
 
661
661
  [[package]]
662
662
  name = "log"
663
- version = "0.4.25"
663
+ version = "0.4.26"
664
664
  source = "registry+https://github.com/rust-lang/crates.io-index"
665
- checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
665
+ checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
666
666
 
667
667
  [[package]]
668
668
  name = "lz4_flex"
@@ -719,9 +719,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
719
719
 
720
720
  [[package]]
721
721
  name = "miniz_oxide"
722
- version = "0.8.3"
722
+ version = "0.8.5"
723
723
  source = "registry+https://github.com/rust-lang/crates.io-index"
724
- checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924"
724
+ checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
725
725
  dependencies = [
726
726
  "adler2",
727
727
  ]
@@ -812,9 +812,9 @@ dependencies = [
812
812
 
813
813
  [[package]]
814
814
  name = "once_cell"
815
- version = "1.20.2"
815
+ version = "1.20.3"
816
816
  source = "registry+https://github.com/rust-lang/crates.io-index"
817
- checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
817
+ checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e"
818
818
 
819
819
  [[package]]
820
820
  name = "ordered-float"
@@ -833,12 +833,13 @@ dependencies = [
833
833
  "arrow-array",
834
834
  "arrow-schema",
835
835
  "bytes",
836
+ "either",
836
837
  "itertools 0.14.0",
837
838
  "jemallocator",
838
839
  "jiff",
839
840
  "magnus",
840
841
  "mimalloc",
841
- "parquet 54.1.0",
842
+ "parquet 54.2.0",
842
843
  "rand",
843
844
  "rb-sys",
844
845
  "simdutf8",
@@ -848,9 +849,9 @@ dependencies = [
848
849
 
849
850
  [[package]]
850
851
  name = "parquet"
851
- version = "54.1.0"
852
+ version = "54.2.0"
852
853
  source = "registry+https://github.com/rust-lang/crates.io-index"
853
- checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235"
854
+ checksum = "761c44d824fe83106e0600d2510c07bf4159a4985bf0569b513ea4288dc1b4fb"
854
855
  dependencies = [
855
856
  "ahash",
856
857
  "arrow-array",
@@ -895,9 +896,9 @@ checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
895
896
 
896
897
  [[package]]
897
898
  name = "portable-atomic"
898
- version = "1.10.0"
899
+ version = "1.11.0"
899
900
  source = "registry+https://github.com/rust-lang/crates.io-index"
900
- checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
901
+ checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
901
902
 
902
903
  [[package]]
903
904
  name = "portable-atomic-util"
@@ -943,7 +944,7 @@ checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94"
943
944
  dependencies = [
944
945
  "rand_chacha",
945
946
  "rand_core",
946
- "zerocopy 0.8.15",
947
+ "zerocopy 0.8.20",
947
948
  ]
948
949
 
949
950
  [[package]]
@@ -958,12 +959,12 @@ dependencies = [
958
959
 
959
960
  [[package]]
960
961
  name = "rand_core"
961
- version = "0.9.0"
962
+ version = "0.9.2"
962
963
  source = "registry+https://github.com/rust-lang/crates.io-index"
963
- checksum = "b08f3c9802962f7e1b25113931d94f43ed9725bebc59db9d0c3e9a23b67e15ff"
964
+ checksum = "7a509b1a2ffbe92afab0e55c8fd99dea1c280e8171bd2d88682bb20bc41cbc2c"
964
965
  dependencies = [
965
966
  "getrandom 0.3.1",
966
- "zerocopy 0.8.15",
967
+ "zerocopy 0.8.20",
967
968
  ]
968
969
 
969
970
  [[package]]
@@ -1079,18 +1080,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
1079
1080
 
1080
1081
  [[package]]
1081
1082
  name = "serde"
1082
- version = "1.0.217"
1083
+ version = "1.0.218"
1083
1084
  source = "registry+https://github.com/rust-lang/crates.io-index"
1084
- checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
1085
+ checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60"
1085
1086
  dependencies = [
1086
1087
  "serde_derive",
1087
1088
  ]
1088
1089
 
1089
1090
  [[package]]
1090
1091
  name = "serde_derive"
1091
- version = "1.0.217"
1092
+ version = "1.0.218"
1092
1093
  source = "registry+https://github.com/rust-lang/crates.io-index"
1093
- checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
1094
+ checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b"
1094
1095
  dependencies = [
1095
1096
  "proc-macro2",
1096
1097
  "quote",
@@ -1099,9 +1100,9 @@ dependencies = [
1099
1100
 
1100
1101
  [[package]]
1101
1102
  name = "serde_json"
1102
- version = "1.0.138"
1103
+ version = "1.0.139"
1103
1104
  source = "registry+https://github.com/rust-lang/crates.io-index"
1104
- checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949"
1105
+ checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6"
1105
1106
  dependencies = [
1106
1107
  "itoa",
1107
1108
  "memchr",
@@ -1152,9 +1153,9 @@ dependencies = [
1152
1153
 
1153
1154
  [[package]]
1154
1155
  name = "tempfile"
1155
- version = "3.16.0"
1156
+ version = "3.17.1"
1156
1157
  source = "registry+https://github.com/rust-lang/crates.io-index"
1157
- checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91"
1158
+ checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230"
1158
1159
  dependencies = [
1159
1160
  "cfg-if",
1160
1161
  "fastrand",
@@ -1216,9 +1217,9 @@ dependencies = [
1216
1217
 
1217
1218
  [[package]]
1218
1219
  name = "unicode-ident"
1219
- version = "1.0.16"
1220
+ version = "1.0.17"
1220
1221
  source = "registry+https://github.com/rust-lang/crates.io-index"
1221
- checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
1222
+ checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe"
1222
1223
 
1223
1224
  [[package]]
1224
1225
  name = "version_check"
@@ -1308,6 +1309,12 @@ dependencies = [
1308
1309
  "windows-targets",
1309
1310
  ]
1310
1311
 
1312
+ [[package]]
1313
+ name = "windows-link"
1314
+ version = "0.1.0"
1315
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1316
+ checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3"
1317
+
1311
1318
  [[package]]
1312
1319
  name = "windows-sys"
1313
1320
  version = "0.59.0"
@@ -1402,11 +1409,11 @@ dependencies = [
1402
1409
 
1403
1410
  [[package]]
1404
1411
  name = "zerocopy"
1405
- version = "0.8.15"
1412
+ version = "0.8.20"
1406
1413
  source = "registry+https://github.com/rust-lang/crates.io-index"
1407
- checksum = "a1e101d4bc320b6f9abb68846837b70e25e380ca2f467ab494bf29fcc435fcc3"
1414
+ checksum = "dde3bb8c68a8f3f1ed4ac9221aad6b10cece3e60a8e2ea54a6a2dec806d0084c"
1408
1415
  dependencies = [
1409
- "zerocopy-derive 0.8.15",
1416
+ "zerocopy-derive 0.8.20",
1410
1417
  ]
1411
1418
 
1412
1419
  [[package]]
@@ -1422,9 +1429,9 @@ dependencies = [
1422
1429
 
1423
1430
  [[package]]
1424
1431
  name = "zerocopy-derive"
1425
- version = "0.8.15"
1432
+ version = "0.8.20"
1426
1433
  source = "registry+https://github.com/rust-lang/crates.io-index"
1427
- checksum = "03a73df1008145cd135b3c780d275c57c3e6ba8324a41bd5e0008fe167c3bc7c"
1434
+ checksum = "eea57037071898bf96a6da35fd626f4f27e9cee3ead2a6c703cf09d472b2e700"
1428
1435
  dependencies = [
1429
1436
  "proc-macro2",
1430
1437
  "quote",
@@ -1433,9 +1440,9 @@ dependencies = [
1433
1440
 
1434
1441
  [[package]]
1435
1442
  name = "zstd"
1436
- version = "0.13.2"
1443
+ version = "0.13.3"
1437
1444
  source = "registry+https://github.com/rust-lang/crates.io-index"
1438
- checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9"
1445
+ checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
1439
1446
  dependencies = [
1440
1447
  "zstd-safe",
1441
1448
  ]
data/README.md CHANGED
@@ -194,4 +194,108 @@ The following data types are supported in the schema:
194
194
  - `date32`
195
195
  - `timestamp_millis`, `timestamp_micros`
196
196
 
197
- Note: Writing of List and Map types is not currently supported.
197
+ ### Schema DSL for Complex Data Types
198
+
199
+ In addition to the hash-based schema definition shown above, this library provides a more expressive DSL for defining complex schemas with nested structures:
200
+
201
+ ```ruby
202
+ require "parquet"
203
+
204
+ # Define a complex schema using the Schema DSL
205
+ schema = Parquet::Schema.define do
206
+ field :id, :int64, nullable: false # Required field
207
+ field :name, :string # Optional field (nullable: true is default)
208
+
209
+ # Nested struct
210
+ field :address, :struct do
211
+ field :street, :string
212
+ field :city, :string
213
+ field :zip, :string
214
+ field :coordinates, :struct do
215
+ field :latitude, :double
216
+ field :longitude, :double
217
+ end
218
+ end
219
+
220
+ # List of primitives
221
+ field :scores, :list, item: :float
222
+
223
+ # List of structs
224
+ field :contacts, :list, item: :struct do
225
+ field :name, :string
226
+ field :phone, :string
227
+ field :primary, :boolean
228
+ end
229
+
230
+ # Map with string values
231
+ field :metadata, :map, key: :string, value: :string
232
+
233
+ # Map with struct values
234
+ field :properties, :map, key: :string, value: :struct do
235
+ field :count, :int32
236
+ field :description, :string
237
+ end
238
+
239
+ # Nested lists
240
+ field :nested_lists, :list, item: :list do
241
+ field :item, :string # For nested lists, inner item must be named 'item'
242
+ end
243
+
244
+ # Map of lists
245
+ field :map_of_lists, :map, key: :string, value: :list do
246
+ field :item, :int32 # For list items in maps, item must be named 'item'
247
+ end
248
+ end
249
+
250
+ # Sample data with nested structures
251
+ data = [
252
+ [
253
+ 1, # id
254
+ "John Doe", # name
255
+ { # address (struct)
256
+ "street" => "123 Main St",
257
+ "city" => "Springfield",
258
+ "zip" => "12345",
259
+ "coordinates" => {
260
+ "latitude" => 37.7749,
261
+ "longitude" => -122.4194
262
+ }
263
+ },
264
+ [85.5, 92.0, 78.5], # scores (list of floats)
265
+ [ # contacts (list of structs)
266
+ { "name" => "Contact 1", "phone" => "555-1234", "primary" => true },
267
+ { "name" => "Contact 2", "phone" => "555-5678", "primary" => false }
268
+ ],
269
+ { "created" => "2023-01-01", "status" => "active" }, # metadata (map)
270
+ { # properties (map of structs)
271
+ "feature1" => { "count" => 5, "description" => "Main feature" },
272
+ "feature2" => { "count" => 3, "description" => "Secondary feature" }
273
+ },
274
+ [["a", "b"], ["c", "d", "e"]], # nested_lists
275
+ { # map_of_lists
276
+ "group1" => [1, 2, 3],
277
+ "group2" => [4, 5, 6]
278
+ }
279
+ ]
280
+ ]
281
+
282
+ # Write to a parquet file using the schema
283
+ Parquet.write_rows(data.each, schema: schema, write_to: "complex_data.parquet")
284
+
285
+ # Read back the data
286
+ Parquet.each_row("complex_data.parquet") do |row|
287
+ puts row.inspect
288
+ end
289
+ ```
290
+
291
+ The Schema DSL supports:
292
+
293
+ - **Primitive types**: All standard Parquet types (`int32`, `string`, etc.)
294
+ - **Complex types**: Structs, lists, and maps with arbitrary nesting
295
+ - **Nullability control**: Specify which fields can contain null values with `nullable: false/true`
296
+ - **List item nullability**: Control whether list items can be null with `item_nullable: false/true`
297
+ - **Map key/value nullability**: Control whether map keys or values can be null with `key_nullable: false/true` and `value_nullable: false/true`
298
+
299
+ Note: When using List and Map types, you need to provide at least:
300
+ - For lists: The `item:` parameter specifying the item type
301
+ - For maps: Both `key:` and `value:` parameters specifying key and value types
@@ -11,15 +11,16 @@ ahash = "0.8"
11
11
  arrow-array = "54.0.0"
12
12
  arrow-schema = "54.0.0"
13
13
  bytes = "^1.9"
14
+ either = "1.9"
14
15
  itertools = "^0.14"
15
- jiff = "0.1.19"
16
+ jiff = "0.2"
16
17
  magnus = { version = "0.7", features = ["rb-sys"] }
17
18
  parquet = { version = "^54.0", features = ["json"] }
18
19
  rand = "0.9"
19
20
  rb-sys = "^0.9"
20
- thiserror = "2.0"
21
- tempfile = "^3.15"
22
21
  simdutf8 = "0.1.5"
22
+ tempfile = "^3.15"
23
+ thiserror = "2.0"
23
24
 
24
25
  [target.'cfg(target_os = "linux")'.dependencies]
25
26
  jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
@@ -7,6 +7,7 @@ pub struct RowEnumeratorArgs {
7
7
  pub result_type: ParserResultType,
8
8
  pub columns: Option<Vec<String>>,
9
9
  pub strict: bool,
10
+ pub logger: Option<Value>,
10
11
  }
11
12
 
12
13
  /// Creates an enumerator for lazy Parquet row parsing
@@ -22,6 +23,9 @@ pub fn create_row_enumerator(args: RowEnumeratorArgs) -> Result<magnus::Enumerat
22
23
  if args.strict {
23
24
  kwargs.aset(Symbol::new("strict"), true)?;
24
25
  }
26
+ if let Some(logger) = args.logger {
27
+ kwargs.aset(Symbol::new("logger"), logger)?;
28
+ }
25
29
  Ok(args
26
30
  .rb_self
27
31
  .enumeratorize("each_row", (args.to_read, KwArgs(kwargs))))
@@ -34,6 +38,7 @@ pub struct ColumnEnumeratorArgs {
34
38
  pub columns: Option<Vec<String>>,
35
39
  pub batch_size: Option<usize>,
36
40
  pub strict: bool,
41
+ pub logger: Option<Value>,
37
42
  }
38
43
 
39
44
  #[inline]
@@ -54,6 +59,9 @@ pub fn create_column_enumerator(
54
59
  if args.strict {
55
60
  kwargs.aset(Symbol::new("strict"), true)?;
56
61
  }
62
+ if let Some(logger) = args.logger {
63
+ kwargs.aset(Symbol::new("logger"), logger)?;
64
+ }
57
65
  Ok(args
58
66
  .rb_self
59
67
  .enumeratorize("each_column", (args.to_read, KwArgs(kwargs))))
@@ -20,6 +20,8 @@ use thiserror::Error;
20
20
  pub enum CacheError {
21
21
  #[error("Failed to acquire lock: {0}")]
22
22
  LockError(String),
23
+ #[error("Failed to convert Ruby String to interned string: {0}")]
24
+ RStringConversion(String),
23
25
  }
24
26
 
25
27
  static STRING_CACHE: LazyLock<Mutex<HashMap<&'static str, (StringCacheKey, AtomicU32)>>> =
@@ -31,10 +33,12 @@ pub struct StringCache;
31
33
  pub struct StringCacheKey(&'static str);
32
34
 
33
35
  impl StringCacheKey {
34
- pub fn new(string: &str) -> Self {
36
+ pub fn new(string: &str) -> Result<Self, CacheError> {
35
37
  let rstr = RString::new(string);
36
38
  let fstr = rstr.to_interned_str();
37
- Self(fstr.as_str().unwrap())
39
+ Ok(Self(fstr.as_str().map_err(|e| {
40
+ CacheError::RStringConversion(e.to_string())
41
+ })?))
38
42
  }
39
43
  }
40
44
 
@@ -90,7 +94,7 @@ impl StringCache {
90
94
  counter.fetch_add(1, Ordering::Relaxed);
91
95
  result.push(*interned_string);
92
96
  } else {
93
- let interned = StringCacheKey::new(string.as_ref());
97
+ let interned = StringCacheKey::new(string.as_ref())?;
94
98
  cache.insert(interned.0, (interned, AtomicU32::new(1)));
95
99
  result.push(interned);
96
100
  }
@@ -1,6 +1,7 @@
1
1
  mod allocator;
2
2
  mod enumerator;
3
3
  pub mod header_cache;
4
+ mod logger;
4
5
  mod reader;
5
6
  mod ruby_reader;
6
7
  mod types;