parquet 0.5.12 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +295 -98
  3. data/Cargo.toml +1 -1
  4. data/Gemfile +1 -0
  5. data/README.md +94 -3
  6. data/ext/parquet/Cargo.toml +8 -5
  7. data/ext/parquet/src/adapter_ffi.rs +156 -0
  8. data/ext/parquet/src/lib.rs +13 -21
  9. data/ext/parquet-core/Cargo.toml +23 -0
  10. data/ext/parquet-core/src/arrow_conversion.rs +1133 -0
  11. data/ext/parquet-core/src/error.rs +163 -0
  12. data/ext/parquet-core/src/lib.rs +60 -0
  13. data/ext/parquet-core/src/reader.rs +263 -0
  14. data/ext/parquet-core/src/schema.rs +283 -0
  15. data/ext/parquet-core/src/test_utils.rs +308 -0
  16. data/ext/parquet-core/src/traits/mod.rs +5 -0
  17. data/ext/parquet-core/src/traits/schema.rs +151 -0
  18. data/ext/parquet-core/src/value.rs +209 -0
  19. data/ext/parquet-core/src/writer.rs +839 -0
  20. data/ext/parquet-core/tests/arrow_conversion_tests.rs +423 -0
  21. data/ext/parquet-core/tests/binary_data.rs +437 -0
  22. data/ext/parquet-core/tests/column_projection.rs +557 -0
  23. data/ext/parquet-core/tests/complex_types.rs +821 -0
  24. data/ext/parquet-core/tests/compression_tests.rs +434 -0
  25. data/ext/parquet-core/tests/concurrent_access.rs +430 -0
  26. data/ext/parquet-core/tests/decimal_tests.rs +488 -0
  27. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
  28. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +547 -0
  29. data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
  30. data/ext/parquet-core/tests/performance_memory.rs +181 -0
  31. data/ext/parquet-core/tests/primitive_types.rs +547 -0
  32. data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
  33. data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
  34. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +534 -0
  35. data/ext/parquet-core/tests/temporal_tests.rs +518 -0
  36. data/ext/parquet-core/tests/test_helpers.rs +132 -0
  37. data/ext/parquet-core/tests/writer_tests.rs +545 -0
  38. data/ext/parquet-ruby-adapter/Cargo.toml +22 -0
  39. data/ext/parquet-ruby-adapter/build.rs +5 -0
  40. data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
  41. data/ext/parquet-ruby-adapter/src/batch_manager.rs +116 -0
  42. data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
  43. data/ext/parquet-ruby-adapter/src/converter.rs +1685 -0
  44. data/ext/parquet-ruby-adapter/src/error.rs +148 -0
  45. data/ext/{parquet/src/ruby_reader.rs → parquet-ruby-adapter/src/io.rs} +190 -56
  46. data/ext/parquet-ruby-adapter/src/lib.rs +90 -0
  47. data/ext/parquet-ruby-adapter/src/logger.rs +64 -0
  48. data/ext/parquet-ruby-adapter/src/metadata.rs +427 -0
  49. data/ext/parquet-ruby-adapter/src/reader.rs +317 -0
  50. data/ext/parquet-ruby-adapter/src/schema.rs +810 -0
  51. data/ext/parquet-ruby-adapter/src/string_cache.rs +106 -0
  52. data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
  53. data/ext/parquet-ruby-adapter/src/types.rs +94 -0
  54. data/ext/parquet-ruby-adapter/src/utils.rs +186 -0
  55. data/ext/parquet-ruby-adapter/src/writer.rs +435 -0
  56. data/lib/parquet/schema.rb +19 -0
  57. data/lib/parquet/version.rb +1 -1
  58. metadata +50 -24
  59. data/ext/parquet/src/enumerator.rs +0 -68
  60. data/ext/parquet/src/header_cache.rs +0 -99
  61. data/ext/parquet/src/logger.rs +0 -171
  62. data/ext/parquet/src/reader/common.rs +0 -111
  63. data/ext/parquet/src/reader/mod.rs +0 -211
  64. data/ext/parquet/src/reader/parquet_column_reader.rs +0 -44
  65. data/ext/parquet/src/reader/parquet_row_reader.rs +0 -43
  66. data/ext/parquet/src/reader/unified/mod.rs +0 -363
  67. data/ext/parquet/src/types/core_types.rs +0 -120
  68. data/ext/parquet/src/types/mod.rs +0 -100
  69. data/ext/parquet/src/types/parquet_value.rs +0 -1275
  70. data/ext/parquet/src/types/record_types.rs +0 -603
  71. data/ext/parquet/src/types/schema_converter.rs +0 -290
  72. data/ext/parquet/src/types/schema_node.rs +0 -424
  73. data/ext/parquet/src/types/timestamp.rs +0 -285
  74. data/ext/parquet/src/types/type_conversion.rs +0 -1949
  75. data/ext/parquet/src/types/writer_types.rs +0 -329
  76. data/ext/parquet/src/utils.rs +0 -184
  77. data/ext/parquet/src/writer/mod.rs +0 -505
  78. data/ext/parquet/src/writer/write_columns.rs +0 -238
  79. data/ext/parquet/src/writer/write_rows.rs +0 -488
@@ -0,0 +1,545 @@
1
+ use bytes::Bytes;
2
+ use ordered_float::OrderedFloat;
3
+ use parquet::basic::Compression;
4
+ use parquet::file::properties::WriterProperties;
5
+ use parquet_core::*;
6
+ use std::sync::Arc;
7
+
8
+ mod test_helpers;
9
+ use test_helpers::*;
10
+
11
+ // =============================================================================
12
+ // Basic Writer Functionality Tests
13
+ // =============================================================================
14
+
15
+ #[test]
16
+ fn test_writer_basic_functionality() {
17
+ let schema = SchemaBuilder::new()
18
+ .with_root(SchemaNode::Struct {
19
+ name: "root".to_string(),
20
+ nullable: false,
21
+ fields: vec![
22
+ SchemaNode::Primitive {
23
+ name: "id".to_string(),
24
+ primitive_type: PrimitiveType::Int32,
25
+ nullable: false,
26
+ format: None,
27
+ },
28
+ SchemaNode::Primitive {
29
+ name: "name".to_string(),
30
+ primitive_type: PrimitiveType::String,
31
+ nullable: true,
32
+ format: None,
33
+ },
34
+ ],
35
+ })
36
+ .build()
37
+ .unwrap();
38
+
39
+ let rows = vec![
40
+ vec![ParquetValue::Int32(1), ParquetValue::String("Alice".into())],
41
+ vec![
42
+ ParquetValue::Int32(2),
43
+ ParquetValue::Null, // nullable field
44
+ ],
45
+ ];
46
+
47
+ test_roundtrip(rows, schema).unwrap();
48
+ }
49
+
50
+ // =============================================================================
51
+ // Batch Size Configuration Tests
52
+ // =============================================================================
53
+
54
+ #[test]
55
+ fn test_writer_fixed_batch_sizes() {
56
+ let schema = SchemaBuilder::new()
57
+ .with_root(SchemaNode::Struct {
58
+ name: "root".to_string(),
59
+ nullable: false,
60
+ fields: vec![
61
+ SchemaNode::Primitive {
62
+ name: "id".to_string(),
63
+ primitive_type: PrimitiveType::Int64,
64
+ nullable: false,
65
+ format: None,
66
+ },
67
+ SchemaNode::Primitive {
68
+ name: "data".to_string(),
69
+ primitive_type: PrimitiveType::String,
70
+ nullable: false,
71
+ format: None,
72
+ },
73
+ ],
74
+ })
75
+ .build()
76
+ .unwrap();
77
+
78
+ // Test different batch sizes
79
+ let batch_sizes = vec![10, 100, 1000, 5000];
80
+
81
+ for batch_size in batch_sizes {
82
+ // Generate test data
83
+ let rows: Vec<Vec<ParquetValue>> = (0..10000)
84
+ .map(|i| {
85
+ vec![
86
+ ParquetValue::Int64(i),
87
+ ParquetValue::String(Arc::from(format!("Row {}", i))),
88
+ ]
89
+ })
90
+ .collect();
91
+
92
+ // Use test_roundtrip_with_options for batch size testing
93
+ let result = test_roundtrip_with_options(
94
+ rows,
95
+ schema.clone(),
96
+ Compression::UNCOMPRESSED,
97
+ Some(batch_size),
98
+ );
99
+
100
+ assert!(
101
+ result.is_ok(),
102
+ "Batch size {} failed: {:?}",
103
+ batch_size,
104
+ result
105
+ );
106
+ }
107
+ }
108
+
109
+ #[test]
110
+ fn test_writer_adaptive_batch_sizing() {
111
+ let schema = SchemaBuilder::new()
112
+ .with_root(SchemaNode::Struct {
113
+ name: "root".to_string(),
114
+ nullable: false,
115
+ fields: vec![
116
+ SchemaNode::Primitive {
117
+ name: "id".to_string(),
118
+ primitive_type: PrimitiveType::Int32,
119
+ nullable: false,
120
+ format: None,
121
+ },
122
+ SchemaNode::Primitive {
123
+ name: "variable_string".to_string(),
124
+ primitive_type: PrimitiveType::String,
125
+ nullable: false,
126
+ format: None,
127
+ },
128
+ ],
129
+ })
130
+ .build()
131
+ .unwrap();
132
+
133
+ let mut buffer = Vec::new();
134
+ {
135
+ // Don't set a fixed batch size - let it adapt
136
+ let mut writer = WriterBuilder::new()
137
+ .with_sample_size(50)
138
+ .build(&mut buffer, schema)
139
+ .unwrap();
140
+
141
+ // Write rows with varying sizes
142
+ for i in 0..1000 {
143
+ let string_size = if i % 100 == 0 {
144
+ 10000 // Large string every 100 rows
145
+ } else {
146
+ 100 // Normal string
147
+ };
148
+
149
+ let row = vec![
150
+ ParquetValue::Int32(i),
151
+ ParquetValue::String(Arc::from("x".repeat(string_size))),
152
+ ];
153
+
154
+ writer.write_row(row).unwrap();
155
+ }
156
+
157
+ writer.close().unwrap();
158
+ }
159
+
160
+ // Verify all data was written
161
+ let bytes = Bytes::from(buffer);
162
+ let reader = Reader::new(bytes);
163
+
164
+ let read_rows: Vec<_> = reader
165
+ .read_rows()
166
+ .unwrap()
167
+ .collect::<Result<Vec<_>>>()
168
+ .unwrap();
169
+
170
+ assert_eq!(read_rows.len(), 1000);
171
+
172
+ // Verify variable string sizes
173
+ for (i, row) in read_rows.iter().enumerate() {
174
+ match &row[1] {
175
+ ParquetValue::String(s) => {
176
+ let expected_len = if i % 100 == 0 { 10000 } else { 100 };
177
+ assert_eq!(s.len(), expected_len, "Wrong string length at row {}", i);
178
+ }
179
+ _ => panic!("Expected string value"),
180
+ }
181
+ }
182
+ }
183
+
184
+ // =============================================================================
185
+ // Memory Management Tests
186
+ // =============================================================================
187
+
188
+ #[test]
189
+ fn test_memory_threshold_configuration() {
190
+ let schema = SchemaBuilder::new()
191
+ .with_root(SchemaNode::Struct {
192
+ name: "root".to_string(),
193
+ nullable: false,
194
+ fields: vec![SchemaNode::Primitive {
195
+ name: "large_string".to_string(),
196
+ primitive_type: PrimitiveType::String,
197
+ nullable: false,
198
+ format: None,
199
+ }],
200
+ })
201
+ .build()
202
+ .unwrap();
203
+
204
+ // Test with different memory thresholds
205
+ let thresholds = vec![
206
+ 1024 * 1024, // 1MB
207
+ 10 * 1024 * 1024, // 10MB
208
+ 50 * 1024 * 1024, // 50MB
209
+ ];
210
+
211
+ for threshold in thresholds {
212
+ let mut buffer = Vec::new();
213
+ {
214
+ let mut writer = WriterBuilder::new()
215
+ .with_memory_threshold(threshold)
216
+ .build(&mut buffer, schema.clone())
217
+ .unwrap();
218
+
219
+ // Write large strings that will trigger memory-based flushing
220
+ let large_string: Arc<str> = Arc::from("x".repeat(1024)); // 1KB string
221
+ let rows: Vec<Vec<ParquetValue>> = (0..5000)
222
+ .map(|_| vec![ParquetValue::String(large_string.clone())])
223
+ .collect();
224
+
225
+ writer.write_rows(rows).unwrap();
226
+ writer.close().unwrap();
227
+ }
228
+
229
+ // Verify data was written correctly
230
+ let bytes = Bytes::from(buffer);
231
+ let reader = Reader::new(bytes);
232
+ let read_count = reader.read_rows().unwrap().count();
233
+ assert_eq!(read_count, 5000);
234
+ }
235
+ }
236
+
237
+ #[test]
238
+ fn test_writer_memory_flushing_with_binary() {
239
+ let schema = SchemaBuilder::new()
240
+ .with_root(SchemaNode::Struct {
241
+ name: "root".to_string(),
242
+ nullable: false,
243
+ fields: vec![SchemaNode::Primitive {
244
+ name: "data".to_string(),
245
+ primitive_type: PrimitiveType::Binary,
246
+ nullable: true,
247
+ format: None,
248
+ }],
249
+ })
250
+ .build()
251
+ .unwrap();
252
+
253
+ // Generate test data
254
+ let rows: Vec<Vec<ParquetValue>> = (0..100)
255
+ .map(|i| {
256
+ let size = if i % 10 == 0 { 500 } else { 50 };
257
+ vec![ParquetValue::Bytes(Bytes::from(vec![i as u8; size]))]
258
+ })
259
+ .collect();
260
+
261
+ // Use a custom writer with memory threshold
262
+ let mut buffer = Vec::new();
263
+ {
264
+ let mut writer = WriterBuilder::new()
265
+ .with_memory_threshold(1024) // 1KB threshold
266
+ .with_sample_size(5)
267
+ .build(&mut buffer, schema.clone())
268
+ .unwrap();
269
+
270
+ writer.write_rows(rows.clone()).unwrap();
271
+ writer.close().unwrap();
272
+ }
273
+
274
+ // Verify data was written correctly
275
+ let reader = Reader::new(Bytes::from(buffer));
276
+ let read_rows: Vec<_> = reader
277
+ .read_rows()
278
+ .unwrap()
279
+ .collect::<Result<Vec<_>>>()
280
+ .unwrap();
281
+
282
+ assert_eq!(read_rows, rows);
283
+ }
284
+
285
+ // =============================================================================
286
+ // Advanced Configuration Tests
287
+ // =============================================================================
288
+
289
+ #[test]
290
+ fn test_writer_properties_direct() {
291
+ let schema = SchemaBuilder::new()
292
+ .with_root(SchemaNode::Struct {
293
+ name: "root".to_string(),
294
+ nullable: false,
295
+ fields: vec![SchemaNode::Primitive {
296
+ name: "value".to_string(),
297
+ primitive_type: PrimitiveType::String,
298
+ nullable: false,
299
+ format: None,
300
+ }],
301
+ })
302
+ .build()
303
+ .unwrap();
304
+
305
+ // Test custom writer properties
306
+ let props = WriterProperties::builder()
307
+ .set_writer_version(parquet::file::properties::WriterVersion::PARQUET_2_0)
308
+ .set_compression(Compression::ZSTD(
309
+ parquet::basic::ZstdLevel::try_new(3).unwrap(),
310
+ ))
311
+ .set_data_page_size_limit(1024) // Small page size
312
+ .set_dictionary_enabled(true)
313
+ .set_statistics_enabled(parquet::file::properties::EnabledStatistics::Page)
314
+ .build();
315
+
316
+ let mut buffer = Vec::new();
317
+ {
318
+ let mut writer = Writer::new_with_properties(&mut buffer, schema, props).unwrap();
319
+
320
+ // Write data with repeated values to test dictionary encoding
321
+ let rows: Vec<Vec<ParquetValue>> = (0..1000)
322
+ .map(|i| {
323
+ vec![ParquetValue::String(Arc::from(format!(
324
+ "Category_{}",
325
+ i % 10
326
+ )))]
327
+ })
328
+ .collect();
329
+
330
+ writer.write_rows(rows).unwrap();
331
+ writer.close().unwrap();
332
+ }
333
+
334
+ // Read back and verify
335
+ let bytes = Bytes::from(buffer);
336
+ let mut reader = Reader::new(bytes);
337
+
338
+ // Check metadata
339
+ let metadata = reader.metadata().unwrap();
340
+ assert!(metadata.num_rows() == 1000);
341
+
342
+ // Verify data integrity
343
+ let read_count = reader.read_rows().unwrap().count();
344
+ assert_eq!(read_count, 1000);
345
+ }
346
+
347
+ #[test]
348
+ fn test_writer_version_compatibility() {
349
+ let schema = SchemaBuilder::new()
350
+ .with_root(SchemaNode::Struct {
351
+ name: "root".to_string(),
352
+ nullable: false,
353
+ fields: vec![SchemaNode::Primitive {
354
+ name: "value".to_string(),
355
+ primitive_type: PrimitiveType::Int32,
356
+ nullable: false,
357
+ format: None,
358
+ }],
359
+ })
360
+ .build()
361
+ .unwrap();
362
+
363
+ let rows: Vec<Vec<ParquetValue>> = (0..100).map(|i| vec![ParquetValue::Int32(i)]).collect();
364
+
365
+ // Test different writer versions
366
+ let versions = vec![
367
+ parquet::file::properties::WriterVersion::PARQUET_1_0,
368
+ parquet::file::properties::WriterVersion::PARQUET_2_0,
369
+ ];
370
+
371
+ for version in versions {
372
+ let mut buffer = Vec::new();
373
+ {
374
+ let props = WriterProperties::builder()
375
+ .set_writer_version(version)
376
+ .build();
377
+
378
+ let mut writer =
379
+ Writer::new_with_properties(&mut buffer, schema.clone(), props).unwrap();
380
+ writer.write_rows(rows.clone()).unwrap();
381
+ writer.close().unwrap();
382
+ }
383
+
384
+ // Verify we can read both versions
385
+ let bytes = Bytes::from(buffer);
386
+ let reader = Reader::new(bytes);
387
+
388
+ let read_rows: Vec<_> = reader
389
+ .read_rows()
390
+ .unwrap()
391
+ .collect::<Result<Vec<_>>>()
392
+ .unwrap();
393
+
394
+ assert_eq!(read_rows.len(), 100);
395
+ }
396
+ }
397
+
398
+ // =============================================================================
399
+ // Large Data Handling Tests
400
+ // =============================================================================
401
+
402
+ #[test]
403
+ fn test_large_string_handling() {
404
+ let schema = SchemaBuilder::new()
405
+ .with_root(SchemaNode::Struct {
406
+ name: "root".to_string(),
407
+ nullable: false,
408
+ fields: vec![
409
+ SchemaNode::Primitive {
410
+ name: "id".to_string(),
411
+ primitive_type: PrimitiveType::Int32,
412
+ nullable: false,
413
+ format: None,
414
+ },
415
+ SchemaNode::Primitive {
416
+ name: "content".to_string(),
417
+ primitive_type: PrimitiveType::String,
418
+ nullable: true,
419
+ format: None,
420
+ },
421
+ ],
422
+ })
423
+ .build()
424
+ .unwrap();
425
+
426
+ // Create strings of various sizes
427
+ let small = "a".repeat(100);
428
+ let medium = "b".repeat(10_000);
429
+ let large = "c".repeat(100_000);
430
+
431
+ // Generate test data
432
+ let rows: Vec<Vec<ParquetValue>> = (0..30)
433
+ .map(|i| {
434
+ let content = match i % 3 {
435
+ 0 => ParquetValue::String(small.clone().into()),
436
+ 1 => ParquetValue::String(medium.clone().into()),
437
+ 2 => ParquetValue::String(large.clone().into()),
438
+ _ => unreachable!(),
439
+ };
440
+
441
+ vec![ParquetValue::Int32(i), content]
442
+ })
443
+ .collect();
444
+
445
+ // Use custom writer with memory threshold
446
+ let mut buffer = Vec::new();
447
+ {
448
+ let mut writer = WriterBuilder::new()
449
+ .with_memory_threshold(1024 * 1024) // 1MB
450
+ .build(&mut buffer, schema.clone())
451
+ .unwrap();
452
+
453
+ writer.write_rows(rows.clone()).unwrap();
454
+ writer.close().unwrap();
455
+ }
456
+
457
+ // Verify all data was written
458
+ let reader = Reader::new(Bytes::from(buffer));
459
+ let read_rows: Vec<_> = reader.read_rows().unwrap().collect::<Result<_>>().unwrap();
460
+ assert_eq!(read_rows, rows);
461
+ }
462
+
463
+ #[test]
464
+ fn test_complex_nested_data_memory() {
465
+ let schema = SchemaBuilder::new()
466
+ .with_root(SchemaNode::Struct {
467
+ name: "root".to_string(),
468
+ nullable: false,
469
+ fields: vec![
470
+ SchemaNode::Primitive {
471
+ name: "id".to_string(),
472
+ primitive_type: PrimitiveType::Int32,
473
+ nullable: false,
474
+ format: None,
475
+ },
476
+ SchemaNode::List {
477
+ name: "items".to_string(),
478
+ nullable: true,
479
+ item: Box::new(SchemaNode::Struct {
480
+ name: "item".to_string(),
481
+ nullable: false,
482
+ fields: vec![
483
+ SchemaNode::Primitive {
484
+ name: "key".to_string(),
485
+ primitive_type: PrimitiveType::String,
486
+ nullable: false,
487
+ format: None,
488
+ },
489
+ SchemaNode::Primitive {
490
+ name: "value".to_string(),
491
+ primitive_type: PrimitiveType::Float64,
492
+ nullable: true,
493
+ format: None,
494
+ },
495
+ ],
496
+ }),
497
+ },
498
+ ],
499
+ })
500
+ .build()
501
+ .unwrap();
502
+
503
+ // Generate test data
504
+ let rows: Vec<Vec<ParquetValue>> = (0..100)
505
+ .map(|i| {
506
+ let num_items = (i % 10 + 1) as usize;
507
+ let mut items = Vec::new();
508
+
509
+ for j in 0..num_items {
510
+ items.push(ParquetValue::Record(indexmap::indexmap! {
511
+ "key".into() => ParquetValue::String(format!("key_{}_{}", i, j).into()),
512
+ "value".into() => if j % 2 == 0 {
513
+ ParquetValue::Float64(OrderedFloat(j as f64 * 1.5))
514
+ } else {
515
+ ParquetValue::Null
516
+ },
517
+ }));
518
+ }
519
+
520
+ vec![ParquetValue::Int32(i), ParquetValue::List(items)]
521
+ })
522
+ .collect();
523
+
524
+ // Use custom writer with memory threshold
525
+ let mut buffer = Vec::new();
526
+ {
527
+ let mut writer = WriterBuilder::new()
528
+ .with_memory_threshold(500 * 1024) // 500KB
529
+ .build(&mut buffer, schema.clone())
530
+ .unwrap();
531
+
532
+ writer.write_rows(rows.clone()).unwrap();
533
+ writer.close().unwrap();
534
+ }
535
+
536
+ // Read back and verify
537
+ let reader = Reader::new(Bytes::from(buffer));
538
+ let read_rows: Vec<_> = reader
539
+ .read_rows()
540
+ .unwrap()
541
+ .collect::<Result<Vec<_>>>()
542
+ .unwrap();
543
+
544
+ assert_eq!(read_rows, rows);
545
+ }
@@ -0,0 +1,22 @@
1
+ [package]
2
+ name = "parquet-ruby-adapter"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+
6
+ [build-dependencies]
7
+ rb-sys-env = "^0.2"
8
+
9
+ [dependencies]
10
+ arrow-array = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan_06-24-remove_primitive_map_key_assertion_on_record_reader" }
11
+ arrow-buffer = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan_06-24-remove_primitive_map_key_assertion_on_record_reader" }
12
+ arrow-schema = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan_06-24-remove_primitive_map_key_assertion_on_record_reader" }
13
+ bytes = "1.5"
14
+ magnus = { version = "0.7", features = ["rb-sys"] }
15
+ num = "0.4.3"
16
+ ordered-float = "5.0.0"
17
+ parquet = { git = "https://github.com/njaremko/arrow-rs", branch = "nathan_06-24-remove_primitive_map_key_assertion_on_record_reader", features = ["arrow"] }
18
+ parquet-core = { path = "../parquet-core" }
19
+ rb-sys = { version = "0.9", features = ["stable-api-compiled-fallback"] }
20
+ tempfile = "^3.15"
21
+ thiserror = "2.0"
22
+ indexmap = "2.2"
@@ -0,0 +1,5 @@
1
+ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
2
+ rb_sys_env::activate()?;
3
+
4
+ Ok(())
5
+ }
@@ -0,0 +1,98 @@
1
+ use magnus::{IntoValue, Ruby, Value};
2
+ use parquet_ruby_adapter::{Result, RubyAdapterError, TryIntoValue};
3
+
4
+ /// Example struct that can fail during conversion to Ruby
5
+ struct ComplexData {
6
+ name: String,
7
+ values: Vec<i32>,
8
+ metadata: std::collections::HashMap<String, String>,
9
+ }
10
+
11
+ impl TryIntoValue for ComplexData {
12
+ fn try_into_value(self, handle: &Ruby) -> Result<Value> {
13
+ let hash = handle.hash_new();
14
+
15
+ // Set name
16
+ hash.aset("name", self.name)
17
+ .map_err(|e| RubyAdapterError::type_conversion(format!("Failed to set name: {}", e)))?;
18
+
19
+ // Convert values array
20
+ let values_array = handle.ary_new();
21
+ for value in self.values {
22
+ values_array.push(value).map_err(|e| {
23
+ RubyAdapterError::type_conversion(format!("Failed to push value: {}", e))
24
+ })?;
25
+ }
26
+ hash.aset("values", values_array).map_err(|e| {
27
+ RubyAdapterError::type_conversion(format!("Failed to set values: {}", e))
28
+ })?;
29
+
30
+ // Convert metadata hash
31
+ let metadata_hash = handle.hash_new();
32
+ for (key, value) in self.metadata {
33
+ metadata_hash.aset(key.clone(), value).map_err(|e| {
34
+ RubyAdapterError::type_conversion(format!(
35
+ "Failed to set metadata key {}: {}",
36
+ key, e
37
+ ))
38
+ })?;
39
+ }
40
+ hash.aset("metadata", metadata_hash).map_err(|e| {
41
+ RubyAdapterError::type_conversion(format!("Failed to set metadata: {}", e))
42
+ })?;
43
+
44
+ Ok(handle.into_value(hash))
45
+ }
46
+ }
47
+
48
+ // Example of a type that might fail validation during conversion
49
+ struct ValidatedNumber {
50
+ value: i32,
51
+ }
52
+
53
+ impl TryIntoValue for ValidatedNumber {
54
+ fn try_into_value(self, handle: &Ruby) -> Result<Value> {
55
+ // Validate the number is positive
56
+ if self.value < 0 {
57
+ return Err(RubyAdapterError::type_conversion(format!(
58
+ "ValidatedNumber must be positive, got {}",
59
+ self.value
60
+ )));
61
+ }
62
+
63
+ // If valid, convert to Ruby
64
+ Ok(self.value.into_value_with(handle))
65
+ }
66
+ }
67
+
68
+ fn main() -> Result<()> {
69
+ // Example usage:
70
+ let ruby = Ruby::get().map_err(|_| RubyAdapterError::runtime("Failed to get Ruby runtime"))?;
71
+
72
+ // Success case
73
+ let data = ComplexData {
74
+ name: "example".to_string(),
75
+ values: vec![1, 2, 3],
76
+ metadata: std::collections::HashMap::from([
77
+ ("key1".to_string(), "value1".to_string()),
78
+ ("key2".to_string(), "value2".to_string()),
79
+ ]),
80
+ };
81
+
82
+ let _ruby_value = data.try_into_value(&ruby)?;
83
+ println!("Successfully converted ComplexData to Ruby value");
84
+
85
+ // Validation failure case
86
+ let invalid_number = ValidatedNumber { value: -5 };
87
+ match invalid_number.try_into_value(&ruby) {
88
+ Ok(_) => println!("This shouldn't happen"),
89
+ Err(e) => println!("Expected validation error: {}", e),
90
+ }
91
+
92
+ // Using the convenience method
93
+ let valid_number = ValidatedNumber { value: 42 };
94
+ let _ruby_value = valid_number.try_into_value_with_current_thread()?;
95
+ println!("Successfully converted ValidatedNumber to Ruby value");
96
+
97
+ Ok(())
98
+ }