parquet-tyfoom 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/Cargo.lock +1854 -0
  3. data/Cargo.toml +3 -0
  4. data/Gemfile +21 -0
  5. data/LICENSE +21 -0
  6. data/README.md +428 -0
  7. data/Rakefile +43 -0
  8. data/ext/parquet/Cargo.toml +39 -0
  9. data/ext/parquet/build.rs +5 -0
  10. data/ext/parquet/extconf.rb +4 -0
  11. data/ext/parquet/src/adapter_ffi.rs +297 -0
  12. data/ext/parquet/src/allocator.rs +13 -0
  13. data/ext/parquet/src/lib.rs +24 -0
  14. data/ext/parquet-core/Cargo.toml +24 -0
  15. data/ext/parquet-core/src/arrow_conversion.rs +1243 -0
  16. data/ext/parquet-core/src/error.rs +189 -0
  17. data/ext/parquet-core/src/lib.rs +60 -0
  18. data/ext/parquet-core/src/reader.rs +368 -0
  19. data/ext/parquet-core/src/schema.rs +452 -0
  20. data/ext/parquet-core/src/test_utils.rs +308 -0
  21. data/ext/parquet-core/src/traits/mod.rs +5 -0
  22. data/ext/parquet-core/src/traits/schema.rs +190 -0
  23. data/ext/parquet-core/src/value.rs +220 -0
  24. data/ext/parquet-core/src/writer.rs +1241 -0
  25. data/ext/parquet-core/tests/arrow_conversion_tests.rs +484 -0
  26. data/ext/parquet-core/tests/binary_data.rs +437 -0
  27. data/ext/parquet-core/tests/column_projection.rs +557 -0
  28. data/ext/parquet-core/tests/complex_types.rs +821 -0
  29. data/ext/parquet-core/tests/compression_tests.rs +434 -0
  30. data/ext/parquet-core/tests/concurrent_access.rs +431 -0
  31. data/ext/parquet-core/tests/decimal_tests.rs +488 -0
  32. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
  33. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +540 -0
  34. data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
  35. data/ext/parquet-core/tests/performance_memory.rs +181 -0
  36. data/ext/parquet-core/tests/primitive_types.rs +547 -0
  37. data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
  38. data/ext/parquet-core/tests/review_regressions.rs +787 -0
  39. data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
  40. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +542 -0
  41. data/ext/parquet-core/tests/temporal_tests.rs +518 -0
  42. data/ext/parquet-core/tests/test_helpers.rs +132 -0
  43. data/ext/parquet-core/tests/writer_tests.rs +545 -0
  44. data/ext/parquet-ruby-adapter/Cargo.toml +24 -0
  45. data/ext/parquet-ruby-adapter/build.rs +5 -0
  46. data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
  47. data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
  48. data/ext/parquet-ruby-adapter/src/converter.rs +1734 -0
  49. data/ext/parquet-ruby-adapter/src/error.rs +141 -0
  50. data/ext/parquet-ruby-adapter/src/io.rs +432 -0
  51. data/ext/parquet-ruby-adapter/src/lib.rs +91 -0
  52. data/ext/parquet-ruby-adapter/src/logger.rs +67 -0
  53. data/ext/parquet-ruby-adapter/src/metadata.rs +529 -0
  54. data/ext/parquet-ruby-adapter/src/reader.rs +339 -0
  55. data/ext/parquet-ruby-adapter/src/schema.rs +884 -0
  56. data/ext/parquet-ruby-adapter/src/string_cache.rs +115 -0
  57. data/ext/parquet-ruby-adapter/src/string_cache_test.rs +122 -0
  58. data/ext/parquet-ruby-adapter/src/string_storage.rs +632 -0
  59. data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
  60. data/ext/parquet-ruby-adapter/src/types.rs +98 -0
  61. data/ext/parquet-ruby-adapter/src/utils.rs +280 -0
  62. data/ext/parquet-ruby-adapter/src/writer.rs +625 -0
  63. data/lib/parquet/schema.rb +262 -0
  64. data/lib/parquet/version.rb +3 -0
  65. data/lib/parquet.rb +11 -0
  66. data/lib/parquet.rbi +181 -0
  67. metadata +165 -0
@@ -0,0 +1,67 @@
1
+ use magnus::value::ReprValue;
2
+ use magnus::{Error as MagnusError, Ruby, Value};
3
+
4
+ pub struct RubyLogger {
5
+ logger: Option<Value>,
6
+ }
7
+
8
+ impl RubyLogger {
9
+ pub fn new(logger: Option<Value>) -> Result<Self, MagnusError> {
10
+ // Validate logger has required methods if provided
11
+ if let Some(ref log) = logger {
12
+ // `respond_to` below already requires the GVL, so a Ruby handle is
13
+ // always available on this path.
14
+ let ruby = Ruby::get().expect("RubyLogger::new runs while the Ruby GVL is held");
15
+ for method in &["debug", "info", "warn", "error"] {
16
+ if !log.respond_to(*method, false)? {
17
+ return Err(MagnusError::new(
18
+ ruby.exception_arg_error(),
19
+ format!("Logger must respond to {}", method),
20
+ ));
21
+ }
22
+ }
23
+ }
24
+ Ok(Self { logger })
25
+ }
26
+
27
+ pub fn debug<F: FnOnce() -> String>(&self, msg_fn: F) -> Result<(), MagnusError> {
28
+ if let Some(ref logger) = self.logger {
29
+ logger.funcall::<_, _, Value>("debug", (msg_fn(),))?;
30
+ }
31
+ Ok(())
32
+ }
33
+
34
+ pub fn info<F: FnOnce() -> String>(&self, msg_fn: F) -> Result<(), MagnusError> {
35
+ if let Some(ref logger) = self.logger {
36
+ logger.funcall::<_, _, Value>("info", (msg_fn(),))?;
37
+ }
38
+ Ok(())
39
+ }
40
+
41
+ pub fn warn<F: FnOnce() -> String>(&self, msg_fn: F) -> Result<(), MagnusError> {
42
+ if let Some(ref logger) = self.logger {
43
+ logger.funcall::<_, _, Value>("warn", (msg_fn(),))?;
44
+ }
45
+ Ok(())
46
+ }
47
+
48
+ pub fn error<F: FnOnce() -> String>(&self, msg_fn: F) -> Result<(), MagnusError> {
49
+ if let Some(ref logger) = self.logger {
50
+ logger.funcall::<_, _, Value>("error", (msg_fn(),))?;
51
+ }
52
+ Ok(())
53
+ }
54
+
55
+ pub fn inner(&self) -> Option<Value> {
56
+ self.logger
57
+ }
58
+ }
59
+
60
+ // Make RubyLogger cloneable for passing to multiple functions
61
+ impl Clone for RubyLogger {
62
+ fn clone(&self) -> Self {
63
+ Self {
64
+ logger: self.logger,
65
+ }
66
+ }
67
+ }
@@ -0,0 +1,529 @@
1
+ use magnus::value::ReprValue;
2
+ use magnus::{Error as MagnusError, IntoValue, Ruby, Value};
3
+ use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
4
+ use std::fs::File;
5
+
6
+ use crate::error::{IntoMagnusError, Result, RubyAdapterError};
7
+ use crate::io::{RubyIOReader, ThreadSafeRubyIOReader};
8
+ use crate::TryIntoValue;
9
+
10
+ fn parquet_time_unit_name(unit: &parquet::basic::TimeUnit) -> &'static str {
11
+ match unit {
12
+ parquet::basic::TimeUnit::MILLIS => "millis",
13
+ parquet::basic::TimeUnit::MICROS => "micros",
14
+ parquet::basic::TimeUnit::NANOS => "nanos",
15
+ }
16
+ }
17
+
18
+ /// Wrapper for ParquetMetaData to implement IntoValue trait
19
+ pub struct RubyParquetMetaData(pub ParquetMetaData);
20
+
21
+ impl TryIntoValue for RubyParquetMetaData {
22
+ fn try_into_value(self, handle: &Ruby) -> Result<Value> {
23
+ let metadata = &self.0;
24
+ let file_metadata = metadata.file_metadata();
25
+ let row_groups = metadata.row_groups();
26
+
27
+ // Construct a hash with the metadata
28
+ let hash = handle.hash_new();
29
+ hash.aset("num_rows", file_metadata.num_rows())
30
+ .map_err(|e| RubyAdapterError::metadata(format!("Failed to set num_rows: {}", e)))?;
31
+ hash.aset("created_by", file_metadata.created_by())
32
+ .map_err(|e| RubyAdapterError::metadata(format!("Failed to set created_by: {}", e)))?;
33
+
34
+ // Convert key_value_metadata to a Ruby array if it exists
35
+ if let Some(key_value_metadata) = file_metadata.key_value_metadata() {
36
+ let kv_array = handle.ary_new();
37
+ for kv in key_value_metadata {
38
+ let kv_hash = handle.hash_new();
39
+ kv_hash
40
+ .aset("key", kv.key.clone())
41
+ .map_err(|e| RubyAdapterError::metadata(format!("Failed to set key: {}", e)))?;
42
+ kv_hash.aset("value", kv.value.clone()).map_err(|e| {
43
+ RubyAdapterError::metadata(format!("Failed to set value: {}", e))
44
+ })?;
45
+ kv_array.push(kv_hash).map_err(|e| {
46
+ RubyAdapterError::metadata(format!("Failed to push kv_hash: {}", e))
47
+ })?;
48
+ }
49
+ hash.aset("key_value_metadata", kv_array).map_err(|e| {
50
+ RubyAdapterError::metadata(format!("Failed to set key_value_metadata: {}", e))
51
+ })?;
52
+ } else {
53
+ hash.aset("key_value_metadata", None::<Value>)
54
+ .map_err(|e| {
55
+ RubyAdapterError::metadata(format!("Failed to set key_value_metadata: {}", e))
56
+ })?;
57
+ }
58
+
59
+ // Convert schema to a Ruby hash since &Type doesn't implement IntoValue
60
+ let schema_hash = handle.hash_new();
61
+ let schema = file_metadata.schema();
62
+ schema_hash
63
+ .aset("name", schema.name())
64
+ .map_err(|e| RubyAdapterError::metadata(format!("Failed to set schema name: {}", e)))?;
65
+
66
+ // Add schema fields information
67
+ let fields_array = handle.ary_new();
68
+ for field in schema.get_fields() {
69
+ let field_hash = handle.hash_new();
70
+ field_hash.aset("name", field.name()).map_err(|e| {
71
+ RubyAdapterError::metadata(format!("Failed to set field name: {}", e))
72
+ })?;
73
+
74
+ // Handle different field types
75
+ match field.as_ref() {
76
+ parquet::schema::types::Type::PrimitiveType {
77
+ physical_type,
78
+ type_length,
79
+ scale,
80
+ precision,
81
+ ..
82
+ } => {
83
+ field_hash.aset("type", "primitive").map_err(|e| {
84
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
85
+ })?;
86
+ field_hash
87
+ .aset("physical_type", format!("{:?}", physical_type))
88
+ .map_err(|e| {
89
+ RubyAdapterError::metadata(format!(
90
+ "Failed to set physical_type: {}",
91
+ e
92
+ ))
93
+ })?;
94
+ field_hash.aset("type_length", *type_length).map_err(|e| {
95
+ RubyAdapterError::metadata(format!("Failed to set type_length: {}", e))
96
+ })?;
97
+ field_hash.aset("scale", *scale).map_err(|e| {
98
+ RubyAdapterError::metadata(format!("Failed to set scale: {}", e))
99
+ })?;
100
+ field_hash.aset("precision", *precision).map_err(|e| {
101
+ RubyAdapterError::metadata(format!("Failed to set precision: {}", e))
102
+ })?;
103
+ }
104
+ parquet::schema::types::Type::GroupType { .. } => {
105
+ field_hash.aset("type", "group").map_err(|e| {
106
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
107
+ })?;
108
+ }
109
+ }
110
+
111
+ // Add basic info
112
+ let basic_info = field.get_basic_info();
113
+ field_hash
114
+ .aset("repetition", format!("{:?}", basic_info.repetition()))
115
+ .map_err(|e| {
116
+ RubyAdapterError::metadata(format!("Failed to set repetition: {}", e))
117
+ })?;
118
+ field_hash
119
+ .aset(
120
+ "converted_type",
121
+ format!("{:?}", basic_info.converted_type()),
122
+ )
123
+ .map_err(|e| {
124
+ RubyAdapterError::metadata(format!("Failed to set converted_type: {}", e))
125
+ })?;
126
+
127
+ if let Some(logical_type) = basic_info.logical_type_ref() {
128
+ let logical_type_value = match logical_type {
129
+ parquet::basic::LogicalType::Decimal { scale, precision } => {
130
+ let logical_hash = handle.hash_new();
131
+ logical_hash.aset("type", "Decimal").map_err(|e| {
132
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
133
+ })?;
134
+ logical_hash.aset("scale", *scale).map_err(|e| {
135
+ RubyAdapterError::metadata(format!("Failed to set scale: {}", e))
136
+ })?;
137
+ logical_hash.aset("precision", *precision).map_err(|e| {
138
+ RubyAdapterError::metadata(format!("Failed to set precision: {}", e))
139
+ })?;
140
+ logical_hash.as_value()
141
+ }
142
+ parquet::basic::LogicalType::Time {
143
+ is_adjusted_to_u_t_c,
144
+ unit,
145
+ } => {
146
+ let logical_hash = handle.hash_new();
147
+ logical_hash.aset("type", "Time").map_err(|e| {
148
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
149
+ })?;
150
+ logical_hash
151
+ .aset(
152
+ "is_adjusted_to_utc",
153
+ is_adjusted_to_u_t_c.to_string().as_str(),
154
+ )
155
+ .map_err(|e| {
156
+ RubyAdapterError::metadata(format!(
157
+ "Failed to set is_adjusted_to_u_t_c: {}",
158
+ e
159
+ ))
160
+ })?;
161
+
162
+ let unit_str = parquet_time_unit_name(unit);
163
+ logical_hash.aset("unit", unit_str).map_err(|e| {
164
+ RubyAdapterError::metadata(format!("Failed to set unit: {}", e))
165
+ })?;
166
+ logical_hash.as_value()
167
+ }
168
+ parquet::basic::LogicalType::Timestamp {
169
+ is_adjusted_to_u_t_c,
170
+ unit,
171
+ } => {
172
+ let logical_hash = handle.hash_new();
173
+ logical_hash.aset("type", "Timestamp").map_err(|e| {
174
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
175
+ })?;
176
+ logical_hash
177
+ .aset("is_adjusted_to_utc", *is_adjusted_to_u_t_c)
178
+ .map_err(|e| {
179
+ RubyAdapterError::metadata(format!(
180
+ "Failed to set is_adjusted_to_u_t_c: {}",
181
+ e
182
+ ))
183
+ })?;
184
+ let unit_str = parquet_time_unit_name(unit);
185
+ logical_hash.aset("unit", unit_str).map_err(|e| {
186
+ RubyAdapterError::metadata(format!("Failed to set unit: {}", e))
187
+ })?;
188
+ logical_hash.as_value()
189
+ }
190
+ parquet::basic::LogicalType::Integer {
191
+ bit_width,
192
+ is_signed,
193
+ } => {
194
+ let logical_hash = handle.hash_new();
195
+ logical_hash.aset("type", "Integer").map_err(|e| {
196
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
197
+ })?;
198
+ logical_hash.aset("bit_width", *bit_width).map_err(|e| {
199
+ RubyAdapterError::metadata(format!("Failed to set bit_width: {}", e))
200
+ })?;
201
+ logical_hash
202
+ .aset("is_signed", is_signed.to_string().as_str())
203
+ .map_err(|e| {
204
+ RubyAdapterError::metadata(format!(
205
+ "Failed to set is_signed: {}",
206
+ e
207
+ ))
208
+ })?;
209
+ logical_hash.as_value()
210
+ }
211
+ _ => {
212
+ let logical_hash = handle.hash_new();
213
+ logical_hash
214
+ .aset("type", format!("{:?}", logical_type))
215
+ .map_err(|e| {
216
+ RubyAdapterError::metadata(format!("Failed to set type: {}", e))
217
+ })?;
218
+ logical_hash.as_value()
219
+ }
220
+ };
221
+ field_hash
222
+ .aset("logical_type", logical_type_value)
223
+ .map_err(|e| {
224
+ RubyAdapterError::metadata(format!("Failed to set logical_type: {}", e))
225
+ })?;
226
+ }
227
+
228
+ fields_array.push(field_hash).map_err(|e| {
229
+ RubyAdapterError::metadata(format!("Failed to push field_hash: {}", e))
230
+ })?;
231
+ }
232
+ schema_hash
233
+ .aset("fields", fields_array)
234
+ .map_err(|e| RubyAdapterError::metadata(format!("Failed to set fields: {}", e)))?;
235
+
236
+ hash.aset("schema", schema_hash)
237
+ .map_err(|e| RubyAdapterError::metadata(format!("Failed to set schema: {}", e)))?;
238
+
239
+ // Convert row_groups to a Ruby array since &[RowGroupMetaData] doesn't implement IntoValue
240
+ let row_groups_array = handle.ary_new();
241
+ for row_group in row_groups.iter() {
242
+ let rg_hash = handle.hash_new();
243
+ rg_hash
244
+ .aset("num_columns", row_group.num_columns())
245
+ .map_err(|e| {
246
+ RubyAdapterError::metadata(format!("Failed to set num_columns: {}", e))
247
+ })?;
248
+ rg_hash
249
+ .aset("num_rows", row_group.num_rows())
250
+ .map_err(|e| {
251
+ RubyAdapterError::metadata(format!("Failed to set num_rows: {}", e))
252
+ })?;
253
+ rg_hash
254
+ .aset("total_byte_size", row_group.total_byte_size())
255
+ .map_err(|e| {
256
+ RubyAdapterError::metadata(format!("Failed to set total_byte_size: {}", e))
257
+ })?;
258
+ rg_hash
259
+ .aset("file_offset", row_group.file_offset())
260
+ .map_err(|e| {
261
+ RubyAdapterError::metadata(format!("Failed to set file_offset: {}", e))
262
+ })?;
263
+ rg_hash
264
+ .aset("ordinal", row_group.ordinal())
265
+ .map_err(|e| RubyAdapterError::metadata(format!("Failed to set ordinal: {}", e)))?;
266
+ rg_hash
267
+ .aset("compressed_size", row_group.compressed_size())
268
+ .map_err(|e| {
269
+ RubyAdapterError::metadata(format!("Failed to set compressed_size: {}", e))
270
+ })?;
271
+
272
+ // Add column chunks metadata
273
+ let columns_array = handle.ary_new();
274
+ for col_idx in 0..row_group.num_columns() {
275
+ let column = row_group.column(col_idx);
276
+ let col_hash = handle.hash_new();
277
+
278
+ col_hash
279
+ .aset("column_path", column.column_path().string())
280
+ .map_err(|e| {
281
+ RubyAdapterError::metadata(format!("Failed to set column_path: {}", e))
282
+ })?;
283
+ col_hash
284
+ .aset("file_path", column.file_path())
285
+ .map_err(|e| {
286
+ RubyAdapterError::metadata(format!("Failed to set file_path: {}", e))
287
+ })?;
288
+ col_hash
289
+ .aset("file_offset", column.file_offset())
290
+ .map_err(|e| {
291
+ RubyAdapterError::metadata(format!("Failed to set file_offset: {}", e))
292
+ })?;
293
+ col_hash
294
+ .aset("num_values", column.num_values())
295
+ .map_err(|e| {
296
+ RubyAdapterError::metadata(format!("Failed to set num_values: {}", e))
297
+ })?;
298
+ col_hash
299
+ .aset("compression", format!("{:?}", column.compression()))
300
+ .map_err(|e| {
301
+ RubyAdapterError::metadata(format!("Failed to set compression: {}", e))
302
+ })?;
303
+ col_hash
304
+ .aset("total_compressed_size", column.compressed_size())
305
+ .map_err(|e| {
306
+ RubyAdapterError::metadata(format!(
307
+ "Failed to set total_compressed_size: {}",
308
+ e
309
+ ))
310
+ })?;
311
+ col_hash
312
+ .aset("total_uncompressed_size", column.uncompressed_size())
313
+ .map_err(|e| {
314
+ RubyAdapterError::metadata(format!(
315
+ "Failed to set total_uncompressed_size: {}",
316
+ e
317
+ ))
318
+ })?;
319
+ col_hash
320
+ .aset("data_page_offset", column.data_page_offset())
321
+ .map_err(|e| {
322
+ RubyAdapterError::metadata(format!("Failed to set data_page_offset: {}", e))
323
+ })?;
324
+
325
+ if let Some(offset) = column.dictionary_page_offset() {
326
+ col_hash
327
+ .aset("dictionary_page_offset", offset)
328
+ .map_err(|e| {
329
+ RubyAdapterError::metadata(format!(
330
+ "Failed to set dictionary_page_offset: {}",
331
+ e
332
+ ))
333
+ })?;
334
+ }
335
+
336
+ if let Some(offset) = column.bloom_filter_offset() {
337
+ col_hash.aset("bloom_filter_offset", offset).map_err(|e| {
338
+ RubyAdapterError::metadata(format!(
339
+ "Failed to set bloom_filter_offset: {}",
340
+ e
341
+ ))
342
+ })?;
343
+ }
344
+
345
+ if let Some(length) = column.bloom_filter_length() {
346
+ col_hash.aset("bloom_filter_length", length).map_err(|e| {
347
+ RubyAdapterError::metadata(format!(
348
+ "Failed to set bloom_filter_length: {}",
349
+ e
350
+ ))
351
+ })?;
352
+ }
353
+
354
+ if let Some(offset) = column.offset_index_offset() {
355
+ col_hash.aset("offset_index_offset", offset).map_err(|e| {
356
+ RubyAdapterError::metadata(format!(
357
+ "Failed to set offset_index_offset: {}",
358
+ e
359
+ ))
360
+ })?;
361
+ }
362
+
363
+ if let Some(length) = column.offset_index_length() {
364
+ col_hash.aset("offset_index_length", length).map_err(|e| {
365
+ RubyAdapterError::metadata(format!(
366
+ "Failed to set offset_index_length: {}",
367
+ e
368
+ ))
369
+ })?;
370
+ }
371
+
372
+ if let Some(offset) = column.column_index_offset() {
373
+ col_hash.aset("column_index_offset", offset).map_err(|e| {
374
+ RubyAdapterError::metadata(format!(
375
+ "Failed to set column_index_offset: {}",
376
+ e
377
+ ))
378
+ })?;
379
+ }
380
+
381
+ if let Some(length) = column.column_index_length() {
382
+ col_hash.aset("column_index_length", length).map_err(|e| {
383
+ RubyAdapterError::metadata(format!(
384
+ "Failed to set column_index_length: {}",
385
+ e
386
+ ))
387
+ })?;
388
+ }
389
+
390
+ // Add encodings
391
+ let encodings_array = handle.ary_new();
392
+ for encoding in column.encodings() {
393
+ encodings_array
394
+ .push(format!("{:?}", encoding))
395
+ .map_err(|e| {
396
+ RubyAdapterError::metadata(format!("Failed to push encoding: {}", e))
397
+ })?;
398
+ }
399
+ col_hash.aset("encodings", encodings_array).map_err(|e| {
400
+ RubyAdapterError::metadata(format!("Failed to set encodings: {}", e))
401
+ })?;
402
+
403
+ // Add statistics if available
404
+ if let Some(stats) = column.statistics() {
405
+ let stats_hash = handle.hash_new();
406
+ stats_hash
407
+ .aset("min_is_exact", stats.min_is_exact())
408
+ .map_err(|e| {
409
+ RubyAdapterError::metadata(format!("Failed to set min_is_exact: {}", e))
410
+ })?;
411
+ stats_hash
412
+ .aset("max_is_exact", stats.max_is_exact())
413
+ .map_err(|e| {
414
+ RubyAdapterError::metadata(format!("Failed to set max_is_exact: {}", e))
415
+ })?;
416
+
417
+ col_hash.aset("statistics", stats_hash).map_err(|e| {
418
+ RubyAdapterError::metadata(format!("Failed to set statistics: {}", e))
419
+ })?;
420
+ }
421
+
422
+ // Add page encoding stats if available
423
+ if let Some(page_encoding_stats) = column.page_encoding_stats() {
424
+ let page_stats_array = handle.ary_new();
425
+ for stat in page_encoding_stats {
426
+ let stat_hash = handle.hash_new();
427
+ stat_hash
428
+ .aset("page_type", format!("{:?}", stat.page_type))
429
+ .map_err(|e| {
430
+ RubyAdapterError::metadata(format!(
431
+ "Failed to set page_type: {}",
432
+ e
433
+ ))
434
+ })?;
435
+ stat_hash
436
+ .aset("encoding", format!("{:?}", stat.encoding))
437
+ .map_err(|e| {
438
+ RubyAdapterError::metadata(format!("Failed to set encoding: {}", e))
439
+ })?;
440
+ stat_hash.aset("count", stat.count).map_err(|e| {
441
+ RubyAdapterError::metadata(format!("Failed to set count: {}", e))
442
+ })?;
443
+ page_stats_array.push(stat_hash).map_err(|e| {
444
+ RubyAdapterError::metadata(format!("Failed to push stat_hash: {}", e))
445
+ })?;
446
+ }
447
+ col_hash
448
+ .aset("page_encoding_stats", page_stats_array)
449
+ .map_err(|e| {
450
+ RubyAdapterError::metadata(format!(
451
+ "Failed to set page_encoding_stats: {}",
452
+ e
453
+ ))
454
+ })?;
455
+ }
456
+
457
+ columns_array.push(col_hash).map_err(|e| {
458
+ RubyAdapterError::metadata(format!("Failed to push col_hash: {}", e))
459
+ })?;
460
+ }
461
+ rg_hash
462
+ .aset("columns", columns_array)
463
+ .map_err(|e| RubyAdapterError::metadata(format!("Failed to set columns: {}", e)))?;
464
+
465
+ row_groups_array.push(rg_hash).map_err(|e| {
466
+ RubyAdapterError::metadata(format!("Failed to push rg_hash: {}", e))
467
+ })?;
468
+ }
469
+ hash.aset("row_groups", row_groups_array)
470
+ .map_err(|e| RubyAdapterError::metadata(format!("Failed to set row_groups: {}", e)))?;
471
+
472
+ Ok(handle.into_value(hash))
473
+ }
474
+ }
475
+
476
+ // Also implement IntoValue for backwards compatibility
477
+ impl IntoValue for RubyParquetMetaData {
478
+ fn into_value_with(self, handle: &Ruby) -> Value {
479
+ // Use TryIntoValue and handle errors by returning an error hash
480
+ match self.try_into_value(handle) {
481
+ Ok(value) => value,
482
+ Err(e) => {
483
+ // Create an error hash instead of panicking
484
+ let error_hash = handle.hash_new();
485
+ let _ = error_hash.aset("error", true);
486
+ let _ = error_hash.aset("message", e.to_string());
487
+ handle.into_value(error_hash)
488
+ }
489
+ }
490
+ }
491
+ }
492
+
493
+ /// Parse metadata from a file path or Ruby IO object
494
+ pub fn parse_metadata(arg: Value) -> std::result::Result<Value, MagnusError> {
495
+ parse_metadata_impl(arg).into_magnus_error()
496
+ }
497
+
498
+ fn parse_metadata_impl(arg: Value) -> Result<Value> {
499
+ let ruby = Ruby::get().map_err(|_| RubyAdapterError::runtime("Failed to get Ruby runtime"))?;
500
+
501
+ let mut reader = ParquetMetaDataReader::new();
502
+ if arg.is_kind_of(ruby.class_string()) {
503
+ let path = arg
504
+ .to_r_string()
505
+ .map_err(|e| {
506
+ RubyAdapterError::invalid_input(format!("Failed to convert to string: {}", e))
507
+ })?
508
+ .to_string()
509
+ .map_err(|e| {
510
+ RubyAdapterError::invalid_input(format!("Failed to convert to Rust string: {}", e))
511
+ })?;
512
+ let file = File::open(path).map_err(RubyAdapterError::Io)?;
513
+ reader
514
+ .try_parse(&file)
515
+ .map_err(|e| RubyAdapterError::Parquet(parquet_core::ParquetError::Parquet(e)))?;
516
+ } else {
517
+ let file = RubyIOReader::new(arg).map_err(RubyAdapterError::Io)?;
518
+ reader
519
+ .try_parse(&ThreadSafeRubyIOReader::new(file))
520
+ .map_err(|e| RubyAdapterError::Parquet(parquet_core::ParquetError::Parquet(e)))?;
521
+ }
522
+
523
+ let metadata = reader
524
+ .finish()
525
+ .map_err(|e| RubyAdapterError::Parquet(parquet_core::ParquetError::Parquet(e)))?;
526
+
527
+ // Use TryIntoValue instead of IntoValue
528
+ RubyParquetMetaData(metadata).try_into_value(&ruby)
529
+ }