parquet 0.5.12 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +295 -98
  3. data/Cargo.toml +1 -1
  4. data/Gemfile +1 -0
  5. data/README.md +94 -3
  6. data/ext/parquet/Cargo.toml +8 -5
  7. data/ext/parquet/src/adapter_ffi.rs +156 -0
  8. data/ext/parquet/src/lib.rs +13 -21
  9. data/ext/parquet-core/Cargo.toml +23 -0
  10. data/ext/parquet-core/src/arrow_conversion.rs +1133 -0
  11. data/ext/parquet-core/src/error.rs +163 -0
  12. data/ext/parquet-core/src/lib.rs +60 -0
  13. data/ext/parquet-core/src/reader.rs +263 -0
  14. data/ext/parquet-core/src/schema.rs +283 -0
  15. data/ext/parquet-core/src/test_utils.rs +308 -0
  16. data/ext/parquet-core/src/traits/mod.rs +5 -0
  17. data/ext/parquet-core/src/traits/schema.rs +151 -0
  18. data/ext/parquet-core/src/value.rs +209 -0
  19. data/ext/parquet-core/src/writer.rs +839 -0
  20. data/ext/parquet-core/tests/arrow_conversion_tests.rs +423 -0
  21. data/ext/parquet-core/tests/binary_data.rs +437 -0
  22. data/ext/parquet-core/tests/column_projection.rs +557 -0
  23. data/ext/parquet-core/tests/complex_types.rs +821 -0
  24. data/ext/parquet-core/tests/compression_tests.rs +434 -0
  25. data/ext/parquet-core/tests/concurrent_access.rs +430 -0
  26. data/ext/parquet-core/tests/decimal_tests.rs +488 -0
  27. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
  28. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +547 -0
  29. data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
  30. data/ext/parquet-core/tests/performance_memory.rs +181 -0
  31. data/ext/parquet-core/tests/primitive_types.rs +547 -0
  32. data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
  33. data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
  34. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +534 -0
  35. data/ext/parquet-core/tests/temporal_tests.rs +518 -0
  36. data/ext/parquet-core/tests/test_helpers.rs +132 -0
  37. data/ext/parquet-core/tests/writer_tests.rs +545 -0
  38. data/ext/parquet-ruby-adapter/Cargo.toml +22 -0
  39. data/ext/parquet-ruby-adapter/build.rs +5 -0
  40. data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
  41. data/ext/parquet-ruby-adapter/src/batch_manager.rs +116 -0
  42. data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
  43. data/ext/parquet-ruby-adapter/src/converter.rs +1685 -0
  44. data/ext/parquet-ruby-adapter/src/error.rs +148 -0
  45. data/ext/{parquet/src/ruby_reader.rs → parquet-ruby-adapter/src/io.rs} +190 -56
  46. data/ext/parquet-ruby-adapter/src/lib.rs +90 -0
  47. data/ext/parquet-ruby-adapter/src/logger.rs +64 -0
  48. data/ext/parquet-ruby-adapter/src/metadata.rs +427 -0
  49. data/ext/parquet-ruby-adapter/src/reader.rs +317 -0
  50. data/ext/parquet-ruby-adapter/src/schema.rs +810 -0
  51. data/ext/parquet-ruby-adapter/src/string_cache.rs +106 -0
  52. data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
  53. data/ext/parquet-ruby-adapter/src/types.rs +94 -0
  54. data/ext/parquet-ruby-adapter/src/utils.rs +186 -0
  55. data/ext/parquet-ruby-adapter/src/writer.rs +435 -0
  56. data/lib/parquet/schema.rb +19 -0
  57. data/lib/parquet/version.rb +1 -1
  58. metadata +50 -24
  59. data/ext/parquet/src/enumerator.rs +0 -68
  60. data/ext/parquet/src/header_cache.rs +0 -99
  61. data/ext/parquet/src/logger.rs +0 -171
  62. data/ext/parquet/src/reader/common.rs +0 -111
  63. data/ext/parquet/src/reader/mod.rs +0 -211
  64. data/ext/parquet/src/reader/parquet_column_reader.rs +0 -44
  65. data/ext/parquet/src/reader/parquet_row_reader.rs +0 -43
  66. data/ext/parquet/src/reader/unified/mod.rs +0 -363
  67. data/ext/parquet/src/types/core_types.rs +0 -120
  68. data/ext/parquet/src/types/mod.rs +0 -100
  69. data/ext/parquet/src/types/parquet_value.rs +0 -1275
  70. data/ext/parquet/src/types/record_types.rs +0 -603
  71. data/ext/parquet/src/types/schema_converter.rs +0 -290
  72. data/ext/parquet/src/types/schema_node.rs +0 -424
  73. data/ext/parquet/src/types/timestamp.rs +0 -285
  74. data/ext/parquet/src/types/type_conversion.rs +0 -1949
  75. data/ext/parquet/src/types/writer_types.rs +0 -329
  76. data/ext/parquet/src/utils.rs +0 -184
  77. data/ext/parquet/src/writer/mod.rs +0 -505
  78. data/ext/parquet/src/writer/write_columns.rs +0 -238
  79. data/ext/parquet/src/writer/write_rows.rs +0 -488
@@ -0,0 +1,1685 @@
1
+ use crate::string_cache::StringCache;
2
+ use bytes::Bytes;
3
+ use indexmap::IndexMap;
4
+ use magnus::r_hash::ForEach;
5
+ use magnus::value::ReprValue;
6
+ use magnus::{
7
+ Error as MagnusError, IntoValue, Module, RArray, RHash, RString, Ruby, Symbol, TryConvert,
8
+ Value,
9
+ };
10
+ use ordered_float::OrderedFloat;
11
+ use parquet_core::{ParquetError, ParquetValue, Result};
12
+ use std::cell::RefCell;
13
+ use std::sync::Arc;
14
+
15
+ /// Ruby value converter
16
+ ///
17
+ /// Note: This converter is not thread-safe due to Ruby's GIL requirements.
18
+ /// It should only be used within Ruby's thread context.
19
+ #[derive(Default)]
20
+ pub struct RubyValueConverter {
21
+ string_cache: RefCell<Option<StringCache>>,
22
+ }
23
+
24
+ impl RubyValueConverter {
25
+ pub fn new() -> Self {
26
+ Self {
27
+ string_cache: RefCell::new(None),
28
+ }
29
+ }
30
+
31
+ pub fn with_string_cache(cache: StringCache) -> Self {
32
+ Self {
33
+ string_cache: RefCell::new(Some(cache)),
34
+ }
35
+ }
36
+
37
+ pub fn string_cache_stats(&self) -> Option<crate::string_cache::CacheStats> {
38
+ self.string_cache
39
+ .borrow()
40
+ .as_ref()
41
+ .map(|cache| cache.stats())
42
+ }
43
+
44
+ /// Convert a Ruby value to ParquetValue with type hint
45
+ /// This is the primary conversion method that handles all Ruby types
46
+ pub fn to_parquet_with_type_hint(
47
+ &mut self,
48
+ value: Value,
49
+ type_hint: Option<&parquet_core::PrimitiveType>,
50
+ ) -> Result<ParquetValue> {
51
+ // Handle nil values
52
+ if value.is_nil() {
53
+ return Ok(ParquetValue::Null);
54
+ }
55
+
56
+ // If we have a type hint, use it to guide conversion
57
+ if let Some(hint) = type_hint {
58
+ return self.convert_with_type_hint(value, hint);
59
+ }
60
+
61
+ // Otherwise, infer type from Ruby value
62
+ self.infer_and_convert(value)
63
+ }
64
+
65
+ /// Convert a Ruby value to ParquetValue with schema hint
66
+ /// This handles both primitive and complex types
67
+ pub fn to_parquet_with_schema_hint(
68
+ &mut self,
69
+ value: Value,
70
+ schema_hint: Option<&parquet_core::SchemaNode>,
71
+ ) -> Result<ParquetValue> {
72
+ // Handle nil values
73
+ if value.is_nil() {
74
+ return Ok(ParquetValue::Null);
75
+ }
76
+
77
+ // If we have a schema hint, use it to guide conversion
78
+ if let Some(schema) = schema_hint {
79
+ return self.convert_with_schema_hint(value, schema);
80
+ }
81
+
82
+ // Otherwise, infer type from Ruby value
83
+ self.infer_and_convert(value)
84
+ }
85
+
86
+ /// Convert with explicit schema hint
87
+ fn convert_with_schema_hint(
88
+ &mut self,
89
+ value: Value,
90
+ schema: &parquet_core::SchemaNode,
91
+ ) -> Result<ParquetValue> {
92
+ use parquet_core::SchemaNode;
93
+
94
+ match schema {
95
+ SchemaNode::Primitive {
96
+ primitive_type,
97
+ format,
98
+ ..
99
+ } => self.convert_with_type_hint_and_format(value, primitive_type, format.as_deref()),
100
+ SchemaNode::List { item, .. } => self.convert_to_list(value, item.as_ref()),
101
+ SchemaNode::Map {
102
+ key, value: val, ..
103
+ } => self.convert_to_map(value, key.as_ref(), val.as_ref()),
104
+ SchemaNode::Struct { fields, .. } => self.convert_to_struct(value, fields),
105
+ }
106
+ }
107
+
108
+ /// Convert with explicit type hint and optional format
109
+ fn convert_with_type_hint_and_format(
110
+ &mut self,
111
+ value: Value,
112
+ type_hint: &parquet_core::PrimitiveType,
113
+ format: Option<&str>,
114
+ ) -> Result<ParquetValue> {
115
+ use parquet_core::PrimitiveType::*;
116
+
117
+ // Special handling for UUID format
118
+ if let (Binary, Some("uuid")) = (type_hint, format) {
119
+ return self.convert_to_uuid_binary(value);
120
+ }
121
+
122
+ // Handle date types with format
123
+ match type_hint {
124
+ Date32 => return self.convert_to_date32(value, format),
125
+ Date64 => return self.convert_to_date64(value, format),
126
+ _ => {}
127
+ }
128
+
129
+ // Default type hint conversion
130
+ self.convert_with_type_hint(value, type_hint)
131
+ }
132
+
133
+ /// Convert with explicit type hint
134
+ fn convert_with_type_hint(
135
+ &mut self,
136
+ value: Value,
137
+ type_hint: &parquet_core::PrimitiveType,
138
+ ) -> Result<ParquetValue> {
139
+ use parquet_core::PrimitiveType::*;
140
+
141
+ match type_hint {
142
+ Boolean => self.convert_to_boolean(value),
143
+ Int8 => self.convert_to_int8(value),
144
+ Int16 => self.convert_to_int16(value),
145
+ Int32 => self.convert_to_int32(value),
146
+ Int64 => self.convert_to_int64(value),
147
+ UInt8 => self.convert_to_uint8(value),
148
+ UInt16 => self.convert_to_uint16(value),
149
+ UInt32 => self.convert_to_uint32(value),
150
+ UInt64 => self.convert_to_uint64(value),
151
+ Float32 => self.convert_to_float32(value),
152
+ Float64 => self.convert_to_float64(value),
153
+ String => self.convert_to_string(value),
154
+ Binary => self.convert_to_binary(value),
155
+ Date32 => self.convert_to_date32(value, None),
156
+ Date64 => self.convert_to_date64(value, None),
157
+ TimeMillis => self.convert_to_time_millis(value),
158
+ TimeMicros => self.convert_to_time_micros(value),
159
+ TimestampSecond(schema_tz) => {
160
+ self.convert_to_timestamp_second_with_tz(value, schema_tz.as_deref())
161
+ }
162
+ TimestampMillis(schema_tz) => {
163
+ self.convert_to_timestamp_millis_with_tz(value, schema_tz.as_deref())
164
+ }
165
+ TimestampMicros(schema_tz) => {
166
+ self.convert_to_timestamp_micros_with_tz(value, schema_tz.as_deref())
167
+ }
168
+ TimestampNanos(schema_tz) => {
169
+ self.convert_to_timestamp_nanos_with_tz(value, schema_tz.as_deref())
170
+ }
171
+ Decimal128(precision, scale) => self.convert_to_decimal128(value, *precision, *scale),
172
+ Decimal256(precision, scale) => self.convert_to_decimal256(value, *precision, *scale),
173
+ FixedLenByteArray(len) => self.convert_to_fixed_len_byte_array(value, *len),
174
+ }
175
+ }
176
+
177
+ /// Infer type from Ruby value and convert
178
+ fn infer_and_convert(&mut self, value: Value) -> Result<ParquetValue> {
179
+ let class_name = value.class().to_string();
180
+
181
+ match class_name.as_str() {
182
+ "Integer" => {
183
+ let i: i64 = TryConvert::try_convert(value)
184
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
185
+ Ok(ParquetValue::Int64(i))
186
+ }
187
+ "Float" => {
188
+ let f: f64 = TryConvert::try_convert(value)
189
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
190
+ Ok(ParquetValue::Float64(OrderedFloat(f)))
191
+ }
192
+ "String" => {
193
+ let s: String = TryConvert::try_convert(value)
194
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
195
+ Ok(ParquetValue::String(s.into()))
196
+ }
197
+ "TrueClass" | "FalseClass" => {
198
+ let b: bool = TryConvert::try_convert(value)
199
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
200
+ Ok(ParquetValue::Boolean(b))
201
+ }
202
+ "Array" => {
203
+ let array: RArray = TryConvert::try_convert(value)
204
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
205
+ let mut list = Vec::with_capacity(array.len());
206
+
207
+ for item in array.into_iter() {
208
+ list.push(self.infer_and_convert(item)?);
209
+ }
210
+
211
+ Ok(ParquetValue::List(list))
212
+ }
213
+ "Hash" => {
214
+ let hash: RHash = TryConvert::try_convert(value)
215
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
216
+ let mut map = Vec::new();
217
+ let mut conversion_error = None;
218
+
219
+ hash.foreach(|key: Value, val: Value| {
220
+ match (self.infer_and_convert(key), self.infer_and_convert(val)) {
221
+ (Ok(k), Ok(v)) => {
222
+ map.push((k, v));
223
+ Ok(ForEach::Continue)
224
+ }
225
+ (Err(e), _) | (_, Err(e)) => {
226
+ conversion_error = Some(e);
227
+ Ok(ForEach::Stop)
228
+ }
229
+ }
230
+ })
231
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
232
+
233
+ if let Some(err) = conversion_error {
234
+ return Err(err);
235
+ }
236
+
237
+ Ok(ParquetValue::Map(map))
238
+ }
239
+ "Time" => {
240
+ // Convert Ruby Time to timestamp millis
241
+ let millis = value
242
+ .funcall::<_, _, i64>("to_i", ())
243
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
244
+ * 1000
245
+ + value
246
+ .funcall::<_, _, i32>("nsec", ())
247
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
248
+ as i64
249
+ / 1_000_000;
250
+ let tz = self.extract_timezone(value)?;
251
+
252
+ Ok(ParquetValue::TimestampMillis(millis, tz))
253
+ }
254
+ "BigDecimal" => {
255
+ // Convert BigDecimal to Decimal128
256
+ let str_val: String = value
257
+ .funcall("to_s", ("F",))
258
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
259
+ self.parse_decimal128(&str_val, 38, 10) // Default precision and scale
260
+ }
261
+ _ => {
262
+ // Try to convert to string as fallback
263
+ let s: String = value.to_string();
264
+ Ok(ParquetValue::String(s.into()))
265
+ }
266
+ }
267
+ }
268
+
269
+ // Helper methods
270
+
271
+ /// Normalize timestamp for Parquet storage according to Parquet specification:
272
+ /// - WITH timezone in schema: Store as UTC (isAdjustedToUTC = true)
273
+ /// - WITHOUT timezone in schema: Store as local/unzoned time (isAdjustedToUTC = false)
274
+ ///
275
+ /// IMPORTANT: Parquet can ONLY store:
276
+ /// 1. UTC timestamps (when schema has ANY timezone)
277
+ /// 2. Local/unzoned timestamps (when schema has NO timezone)
278
+ ///
279
+ /// Non-UTC timezones like "+09:00" or "America/New_York" are NOT preserved.
280
+ fn normalize_timestamp_for_parquet(
281
+ &self,
282
+ time_value: Value,
283
+ schema_has_timezone: bool,
284
+ ) -> Result<Value> {
285
+ if schema_has_timezone {
286
+ // Schema has timezone -> MUST convert to UTC (Parquet limitation)
287
+ // The original timezone offset is lost - only UTC is stored
288
+ time_value
289
+ .funcall("utc", ())
290
+ .map_err(|e| ParquetError::Conversion(format!("Failed to convert to UTC: {}", e)))
291
+ } else {
292
+ // Schema has no timezone -> keep as local/unzoned time
293
+ // This represents a "wall clock" time without timezone information
294
+ Ok(time_value)
295
+ }
296
+ }
297
+
298
+ /// Extract timezone information from a Ruby Time object
299
+ fn extract_timezone(&self, time_value: Value) -> Result<Option<std::sync::Arc<str>>> {
300
+ let _ruby = Ruby::get()
301
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
302
+
303
+ // Check if the time is in UTC
304
+ let is_utc: bool = time_value
305
+ .funcall("utc?", ())
306
+ .map_err(|e| ParquetError::Conversion(format!("Failed to check UTC: {}", e)))?;
307
+
308
+ if is_utc {
309
+ return Ok(Some("UTC".into()));
310
+ }
311
+
312
+ // Get the UTC offset in seconds
313
+ let utc_offset: i32 = time_value
314
+ .funcall("utc_offset", ())
315
+ .map_err(|e| ParquetError::Conversion(format!("Failed to get UTC offset: {}", e)))?;
316
+
317
+ // If offset is 0 and not explicitly UTC, it might be local time
318
+ if utc_offset == 0 {
319
+ // Check if this is actually UTC or just happens to have 0 offset
320
+ // We already checked utc? above, so this is local time with 0 offset
321
+ return Ok(None);
322
+ }
323
+
324
+ // Convert offset to hours and minutes
325
+ let hours = utc_offset / 3600;
326
+ let minutes = (utc_offset.abs() % 3600) / 60;
327
+
328
+ // Format as +HH:MM or -HH:MM
329
+ let tz_string = if minutes == 0 {
330
+ format!("{:+03}:00", hours)
331
+ } else {
332
+ format!("{:+03}:{:02}", hours, minutes)
333
+ };
334
+
335
+ Ok(Some(tz_string.into()))
336
+ }
337
+
338
+ // Conversion methods for specific types
339
+
340
+ fn convert_to_boolean(&self, value: Value) -> Result<ParquetValue> {
341
+ if value.is_nil() {
342
+ return Ok(ParquetValue::Null);
343
+ }
344
+
345
+ let b: bool = TryConvert::try_convert(value)
346
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
347
+ Ok(ParquetValue::Boolean(b))
348
+ }
349
+
350
+ fn convert_to_int8(&self, value: Value) -> Result<ParquetValue> {
351
+ if value.is_nil() {
352
+ return Ok(ParquetValue::Null);
353
+ }
354
+
355
+ let i = self.convert_numeric::<i8>(value)?;
356
+ Ok(ParquetValue::Int8(i))
357
+ }
358
+
359
+ fn convert_to_int16(&self, value: Value) -> Result<ParquetValue> {
360
+ if value.is_nil() {
361
+ return Ok(ParquetValue::Null);
362
+ }
363
+
364
+ let i = self.convert_numeric::<i16>(value)?;
365
+ Ok(ParquetValue::Int16(i))
366
+ }
367
+
368
+ fn convert_to_int32(&self, value: Value) -> Result<ParquetValue> {
369
+ if value.is_nil() {
370
+ return Ok(ParquetValue::Null);
371
+ }
372
+
373
+ let i = self.convert_numeric::<i32>(value)?;
374
+ Ok(ParquetValue::Int32(i))
375
+ }
376
+
377
+ fn convert_to_int64(&self, value: Value) -> Result<ParquetValue> {
378
+ if value.is_nil() {
379
+ return Ok(ParquetValue::Null);
380
+ }
381
+
382
+ let i = self.convert_numeric::<i64>(value)?;
383
+ Ok(ParquetValue::Int64(i))
384
+ }
385
+
386
+ fn convert_to_uint8(&self, value: Value) -> Result<ParquetValue> {
387
+ if value.is_nil() {
388
+ return Ok(ParquetValue::Null);
389
+ }
390
+
391
+ let i = self.convert_numeric::<u8>(value)?;
392
+ Ok(ParquetValue::UInt8(i))
393
+ }
394
+
395
+ fn convert_to_uint16(&self, value: Value) -> Result<ParquetValue> {
396
+ if value.is_nil() {
397
+ return Ok(ParquetValue::Null);
398
+ }
399
+
400
+ let i = self.convert_numeric::<u16>(value)?;
401
+ Ok(ParquetValue::UInt16(i))
402
+ }
403
+
404
+ fn convert_to_uint32(&self, value: Value) -> Result<ParquetValue> {
405
+ if value.is_nil() {
406
+ return Ok(ParquetValue::Null);
407
+ }
408
+
409
+ let i = self.convert_numeric::<u32>(value)?;
410
+ Ok(ParquetValue::UInt32(i))
411
+ }
412
+
413
+ fn convert_to_uint64(&self, value: Value) -> Result<ParquetValue> {
414
+ if value.is_nil() {
415
+ return Ok(ParquetValue::Null);
416
+ }
417
+
418
+ let i = self.convert_numeric::<u64>(value)?;
419
+ Ok(ParquetValue::UInt64(i))
420
+ }
421
+
422
+ fn convert_to_float32(&self, value: Value) -> Result<ParquetValue> {
423
+ if value.is_nil() {
424
+ return Ok(ParquetValue::Null);
425
+ }
426
+
427
+ let f = self.convert_numeric::<f32>(value)?;
428
+ Ok(ParquetValue::Float32(OrderedFloat(f)))
429
+ }
430
+
431
+ fn convert_to_float64(&self, value: Value) -> Result<ParquetValue> {
432
+ if value.is_nil() {
433
+ return Ok(ParquetValue::Null);
434
+ }
435
+
436
+ let f = self.convert_numeric::<f64>(value)?;
437
+ Ok(ParquetValue::Float64(OrderedFloat(f)))
438
+ }
439
+
440
+ fn convert_to_string(&mut self, value: Value) -> Result<ParquetValue> {
441
+ if value.is_nil() {
442
+ return Ok(ParquetValue::Null);
443
+ }
444
+
445
+ // Convert any value to string using to_s
446
+ let s: String = value
447
+ .funcall("to_s", ())
448
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
449
+
450
+ // Use string cache if available for statistics tracking
451
+ // Note: Currently doesn't provide memory savings due to ParquetValue storing String
452
+ if let Some(ref mut cache) = self.string_cache.borrow_mut().as_mut() {
453
+ let interned = cache.intern(s);
454
+ Ok(ParquetValue::String(interned))
455
+ } else {
456
+ Ok(ParquetValue::String(s.into()))
457
+ }
458
+ }
459
+
460
+ fn convert_to_binary(&self, value: Value) -> Result<ParquetValue> {
461
+ if value.is_nil() {
462
+ return Ok(ParquetValue::Null);
463
+ }
464
+
465
+ let ruby = Ruby::get()
466
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
467
+ if value.is_kind_of(ruby.class_string()) {
468
+ let s: RString = TryConvert::try_convert(value)
469
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
470
+ let bytes = unsafe { Bytes::copy_from_slice(s.as_slice()) };
471
+ Ok(ParquetValue::Bytes(bytes))
472
+ } else {
473
+ // Try to convert to string first
474
+ let s: String = TryConvert::try_convert(value)
475
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
476
+ Ok(ParquetValue::Bytes(s.into()))
477
+ }
478
+ }
479
+
480
+ fn convert_to_uuid_binary(&self, value: Value) -> Result<ParquetValue> {
481
+ if value.is_nil() {
482
+ return Ok(ParquetValue::Null);
483
+ }
484
+
485
+ // Convert value to string
486
+ let uuid_str: String = value
487
+ .funcall("to_s", ())
488
+ .and_then(TryConvert::try_convert)
489
+ .map_err(|e: MagnusError| {
490
+ ParquetError::Conversion(format!("Failed to convert to UUID string: {}", e))
491
+ })?;
492
+
493
+ // Remove hyphens and validate length
494
+ let clean_uuid = uuid_str.replace('-', "");
495
+ if clean_uuid.len() != 32 {
496
+ return Err(ParquetError::Conversion(format!(
497
+ "Invalid UUID format: expected 32 hex characters (ignoring hyphens), got {}",
498
+ clean_uuid.len()
499
+ )));
500
+ }
501
+
502
+ // Parse hex string to bytes
503
+ let mut bytes = Vec::with_capacity(16);
504
+ for i in 0..16 {
505
+ let hex_byte = &clean_uuid[i * 2..i * 2 + 2];
506
+ let byte = u8::from_str_radix(hex_byte, 16).map_err(|_| {
507
+ ParquetError::Conversion(format!("Invalid hex character in UUID: {}", hex_byte))
508
+ })?;
509
+ bytes.push(byte);
510
+ }
511
+
512
+ Ok(ParquetValue::Bytes(bytes.into()))
513
+ }
514
+
515
+ fn convert_to_date32(&self, value: Value, date_format: Option<&str>) -> Result<ParquetValue> {
516
+ if value.is_nil() {
517
+ return Ok(ParquetValue::Null);
518
+ }
519
+
520
+ // Handle Time objects
521
+ let ruby = Ruby::get()
522
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
523
+ if value.is_kind_of(ruby.class_time()) {
524
+ let secs: i64 = value
525
+ .funcall("to_i", ())
526
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
527
+ let days = (secs / 86400) as i32;
528
+ return Ok(ParquetValue::Date32(days));
529
+ }
530
+
531
+ // Handle strings
532
+ if value.is_kind_of(ruby.class_string()) {
533
+ // Use Ruby's Date module
534
+ let _ = ruby.require("date");
535
+ let kernel = ruby.module_kernel();
536
+ let date_module = kernel
537
+ .const_get::<_, Value>("Date")
538
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
539
+
540
+ // Use strptime if format is provided, otherwise use parse
541
+ let date = if let Some(format) = date_format {
542
+ date_module
543
+ .funcall::<_, _, Value>("strptime", (value, format))
544
+ .map_err(|e| {
545
+ ParquetError::Conversion(format!(
546
+ "Failed to parse date with format '{}': {}",
547
+ format, e
548
+ ))
549
+ })?
550
+ } else {
551
+ date_module
552
+ .funcall::<_, _, Value>("parse", (value,))
553
+ .map_err(|e| ParquetError::Conversion(format!("Failed to parse date: {}", e)))?
554
+ };
555
+
556
+ // Convert to Time object then to days since epoch
557
+ let time = date
558
+ .funcall::<_, _, Value>("to_time", ())
559
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
560
+ let secs: i64 = time
561
+ .funcall("to_i", ())
562
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
563
+ let days = (secs / 86400) as i32;
564
+ return Ok(ParquetValue::Date32(days));
565
+ }
566
+
567
+ Err(ParquetError::Conversion(format!(
568
+ "Cannot convert {} to date32",
569
+ value.class()
570
+ )))
571
+ }
572
+
573
+ fn convert_to_date64(&self, value: Value, date_format: Option<&str>) -> Result<ParquetValue> {
574
+ if value.is_nil() {
575
+ return Ok(ParquetValue::Null);
576
+ }
577
+
578
+ // Similar to date32 but returns milliseconds since epoch
579
+ let ruby = Ruby::get()
580
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
581
+ if value.is_kind_of(ruby.class_time()) {
582
+ let millis: i64 = value
583
+ .funcall::<_, _, i64>("to_i", ())
584
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
585
+ * 1000;
586
+ return Ok(ParquetValue::Date64(millis));
587
+ }
588
+
589
+ // Handle strings
590
+ if value.is_kind_of(ruby.class_string()) {
591
+ // Use Ruby's Date module
592
+ let _ = ruby.require("date");
593
+ let kernel = ruby.module_kernel();
594
+ let date_module = kernel
595
+ .const_get::<_, Value>("Date")
596
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
597
+
598
+ // Use strptime if format is provided, otherwise use parse
599
+ let date = if let Some(format) = date_format {
600
+ date_module
601
+ .funcall::<_, _, Value>("strptime", (value, format))
602
+ .map_err(|e| {
603
+ ParquetError::Conversion(format!(
604
+ "Failed to parse date with format '{}': {}",
605
+ format, e
606
+ ))
607
+ })?
608
+ } else {
609
+ date_module
610
+ .funcall::<_, _, Value>("parse", (value,))
611
+ .map_err(|e| ParquetError::Conversion(format!("Failed to parse date: {}", e)))?
612
+ };
613
+
614
+ // Convert to Time object then to milliseconds since epoch
615
+ let time = date
616
+ .funcall::<_, _, Value>("to_time", ())
617
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
618
+ let secs: i64 = time
619
+ .funcall("to_i", ())
620
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
621
+ let millis = secs * 1000;
622
+ return Ok(ParquetValue::Date64(millis));
623
+ }
624
+
625
+ Err(ParquetError::Conversion(format!(
626
+ "Cannot convert {} to date64",
627
+ value.class()
628
+ )))
629
+ }
630
+
631
+ fn convert_to_time_millis(&self, value: Value) -> Result<ParquetValue> {
632
+ if value.is_nil() {
633
+ return Ok(ParquetValue::Null);
634
+ }
635
+
636
+ // Convert to milliseconds since midnight
637
+ let ruby = Ruby::get()
638
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
639
+ if value.is_kind_of(ruby.class_time()) {
640
+ let hour: i32 = value
641
+ .funcall("hour", ())
642
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
643
+ let min: i32 = value
644
+ .funcall("min", ())
645
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
646
+ let sec: i32 = value
647
+ .funcall("sec", ())
648
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
649
+ let nsec: i32 = value
650
+ .funcall("nsec", ())
651
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
652
+
653
+ let millis = (hour * 3600 + min * 60 + sec) * 1000 + nsec / 1_000_000;
654
+ return Ok(ParquetValue::TimeMillis(millis));
655
+ }
656
+
657
+ Err(ParquetError::Conversion(format!(
658
+ "Cannot convert {} to time_millis",
659
+ value.class()
660
+ )))
661
+ }
662
+
663
+ fn convert_to_time_micros(&self, value: Value) -> Result<ParquetValue> {
664
+ if value.is_nil() {
665
+ return Ok(ParquetValue::Null);
666
+ }
667
+
668
+ // Convert to microseconds since midnight
669
+ let ruby = Ruby::get()
670
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
671
+ if value.is_kind_of(ruby.class_time()) {
672
+ let hour: i64 = value
673
+ .funcall("hour", ())
674
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
675
+ let min: i64 = value
676
+ .funcall("min", ())
677
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
678
+ let sec: i64 = value
679
+ .funcall("sec", ())
680
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
681
+ let nsec: i64 = value
682
+ .funcall("nsec", ())
683
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
684
+
685
+ let micros = (hour * 3600 + min * 60 + sec) * 1_000_000 + nsec / 1000;
686
+ return Ok(ParquetValue::TimeMicros(micros));
687
+ }
688
+
689
+ Err(ParquetError::Conversion(format!(
690
+ "Cannot convert {} to time_micros",
691
+ value.class()
692
+ )))
693
+ }
694
+
695
+ // Timestamp conversion methods that respect schema timezone
696
+ fn convert_to_timestamp_second_with_tz(
697
+ &self,
698
+ value: Value,
699
+ schema_tz: Option<&str>,
700
+ ) -> Result<ParquetValue> {
701
+ if value.is_nil() {
702
+ return Ok(ParquetValue::Null);
703
+ }
704
+
705
+ let ruby = Ruby::get()
706
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
707
+ if value.is_kind_of(ruby.class_time()) {
708
+ // Normalize timestamp according to Parquet spec
709
+ let adjusted_time = self.normalize_timestamp_for_parquet(value, schema_tz.is_some())?;
710
+
711
+ let secs: i64 = adjusted_time
712
+ .funcall("to_i", ())
713
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
714
+
715
+ // PARQUET TIMESTAMP STORAGE:
716
+ // - Schema WITH timezone -> Store as UTC (isAdjustedToUTC = true)
717
+ // - Schema WITHOUT timezone -> Store as unzoned (isAdjustedToUTC = false)
718
+ // NOTE: Original timezone like "+09:00" is converted to "UTC" for storage
719
+ let tz = if schema_tz.is_some() {
720
+ Some(Arc::from("UTC")) // Always UTC, never the original timezone
721
+ } else {
722
+ None // Unzoned/local timestamp
723
+ };
724
+
725
+ return Ok(ParquetValue::TimestampSecond(secs, tz));
726
+ }
727
+
728
+ // Handle strings
729
+ if value.is_kind_of(ruby.class_string()) {
730
+ // Use Ruby's Time.parse to handle timestamp strings
731
+ let time_class = ruby.class_time();
732
+ let time = time_class
733
+ .funcall::<_, _, Value>("parse", (value,))
734
+ .map_err(|e| {
735
+ ParquetError::Conversion(format!("Failed to parse timestamp: {}", e))
736
+ })?;
737
+
738
+ // Normalize timestamp according to Parquet spec
739
+ let adjusted_time = self.normalize_timestamp_for_parquet(time, schema_tz.is_some())?;
740
+
741
+ let secs: i64 = adjusted_time
742
+ .funcall("to_i", ())
743
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
744
+
745
+ // PARQUET TIMESTAMP STORAGE:
746
+ // - Schema WITH timezone -> Store as UTC (isAdjustedToUTC = true)
747
+ // - Schema WITHOUT timezone -> Store as unzoned (isAdjustedToUTC = false)
748
+ // NOTE: Original timezone like "+09:00" is converted to "UTC" for storage
749
+ let tz = if schema_tz.is_some() {
750
+ Some(Arc::from("UTC")) // Always UTC, never the original timezone
751
+ } else {
752
+ None // Unzoned/local timestamp
753
+ };
754
+
755
+ return Ok(ParquetValue::TimestampSecond(secs, tz));
756
+ }
757
+
758
+ Err(ParquetError::Conversion(format!(
759
+ "Cannot convert {} to timestamp_second",
760
+ value.class()
761
+ )))
762
+ }
763
+
764
+ fn convert_to_timestamp_millis_with_tz(
765
+ &self,
766
+ value: Value,
767
+ schema_tz: Option<&str>,
768
+ ) -> Result<ParquetValue> {
769
+ if value.is_nil() {
770
+ return Ok(ParquetValue::Null);
771
+ }
772
+
773
+ let ruby = Ruby::get()
774
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
775
+ if value.is_kind_of(ruby.class_time()) {
776
+ // Normalize timestamp according to Parquet spec
777
+ let adjusted_time = self.normalize_timestamp_for_parquet(value, schema_tz.is_some())?;
778
+
779
+ let millis = adjusted_time
780
+ .funcall::<_, _, i64>("to_i", ())
781
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
782
+ * 1000
783
+ + adjusted_time
784
+ .funcall::<_, _, i32>("nsec", ())
785
+ .map_err(|e| ParquetError::Conversion(e.to_string()))? as i64
786
+ / 1_000_000;
787
+
788
+ // PARQUET TIMESTAMP STORAGE:
789
+ // - Schema WITH timezone -> Store as UTC (isAdjustedToUTC = true)
790
+ // - Schema WITHOUT timezone -> Store as unzoned (isAdjustedToUTC = false)
791
+ // NOTE: Original timezone like "+09:00" is converted to "UTC" for storage
792
+ let tz = if schema_tz.is_some() {
793
+ Some(Arc::from("UTC")) // Always UTC, never the original timezone
794
+ } else {
795
+ None // Unzoned/local timestamp
796
+ };
797
+
798
+ return Ok(ParquetValue::TimestampMillis(millis, tz));
799
+ }
800
+
801
+ // Handle strings
802
+ if value.is_kind_of(ruby.class_string()) {
803
+ // Use Ruby's Time.parse to handle timestamp strings
804
+ let time_class = ruby.class_time();
805
+ let time = time_class
806
+ .funcall::<_, _, Value>("parse", (value,))
807
+ .map_err(|e| {
808
+ ParquetError::Conversion(format!("Failed to parse timestamp: {}", e))
809
+ })?;
810
+
811
+ // Normalize timestamp according to Parquet spec
812
+ let adjusted_time = self.normalize_timestamp_for_parquet(time, schema_tz.is_some())?;
813
+
814
+ let millis = adjusted_time
815
+ .funcall::<_, _, i64>("to_i", ())
816
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
817
+ * 1000
818
+ + adjusted_time
819
+ .funcall::<_, _, i32>("nsec", ())
820
+ .map_err(|e| ParquetError::Conversion(e.to_string()))? as i64
821
+ / 1_000_000;
822
+
823
+ // PARQUET TIMESTAMP STORAGE:
824
+ // - Schema WITH timezone -> Store as UTC (isAdjustedToUTC = true)
825
+ // - Schema WITHOUT timezone -> Store as unzoned (isAdjustedToUTC = false)
826
+ // NOTE: Original timezone like "+09:00" is converted to "UTC" for storage
827
+ let tz = if schema_tz.is_some() {
828
+ Some(Arc::from("UTC")) // Always UTC, never the original timezone
829
+ } else {
830
+ None // Unzoned/local timestamp
831
+ };
832
+
833
+ return Ok(ParquetValue::TimestampMillis(millis, tz));
834
+ }
835
+
836
+ Err(ParquetError::Conversion(format!(
837
+ "Cannot convert {} to timestamp_millis",
838
+ value.class()
839
+ )))
840
+ }
841
+
842
+ fn convert_to_timestamp_micros_with_tz(
843
+ &self,
844
+ value: Value,
845
+ schema_tz: Option<&str>,
846
+ ) -> Result<ParquetValue> {
847
+ if value.is_nil() {
848
+ return Ok(ParquetValue::Null);
849
+ }
850
+
851
+ let ruby = Ruby::get()
852
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
853
+ if value.is_kind_of(ruby.class_time()) {
854
+ // Normalize timestamp according to Parquet spec
855
+ let adjusted_time = self.normalize_timestamp_for_parquet(value, schema_tz.is_some())?;
856
+
857
+ let micros = adjusted_time
858
+ .funcall::<_, _, i64>("to_i", ())
859
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
860
+ * 1_000_000
861
+ + adjusted_time
862
+ .funcall::<_, _, i32>("nsec", ())
863
+ .map_err(|e| ParquetError::Conversion(e.to_string()))? as i64
864
+ / 1000;
865
+
866
+ // PARQUET TIMESTAMP STORAGE:
867
+ // - Schema WITH timezone -> Store as UTC (isAdjustedToUTC = true)
868
+ // - Schema WITHOUT timezone -> Store as unzoned (isAdjustedToUTC = false)
869
+ // NOTE: Original timezone like "+09:00" is converted to "UTC" for storage
870
+ let tz = if schema_tz.is_some() {
871
+ Some(Arc::from("UTC")) // Always UTC, never the original timezone
872
+ } else {
873
+ None // Unzoned/local timestamp
874
+ };
875
+
876
+ return Ok(ParquetValue::TimestampMicros(micros, tz));
877
+ }
878
+
879
+ // Handle strings
880
+ if value.is_kind_of(ruby.class_string()) {
881
+ // Use Ruby's Time.parse to handle timestamp strings
882
+ let time_class = ruby.class_time();
883
+ let time = time_class
884
+ .funcall::<_, _, Value>("parse", (value,))
885
+ .map_err(|e| {
886
+ ParquetError::Conversion(format!("Failed to parse timestamp: {}", e))
887
+ })?;
888
+
889
+ // Normalize timestamp according to Parquet spec
890
+ let adjusted_time = self.normalize_timestamp_for_parquet(time, schema_tz.is_some())?;
891
+
892
+ let micros = adjusted_time
893
+ .funcall::<_, _, i64>("to_i", ())
894
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
895
+ * 1_000_000
896
+ + adjusted_time
897
+ .funcall::<_, _, i32>("nsec", ())
898
+ .map_err(|e| ParquetError::Conversion(e.to_string()))? as i64
899
+ / 1000;
900
+
901
+ // PARQUET TIMESTAMP STORAGE:
902
+ // - Schema WITH timezone -> Store as UTC (isAdjustedToUTC = true)
903
+ // - Schema WITHOUT timezone -> Store as unzoned (isAdjustedToUTC = false)
904
+ // NOTE: Original timezone like "+09:00" is converted to "UTC" for storage
905
+ let tz = if schema_tz.is_some() {
906
+ Some(Arc::from("UTC")) // Always UTC, never the original timezone
907
+ } else {
908
+ None // Unzoned/local timestamp
909
+ };
910
+
911
+ return Ok(ParquetValue::TimestampMicros(micros, tz));
912
+ }
913
+
914
+ Err(ParquetError::Conversion(format!(
915
+ "Cannot convert {} to timestamp_micros",
916
+ value.class()
917
+ )))
918
+ }
919
+
920
+ fn convert_to_timestamp_nanos_with_tz(
921
+ &self,
922
+ value: Value,
923
+ schema_tz: Option<&str>,
924
+ ) -> Result<ParquetValue> {
925
+ if value.is_nil() {
926
+ return Ok(ParquetValue::Null);
927
+ }
928
+
929
+ let ruby = Ruby::get()
930
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
931
+ if value.is_kind_of(ruby.class_time()) {
932
+ // Normalize timestamp according to Parquet spec
933
+ let adjusted_time = self.normalize_timestamp_for_parquet(value, schema_tz.is_some())?;
934
+
935
+ let nanos = adjusted_time
936
+ .funcall::<_, _, i64>("to_i", ())
937
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
938
+ * 1_000_000_000
939
+ + adjusted_time
940
+ .funcall::<_, _, i32>("nsec", ())
941
+ .map_err(|e| ParquetError::Conversion(e.to_string()))? as i64;
942
+
943
+ // PARQUET TIMESTAMP STORAGE:
944
+ // - Schema WITH timezone -> Store as UTC (isAdjustedToUTC = true)
945
+ // - Schema WITHOUT timezone -> Store as unzoned (isAdjustedToUTC = false)
946
+ // NOTE: Original timezone like "+09:00" is converted to "UTC" for storage
947
+ let tz = if schema_tz.is_some() {
948
+ Some(Arc::from("UTC")) // Always UTC, never the original timezone
949
+ } else {
950
+ None // Unzoned/local timestamp
951
+ };
952
+
953
+ return Ok(ParquetValue::TimestampNanos(nanos, tz));
954
+ }
955
+
956
+ // Handle strings
957
+ if value.is_kind_of(ruby.class_string()) {
958
+ // Use Ruby's Time.parse to handle timestamp strings
959
+ let time_class = ruby.class_time();
960
+ let time = time_class
961
+ .funcall::<_, _, Value>("parse", (value,))
962
+ .map_err(|e| {
963
+ ParquetError::Conversion(format!("Failed to parse timestamp: {}", e))
964
+ })?;
965
+
966
+ // Normalize timestamp according to Parquet spec
967
+ let adjusted_time = self.normalize_timestamp_for_parquet(time, schema_tz.is_some())?;
968
+
969
+ let nanos = adjusted_time
970
+ .funcall::<_, _, i64>("to_i", ())
971
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
972
+ * 1_000_000_000
973
+ + adjusted_time
974
+ .funcall::<_, _, i32>("nsec", ())
975
+ .map_err(|e| ParquetError::Conversion(e.to_string()))? as i64;
976
+
977
+ // PARQUET TIMESTAMP STORAGE:
978
+ // - Schema WITH timezone -> Store as UTC (isAdjustedToUTC = true)
979
+ // - Schema WITHOUT timezone -> Store as unzoned (isAdjustedToUTC = false)
980
+ // NOTE: Original timezone like "+09:00" is converted to "UTC" for storage
981
+ let tz = if schema_tz.is_some() {
982
+ Some(Arc::from("UTC")) // Always UTC, never the original timezone
983
+ } else {
984
+ None // Unzoned/local timestamp
985
+ };
986
+
987
+ return Ok(ParquetValue::TimestampNanos(nanos, tz));
988
+ }
989
+
990
+ Err(ParquetError::Conversion(format!(
991
+ "Cannot convert {} to timestamp_nanos",
992
+ value.class()
993
+ )))
994
+ }
995
+
996
+ fn convert_to_decimal128(
997
+ &self,
998
+ value: Value,
999
+ precision: u8,
1000
+ scale: i8,
1001
+ ) -> Result<ParquetValue> {
1002
+ if value.is_nil() {
1003
+ return Ok(ParquetValue::Null);
1004
+ }
1005
+
1006
+ // For BigDecimal, use to_s("F") to get non-scientific notation
1007
+ let str_val: String = if value.class().to_string() == "BigDecimal" {
1008
+ value
1009
+ .funcall("to_s", ("F",))
1010
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
1011
+ } else {
1012
+ value
1013
+ .funcall("to_s", ())
1014
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
1015
+ };
1016
+
1017
+ self.parse_decimal128(&str_val, precision, scale)
1018
+ }
1019
+
1020
+ fn convert_to_decimal256(
1021
+ &self,
1022
+ value: Value,
1023
+ precision: u8,
1024
+ scale: i8,
1025
+ ) -> Result<ParquetValue> {
1026
+ if value.is_nil() {
1027
+ return Ok(ParquetValue::Null);
1028
+ }
1029
+
1030
+ // For BigDecimal, use to_s("F") to get non-scientific notation
1031
+ let str_val: String = if value.class().to_string() == "BigDecimal" {
1032
+ value
1033
+ .funcall("to_s", ("F",))
1034
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
1035
+ } else {
1036
+ value
1037
+ .funcall("to_s", ())
1038
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
1039
+ };
1040
+
1041
+ self.parse_decimal256(&str_val, precision, scale)
1042
+ }
1043
+
1044
+ fn convert_to_fixed_len_byte_array(&self, value: Value, len: i32) -> Result<ParquetValue> {
1045
+ if value.is_nil() {
1046
+ return Ok(ParquetValue::Null);
1047
+ }
1048
+
1049
+ let ruby = Ruby::get()
1050
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
1051
+ let bytes = if value.is_kind_of(ruby.class_string()) {
1052
+ let s: RString = TryConvert::try_convert(value)
1053
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
1054
+ unsafe { s.as_slice() }.to_vec()
1055
+ } else {
1056
+ let s: String = TryConvert::try_convert(value)
1057
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
1058
+ s.into_bytes()
1059
+ };
1060
+
1061
+ if bytes.len() != len as usize {
1062
+ return Err(ParquetError::Conversion(format!(
1063
+ "Expected {} bytes, got {}",
1064
+ len,
1065
+ bytes.len()
1066
+ )));
1067
+ }
1068
+
1069
+ Ok(ParquetValue::Bytes(bytes.into()))
1070
+ }
1071
+
1072
+ // Helper methods
1073
+
1074
+ fn convert_numeric<T>(&self, value: Value) -> Result<T>
1075
+ where
1076
+ T: TryConvert + std::str::FromStr,
1077
+ <T as std::str::FromStr>::Err: std::fmt::Display,
1078
+ {
1079
+ // Try direct conversion first
1080
+ if let Ok(val) = TryConvert::try_convert(value) {
1081
+ return Ok(val);
1082
+ }
1083
+
1084
+ // If that fails, try converting to i64/f64 first, then to target type
1085
+ let ruby = Ruby::get()
1086
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
1087
+ if value.is_kind_of(ruby.class_integer()) {
1088
+ // Convert Integer to i64 first, then to target type
1089
+ let i: i64 = TryConvert::try_convert(value)
1090
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
1091
+ i.to_string().parse::<T>().map_err(|e| {
1092
+ ParquetError::Conversion(format!("Failed to convert {} to target type: {}", i, e))
1093
+ })
1094
+ } else if value.is_kind_of(ruby.class_float()) {
1095
+ // Convert Float to f64 first, then to target type
1096
+ let f: f64 = TryConvert::try_convert(value)
1097
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
1098
+ f.to_string().parse::<T>().map_err(|e| {
1099
+ ParquetError::Conversion(format!("Failed to convert {} to target type: {}", f, e))
1100
+ })
1101
+ } else if value.is_kind_of(ruby.class_string()) {
1102
+ let s: String = TryConvert::try_convert(value)
1103
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
1104
+ s.trim().parse::<T>().map_err(|e| {
1105
+ ParquetError::Conversion(format!("Failed to parse '{}' as numeric: {}", s, e))
1106
+ })
1107
+ } else {
1108
+ Err(ParquetError::Conversion(format!(
1109
+ "Cannot convert {} to numeric",
1110
+ value.class()
1111
+ )))
1112
+ }
1113
+ }
1114
+
1115
+ fn parse_decimal128(&self, s: &str, _precision: u8, scale: i8) -> Result<ParquetValue> {
1116
+ // Parse decimal string to i128
1117
+ let clean = s.trim();
1118
+
1119
+ // Handle scientific notation by converting to regular decimal format
1120
+ let normalized = if clean.to_lowercase().contains('e') {
1121
+ // Parse as f64 first to handle scientific notation
1122
+ let f: f64 = clean.parse().map_err(|e| {
1123
+ ParquetError::Conversion(format!("Failed to parse scientific notation: {}", e))
1124
+ })?;
1125
+ // Convert to string with enough precision
1126
+ format!("{:.15}", f)
1127
+ .trim_end_matches('0')
1128
+ .trim_end_matches('.')
1129
+ .to_string()
1130
+ } else {
1131
+ clean.to_string()
1132
+ };
1133
+
1134
+ let is_negative = normalized.starts_with('-');
1135
+ let clean_abs = normalized.trim_start_matches('-').trim_start_matches('+');
1136
+
1137
+ let parts: Vec<&str> = clean_abs.split('.').collect();
1138
+
1139
+ if parts.len() > 2 {
1140
+ return Err(ParquetError::Conversion(
1141
+ "Invalid decimal format".to_string(),
1142
+ ));
1143
+ }
1144
+
1145
+ let integer_part = if parts.is_empty() || parts[0].is_empty() {
1146
+ "0"
1147
+ } else {
1148
+ parts[0]
1149
+ };
1150
+ let fractional_part = if parts.len() == 2 { parts[1] } else { "" };
1151
+
1152
+ // Calculate the actual value considering the scale
1153
+ let current_scale = fractional_part.len() as i8;
1154
+
1155
+ if scale < 0 {
1156
+ return Err(ParquetError::Conversion(
1157
+ "Negative scale not supported".to_string(),
1158
+ ));
1159
+ }
1160
+
1161
+ // Parse integer and fractional parts
1162
+ let integer_value: i128 = integer_part.parse().map_err(|e| {
1163
+ ParquetError::Conversion(format!("Failed to parse integer part: {}", e))
1164
+ })?;
1165
+
1166
+ let fractional_value: i128 = if fractional_part.is_empty() {
1167
+ 0
1168
+ } else {
1169
+ fractional_part.parse().map_err(|e| {
1170
+ ParquetError::Conversion(format!("Failed to parse fractional part: {}", e))
1171
+ })?
1172
+ };
1173
+
1174
+ // Calculate the final value based on scale
1175
+ let scale_factor = 10_i128.pow(scale as u32);
1176
+ let current_scale_factor = 10_i128.pow(current_scale as u32);
1177
+
1178
+ let mut value = if current_scale <= scale {
1179
+ // Current scale is less than or equal to target scale - pad with zeros
1180
+ integer_value * scale_factor + fractional_value * (scale_factor / current_scale_factor)
1181
+ } else {
1182
+ // Current scale is greater than target scale - need to truncate/round
1183
+ let adjustment_factor = 10_i128.pow((current_scale - scale) as u32);
1184
+ let adjusted_fractional = fractional_value / adjustment_factor;
1185
+ integer_value * scale_factor + adjusted_fractional
1186
+ };
1187
+
1188
+ if is_negative {
1189
+ value = -value;
1190
+ }
1191
+
1192
+ Ok(ParquetValue::Decimal128(value, scale))
1193
+ }
1194
+
1195
+ fn parse_decimal256(&self, s: &str, _precision: u8, scale: i8) -> Result<ParquetValue> {
1196
+ // Parse decimal string to BigInt
1197
+ use num::{BigInt, Zero};
1198
+
1199
+ let clean = s.trim();
1200
+
1201
+ // Handle scientific notation by converting to regular decimal format
1202
+ let normalized = if clean.to_lowercase().contains('e') {
1203
+ // Parse as f64 first to handle scientific notation
1204
+ let f: f64 = clean.parse().map_err(|e| {
1205
+ ParquetError::Conversion(format!("Failed to parse scientific notation: {}", e))
1206
+ })?;
1207
+ // Convert to string with enough precision
1208
+ format!("{:.15}", f)
1209
+ .trim_end_matches('0')
1210
+ .trim_end_matches('.')
1211
+ .to_string()
1212
+ } else {
1213
+ clean.to_string()
1214
+ };
1215
+
1216
+ let is_negative = normalized.starts_with('-');
1217
+ let clean_abs = normalized.trim_start_matches('-').trim_start_matches('+');
1218
+
1219
+ let parts: Vec<&str> = clean_abs.split('.').collect();
1220
+
1221
+ if parts.len() > 2 {
1222
+ return Err(ParquetError::Conversion(
1223
+ "Invalid decimal format".to_string(),
1224
+ ));
1225
+ }
1226
+
1227
+ let integer_part = if parts.is_empty() || parts[0].is_empty() {
1228
+ "0"
1229
+ } else {
1230
+ parts[0]
1231
+ };
1232
+ let fractional_part = if parts.len() == 2 { parts[1] } else { "" };
1233
+
1234
+ // Calculate the actual value considering the scale
1235
+ let current_scale = fractional_part.len() as i8;
1236
+
1237
+ if scale < 0 {
1238
+ return Err(ParquetError::Conversion(
1239
+ "Negative scale not supported".to_string(),
1240
+ ));
1241
+ }
1242
+
1243
+ // Parse integer and fractional parts
1244
+ let integer_value: BigInt = integer_part.parse().map_err(|e| {
1245
+ ParquetError::Conversion(format!("Failed to parse integer part: {}", e))
1246
+ })?;
1247
+
1248
+ let fractional_value: BigInt = if fractional_part.is_empty() {
1249
+ BigInt::zero()
1250
+ } else {
1251
+ fractional_part.parse().map_err(|e| {
1252
+ ParquetError::Conversion(format!("Failed to parse fractional part: {}", e))
1253
+ })?
1254
+ };
1255
+
1256
+ // Calculate the final value based on scale
1257
+ let scale_factor = BigInt::from(10).pow(scale as u32);
1258
+ let current_scale_factor = BigInt::from(10).pow(current_scale as u32);
1259
+
1260
+ let mut value = if current_scale <= scale {
1261
+ // Current scale is less than or equal to target scale - pad with zeros
1262
+ integer_value * &scale_factor + fractional_value * (scale_factor / current_scale_factor)
1263
+ } else {
1264
+ // Current scale is greater than target scale - need to truncate/round
1265
+ let adjustment_factor = BigInt::from(10).pow((current_scale - scale) as u32);
1266
+ let adjusted_fractional = fractional_value / adjustment_factor;
1267
+ integer_value * &scale_factor + adjusted_fractional
1268
+ };
1269
+
1270
+ if is_negative {
1271
+ value = -value;
1272
+ }
1273
+
1274
+ Ok(ParquetValue::Decimal256(value, scale))
1275
+ }
1276
+
1277
+ /// Convert a Ruby array to a ParquetValue::List
1278
+ fn convert_to_list(
1279
+ &mut self,
1280
+ value: Value,
1281
+ item_schema: &parquet_core::SchemaNode,
1282
+ ) -> Result<ParquetValue> {
1283
+ if value.is_nil() {
1284
+ return Ok(ParquetValue::Null);
1285
+ }
1286
+
1287
+ let array: RArray = TryConvert::try_convert(value).map_err(|e: MagnusError| {
1288
+ ParquetError::Conversion(format!("Expected Array for List type: {}", e))
1289
+ })?;
1290
+
1291
+ let mut list = Vec::with_capacity(array.len());
1292
+ for item in array.into_iter() {
1293
+ list.push(self.convert_with_schema_hint(item, item_schema)?);
1294
+ }
1295
+
1296
+ Ok(ParquetValue::List(list))
1297
+ }
1298
+
1299
+ /// Convert a Ruby hash to a ParquetValue::Map
1300
+ fn convert_to_map(
1301
+ &mut self,
1302
+ value: Value,
1303
+ key_schema: &parquet_core::SchemaNode,
1304
+ value_schema: &parquet_core::SchemaNode,
1305
+ ) -> Result<ParquetValue> {
1306
+ if value.is_nil() {
1307
+ return Ok(ParquetValue::Null);
1308
+ }
1309
+
1310
+ let hash: RHash = TryConvert::try_convert(value).map_err(|e: MagnusError| {
1311
+ ParquetError::Conversion(format!("Expected Hash for Map type: {}", e))
1312
+ })?;
1313
+
1314
+ // Collect key-value pairs first
1315
+ let mut kv_pairs = Vec::new();
1316
+ hash.foreach(|k: Value, v: Value| {
1317
+ kv_pairs.push((k, v));
1318
+ Ok(ForEach::Continue)
1319
+ })
1320
+ .map_err(|e: MagnusError| ParquetError::Conversion(e.to_string()))?;
1321
+
1322
+ // Now convert them with mutable self
1323
+ let mut map = Vec::new();
1324
+ for (k, v) in kv_pairs {
1325
+ let key = self.convert_with_schema_hint(k, key_schema)?;
1326
+ let val = self.convert_with_schema_hint(v, value_schema)?;
1327
+ map.push((key, val));
1328
+ }
1329
+
1330
+ Ok(ParquetValue::Map(map))
1331
+ }
1332
+
1333
+ /// Convert a Ruby hash to a ParquetValue::Record (struct)
1334
+ fn convert_to_struct(
1335
+ &mut self,
1336
+ value: Value,
1337
+ fields: &[parquet_core::SchemaNode],
1338
+ ) -> Result<ParquetValue> {
1339
+ if value.is_nil() {
1340
+ return Ok(ParquetValue::Null);
1341
+ }
1342
+
1343
+ let hash: RHash = TryConvert::try_convert(value).map_err(|e: MagnusError| {
1344
+ ParquetError::Conversion(format!("Expected Hash for Struct type: {}", e))
1345
+ })?;
1346
+
1347
+ let mut record = IndexMap::new();
1348
+
1349
+ for field in fields {
1350
+ let field_name = field.name();
1351
+ let ruby_key = Symbol::new(field_name);
1352
+
1353
+ // Try symbol key first, then string key
1354
+ let field_value = if let Some(val) = hash.get(ruby_key) {
1355
+ val
1356
+ } else if let Some(val) = hash.get(field_name) {
1357
+ val
1358
+ } else {
1359
+ // Field not found, use null
1360
+ Ruby::get()
1361
+ .map_err(|_| {
1362
+ ParquetError::Conversion("Failed to get Ruby runtime".to_string())
1363
+ })?
1364
+ .qnil()
1365
+ .as_value()
1366
+ };
1367
+
1368
+ let converted = self.convert_with_schema_hint(field_value, field)?;
1369
+ record.insert(field_name.into(), converted);
1370
+ }
1371
+
1372
+ Ok(ParquetValue::Record(record))
1373
+ }
1374
+ }
1375
+
1376
+ // Helper functions for one-off conversions where we don't need string caching
1377
+
1378
+ pub fn ruby_to_parquet(value: Value) -> Result<ParquetValue> {
1379
+ let mut converter = RubyValueConverter::new();
1380
+ converter.infer_and_convert(value)
1381
+ }
1382
+
1383
+ pub fn parquet_to_ruby(value: ParquetValue) -> Result<Value> {
1384
+ let ruby = Ruby::get()
1385
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
1386
+
1387
+ match value {
1388
+ ParquetValue::Null => Ok(ruby.qnil().as_value()),
1389
+ ParquetValue::Boolean(b) => Ok(b.into_value_with(&ruby)),
1390
+ ParquetValue::Int8(i) => Ok((i as i64).into_value_with(&ruby)),
1391
+ ParquetValue::Int16(i) => Ok((i as i64).into_value_with(&ruby)),
1392
+ ParquetValue::Int32(i) => Ok((i as i64).into_value_with(&ruby)),
1393
+ ParquetValue::Int64(i) => Ok(i.into_value_with(&ruby)),
1394
+ ParquetValue::UInt8(i) => Ok((i as u64).into_value_with(&ruby)),
1395
+ ParquetValue::UInt16(i) => Ok((i as u64).into_value_with(&ruby)),
1396
+ ParquetValue::UInt32(i) => Ok((i as u64).into_value_with(&ruby)),
1397
+ ParquetValue::UInt64(i) => Ok(i.into_value_with(&ruby)),
1398
+ ParquetValue::Float16(OrderedFloat(f)) => Ok((f as f64).into_value_with(&ruby)),
1399
+ ParquetValue::Float32(OrderedFloat(f)) => Ok((f as f64).into_value_with(&ruby)),
1400
+ ParquetValue::Float64(OrderedFloat(f)) => Ok(f.into_value_with(&ruby)),
1401
+ ParquetValue::String(s) => Ok(s.into_value_with(&ruby)),
1402
+ ParquetValue::Bytes(b) => {
1403
+ // Check if this is a UUID (16 bytes)
1404
+ if b.len() == 16 {
1405
+ // Format as UUID string
1406
+ let uuid_str = format!(
1407
+ "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
1408
+ b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7],
1409
+ b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]
1410
+ );
1411
+ Ok(uuid_str.into_value_with(&ruby))
1412
+ } else {
1413
+ // Regular bytes - convert to string
1414
+ Ok(ruby.str_from_slice(&b).as_value())
1415
+ }
1416
+ }
1417
+ ParquetValue::Date32(days) => {
1418
+ // Convert days since epoch to Date object
1419
+ let _ = ruby.require("date");
1420
+ let kernel = ruby.module_kernel();
1421
+ let date_class = kernel
1422
+ .const_get::<_, Value>("Date")
1423
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1424
+ let secs = days as i64 * 86400;
1425
+ let time_class = ruby.class_time();
1426
+ let time = time_class
1427
+ .funcall::<_, _, Value>("at", (secs,))
1428
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?
1429
+ .funcall::<_, _, Value>("utc", ())
1430
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1431
+ let year: i32 = time
1432
+ .funcall("year", ())
1433
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1434
+ let month: i32 = time
1435
+ .funcall("month", ())
1436
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1437
+ let day: i32 = time
1438
+ .funcall("day", ())
1439
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1440
+ date_class
1441
+ .funcall("new", (year, month, day))
1442
+ .map_err(|e| ParquetError::Conversion(e.to_string()))
1443
+ }
1444
+ ParquetValue::Date64(millis) => {
1445
+ // Convert millis to Time object
1446
+ let time_class = ruby.class_time();
1447
+ let secs = millis / 1000;
1448
+ let nsec = (millis % 1000) * 1_000_000;
1449
+ time_class
1450
+ .funcall("at", (secs, nsec))
1451
+ .map_err(|e| ParquetError::Conversion(e.to_string()))
1452
+ }
1453
+ ParquetValue::TimeMillis(millis) => {
1454
+ // Convert to Time object for today with given time
1455
+ let time_class = ruby.class_time();
1456
+ let hours = millis / (3600 * 1000);
1457
+ let minutes = (millis % (3600 * 1000)) / (60 * 1000);
1458
+ let seconds = (millis % (60 * 1000)) / 1000;
1459
+ let ms = millis % 1000;
1460
+
1461
+ let now: Value = time_class
1462
+ .funcall("now", ())
1463
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1464
+ let year: i32 = now
1465
+ .funcall("year", ())
1466
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1467
+ let month: i32 = now
1468
+ .funcall("month", ())
1469
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1470
+ let day: i32 = now
1471
+ .funcall("day", ())
1472
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1473
+
1474
+ time_class
1475
+ .funcall(
1476
+ "utc",
1477
+ (year, month, day, hours, minutes, seconds, ms * 1000),
1478
+ )
1479
+ .map_err(|e| ParquetError::Conversion(e.to_string()))
1480
+ }
1481
+ ParquetValue::TimeMicros(micros) => {
1482
+ // Similar to TimeMillis but with microsecond precision
1483
+ let time_class = ruby.class_time();
1484
+ let hours = micros / (3600 * 1_000_000);
1485
+ let minutes = (micros % (3600 * 1_000_000)) / (60 * 1_000_000);
1486
+ let seconds = (micros % (60 * 1_000_000)) / 1_000_000;
1487
+ let us = micros % 1_000_000;
1488
+
1489
+ let now: Value = time_class
1490
+ .funcall("now", ())
1491
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1492
+ let year: i32 = now
1493
+ .funcall("year", ())
1494
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1495
+ let month: i32 = now
1496
+ .funcall("month", ())
1497
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1498
+ let day: i32 = now
1499
+ .funcall("day", ())
1500
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1501
+
1502
+ time_class
1503
+ .funcall("utc", (year, month, day, hours, minutes, seconds, us))
1504
+ .map_err(|e| ParquetError::Conversion(e.to_string()))
1505
+ }
1506
+ ParquetValue::TimestampSecond(secs, tz) => {
1507
+ let time_class = ruby.class_time();
1508
+ let time = time_class
1509
+ .funcall::<_, _, Value>("at", (secs,))
1510
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1511
+ apply_timezone(time, &tz)
1512
+ }
1513
+ ParquetValue::TimestampMillis(millis, tz) => {
1514
+ let time_class = ruby.class_time();
1515
+ let secs = millis / 1000;
1516
+ let usec = (millis % 1000) * 1000; // Convert millisecond remainder to microseconds
1517
+ let time = time_class
1518
+ .funcall::<_, _, Value>("at", (secs, usec))
1519
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1520
+ apply_timezone(time, &tz)
1521
+ }
1522
+ ParquetValue::TimestampMicros(micros, tz) => {
1523
+ let time_class = ruby.class_time();
1524
+ let secs = micros / 1_000_000;
1525
+ let usec = micros % 1_000_000; // Already in microseconds
1526
+ let time = time_class
1527
+ .funcall::<_, _, Value>("at", (secs, usec))
1528
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1529
+ apply_timezone(time, &tz)
1530
+ }
1531
+ ParquetValue::TimestampNanos(nanos, tz) => {
1532
+ let time_class = ruby.class_time();
1533
+ let secs = nanos / 1_000_000_000;
1534
+ let nsec = nanos % 1_000_000_000;
1535
+ // Use the nanosecond form of Time.at
1536
+ let time = time_class
1537
+ .funcall::<_, _, Value>("at", (secs, nsec, Symbol::new("nanosecond")))
1538
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1539
+ apply_timezone(time, &tz)
1540
+ }
1541
+ ParquetValue::Decimal128(val, scale) => {
1542
+ // Load BigDecimal if needed
1543
+ let _ = ruby.require("bigdecimal");
1544
+
1545
+ // Format decimal with scale
1546
+ let str_val = format_decimal128(val, scale);
1547
+ let kernel = ruby.module_kernel();
1548
+ kernel
1549
+ .funcall("BigDecimal", (str_val,))
1550
+ .map_err(|e| ParquetError::Conversion(e.to_string()))
1551
+ }
1552
+ ParquetValue::Decimal256(val, scale) => {
1553
+ // Load BigDecimal if needed
1554
+ let _ = ruby.require("bigdecimal");
1555
+
1556
+ // Format decimal with scale
1557
+ let str_val = format_decimal256(&val, scale);
1558
+ let kernel = ruby.module_kernel();
1559
+ kernel
1560
+ .funcall("BigDecimal", (str_val,))
1561
+ .map_err(|e| ParquetError::Conversion(e.to_string()))
1562
+ }
1563
+ ParquetValue::List(list) => {
1564
+ let array = ruby.ary_new_capa(list.len());
1565
+ for item in list {
1566
+ let ruby_val = parquet_to_ruby(item)?;
1567
+ array
1568
+ .push(ruby_val)
1569
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1570
+ }
1571
+ Ok(array.as_value())
1572
+ }
1573
+ ParquetValue::Map(map) => {
1574
+ let hash = ruby.hash_new();
1575
+ for (k, v) in map {
1576
+ let ruby_key = parquet_to_ruby(k)?;
1577
+ let ruby_val = parquet_to_ruby(v)?;
1578
+ hash.aset(ruby_key, ruby_val)
1579
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1580
+ }
1581
+ Ok(hash.as_value())
1582
+ }
1583
+ ParquetValue::Record(record) => {
1584
+ // Convert Record to Ruby Hash
1585
+ let hash = ruby.hash_new();
1586
+ for (field_name, field_value) in record {
1587
+ let ruby_key = ruby.str_new(&field_name);
1588
+ let ruby_val = parquet_to_ruby(field_value)?;
1589
+ hash.aset(ruby_key, ruby_val)
1590
+ .map_err(|e| ParquetError::Conversion(e.to_string()))?;
1591
+ }
1592
+ Ok(hash.as_value())
1593
+ }
1594
+ }
1595
+ }
1596
+
1597
+ // Helper functions for decimal formatting
1598
+
1599
+ fn format_decimal128(value: i128, scale: i8) -> String {
1600
+ if scale == 0 {
1601
+ return value.to_string();
1602
+ }
1603
+
1604
+ let abs_value = value.abs();
1605
+ let sign = if value < 0 { "-" } else { "" };
1606
+
1607
+ if scale > 0 {
1608
+ let divisor = 10_i128.pow(scale as u32);
1609
+ let integer_part = abs_value / divisor;
1610
+ let fractional_part = abs_value % divisor;
1611
+ format!(
1612
+ "{}{}.{:0>width$}",
1613
+ sign,
1614
+ integer_part,
1615
+ fractional_part,
1616
+ width = scale as usize
1617
+ )
1618
+ } else {
1619
+ // Negative scale means multiply by 10^(-scale)
1620
+ let multiplier = 10_i128.pow((-scale) as u32);
1621
+ format!("{}{}", sign, abs_value * multiplier)
1622
+ }
1623
+ }
1624
+
1625
+ fn format_decimal256(value: &num::BigInt, scale: i8) -> String {
1626
+ use num::{BigInt, Signed};
1627
+
1628
+ if scale == 0 {
1629
+ return value.to_string();
1630
+ }
1631
+
1632
+ let abs_value = value.abs();
1633
+ let sign = if value.is_negative() { "-" } else { "" };
1634
+
1635
+ if scale > 0 {
1636
+ let ten = BigInt::from(10);
1637
+ let divisor = ten.pow(scale as u32);
1638
+ let integer_part = &abs_value / &divisor;
1639
+ let fractional_part = &abs_value % &divisor;
1640
+
1641
+ // Format fractional part with leading zeros
1642
+ let frac_str = fractional_part.to_string();
1643
+ let padding = scale as usize - frac_str.len();
1644
+ let zeros = "0".repeat(padding);
1645
+
1646
+ format!("{}{}.{}{}", sign, integer_part, zeros, frac_str)
1647
+ } else {
1648
+ // Negative scale means multiply by 10^(-scale)
1649
+ let ten = BigInt::from(10);
1650
+ let multiplier = ten.pow((-scale) as u32);
1651
+ format!("{}{}", sign, abs_value * multiplier)
1652
+ }
1653
+ }
1654
+
1655
+ /// Apply timezone when reading timestamp from Parquet file
1656
+ ///
1657
+ /// PARQUET SPEC COMPLIANCE:
1658
+ /// - If schema has ANY timezone -> values are UTC (isAdjustedToUTC = true)
1659
+ /// - If schema has NO timezone -> values are local/unzoned (isAdjustedToUTC = false)
1660
+ ///
1661
+ /// NOTE: The actual timezone string in the schema is irrelevant for reading.
1662
+ /// Whether it's "UTC", "+09:00", or "America/New_York", the stored values
1663
+ /// are ALWAYS UTC-normalized. We return them as UTC Time objects.
1664
+ fn apply_timezone(time: Value, tz: &Option<std::sync::Arc<str>>) -> Result<Value> {
1665
+ let _ruby = Ruby::get()
1666
+ .map_err(|_| ParquetError::Conversion("Failed to get Ruby runtime".to_string()))?;
1667
+
1668
+ match tz {
1669
+ Some(_) => {
1670
+ // ANY timezone = UTC storage (Parquet spec requirement)
1671
+ // Original timezone like "+09:00" is NOT preserved
1672
+ time.funcall("utc", ())
1673
+ .map_err(|e| ParquetError::Conversion(e.to_string()))
1674
+ }
1675
+ None => {
1676
+ // No timezone = local/unzoned timestamp
1677
+ // This is a "wall clock" time without timezone context
1678
+ Ok(time)
1679
+ }
1680
+ }
1681
+ }
1682
+
1683
+ // Note: These wrapper functions are needed because ValueConverter is not thread-safe
1684
+ // due to Ruby's GIL requirements. They are called from Ruby FFI functions where we know
1685
+ // we're in the correct thread context.