parquet 0.5.12 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +295 -98
  3. data/Cargo.toml +1 -1
  4. data/Gemfile +1 -0
  5. data/README.md +94 -3
  6. data/ext/parquet/Cargo.toml +8 -5
  7. data/ext/parquet/src/adapter_ffi.rs +156 -0
  8. data/ext/parquet/src/lib.rs +13 -21
  9. data/ext/parquet-core/Cargo.toml +23 -0
  10. data/ext/parquet-core/src/arrow_conversion.rs +1133 -0
  11. data/ext/parquet-core/src/error.rs +163 -0
  12. data/ext/parquet-core/src/lib.rs +60 -0
  13. data/ext/parquet-core/src/reader.rs +263 -0
  14. data/ext/parquet-core/src/schema.rs +283 -0
  15. data/ext/parquet-core/src/test_utils.rs +308 -0
  16. data/ext/parquet-core/src/traits/mod.rs +5 -0
  17. data/ext/parquet-core/src/traits/schema.rs +151 -0
  18. data/ext/parquet-core/src/value.rs +209 -0
  19. data/ext/parquet-core/src/writer.rs +839 -0
  20. data/ext/parquet-core/tests/arrow_conversion_tests.rs +423 -0
  21. data/ext/parquet-core/tests/binary_data.rs +437 -0
  22. data/ext/parquet-core/tests/column_projection.rs +557 -0
  23. data/ext/parquet-core/tests/complex_types.rs +821 -0
  24. data/ext/parquet-core/tests/compression_tests.rs +434 -0
  25. data/ext/parquet-core/tests/concurrent_access.rs +430 -0
  26. data/ext/parquet-core/tests/decimal_tests.rs +488 -0
  27. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
  28. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +547 -0
  29. data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
  30. data/ext/parquet-core/tests/performance_memory.rs +181 -0
  31. data/ext/parquet-core/tests/primitive_types.rs +547 -0
  32. data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
  33. data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
  34. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +534 -0
  35. data/ext/parquet-core/tests/temporal_tests.rs +518 -0
  36. data/ext/parquet-core/tests/test_helpers.rs +132 -0
  37. data/ext/parquet-core/tests/writer_tests.rs +545 -0
  38. data/ext/parquet-ruby-adapter/Cargo.toml +22 -0
  39. data/ext/parquet-ruby-adapter/build.rs +5 -0
  40. data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
  41. data/ext/parquet-ruby-adapter/src/batch_manager.rs +116 -0
  42. data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
  43. data/ext/parquet-ruby-adapter/src/converter.rs +1685 -0
  44. data/ext/parquet-ruby-adapter/src/error.rs +148 -0
  45. data/ext/{parquet/src/ruby_reader.rs → parquet-ruby-adapter/src/io.rs} +190 -56
  46. data/ext/parquet-ruby-adapter/src/lib.rs +90 -0
  47. data/ext/parquet-ruby-adapter/src/logger.rs +64 -0
  48. data/ext/parquet-ruby-adapter/src/metadata.rs +427 -0
  49. data/ext/parquet-ruby-adapter/src/reader.rs +317 -0
  50. data/ext/parquet-ruby-adapter/src/schema.rs +810 -0
  51. data/ext/parquet-ruby-adapter/src/string_cache.rs +106 -0
  52. data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
  53. data/ext/parquet-ruby-adapter/src/types.rs +94 -0
  54. data/ext/parquet-ruby-adapter/src/utils.rs +186 -0
  55. data/ext/parquet-ruby-adapter/src/writer.rs +435 -0
  56. data/lib/parquet/schema.rb +19 -0
  57. data/lib/parquet/version.rb +1 -1
  58. metadata +50 -24
  59. data/ext/parquet/src/enumerator.rs +0 -68
  60. data/ext/parquet/src/header_cache.rs +0 -99
  61. data/ext/parquet/src/logger.rs +0 -171
  62. data/ext/parquet/src/reader/common.rs +0 -111
  63. data/ext/parquet/src/reader/mod.rs +0 -211
  64. data/ext/parquet/src/reader/parquet_column_reader.rs +0 -44
  65. data/ext/parquet/src/reader/parquet_row_reader.rs +0 -43
  66. data/ext/parquet/src/reader/unified/mod.rs +0 -363
  67. data/ext/parquet/src/types/core_types.rs +0 -120
  68. data/ext/parquet/src/types/mod.rs +0 -100
  69. data/ext/parquet/src/types/parquet_value.rs +0 -1275
  70. data/ext/parquet/src/types/record_types.rs +0 -603
  71. data/ext/parquet/src/types/schema_converter.rs +0 -290
  72. data/ext/parquet/src/types/schema_node.rs +0 -424
  73. data/ext/parquet/src/types/timestamp.rs +0 -285
  74. data/ext/parquet/src/types/type_conversion.rs +0 -1949
  75. data/ext/parquet/src/types/writer_types.rs +0 -329
  76. data/ext/parquet/src/utils.rs +0 -184
  77. data/ext/parquet/src/writer/mod.rs +0 -505
  78. data/ext/parquet/src/writer/write_columns.rs +0 -238
  79. data/ext/parquet/src/writer/write_rows.rs +0 -488
@@ -1,603 +0,0 @@
1
- use std::sync::OnceLock;
2
-
3
- use itertools::Itertools;
4
- use parquet::{
5
- basic::{ConvertedType, LogicalType},
6
- data_type::AsBytes,
7
- };
8
-
9
- use super::*;
10
-
11
- pub static LOADED_BIGDECIMAL: OnceLock<bool> = OnceLock::new();
12
-
13
- /// Format decimal value with appropriate scale for BigDecimal conversion
14
- /// Handles positive and negative scales correctly for i8 scale
15
- pub fn format_decimal_with_i8_scale<T: std::fmt::Display>(value: T, scale: i8) -> String {
16
- if scale >= 0 {
17
- // Positive scale means divide (move decimal point left)
18
- format!("{}e-{}", value, scale)
19
- } else {
20
- // Negative scale means multiply (move decimal point right)
21
- format!("{}e{}", value, -scale)
22
- }
23
- }
24
-
25
- /// Format i256 decimal value with appropriate scale for BigDecimal conversion
26
- /// Uses bytes conversion to preserve full precision
27
- pub fn format_i256_decimal_with_scale(
28
- value: arrow_buffer::i256,
29
- scale: i8,
30
- ) -> Result<String, ParquetGemError> {
31
- // Convert i256 to big-endian bytes
32
- let bytes = value.to_be_bytes();
33
-
34
- // Use the existing bytes_to_decimal function which handles full precision
35
- bytes_to_decimal(&bytes, scale as i32)
36
- }
37
-
38
- /// Format decimal value with appropriate scale for BigDecimal conversion
39
- /// Handles positive and negative scales correctly for i32 scale
40
- pub fn format_decimal_with_i32_scale<T: std::fmt::Display>(value: T, scale: i32) -> String {
41
- if scale >= 0 {
42
- // Positive scale means divide (move decimal point left)
43
- format!("{}e-{}", value, scale)
44
- } else {
45
- // Negative scale means multiply (move decimal point right)
46
- format!("{}e{}", value, -scale)
47
- }
48
- }
49
-
50
- /// Convert arbitrary-length big-endian byte array to decimal string
51
- /// Supports byte arrays from 1 to 32 bytes in length
52
- fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError> {
53
- match bytes.len() {
54
- 0 => Err(ParquetGemError::InvalidDecimal(
55
- "Empty byte array for decimal".to_string(),
56
- )),
57
- 1 => {
58
- // For 1 byte, use i8
59
- let value = bytes[0] as i8;
60
- Ok(format_decimal_with_i32_scale(value, scale))
61
- }
62
- 2 => {
63
- // For 2 bytes, use i16
64
- let mut value: i16 = 0;
65
- let is_negative = bytes[0] & 0x80 != 0;
66
-
67
- for &byte in bytes {
68
- value = (value << 8) | (byte as i16);
69
- }
70
-
71
- // Sign extend if negative
72
- if is_negative {
73
- let shift = 16 - (bytes.len() * 8);
74
- value = (value << shift) >> shift;
75
- }
76
-
77
- Ok(format_decimal_with_i32_scale(value, scale))
78
- }
79
- 3..=4 => {
80
- // For 3-4 bytes, use i32
81
- let mut value: i32 = 0;
82
- let is_negative = bytes[0] & 0x80 != 0;
83
-
84
- for &byte in bytes {
85
- value = (value << 8) | (byte as i32);
86
- }
87
-
88
- // Sign extend if negative
89
- if is_negative {
90
- let shift = 32 - (bytes.len() * 8);
91
- value = (value << shift) >> shift;
92
- }
93
-
94
- Ok(format_decimal_with_i32_scale(value, scale))
95
- }
96
- 5..=8 => {
97
- // For 5-8 bytes, use i64
98
- let mut value: i64 = 0;
99
- let is_negative = bytes[0] & 0x80 != 0;
100
-
101
- for &byte in bytes {
102
- value = (value << 8) | (byte as i64);
103
- }
104
-
105
- // Sign extend if negative
106
- if is_negative {
107
- let shift = 64 - (bytes.len() * 8);
108
- value = (value << shift) >> shift;
109
- }
110
-
111
- Ok(format_decimal_with_i32_scale(value, scale))
112
- }
113
- 9..=16 => {
114
- // For 9-16 bytes, use i128
115
- let mut value: i128 = 0;
116
- let is_negative = bytes[0] & 0x80 != 0;
117
-
118
- for &byte in bytes {
119
- value = (value << 8) | (byte as i128);
120
- }
121
-
122
- // Sign extend if negative
123
- if is_negative {
124
- let shift = 128 - (bytes.len() * 8);
125
- value = (value << shift) >> shift;
126
- }
127
-
128
- Ok(format_decimal_with_i32_scale(value, scale))
129
- }
130
- 17..=32 => {
131
- // For 17-32 bytes, we need arbitrary precision handling
132
- // Check if the number is negative (MSB of first byte)
133
- let is_negative = bytes[0] & 0x80 != 0;
134
-
135
- if is_negative {
136
- // For negative numbers, we need to compute two's complement
137
- // First, invert all bits
138
- let mut inverted = Vec::with_capacity(bytes.len());
139
- for &byte in bytes {
140
- inverted.push(!byte);
141
- }
142
-
143
- // Then add 1
144
- let mut carry = 1u8;
145
- for i in (0..inverted.len()).rev() {
146
- let (sum, new_carry) = inverted[i].overflowing_add(carry);
147
- inverted[i] = sum;
148
- carry = if new_carry { 1 } else { 0 };
149
- }
150
-
151
- // Convert to decimal string
152
- let mut result = String::new();
153
- let mut remainder = inverted;
154
-
155
- // Repeatedly divide by 10 to get decimal digits
156
- while !remainder.iter().all(|&b| b == 0) {
157
- let mut carry = 0u16;
158
- for i in 0..remainder.len() {
159
- let temp = (carry << 8) | (remainder[i] as u16);
160
- remainder[i] = (temp / 10) as u8;
161
- carry = temp % 10;
162
- }
163
- result.push_str(&carry.to_string());
164
- }
165
-
166
- // The digits are in reverse order
167
- if result.is_empty() {
168
- result = "0".to_string();
169
- } else {
170
- result = result.chars().rev().collect();
171
- }
172
-
173
- // Add negative sign and format with scale
174
- Ok(format_decimal_with_i32_scale(format!("-{}", result), scale))
175
- } else {
176
- // For positive numbers, direct conversion
177
- let mut result = String::new();
178
- let mut remainder = bytes.to_vec();
179
-
180
- // Repeatedly divide by 10 to get decimal digits
181
- while !remainder.iter().all(|&b| b == 0) {
182
- let mut carry = 0u16;
183
- for i in 0..remainder.len() {
184
- let temp = (carry << 8) | (remainder[i] as u16);
185
- remainder[i] = (temp / 10) as u8;
186
- carry = temp % 10;
187
- }
188
- result.push_str(&carry.to_string());
189
- }
190
-
191
- // The digits are in reverse order
192
- if result.is_empty() {
193
- result = "0".to_string();
194
- } else {
195
- result = result.chars().rev().collect();
196
- }
197
-
198
- Ok(format_decimal_with_i32_scale(result, scale))
199
- }
200
- }
201
- _ => Err(ParquetGemError::InvalidDecimal(format!(
202
- "Unsupported decimal byte array size: {} (maximum 32 bytes)",
203
- bytes.len()
204
- ))),
205
- }
206
- }
207
-
208
- #[derive(Debug)]
209
- pub enum RowRecord<S: BuildHasher + Default> {
210
- Vec(Vec<ParquetField>),
211
- Map(HashMap<StringCacheKey, ParquetField, S>),
212
- }
213
-
214
- #[derive(Debug)]
215
- pub enum ColumnRecord<S: BuildHasher + Default> {
216
- Vec(Vec<Vec<ParquetValue>>),
217
- Map(HashMap<StringCacheKey, Vec<ParquetValue>, S>),
218
- }
219
-
220
- #[derive(Debug)]
221
- pub struct ParquetField {
222
- pub field: Field,
223
- #[allow(dead_code)]
224
- pub converted_type: ConvertedType,
225
- pub logical_type: Option<LogicalType>,
226
- pub strict: bool,
227
- }
228
-
229
- impl<S: BuildHasher + Default> TryIntoValue for RowRecord<S> {
230
- fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
231
- match self {
232
- RowRecord::Vec(vec) => {
233
- let ary = handle.ary_new_capa(vec.len());
234
- vec.into_iter().try_for_each(|v| {
235
- ary.push(v.try_into_value_with(handle)?)?;
236
- Ok::<_, ParquetGemError>(())
237
- })?;
238
- Ok(handle.into_value(ary))
239
- }
240
- RowRecord::Map(map) => {
241
- #[cfg(ruby_lt_3_2)]
242
- let hash = handle.hash_new_capa(map.len());
243
-
244
- #[cfg(not(ruby_lt_3_2))]
245
- let hash = handle.hash_new();
246
-
247
- let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
248
- let mut i = 0;
249
-
250
- for chunk in &map.into_iter().chunks(64) {
251
- // Reduced to 64 to ensure space for pairs
252
- for (k, v) in chunk {
253
- if i + 1 >= values.len() {
254
- // Bulk insert current batch if array is full
255
- hash.bulk_insert(&values[..i])?;
256
- values[..i].fill(handle.qnil().as_value());
257
- i = 0;
258
- }
259
- values[i] = handle.into_value(k);
260
- values[i + 1] = v.try_into_value_with(handle)?;
261
- i += 2;
262
- }
263
- // Insert any remaining pairs
264
- if i > 0 {
265
- hash.bulk_insert(&values[..i])?;
266
- values[..i].fill(handle.qnil().as_value());
267
- i = 0;
268
- }
269
- }
270
-
271
- Ok(hash.into_value_with(handle))
272
- }
273
- }
274
- }
275
- }
276
-
277
- impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
278
- fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
279
- match self {
280
- ColumnRecord::Vec(vec) => {
281
- let ary = handle.ary_new_capa(vec.len());
282
- vec.into_iter().try_for_each(|v| {
283
- let nested_ary = handle.ary_new_capa(v.len());
284
- v.into_iter().try_for_each(|v| {
285
- nested_ary.push(v.try_into_value_with(handle)?)?;
286
- Ok::<_, ParquetGemError>(())
287
- })?;
288
- ary.push(nested_ary.into_value_with(handle))?;
289
- Ok::<_, ParquetGemError>(())
290
- })?;
291
- Ok(ary.into_value_with(handle))
292
- }
293
- ColumnRecord::Map(map) => {
294
- #[cfg(ruby_lt_3_2)]
295
- let hash = handle.hash_new_capa(map.len());
296
-
297
- #[cfg(not(ruby_lt_3_2))]
298
- let hash = handle.hash_new();
299
-
300
- let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
301
- let mut i = 0;
302
-
303
- for chunk in &map.into_iter().chunks(64) {
304
- // Reduced to 64 to ensure space for pairs
305
- for (k, v) in chunk {
306
- if i + 1 >= values.len() {
307
- // Bulk insert current batch if array is full
308
- hash.bulk_insert(&values[..i])?;
309
- values[..i].fill(handle.qnil().as_value());
310
- i = 0;
311
- }
312
- values[i] = handle.into_value(k);
313
- let ary = handle.ary_new_capa(v.len());
314
- v.into_iter().try_for_each(|v| {
315
- ary.push(v.try_into_value_with(handle)?)?;
316
- Ok::<_, ParquetGemError>(())
317
- })?;
318
- values[i + 1] = handle.into_value(ary);
319
- i += 2;
320
- }
321
- // Insert any remaining pairs
322
- if i > 0 {
323
- hash.bulk_insert(&values[..i])?;
324
- values[..i].fill(handle.qnil().as_value());
325
- i = 0;
326
- }
327
- }
328
-
329
- Ok(hash.into_value_with(handle))
330
- }
331
- }
332
- }
333
- }
334
-
335
- pub trait TryIntoValue {
336
- fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError>;
337
- }
338
-
339
- impl TryIntoValue for ParquetField {
340
- fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
341
- match self.field {
342
- Field::Null => Ok(handle.qnil().as_value()),
343
- Field::Bool(b) => Ok(b.into_value_with(handle)),
344
- Field::Short(s) => Ok(s.into_value_with(handle)),
345
- Field::Int(i) => Ok(i.into_value_with(handle)),
346
- Field::Long(l) => Ok(l.into_value_with(handle)),
347
- Field::UByte(ub) => Ok(ub.into_value_with(handle)),
348
- Field::UShort(us) => Ok(us.into_value_with(handle)),
349
- Field::UInt(ui) => Ok(ui.into_value_with(handle)),
350
- Field::ULong(ul) => Ok(ul.into_value_with(handle)),
351
- Field::Float16(f) => Ok(f32::from(f).into_value_with(handle)),
352
- Field::Float(f) => Ok(f.into_value_with(handle)),
353
- Field::Double(d) => Ok(d.into_value_with(handle)),
354
- Field::Str(s) => {
355
- if self.strict {
356
- Ok(simdutf8::basic::from_utf8(s.as_bytes())
357
- .map_err(ParquetGemError::Utf8Error)
358
- .map(|s| s.into_value_with(handle))?)
359
- } else {
360
- let s = String::from_utf8_lossy(s.as_bytes());
361
- Ok(s.into_value_with(handle))
362
- }
363
- }
364
- Field::Byte(b) => Ok(b.into_value_with(handle)),
365
- Field::Bytes(b) => {
366
- if matches!(self.logical_type, Some(parquet::basic::LogicalType::Uuid)) {
367
- let bytes = b.as_bytes();
368
- let uuid = uuid::Uuid::from_slice(bytes)?;
369
- Ok(uuid.to_string().into_value_with(handle))
370
- } else {
371
- Ok(handle.str_from_slice(b.data()).as_value())
372
- }
373
- }
374
- Field::Date(d) => {
375
- let ts = jiff::Timestamp::from_second((d as i64) * 86400)?;
376
- let formatted = ts.strftime("%Y-%m-%d").to_string();
377
- Ok(formatted.into_value_with(handle))
378
- }
379
- Field::TimeMillis(ts) => {
380
- let ts = jiff::Timestamp::from_millisecond(ts as i64)?;
381
- let time_class = handle.class_time();
382
- Ok(time_class
383
- .funcall::<_, _, Value>("parse", (ts.to_string(),))?
384
- .into_value_with(handle))
385
- }
386
- Field::TimestampMillis(ts) => {
387
- let ts = jiff::Timestamp::from_millisecond(ts)?;
388
- let time_class = handle.class_time();
389
- Ok(time_class
390
- .funcall::<_, _, Value>("parse", (ts.to_string(),))?
391
- .into_value_with(handle))
392
- }
393
- Field::TimestampMicros(ts) | Field::TimeMicros(ts) => {
394
- let ts = jiff::Timestamp::from_microsecond(ts)?;
395
- let time_class = handle.class_time();
396
- Ok(time_class
397
- .funcall::<_, _, Value>("parse", (ts.to_string(),))?
398
- .into_value_with(handle))
399
- }
400
- Field::ListInternal(list) => {
401
- let elements = list.elements();
402
- let ary = handle.ary_new_capa(elements.len());
403
- elements.iter().try_for_each(|e| {
404
- ary.push(
405
- ParquetField {
406
- field: e.clone(),
407
- logical_type: e.to_logical_type(),
408
- converted_type: e.to_converted_type(),
409
- strict: self.strict,
410
- }
411
- .try_into_value_with(handle)?,
412
- )?;
413
- Ok::<_, ParquetGemError>(())
414
- })?;
415
- Ok(ary.into_value_with(handle))
416
- }
417
- Field::MapInternal(map) => {
418
- #[cfg(ruby_lt_3_2)]
419
- let hash = handle.hash_new_capa(map.len());
420
-
421
- #[cfg(not(ruby_lt_3_2))]
422
- let hash = handle.hash_new();
423
-
424
- map.entries().iter().try_for_each(|(k, v)| {
425
- hash.aset(
426
- ParquetField {
427
- field: k.clone(),
428
- converted_type: k.to_converted_type(),
429
- logical_type: k.to_logical_type(),
430
- strict: self.strict,
431
- }
432
- .try_into_value_with(handle)?,
433
- ParquetField {
434
- field: v.clone(),
435
- converted_type: v.to_converted_type(),
436
- logical_type: v.to_logical_type(),
437
- strict: self.strict,
438
- }
439
- .try_into_value_with(handle)?,
440
- )?;
441
- Ok::<_, ParquetGemError>(())
442
- })?;
443
- Ok(hash.into_value_with(handle))
444
- }
445
- Field::Decimal(d) => {
446
- let value = match d {
447
- Decimal::Int32 { value, scale, .. } => {
448
- let unscaled = i32::from_be_bytes(value);
449
- format_decimal_with_i32_scale(unscaled, scale)
450
- }
451
- Decimal::Int64 { value, scale, .. } => {
452
- let unscaled = i64::from_be_bytes(value);
453
- format_decimal_with_i32_scale(unscaled, scale)
454
- }
455
- Decimal::Bytes { value, scale, .. } => {
456
- bytes_to_decimal(value.as_bytes(), scale)?
457
- }
458
- };
459
-
460
- // Load the bigdecimal gem if it's not already loaded
461
- LOADED_BIGDECIMAL.get_or_init(|| handle.require("bigdecimal").unwrap_or_default());
462
-
463
- let kernel = handle.module_kernel();
464
- Ok(kernel.funcall::<_, _, Value>("BigDecimal", (value,))?)
465
- }
466
- Field::Group(row) => {
467
- let hash = handle.hash_new();
468
- row.get_column_iter().try_for_each(|(k, v)| {
469
- hash.aset(
470
- k.clone().into_value_with(handle),
471
- ParquetField {
472
- field: v.clone(),
473
- converted_type: v.to_converted_type(),
474
- logical_type: v.to_logical_type(),
475
- strict: self.strict,
476
- }
477
- .try_into_value_with(handle)?,
478
- )?;
479
- Ok::<_, ParquetGemError>(())
480
- })?;
481
- Ok(hash.into_value_with(handle))
482
- }
483
- }
484
- }
485
- }
486
-
487
- trait ToTypeInfo {
488
- fn to_converted_type(&self) -> ConvertedType;
489
- fn to_logical_type(&self) -> Option<LogicalType>;
490
- }
491
-
492
- impl ToTypeInfo for &parquet::record::Field {
493
- fn to_converted_type(&self) -> ConvertedType {
494
- match self {
495
- Field::Null => ConvertedType::NONE,
496
- Field::Bool(_) => ConvertedType::INT_8,
497
- Field::Byte(_) => ConvertedType::INT_8,
498
- Field::Short(_) => ConvertedType::INT_16,
499
- Field::Int(_) => ConvertedType::INT_32,
500
- Field::Long(_) => ConvertedType::INT_64,
501
- Field::UByte(_) => ConvertedType::UINT_8,
502
- Field::UShort(_) => ConvertedType::UINT_16,
503
- Field::UInt(_) => ConvertedType::UINT_32,
504
- Field::ULong(_) => ConvertedType::UINT_64,
505
- Field::Float16(_) => ConvertedType::NONE,
506
- Field::Float(_) => ConvertedType::NONE,
507
- Field::Double(_) => ConvertedType::NONE,
508
- Field::Decimal(_) => ConvertedType::DECIMAL,
509
- Field::Str(_) => ConvertedType::UTF8,
510
- Field::Bytes(_) => ConvertedType::LIST,
511
- Field::Date(_) => ConvertedType::DATE,
512
- Field::TimeMillis(_) => ConvertedType::TIME_MILLIS,
513
- Field::TimeMicros(_) => ConvertedType::TIMESTAMP_MICROS,
514
- Field::TimestampMillis(_) => ConvertedType::TIMESTAMP_MILLIS,
515
- Field::TimestampMicros(_) => ConvertedType::TIMESTAMP_MICROS,
516
- Field::Group(_) => ConvertedType::NONE,
517
- Field::ListInternal(_) => ConvertedType::LIST,
518
- Field::MapInternal(_) => ConvertedType::MAP,
519
- }
520
- }
521
- fn to_logical_type(&self) -> Option<LogicalType> {
522
- Some(match self {
523
- Field::Null => LogicalType::Unknown,
524
- Field::Bool(_) => LogicalType::Integer {
525
- bit_width: 1,
526
- is_signed: false,
527
- },
528
- Field::Byte(_) => LogicalType::Integer {
529
- bit_width: 8,
530
- is_signed: false,
531
- },
532
- Field::Short(_) => LogicalType::Integer {
533
- bit_width: 16,
534
- is_signed: true,
535
- },
536
- Field::Int(_) => LogicalType::Integer {
537
- bit_width: 32,
538
- is_signed: true,
539
- },
540
- Field::Long(_) => LogicalType::Integer {
541
- bit_width: 64,
542
- is_signed: true,
543
- },
544
- Field::UByte(_) => LogicalType::Integer {
545
- bit_width: 8,
546
- is_signed: false,
547
- },
548
- Field::UShort(_) => LogicalType::Integer {
549
- bit_width: 16,
550
- is_signed: false,
551
- },
552
- Field::UInt(_) => LogicalType::Integer {
553
- bit_width: 32,
554
- is_signed: false,
555
- },
556
- Field::ULong(_) => LogicalType::Integer {
557
- bit_width: 64,
558
- is_signed: false,
559
- },
560
- Field::Float16(_) => LogicalType::Float16,
561
- Field::Float(_) => LogicalType::Decimal {
562
- scale: 7,
563
- precision: 7,
564
- },
565
- Field::Double(_) => LogicalType::Decimal {
566
- scale: 15,
567
- precision: 15,
568
- },
569
- Field::Decimal(decimal) => LogicalType::Decimal {
570
- scale: decimal.scale(),
571
- precision: decimal.precision(),
572
- },
573
- Field::Str(_) => LogicalType::String,
574
- Field::Bytes(b) => {
575
- if b.data().len() == 16 && uuid::Uuid::from_slice(b.as_bytes()).is_ok() {
576
- LogicalType::Uuid
577
- } else {
578
- LogicalType::Unknown
579
- }
580
- }
581
- Field::Date(_) => LogicalType::Date,
582
- Field::TimeMillis(_) => LogicalType::Time {
583
- is_adjusted_to_u_t_c: true,
584
- unit: parquet::basic::TimeUnit::MILLIS(parquet::format::MilliSeconds {}),
585
- },
586
- Field::TimeMicros(_) => LogicalType::Time {
587
- is_adjusted_to_u_t_c: true,
588
- unit: parquet::basic::TimeUnit::MICROS(parquet::format::MicroSeconds {}),
589
- },
590
- Field::TimestampMillis(_) => LogicalType::Timestamp {
591
- is_adjusted_to_u_t_c: true,
592
- unit: parquet::basic::TimeUnit::MILLIS(parquet::format::MilliSeconds {}),
593
- },
594
- Field::TimestampMicros(_) => LogicalType::Timestamp {
595
- is_adjusted_to_u_t_c: true,
596
- unit: parquet::basic::TimeUnit::MICROS(parquet::format::MicroSeconds {}),
597
- },
598
- Field::Group(_) => LogicalType::Unknown,
599
- Field::ListInternal(_) => LogicalType::List,
600
- Field::MapInternal(_) => LogicalType::Map,
601
- })
602
- }
603
- }