parquet 0.5.13 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Cargo.lock +295 -98
  3. data/Cargo.toml +1 -1
  4. data/Gemfile +1 -0
  5. data/README.md +94 -3
  6. data/ext/parquet/Cargo.toml +3 -0
  7. data/ext/parquet/src/adapter_ffi.rs +156 -0
  8. data/ext/parquet/src/lib.rs +13 -21
  9. data/ext/parquet-core/Cargo.toml +23 -0
  10. data/ext/parquet-core/src/arrow_conversion.rs +1133 -0
  11. data/ext/parquet-core/src/error.rs +163 -0
  12. data/ext/parquet-core/src/lib.rs +60 -0
  13. data/ext/parquet-core/src/reader.rs +263 -0
  14. data/ext/parquet-core/src/schema.rs +283 -0
  15. data/ext/parquet-core/src/test_utils.rs +308 -0
  16. data/ext/parquet-core/src/traits/mod.rs +5 -0
  17. data/ext/parquet-core/src/traits/schema.rs +151 -0
  18. data/ext/parquet-core/src/value.rs +209 -0
  19. data/ext/parquet-core/src/writer.rs +839 -0
  20. data/ext/parquet-core/tests/arrow_conversion_tests.rs +423 -0
  21. data/ext/parquet-core/tests/binary_data.rs +437 -0
  22. data/ext/parquet-core/tests/column_projection.rs +557 -0
  23. data/ext/parquet-core/tests/complex_types.rs +821 -0
  24. data/ext/parquet-core/tests/compression_tests.rs +434 -0
  25. data/ext/parquet-core/tests/concurrent_access.rs +430 -0
  26. data/ext/parquet-core/tests/decimal_tests.rs +488 -0
  27. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
  28. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +547 -0
  29. data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
  30. data/ext/parquet-core/tests/performance_memory.rs +181 -0
  31. data/ext/parquet-core/tests/primitive_types.rs +547 -0
  32. data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
  33. data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
  34. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +534 -0
  35. data/ext/parquet-core/tests/temporal_tests.rs +518 -0
  36. data/ext/parquet-core/tests/test_helpers.rs +132 -0
  37. data/ext/parquet-core/tests/writer_tests.rs +545 -0
  38. data/ext/parquet-ruby-adapter/Cargo.toml +22 -0
  39. data/ext/parquet-ruby-adapter/build.rs +5 -0
  40. data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
  41. data/ext/parquet-ruby-adapter/src/batch_manager.rs +116 -0
  42. data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
  43. data/ext/parquet-ruby-adapter/src/converter.rs +1685 -0
  44. data/ext/parquet-ruby-adapter/src/error.rs +148 -0
  45. data/ext/{parquet/src/ruby_reader.rs → parquet-ruby-adapter/src/io.rs} +190 -56
  46. data/ext/parquet-ruby-adapter/src/lib.rs +90 -0
  47. data/ext/parquet-ruby-adapter/src/logger.rs +64 -0
  48. data/ext/parquet-ruby-adapter/src/metadata.rs +427 -0
  49. data/ext/parquet-ruby-adapter/src/reader.rs +317 -0
  50. data/ext/parquet-ruby-adapter/src/schema.rs +810 -0
  51. data/ext/parquet-ruby-adapter/src/string_cache.rs +106 -0
  52. data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
  53. data/ext/parquet-ruby-adapter/src/types.rs +94 -0
  54. data/ext/parquet-ruby-adapter/src/utils.rs +186 -0
  55. data/ext/parquet-ruby-adapter/src/writer.rs +435 -0
  56. data/lib/parquet/schema.rb +19 -0
  57. data/lib/parquet/version.rb +1 -1
  58. metadata +50 -24
  59. data/ext/parquet/src/enumerator.rs +0 -68
  60. data/ext/parquet/src/header_cache.rs +0 -99
  61. data/ext/parquet/src/logger.rs +0 -171
  62. data/ext/parquet/src/reader/common.rs +0 -111
  63. data/ext/parquet/src/reader/mod.rs +0 -211
  64. data/ext/parquet/src/reader/parquet_column_reader.rs +0 -44
  65. data/ext/parquet/src/reader/parquet_row_reader.rs +0 -43
  66. data/ext/parquet/src/reader/unified/mod.rs +0 -363
  67. data/ext/parquet/src/types/core_types.rs +0 -120
  68. data/ext/parquet/src/types/mod.rs +0 -100
  69. data/ext/parquet/src/types/parquet_value.rs +0 -1275
  70. data/ext/parquet/src/types/record_types.rs +0 -605
  71. data/ext/parquet/src/types/schema_converter.rs +0 -290
  72. data/ext/parquet/src/types/schema_node.rs +0 -424
  73. data/ext/parquet/src/types/timestamp.rs +0 -285
  74. data/ext/parquet/src/types/type_conversion.rs +0 -1949
  75. data/ext/parquet/src/types/writer_types.rs +0 -329
  76. data/ext/parquet/src/utils.rs +0 -184
  77. data/ext/parquet/src/writer/mod.rs +0 -505
  78. data/ext/parquet/src/writer/write_columns.rs +0 -238
  79. data/ext/parquet/src/writer/write_rows.rs +0 -488
@@ -1,605 +0,0 @@
1
- use std::sync::OnceLock;
2
-
3
- use itertools::Itertools;
4
- use jiff::ToSpan;
5
- use parquet::{
6
- basic::{ConvertedType, LogicalType},
7
- data_type::AsBytes,
8
- };
9
-
10
- use super::*;
11
-
12
- pub static LOADED_BIGDECIMAL: OnceLock<bool> = OnceLock::new();
13
-
14
- /// Format decimal value with appropriate scale for BigDecimal conversion
15
- /// Handles positive and negative scales correctly for i8 scale
16
- pub fn format_decimal_with_i8_scale<T: std::fmt::Display>(value: T, scale: i8) -> String {
17
- if scale >= 0 {
18
- // Positive scale means divide (move decimal point left)
19
- format!("{}e-{}", value, scale)
20
- } else {
21
- // Negative scale means multiply (move decimal point right)
22
- format!("{}e{}", value, -scale)
23
- }
24
- }
25
-
26
- /// Format i256 decimal value with appropriate scale for BigDecimal conversion
27
- /// Uses bytes conversion to preserve full precision
28
- pub fn format_i256_decimal_with_scale(
29
- value: arrow_buffer::i256,
30
- scale: i8,
31
- ) -> Result<String, ParquetGemError> {
32
- // Convert i256 to big-endian bytes
33
- let bytes = value.to_be_bytes();
34
-
35
- // Use the existing bytes_to_decimal function which handles full precision
36
- bytes_to_decimal(&bytes, scale as i32)
37
- }
38
-
39
- /// Format decimal value with appropriate scale for BigDecimal conversion
40
- /// Handles positive and negative scales correctly for i32 scale
41
- pub fn format_decimal_with_i32_scale<T: std::fmt::Display>(value: T, scale: i32) -> String {
42
- if scale >= 0 {
43
- // Positive scale means divide (move decimal point left)
44
- format!("{}e-{}", value, scale)
45
- } else {
46
- // Negative scale means multiply (move decimal point right)
47
- format!("{}e{}", value, -scale)
48
- }
49
- }
50
-
51
- /// Convert arbitrary-length big-endian byte array to decimal string
52
- /// Supports byte arrays from 1 to 32 bytes in length
53
- fn bytes_to_decimal(bytes: &[u8], scale: i32) -> Result<String, ParquetGemError> {
54
- match bytes.len() {
55
- 0 => Err(ParquetGemError::InvalidDecimal(
56
- "Empty byte array for decimal".to_string(),
57
- )),
58
- 1 => {
59
- // For 1 byte, use i8
60
- let value = bytes[0] as i8;
61
- Ok(format_decimal_with_i32_scale(value, scale))
62
- }
63
- 2 => {
64
- // For 2 bytes, use i16
65
- let mut value: i16 = 0;
66
- let is_negative = bytes[0] & 0x80 != 0;
67
-
68
- for &byte in bytes {
69
- value = (value << 8) | (byte as i16);
70
- }
71
-
72
- // Sign extend if negative
73
- if is_negative {
74
- let shift = 16 - (bytes.len() * 8);
75
- value = (value << shift) >> shift;
76
- }
77
-
78
- Ok(format_decimal_with_i32_scale(value, scale))
79
- }
80
- 3..=4 => {
81
- // For 3-4 bytes, use i32
82
- let mut value: i32 = 0;
83
- let is_negative = bytes[0] & 0x80 != 0;
84
-
85
- for &byte in bytes {
86
- value = (value << 8) | (byte as i32);
87
- }
88
-
89
- // Sign extend if negative
90
- if is_negative {
91
- let shift = 32 - (bytes.len() * 8);
92
- value = (value << shift) >> shift;
93
- }
94
-
95
- Ok(format_decimal_with_i32_scale(value, scale))
96
- }
97
- 5..=8 => {
98
- // For 5-8 bytes, use i64
99
- let mut value: i64 = 0;
100
- let is_negative = bytes[0] & 0x80 != 0;
101
-
102
- for &byte in bytes {
103
- value = (value << 8) | (byte as i64);
104
- }
105
-
106
- // Sign extend if negative
107
- if is_negative {
108
- let shift = 64 - (bytes.len() * 8);
109
- value = (value << shift) >> shift;
110
- }
111
-
112
- Ok(format_decimal_with_i32_scale(value, scale))
113
- }
114
- 9..=16 => {
115
- // For 9-16 bytes, use i128
116
- let mut value: i128 = 0;
117
- let is_negative = bytes[0] & 0x80 != 0;
118
-
119
- for &byte in bytes {
120
- value = (value << 8) | (byte as i128);
121
- }
122
-
123
- // Sign extend if negative
124
- if is_negative {
125
- let shift = 128 - (bytes.len() * 8);
126
- value = (value << shift) >> shift;
127
- }
128
-
129
- Ok(format_decimal_with_i32_scale(value, scale))
130
- }
131
- 17..=32 => {
132
- // For 17-32 bytes, we need arbitrary precision handling
133
- // Check if the number is negative (MSB of first byte)
134
- let is_negative = bytes[0] & 0x80 != 0;
135
-
136
- if is_negative {
137
- // For negative numbers, we need to compute two's complement
138
- // First, invert all bits
139
- let mut inverted = Vec::with_capacity(bytes.len());
140
- for &byte in bytes {
141
- inverted.push(!byte);
142
- }
143
-
144
- // Then add 1
145
- let mut carry = 1u8;
146
- for i in (0..inverted.len()).rev() {
147
- let (sum, new_carry) = inverted[i].overflowing_add(carry);
148
- inverted[i] = sum;
149
- carry = if new_carry { 1 } else { 0 };
150
- }
151
-
152
- // Convert to decimal string
153
- let mut result = String::new();
154
- let mut remainder = inverted;
155
-
156
- // Repeatedly divide by 10 to get decimal digits
157
- while !remainder.iter().all(|&b| b == 0) {
158
- let mut carry = 0u16;
159
- for i in 0..remainder.len() {
160
- let temp = (carry << 8) | (remainder[i] as u16);
161
- remainder[i] = (temp / 10) as u8;
162
- carry = temp % 10;
163
- }
164
- result.push_str(&carry.to_string());
165
- }
166
-
167
- // The digits are in reverse order
168
- if result.is_empty() {
169
- result = "0".to_string();
170
- } else {
171
- result = result.chars().rev().collect();
172
- }
173
-
174
- // Add negative sign and format with scale
175
- Ok(format_decimal_with_i32_scale(format!("-{}", result), scale))
176
- } else {
177
- // For positive numbers, direct conversion
178
- let mut result = String::new();
179
- let mut remainder = bytes.to_vec();
180
-
181
- // Repeatedly divide by 10 to get decimal digits
182
- while !remainder.iter().all(|&b| b == 0) {
183
- let mut carry = 0u16;
184
- for i in 0..remainder.len() {
185
- let temp = (carry << 8) | (remainder[i] as u16);
186
- remainder[i] = (temp / 10) as u8;
187
- carry = temp % 10;
188
- }
189
- result.push_str(&carry.to_string());
190
- }
191
-
192
- // The digits are in reverse order
193
- if result.is_empty() {
194
- result = "0".to_string();
195
- } else {
196
- result = result.chars().rev().collect();
197
- }
198
-
199
- Ok(format_decimal_with_i32_scale(result, scale))
200
- }
201
- }
202
- _ => Err(ParquetGemError::InvalidDecimal(format!(
203
- "Unsupported decimal byte array size: {} (maximum 32 bytes)",
204
- bytes.len()
205
- ))),
206
- }
207
- }
208
-
209
- #[derive(Debug)]
210
- pub enum RowRecord<S: BuildHasher + Default> {
211
- Vec(Vec<ParquetField>),
212
- Map(HashMap<StringCacheKey, ParquetField, S>),
213
- }
214
-
215
- #[derive(Debug)]
216
- pub enum ColumnRecord<S: BuildHasher + Default> {
217
- Vec(Vec<Vec<ParquetValue>>),
218
- Map(HashMap<StringCacheKey, Vec<ParquetValue>, S>),
219
- }
220
-
221
- #[derive(Debug)]
222
- pub struct ParquetField {
223
- pub field: Field,
224
- #[allow(dead_code)]
225
- pub converted_type: ConvertedType,
226
- pub logical_type: Option<LogicalType>,
227
- pub strict: bool,
228
- }
229
-
230
- impl<S: BuildHasher + Default> TryIntoValue for RowRecord<S> {
231
- fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
232
- match self {
233
- RowRecord::Vec(vec) => {
234
- let ary = handle.ary_new_capa(vec.len());
235
- vec.into_iter().try_for_each(|v| {
236
- ary.push(v.try_into_value_with(handle)?)?;
237
- Ok::<_, ParquetGemError>(())
238
- })?;
239
- Ok(handle.into_value(ary))
240
- }
241
- RowRecord::Map(map) => {
242
- #[cfg(ruby_lt_3_2)]
243
- let hash = handle.hash_new_capa(map.len());
244
-
245
- #[cfg(not(ruby_lt_3_2))]
246
- let hash = handle.hash_new();
247
-
248
- let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
249
- let mut i = 0;
250
-
251
- for chunk in &map.into_iter().chunks(64) {
252
- // Reduced to 64 to ensure space for pairs
253
- for (k, v) in chunk {
254
- if i + 1 >= values.len() {
255
- // Bulk insert current batch if array is full
256
- hash.bulk_insert(&values[..i])?;
257
- values[..i].fill(handle.qnil().as_value());
258
- i = 0;
259
- }
260
- values[i] = handle.into_value(k);
261
- values[i + 1] = v.try_into_value_with(handle)?;
262
- i += 2;
263
- }
264
- // Insert any remaining pairs
265
- if i > 0 {
266
- hash.bulk_insert(&values[..i])?;
267
- values[..i].fill(handle.qnil().as_value());
268
- i = 0;
269
- }
270
- }
271
-
272
- Ok(hash.into_value_with(handle))
273
- }
274
- }
275
- }
276
- }
277
-
278
- impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
279
- fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
280
- match self {
281
- ColumnRecord::Vec(vec) => {
282
- let ary = handle.ary_new_capa(vec.len());
283
- vec.into_iter().try_for_each(|v| {
284
- let nested_ary = handle.ary_new_capa(v.len());
285
- v.into_iter().try_for_each(|v| {
286
- nested_ary.push(v.try_into_value_with(handle)?)?;
287
- Ok::<_, ParquetGemError>(())
288
- })?;
289
- ary.push(nested_ary.into_value_with(handle))?;
290
- Ok::<_, ParquetGemError>(())
291
- })?;
292
- Ok(ary.into_value_with(handle))
293
- }
294
- ColumnRecord::Map(map) => {
295
- #[cfg(ruby_lt_3_2)]
296
- let hash = handle.hash_new_capa(map.len());
297
-
298
- #[cfg(not(ruby_lt_3_2))]
299
- let hash = handle.hash_new();
300
-
301
- let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
302
- let mut i = 0;
303
-
304
- for chunk in &map.into_iter().chunks(64) {
305
- // Reduced to 64 to ensure space for pairs
306
- for (k, v) in chunk {
307
- if i + 1 >= values.len() {
308
- // Bulk insert current batch if array is full
309
- hash.bulk_insert(&values[..i])?;
310
- values[..i].fill(handle.qnil().as_value());
311
- i = 0;
312
- }
313
- values[i] = handle.into_value(k);
314
- let ary = handle.ary_new_capa(v.len());
315
- v.into_iter().try_for_each(|v| {
316
- ary.push(v.try_into_value_with(handle)?)?;
317
- Ok::<_, ParquetGemError>(())
318
- })?;
319
- values[i + 1] = handle.into_value(ary);
320
- i += 2;
321
- }
322
- // Insert any remaining pairs
323
- if i > 0 {
324
- hash.bulk_insert(&values[..i])?;
325
- values[..i].fill(handle.qnil().as_value());
326
- i = 0;
327
- }
328
- }
329
-
330
- Ok(hash.into_value_with(handle))
331
- }
332
- }
333
- }
334
- }
335
-
336
- pub trait TryIntoValue {
337
- fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError>;
338
- }
339
-
340
- impl TryIntoValue for ParquetField {
341
- fn try_into_value_with(self, handle: &Ruby) -> Result<Value, ParquetGemError> {
342
- match self.field {
343
- Field::Null => Ok(handle.qnil().as_value()),
344
- Field::Bool(b) => Ok(b.into_value_with(handle)),
345
- Field::Short(s) => Ok(s.into_value_with(handle)),
346
- Field::Int(i) => Ok(i.into_value_with(handle)),
347
- Field::Long(l) => Ok(l.into_value_with(handle)),
348
- Field::UByte(ub) => Ok(ub.into_value_with(handle)),
349
- Field::UShort(us) => Ok(us.into_value_with(handle)),
350
- Field::UInt(ui) => Ok(ui.into_value_with(handle)),
351
- Field::ULong(ul) => Ok(ul.into_value_with(handle)),
352
- Field::Float16(f) => Ok(f32::from(f).into_value_with(handle)),
353
- Field::Float(f) => Ok(f.into_value_with(handle)),
354
- Field::Double(d) => Ok(d.into_value_with(handle)),
355
- Field::Str(s) => {
356
- if self.strict {
357
- Ok(simdutf8::basic::from_utf8(s.as_bytes())
358
- .map_err(ParquetGemError::Utf8Error)
359
- .map(|s| s.into_value_with(handle))?)
360
- } else {
361
- let s = String::from_utf8_lossy(s.as_bytes());
362
- Ok(s.into_value_with(handle))
363
- }
364
- }
365
- Field::Byte(b) => Ok(b.into_value_with(handle)),
366
- Field::Bytes(b) => {
367
- if matches!(self.logical_type, Some(parquet::basic::LogicalType::Uuid)) {
368
- let bytes = b.as_bytes();
369
- let uuid = uuid::Uuid::from_slice(bytes)?;
370
- Ok(uuid.to_string().into_value_with(handle))
371
- } else {
372
- Ok(handle.str_from_slice(b.data()).as_value())
373
- }
374
- }
375
- Field::Date(d) => {
376
- let epoch = jiff::civil::Date::new(1970, 1, 1)?;
377
- let date = epoch.checked_add(d.days()).map_err(ParquetGemError::Jiff)?;
378
- let formatted = date.to_string();
379
- Ok(formatted.into_value_with(handle))
380
- }
381
- Field::TimeMillis(ts) => {
382
- let ts = jiff::Timestamp::from_millisecond(ts as i64)?;
383
- let time_class = handle.class_time();
384
- Ok(time_class
385
- .funcall::<_, _, Value>("parse", (ts.to_string(),))?
386
- .into_value_with(handle))
387
- }
388
- Field::TimestampMillis(ts) => {
389
- let ts = jiff::Timestamp::from_millisecond(ts)?;
390
- let time_class = handle.class_time();
391
- Ok(time_class
392
- .funcall::<_, _, Value>("parse", (ts.to_string(),))?
393
- .into_value_with(handle))
394
- }
395
- Field::TimestampMicros(ts) | Field::TimeMicros(ts) => {
396
- let ts = jiff::Timestamp::from_microsecond(ts)?;
397
- let time_class = handle.class_time();
398
- Ok(time_class
399
- .funcall::<_, _, Value>("parse", (ts.to_string(),))?
400
- .into_value_with(handle))
401
- }
402
- Field::ListInternal(list) => {
403
- let elements = list.elements();
404
- let ary = handle.ary_new_capa(elements.len());
405
- elements.iter().try_for_each(|e| {
406
- ary.push(
407
- ParquetField {
408
- field: e.clone(),
409
- logical_type: e.to_logical_type(),
410
- converted_type: e.to_converted_type(),
411
- strict: self.strict,
412
- }
413
- .try_into_value_with(handle)?,
414
- )?;
415
- Ok::<_, ParquetGemError>(())
416
- })?;
417
- Ok(ary.into_value_with(handle))
418
- }
419
- Field::MapInternal(map) => {
420
- #[cfg(ruby_lt_3_2)]
421
- let hash = handle.hash_new_capa(map.len());
422
-
423
- #[cfg(not(ruby_lt_3_2))]
424
- let hash = handle.hash_new();
425
-
426
- map.entries().iter().try_for_each(|(k, v)| {
427
- hash.aset(
428
- ParquetField {
429
- field: k.clone(),
430
- converted_type: k.to_converted_type(),
431
- logical_type: k.to_logical_type(),
432
- strict: self.strict,
433
- }
434
- .try_into_value_with(handle)?,
435
- ParquetField {
436
- field: v.clone(),
437
- converted_type: v.to_converted_type(),
438
- logical_type: v.to_logical_type(),
439
- strict: self.strict,
440
- }
441
- .try_into_value_with(handle)?,
442
- )?;
443
- Ok::<_, ParquetGemError>(())
444
- })?;
445
- Ok(hash.into_value_with(handle))
446
- }
447
- Field::Decimal(d) => {
448
- let value = match d {
449
- Decimal::Int32 { value, scale, .. } => {
450
- let unscaled = i32::from_be_bytes(value);
451
- format_decimal_with_i32_scale(unscaled, scale)
452
- }
453
- Decimal::Int64 { value, scale, .. } => {
454
- let unscaled = i64::from_be_bytes(value);
455
- format_decimal_with_i32_scale(unscaled, scale)
456
- }
457
- Decimal::Bytes { value, scale, .. } => {
458
- bytes_to_decimal(value.as_bytes(), scale)?
459
- }
460
- };
461
-
462
- // Load the bigdecimal gem if it's not already loaded
463
- LOADED_BIGDECIMAL.get_or_init(|| handle.require("bigdecimal").unwrap_or_default());
464
-
465
- let kernel = handle.module_kernel();
466
- Ok(kernel.funcall::<_, _, Value>("BigDecimal", (value,))?)
467
- }
468
- Field::Group(row) => {
469
- let hash = handle.hash_new();
470
- row.get_column_iter().try_for_each(|(k, v)| {
471
- hash.aset(
472
- k.clone().into_value_with(handle),
473
- ParquetField {
474
- field: v.clone(),
475
- converted_type: v.to_converted_type(),
476
- logical_type: v.to_logical_type(),
477
- strict: self.strict,
478
- }
479
- .try_into_value_with(handle)?,
480
- )?;
481
- Ok::<_, ParquetGemError>(())
482
- })?;
483
- Ok(hash.into_value_with(handle))
484
- }
485
- }
486
- }
487
- }
488
-
489
- trait ToTypeInfo {
490
- fn to_converted_type(&self) -> ConvertedType;
491
- fn to_logical_type(&self) -> Option<LogicalType>;
492
- }
493
-
494
- impl ToTypeInfo for &parquet::record::Field {
495
- fn to_converted_type(&self) -> ConvertedType {
496
- match self {
497
- Field::Null => ConvertedType::NONE,
498
- Field::Bool(_) => ConvertedType::INT_8,
499
- Field::Byte(_) => ConvertedType::INT_8,
500
- Field::Short(_) => ConvertedType::INT_16,
501
- Field::Int(_) => ConvertedType::INT_32,
502
- Field::Long(_) => ConvertedType::INT_64,
503
- Field::UByte(_) => ConvertedType::UINT_8,
504
- Field::UShort(_) => ConvertedType::UINT_16,
505
- Field::UInt(_) => ConvertedType::UINT_32,
506
- Field::ULong(_) => ConvertedType::UINT_64,
507
- Field::Float16(_) => ConvertedType::NONE,
508
- Field::Float(_) => ConvertedType::NONE,
509
- Field::Double(_) => ConvertedType::NONE,
510
- Field::Decimal(_) => ConvertedType::DECIMAL,
511
- Field::Str(_) => ConvertedType::UTF8,
512
- Field::Bytes(_) => ConvertedType::LIST,
513
- Field::Date(_) => ConvertedType::DATE,
514
- Field::TimeMillis(_) => ConvertedType::TIME_MILLIS,
515
- Field::TimeMicros(_) => ConvertedType::TIMESTAMP_MICROS,
516
- Field::TimestampMillis(_) => ConvertedType::TIMESTAMP_MILLIS,
517
- Field::TimestampMicros(_) => ConvertedType::TIMESTAMP_MICROS,
518
- Field::Group(_) => ConvertedType::NONE,
519
- Field::ListInternal(_) => ConvertedType::LIST,
520
- Field::MapInternal(_) => ConvertedType::MAP,
521
- }
522
- }
523
- fn to_logical_type(&self) -> Option<LogicalType> {
524
- Some(match self {
525
- Field::Null => LogicalType::Unknown,
526
- Field::Bool(_) => LogicalType::Integer {
527
- bit_width: 1,
528
- is_signed: false,
529
- },
530
- Field::Byte(_) => LogicalType::Integer {
531
- bit_width: 8,
532
- is_signed: false,
533
- },
534
- Field::Short(_) => LogicalType::Integer {
535
- bit_width: 16,
536
- is_signed: true,
537
- },
538
- Field::Int(_) => LogicalType::Integer {
539
- bit_width: 32,
540
- is_signed: true,
541
- },
542
- Field::Long(_) => LogicalType::Integer {
543
- bit_width: 64,
544
- is_signed: true,
545
- },
546
- Field::UByte(_) => LogicalType::Integer {
547
- bit_width: 8,
548
- is_signed: false,
549
- },
550
- Field::UShort(_) => LogicalType::Integer {
551
- bit_width: 16,
552
- is_signed: false,
553
- },
554
- Field::UInt(_) => LogicalType::Integer {
555
- bit_width: 32,
556
- is_signed: false,
557
- },
558
- Field::ULong(_) => LogicalType::Integer {
559
- bit_width: 64,
560
- is_signed: false,
561
- },
562
- Field::Float16(_) => LogicalType::Float16,
563
- Field::Float(_) => LogicalType::Decimal {
564
- scale: 7,
565
- precision: 7,
566
- },
567
- Field::Double(_) => LogicalType::Decimal {
568
- scale: 15,
569
- precision: 15,
570
- },
571
- Field::Decimal(decimal) => LogicalType::Decimal {
572
- scale: decimal.scale(),
573
- precision: decimal.precision(),
574
- },
575
- Field::Str(_) => LogicalType::String,
576
- Field::Bytes(b) => {
577
- if b.data().len() == 16 && uuid::Uuid::from_slice(b.as_bytes()).is_ok() {
578
- LogicalType::Uuid
579
- } else {
580
- LogicalType::Unknown
581
- }
582
- }
583
- Field::Date(_) => LogicalType::Date,
584
- Field::TimeMillis(_) => LogicalType::Time {
585
- is_adjusted_to_u_t_c: true,
586
- unit: parquet::basic::TimeUnit::MILLIS(parquet::format::MilliSeconds {}),
587
- },
588
- Field::TimeMicros(_) => LogicalType::Time {
589
- is_adjusted_to_u_t_c: true,
590
- unit: parquet::basic::TimeUnit::MICROS(parquet::format::MicroSeconds {}),
591
- },
592
- Field::TimestampMillis(_) => LogicalType::Timestamp {
593
- is_adjusted_to_u_t_c: true,
594
- unit: parquet::basic::TimeUnit::MILLIS(parquet::format::MilliSeconds {}),
595
- },
596
- Field::TimestampMicros(_) => LogicalType::Timestamp {
597
- is_adjusted_to_u_t_c: true,
598
- unit: parquet::basic::TimeUnit::MICROS(parquet::format::MicroSeconds {}),
599
- },
600
- Field::Group(_) => LogicalType::Unknown,
601
- Field::ListInternal(_) => LogicalType::List,
602
- Field::MapInternal(_) => LogicalType::Map,
603
- })
604
- }
605
- }