parquet 0.5.10 → 0.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,107 @@
1
1
  use super::*;
2
+ use magnus::{TryConvert, Value};
3
+
4
+ /// Parses a fixed offset timezone string (e.g., "+09:00", "-05:30", "+0800")
5
+ /// Returns the offset in minutes from UTC
6
+ fn parse_fixed_offset(tz: &str) -> Result<i32, ParquetGemError> {
7
+ // Remove any whitespace
8
+ let tz = tz.trim();
9
+
10
+ // Check if it starts with + or -
11
+ if !tz.starts_with('+') && !tz.starts_with('-') {
12
+ return Err(MagnusError::new(
13
+ magnus::exception::arg_error(),
14
+ format!(
15
+ "Invalid timezone offset format: '{}'. Expected format like '+09:00' or '-0530'",
16
+ tz
17
+ ),
18
+ ))?;
19
+ }
20
+
21
+ let sign = if tz.starts_with('-') { -1 } else { 1 };
22
+ let offset_str = &tz[1..]; // Remove the sign
23
+
24
+ // Parse different formats: "+09:00", "+0900", "+09"
25
+ let (hours, minutes) = if offset_str.contains(':') {
26
+ // Format: "+09:00" or "+9:30"
27
+ let parts: Vec<&str> = offset_str.split(':').collect();
28
+ if parts.len() != 2 {
29
+ return Err(MagnusError::new(
30
+ magnus::exception::arg_error(),
31
+ format!("Invalid timezone offset format: '{}'. Expected HH:MM", tz),
32
+ ))?;
33
+ }
34
+
35
+ let h = parts[0].parse::<i32>().map_err(|e| {
36
+ MagnusError::new(
37
+ magnus::exception::arg_error(),
38
+ format!("Invalid hour in timezone offset '{}': {}", tz, e),
39
+ )
40
+ })?;
41
+
42
+ let m = parts[1].parse::<i32>().map_err(|e| {
43
+ MagnusError::new(
44
+ magnus::exception::arg_error(),
45
+ format!("Invalid minute in timezone offset '{}': {}", tz, e),
46
+ )
47
+ })?;
48
+
49
+ (h, m)
50
+ } else if offset_str.len() == 4 {
51
+ // Format: "+0900"
52
+ let h = offset_str[0..2].parse::<i32>().map_err(|e| {
53
+ MagnusError::new(
54
+ magnus::exception::arg_error(),
55
+ format!("Invalid hour in timezone offset '{}': {}", tz, e),
56
+ )
57
+ })?;
58
+
59
+ let m = offset_str[2..4].parse::<i32>().map_err(|e| {
60
+ MagnusError::new(
61
+ magnus::exception::arg_error(),
62
+ format!("Invalid minute in timezone offset '{}': {}", tz, e),
63
+ )
64
+ })?;
65
+
66
+ (h, m)
67
+ } else if offset_str.len() == 2
68
+ || (offset_str.len() == 1 && offset_str.chars().all(|c| c.is_numeric()))
69
+ {
70
+ // Format: "+09" or "+9"
71
+ let h = offset_str.parse::<i32>().map_err(|e| {
72
+ MagnusError::new(
73
+ magnus::exception::arg_error(),
74
+ format!("Invalid hour in timezone offset '{}': {}", tz, e),
75
+ )
76
+ })?;
77
+ (h, 0)
78
+ } else {
79
+ return Err(MagnusError::new(
80
+ magnus::exception::arg_error(),
81
+ format!("Invalid timezone offset format: '{}'. Expected formats: '+HH:MM', '+HHMM', or '+HH'", tz),
82
+ ))?;
83
+ };
84
+
85
+ // Validate ranges
86
+ if hours < 0 || hours > 23 {
87
+ return Err(MagnusError::new(
88
+ magnus::exception::arg_error(),
89
+ format!("Invalid hour in timezone offset: {}. Must be 0-23", hours),
90
+ ))?;
91
+ }
92
+
93
+ if minutes < 0 || minutes > 59 {
94
+ return Err(MagnusError::new(
95
+ magnus::exception::arg_error(),
96
+ format!(
97
+ "Invalid minute in timezone offset: {}. Must be 0-59",
98
+ minutes
99
+ ),
100
+ ))?;
101
+ }
102
+
103
+ Ok(sign * (hours * 60 + minutes))
104
+ }
2
105
 
3
106
  pub fn parse_zoned_timestamp(value: &ParquetValue) -> Result<jiff::Timestamp, ParquetGemError> {
4
107
  let (ts, tz) = match value {
@@ -18,37 +121,40 @@ pub fn parse_zoned_timestamp(value: &ParquetValue) -> Result<jiff::Timestamp, Pa
18
121
 
19
122
  // If timezone is provided, convert to zoned timestamp
20
123
  if let Some(tz) = tz {
21
- // Handle fixed offset timezones like "+09:00" first
124
+ // Handle fixed offset timezones first
22
125
  if tz.starts_with('+') || tz.starts_with('-') {
23
- // Parse the offset string into hours and minutes
24
- let (hours, minutes) = if tz.len() >= 5 && tz.contains(':') {
25
- // Format: "+09:00" or "-09:00"
26
- let h = tz[1..3].parse::<i32>().unwrap_or(0);
27
- let m = tz[4..6].parse::<i32>().unwrap_or(0);
28
- (h, m)
29
- } else if tz.len() >= 3 {
30
- // Format: "+09" or "-09"
31
- let h = tz[1..3].parse::<i32>().unwrap_or(0);
32
- (h, 0)
33
- } else {
34
- (0, 0)
35
- };
126
+ let total_minutes = parse_fixed_offset(tz)?;
127
+
128
+ // Create fixed timezone using the parsed offset
129
+ let offset_hours = total_minutes / 60;
130
+ let offset_minutes = total_minutes % 60;
36
131
 
37
- // Apply sign
38
- let total_minutes = if tz.starts_with('-') {
39
- -(hours * 60 + minutes)
132
+ // jiff expects offset in hours, but we can be more precise
133
+ let tz = if offset_minutes == 0 {
134
+ jiff::tz::TimeZone::fixed(jiff::tz::offset(offset_hours as i8))
40
135
  } else {
41
- hours * 60 + minutes
136
+ // For non-zero minutes, we need to create a custom offset
137
+ // jiff doesn't directly support minute-precision offsets in the simple API,
138
+ // so we'll use the timestamp directly with the offset applied
139
+ return Ok(ts);
42
140
  };
43
141
 
44
- // Create fixed timezone
45
- let tz = jiff::tz::TimeZone::fixed(jiff::tz::offset((total_minutes / 60) as i8));
46
142
  Ok(ts.to_zoned(tz).timestamp())
143
+ } else if tz.eq_ignore_ascii_case("UTC") || tz.eq_ignore_ascii_case("GMT") {
144
+ // Common UTC aliases
145
+ Ok(ts)
47
146
  } else {
48
147
  // Try IANA timezone
49
148
  match ts.in_tz(tz) {
50
149
  Ok(zoned) => Ok(zoned.timestamp()),
51
- Err(_) => Ok(ts), // Fall back to UTC if timezone is invalid
150
+ Err(e) => {
151
+ // Log the error but don't fail - fall back to UTC
152
+ eprintln!(
153
+ "Warning: Failed to parse timezone '{}': {}. Using UTC.",
154
+ tz, e
155
+ );
156
+ Ok(ts)
157
+ }
52
158
  }
53
159
  }
54
160
  } else {
@@ -57,21 +163,112 @@ pub fn parse_zoned_timestamp(value: &ParquetValue) -> Result<jiff::Timestamp, Pa
57
163
  }
58
164
  }
59
165
 
166
+ /// Validates and normalizes a timezone string
167
+ /// Returns the normalized timezone string or None if invalid
168
+ pub fn validate_timezone(tz: &str) -> Option<String> {
169
+ let tz = tz.trim();
170
+
171
+ // Check for empty timezone
172
+ if tz.is_empty() {
173
+ return None;
174
+ }
175
+
176
+ // Fixed offset timezones
177
+ if tz.starts_with('+') || tz.starts_with('-') {
178
+ // Validate it can be parsed
179
+ if parse_fixed_offset(tz).is_ok() {
180
+ return Some(tz.to_string());
181
+ }
182
+ }
183
+
184
+ // Common UTC aliases
185
+ if tz.eq_ignore_ascii_case("UTC")
186
+ || tz.eq_ignore_ascii_case("GMT")
187
+ || tz.eq_ignore_ascii_case("Z")
188
+ {
189
+ return Some("UTC".to_string());
190
+ }
191
+
192
+ // Try to validate as IANA timezone by attempting to use it
193
+ // This is a bit expensive but ensures we only store valid timezones
194
+ if let Ok(tz_obj) = jiff::tz::TimeZone::get(tz) {
195
+ // Use the canonical name from jiff
196
+ return Some(
197
+ tz_obj
198
+ .iana_name()
199
+ .map(|s| s.to_string())
200
+ .unwrap_or_else(|| tz.to_string()),
201
+ );
202
+ }
203
+
204
+ None
205
+ }
206
+
207
+ /// Converts a Ruby Time object to a timestamp with timezone
208
+ pub fn ruby_time_to_timestamp_with_tz(
209
+ value: Value,
210
+ unit: &str,
211
+ ) -> Result<(i64, Option<Arc<str>>), MagnusError> {
212
+ // Get seconds and microseconds
213
+ let secs = i64::try_convert(value.funcall::<_, _, Value>("to_i", ())?)?;
214
+ let usecs = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
215
+
216
+ // Get timezone information from Ruby Time object
217
+ let tz_str = if let Ok(zone) = value.funcall::<_, _, Value>("zone", ()) {
218
+ if zone.is_nil() {
219
+ None
220
+ } else if let Ok(s) = String::try_convert(zone) {
221
+ validate_timezone(&s).map(|tz| Arc::from(tz.as_str()))
222
+ } else {
223
+ None
224
+ }
225
+ } else {
226
+ None
227
+ };
228
+
229
+ // Convert to appropriate unit
230
+ let timestamp = match unit {
231
+ "millis" => secs * 1000 + (usecs / 1000),
232
+ "micros" => secs * 1_000_000 + usecs,
233
+ "seconds" => secs,
234
+ "nanos" => secs * 1_000_000_000 + (usecs * 1000),
235
+ _ => {
236
+ return Err(MagnusError::new(
237
+ magnus::exception::arg_error(),
238
+ format!("Invalid timestamp unit: {}", unit),
239
+ ))
240
+ }
241
+ };
242
+
243
+ Ok((timestamp, tz_str))
244
+ }
245
+
60
246
  // Macro for handling timestamp conversions
61
247
  #[macro_export]
62
248
  macro_rules! impl_timestamp_conversion {
63
249
  ($value:expr, $unit:ident, $handle:expr) => {{
64
250
  match $value {
65
251
  ParquetValue::$unit(ts, tz) => {
66
- let ts = parse_zoned_timestamp(&ParquetValue::$unit(ts, tz))?;
252
+ let ts = parse_zoned_timestamp(&ParquetValue::$unit(ts, tz.clone()))?;
67
253
  let time_class = $handle.class_time();
68
- Ok(time_class
254
+
255
+ // Convert timestamp to Time object
256
+ let time_obj = time_class
69
257
  .funcall::<_, _, Value>("parse", (ts.to_string(),))?
70
- .into_value_with($handle))
258
+ .into_value_with($handle);
259
+
260
+ // If we have timezone info, we've already handled it in parse_zoned_timestamp
261
+ // The resulting Time object will be in the correct timezone
262
+
263
+ Ok(time_obj)
71
264
  }
72
265
  _ => Err(MagnusError::new(
73
266
  magnus::exception::type_error(),
74
- "Invalid timestamp type".to_string(),
267
+ format!(
268
+ "Invalid timestamp type. Expected {}, got {:?}",
269
+ stringify!($unit),
270
+ $value
271
+ ),
75
272
  ))?,
76
273
  }
77
274
  }};
@@ -250,6 +250,8 @@ pub fn parquet_schema_type_to_arrow_data_type(
250
250
  PrimitiveType::Date32 => DataType::Date32,
251
251
  PrimitiveType::TimestampMillis => DataType::Timestamp(TimeUnit::Millisecond, None),
252
252
  PrimitiveType::TimestampMicros => DataType::Timestamp(TimeUnit::Microsecond, None),
253
+ PrimitiveType::TimeMillis => DataType::Time32(TimeUnit::Millisecond),
254
+ PrimitiveType::TimeMicros => DataType::Time64(TimeUnit::Microsecond),
253
255
  },
254
256
  // For a List<T>, create a standard List in Arrow with nullable items
255
257
  ParquetSchemaType::List(list_field) => {
@@ -416,6 +418,12 @@ fn create_arrow_builder_for_type(
416
418
  ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros) => {
417
419
  Ok(Box::new(TimestampMicrosecondBuilder::with_capacity(cap)))
418
420
  }
421
+ ParquetSchemaType::Primitive(PrimitiveType::TimeMillis) => {
422
+ Ok(Box::new(Time32MillisecondBuilder::with_capacity(cap)))
423
+ }
424
+ ParquetSchemaType::Primitive(PrimitiveType::TimeMicros) => {
425
+ Ok(Box::new(Time64MicrosecondBuilder::with_capacity(cap)))
426
+ }
419
427
  ParquetSchemaType::List(list_field) => {
420
428
  // For a list, we create a ListBuilder whose child builder is determined by item_type.
421
429
  // Pass through capacity to ensure consistent sizing
@@ -1165,6 +1173,44 @@ fn fill_builder(
1165
1173
  }
1166
1174
  Ok(())
1167
1175
  }
1176
+ ParquetSchemaType::Primitive(PrimitiveType::TimeMillis) => {
1177
+ let typed_builder = builder
1178
+ .as_any_mut()
1179
+ .downcast_mut::<Time32MillisecondBuilder>()
1180
+ .expect("Builder mismatch: expected Time32MillisecondBuilder");
1181
+ for val in values {
1182
+ match val {
1183
+ ParquetValue::TimeMillis(t) => typed_builder.append_value(*t),
1184
+ ParquetValue::Null => typed_builder.append_null(),
1185
+ other => {
1186
+ return Err(MagnusError::new(
1187
+ magnus::exception::type_error(),
1188
+ format!("Expected TimeMillis, got {:?}", other),
1189
+ ))
1190
+ }
1191
+ }
1192
+ }
1193
+ Ok(())
1194
+ }
1195
+ ParquetSchemaType::Primitive(PrimitiveType::TimeMicros) => {
1196
+ let typed_builder = builder
1197
+ .as_any_mut()
1198
+ .downcast_mut::<Time64MicrosecondBuilder>()
1199
+ .expect("Builder mismatch: expected Time64MicrosecondBuilder");
1200
+ for val in values {
1201
+ match val {
1202
+ ParquetValue::TimeMicros(t) => typed_builder.append_value(*t),
1203
+ ParquetValue::Null => typed_builder.append_null(),
1204
+ other => {
1205
+ return Err(MagnusError::new(
1206
+ magnus::exception::type_error(),
1207
+ format!("Expected TimeMicros, got {:?}", other),
1208
+ ))
1209
+ }
1210
+ }
1211
+ }
1212
+ Ok(())
1213
+ }
1168
1214
 
1169
1215
  // ------------------
1170
1216
  // NESTED LIST - using helper function
@@ -1433,6 +1479,24 @@ fn fill_builder(
1433
1479
  )
1434
1480
  })?
1435
1481
  .append_value(*x),
1482
+ ParquetValue::TimeMillis(x) => typed_builder
1483
+ .field_builder::<Time32MillisecondBuilder>(i)
1484
+ .ok_or_else(|| {
1485
+ MagnusError::new(
1486
+ magnus::exception::type_error(),
1487
+ "Failed to coerce into Time32MillisecondBuilder",
1488
+ )
1489
+ })?
1490
+ .append_value(*x),
1491
+ ParquetValue::TimeMicros(x) => typed_builder
1492
+ .field_builder::<Time64MicrosecondBuilder>(i)
1493
+ .ok_or_else(|| {
1494
+ MagnusError::new(
1495
+ magnus::exception::type_error(),
1496
+ "Failed to coerce into Time64MicrosecondBuilder",
1497
+ )
1498
+ })?
1499
+ .append_value(*x),
1436
1500
  ParquetValue::List(items) => {
1437
1501
  let list_builder = typed_builder
1438
1502
  .field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(i)
@@ -1647,6 +1711,24 @@ fn fill_builder(
1647
1711
  )
1648
1712
  })?
1649
1713
  .append_null(),
1714
+ ParquetSchemaType::Primitive(PrimitiveType::TimeMillis) => typed_builder
1715
+ .field_builder::<Time32MillisecondBuilder>(i)
1716
+ .ok_or_else(|| {
1717
+ MagnusError::new(
1718
+ magnus::exception::type_error(),
1719
+ "Failed to coerce into Time32MillisecondBuilder",
1720
+ )
1721
+ })?
1722
+ .append_null(),
1723
+ ParquetSchemaType::Primitive(PrimitiveType::TimeMicros) => typed_builder
1724
+ .field_builder::<Time64MicrosecondBuilder>(i)
1725
+ .ok_or_else(|| {
1726
+ MagnusError::new(
1727
+ magnus::exception::type_error(),
1728
+ "Failed to coerce into Time64MicrosecondBuilder",
1729
+ )
1730
+ })?
1731
+ .append_null(),
1650
1732
  ParquetSchemaType::List(_) => typed_builder
1651
1733
  .field_builder::<ListBuilder<Box<dyn ArrayBuilder>>>(i)
1652
1734
  .ok_or_else(|| {
@@ -1743,3 +1825,125 @@ pub fn convert_ruby_array_to_arrow(
1743
1825
  }
1744
1826
  convert_parquet_values_to_arrow(parquet_values, type_)
1745
1827
  }
1828
+
1829
+ pub fn convert_to_time_millis(
1830
+ ruby: &Ruby,
1831
+ value: Value,
1832
+ format: Option<&str>,
1833
+ ) -> Result<i32, MagnusError> {
1834
+ if value.is_kind_of(ruby.class_time()) {
1835
+ // Extract time components
1836
+ let hour = i32::try_convert(value.funcall::<_, _, Value>("hour", ())?)?;
1837
+ let min = i32::try_convert(value.funcall::<_, _, Value>("min", ())?)?;
1838
+ let sec = i32::try_convert(value.funcall::<_, _, Value>("sec", ())?)?;
1839
+ let usec = i32::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
1840
+
1841
+ // Convert to milliseconds since midnight
1842
+ Ok(hour * 3600000 + min * 60000 + sec * 1000 + usec / 1000)
1843
+ } else if value.is_kind_of(ruby.class_string()) {
1844
+ let s = String::try_convert(value)?;
1845
+
1846
+ if let Some(fmt) = format {
1847
+ // Parse using the provided format
1848
+ match jiff::civil::Time::strptime(fmt, &s) {
1849
+ Ok(time) => {
1850
+ let millis = time.hour() as i32 * 3600000
1851
+ + time.minute() as i32 * 60000
1852
+ + time.second() as i32 * 1000
1853
+ + time.millisecond() as i32;
1854
+ Ok(millis)
1855
+ }
1856
+ Err(e) => Err(MagnusError::new(
1857
+ magnus::exception::type_error(),
1858
+ format!(
1859
+ "Failed to parse '{}' with format '{}' as time: {}",
1860
+ s, fmt, e
1861
+ ),
1862
+ )),
1863
+ }
1864
+ } else {
1865
+ // Try to parse as standard time format
1866
+ match s.parse::<jiff::civil::Time>() {
1867
+ Ok(time) => {
1868
+ let millis = time.hour() as i32 * 3600000
1869
+ + time.minute() as i32 * 60000
1870
+ + time.second() as i32 * 1000
1871
+ + time.millisecond() as i32;
1872
+ Ok(millis)
1873
+ }
1874
+ Err(e) => Err(MagnusError::new(
1875
+ magnus::exception::type_error(),
1876
+ format!("Failed to parse '{}' as time: {}", s, e),
1877
+ )),
1878
+ }
1879
+ }
1880
+ } else {
1881
+ Err(MagnusError::new(
1882
+ magnus::exception::type_error(),
1883
+ format!("Cannot convert {} to time_millis", unsafe {
1884
+ value.classname()
1885
+ }),
1886
+ ))
1887
+ }
1888
+ }
1889
+
1890
+ pub fn convert_to_time_micros(
1891
+ ruby: &Ruby,
1892
+ value: Value,
1893
+ format: Option<&str>,
1894
+ ) -> Result<i64, MagnusError> {
1895
+ if value.is_kind_of(ruby.class_time()) {
1896
+ // Extract time components
1897
+ let hour = i64::try_convert(value.funcall::<_, _, Value>("hour", ())?)?;
1898
+ let min = i64::try_convert(value.funcall::<_, _, Value>("min", ())?)?;
1899
+ let sec = i64::try_convert(value.funcall::<_, _, Value>("sec", ())?)?;
1900
+ let usec = i64::try_convert(value.funcall::<_, _, Value>("usec", ())?)?;
1901
+
1902
+ // Convert to microseconds since midnight
1903
+ Ok(hour * 3600000000 + min * 60000000 + sec * 1000000 + usec)
1904
+ } else if value.is_kind_of(ruby.class_string()) {
1905
+ let s = String::try_convert(value)?;
1906
+
1907
+ if let Some(fmt) = format {
1908
+ // Parse using the provided format
1909
+ match jiff::civil::Time::strptime(fmt, &s) {
1910
+ Ok(time) => {
1911
+ let micros = time.hour() as i64 * 3600000000
1912
+ + time.minute() as i64 * 60000000
1913
+ + time.second() as i64 * 1000000
1914
+ + time.microsecond() as i64;
1915
+ Ok(micros)
1916
+ }
1917
+ Err(e) => Err(MagnusError::new(
1918
+ magnus::exception::type_error(),
1919
+ format!(
1920
+ "Failed to parse '{}' with format '{}' as time: {}",
1921
+ s, fmt, e
1922
+ ),
1923
+ )),
1924
+ }
1925
+ } else {
1926
+ // Try to parse as standard time format
1927
+ match s.parse::<jiff::civil::Time>() {
1928
+ Ok(time) => {
1929
+ let micros = time.hour() as i64 * 3600000000
1930
+ + time.minute() as i64 * 60000000
1931
+ + time.second() as i64 * 1000000
1932
+ + time.microsecond() as i64;
1933
+ Ok(micros)
1934
+ }
1935
+ Err(e) => Err(MagnusError::new(
1936
+ magnus::exception::type_error(),
1937
+ format!("Failed to parse '{}' as time: {}", s, e),
1938
+ )),
1939
+ }
1940
+ }
1941
+ } else {
1942
+ Err(MagnusError::new(
1943
+ magnus::exception::type_error(),
1944
+ format!("Cannot convert {} to time_micros", unsafe {
1945
+ value.classname()
1946
+ }),
1947
+ ))
1948
+ }
1949
+ }
@@ -146,7 +146,10 @@ impl FromStr for ParquetSchemaType<'_> {
146
146
  }
147
147
 
148
148
  // Check if it's a decimal256 type with precision and scale
149
- if let Some(decimal_params) = s.strip_prefix("decimal256(").and_then(|s| s.strip_suffix(")")) {
149
+ if let Some(decimal_params) = s
150
+ .strip_prefix("decimal256(")
151
+ .and_then(|s| s.strip_suffix(")"))
152
+ {
150
153
  let parts: Vec<&str> = decimal_params.split(',').collect();
151
154
 
152
155
  // Handle both single parameter (precision only) and two parameters (precision and scale)
@@ -210,6 +213,8 @@ impl FromStr for ParquetSchemaType<'_> {
210
213
  "date32" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Date32)),
211
214
  "timestamp_millis" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimestampMillis)),
212
215
  "timestamp_micros" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimestampMicros)),
216
+ "time_millis" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimeMillis)),
217
+ "time_micros" => Ok(ParquetSchemaType::Primitive(PrimitiveType::TimeMicros)),
213
218
  "decimal" => Ok(ParquetSchemaType::Primitive(PrimitiveType::Decimal128(
214
219
  38, 0,
215
220
  ))),
@@ -230,6 +230,16 @@ fn arrow_data_type_to_parquet_schema_type(dt: &DataType) -> Result<ParquetSchema
230
230
  "TimestampNanos not supported, please adjust your schema or code.",
231
231
  ))
232
232
  }
233
+ DataType::Time32(TimeUnit::Millisecond) => Ok(PST::Primitive(PrimitiveType::TimeMillis)),
234
+ DataType::Time64(TimeUnit::Microsecond) => Ok(PST::Primitive(PrimitiveType::TimeMicros)),
235
+ DataType::Time32(_) => Err(MagnusError::new(
236
+ magnus::exception::runtime_error(),
237
+ "Time32 only supports millisecond unit",
238
+ )),
239
+ DataType::Time64(_) => Err(MagnusError::new(
240
+ magnus::exception::runtime_error(),
241
+ "Time64 only supports microsecond unit",
242
+ )),
233
243
  DataType::Utf8 => Ok(PST::Primitive(PrimitiveType::String)),
234
244
  DataType::Binary => Ok(PST::Primitive(PrimitiveType::Binary)),
235
245
  DataType::LargeUtf8 => {
@@ -170,6 +170,12 @@ fn write_columns_impl(ruby: Rc<Ruby>, args: &[Value]) -> Result<(), ParquetGemEr
170
170
  PrimitiveType::TimestampMicros => {
171
171
  PST::Primitive(PrimitiveType::TimestampMicros)
172
172
  }
173
+ PrimitiveType::TimeMillis => {
174
+ PST::Primitive(PrimitiveType::TimeMillis)
175
+ }
176
+ PrimitiveType::TimeMicros => {
177
+ PST::Primitive(PrimitiveType::TimeMicros)
178
+ }
173
179
  PrimitiveType::Decimal256(precision, scale) => {
174
180
  PST::Primitive(PrimitiveType::Decimal256(precision, scale))
175
181
  }
@@ -259,9 +259,11 @@ pub fn estimate_value_size(
259
259
  PST::Primitive(PrimitiveType::Boolean) => Ok(1),
260
260
  PST::Primitive(PrimitiveType::Decimal128(_, _)) => Ok(16),
261
261
  PST::Primitive(PrimitiveType::Decimal256(_, _)) => Ok(32),
262
- PST::Primitive(PrimitiveType::Date32)
263
- | PST::Primitive(PrimitiveType::TimestampMillis)
264
- | PST::Primitive(PrimitiveType::TimestampMicros) => Ok(8),
262
+ PST::Primitive(PrimitiveType::Date32) => Ok(4), // Date32 is 4 bytes
263
+ PST::Primitive(PrimitiveType::TimestampMillis)
264
+ | PST::Primitive(PrimitiveType::TimestampMicros) => Ok(8), // Timestamps are 8 bytes
265
+ PST::Primitive(PrimitiveType::TimeMillis) => Ok(4), // TimeMillis is 4 bytes
266
+ PST::Primitive(PrimitiveType::TimeMicros) => Ok(8), // TimeMicros is 8 bytes
265
267
  PST::Primitive(PrimitiveType::String) | PST::Primitive(PrimitiveType::Binary) => {
266
268
  if let Ok(s) = String::try_convert(value) {
267
269
  // Account for string length plus Rust String's capacity+pointer overhead
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.5.10"
2
+ VERSION = "0.5.12"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.10
4
+ version: 0.5.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-06-17 00:00:00.000000000 Z
11
+ date: 2025-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -62,9 +62,7 @@ files:
62
62
  - ext/parquet/src/header_cache.rs
63
63
  - ext/parquet/src/lib.rs
64
64
  - ext/parquet/src/logger.rs
65
- - ext/parquet/src/reader/arrow_reader.rs
66
65
  - ext/parquet/src/reader/common.rs
67
- - ext/parquet/src/reader/format_detector.rs
68
66
  - ext/parquet/src/reader/mod.rs
69
67
  - ext/parquet/src/reader/parquet_column_reader.rs
70
68
  - ext/parquet/src/reader/parquet_row_reader.rs