parquet 0.5.12 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +295 -98
- data/Cargo.toml +1 -1
- data/Gemfile +1 -0
- data/README.md +94 -3
- data/ext/parquet/Cargo.toml +8 -5
- data/ext/parquet/src/adapter_ffi.rs +156 -0
- data/ext/parquet/src/lib.rs +13 -21
- data/ext/parquet-core/Cargo.toml +23 -0
- data/ext/parquet-core/src/arrow_conversion.rs +1133 -0
- data/ext/parquet-core/src/error.rs +163 -0
- data/ext/parquet-core/src/lib.rs +60 -0
- data/ext/parquet-core/src/reader.rs +263 -0
- data/ext/parquet-core/src/schema.rs +283 -0
- data/ext/parquet-core/src/test_utils.rs +308 -0
- data/ext/parquet-core/src/traits/mod.rs +5 -0
- data/ext/parquet-core/src/traits/schema.rs +151 -0
- data/ext/parquet-core/src/value.rs +209 -0
- data/ext/parquet-core/src/writer.rs +839 -0
- data/ext/parquet-core/tests/arrow_conversion_tests.rs +423 -0
- data/ext/parquet-core/tests/binary_data.rs +437 -0
- data/ext/parquet-core/tests/column_projection.rs +557 -0
- data/ext/parquet-core/tests/complex_types.rs +821 -0
- data/ext/parquet-core/tests/compression_tests.rs +434 -0
- data/ext/parquet-core/tests/concurrent_access.rs +430 -0
- data/ext/parquet-core/tests/decimal_tests.rs +488 -0
- data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
- data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +547 -0
- data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
- data/ext/parquet-core/tests/performance_memory.rs +181 -0
- data/ext/parquet-core/tests/primitive_types.rs +547 -0
- data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
- data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
- data/ext/parquet-core/tests/schema_comprehensive_tests.rs +534 -0
- data/ext/parquet-core/tests/temporal_tests.rs +518 -0
- data/ext/parquet-core/tests/test_helpers.rs +132 -0
- data/ext/parquet-core/tests/writer_tests.rs +545 -0
- data/ext/parquet-ruby-adapter/Cargo.toml +22 -0
- data/ext/parquet-ruby-adapter/build.rs +5 -0
- data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
- data/ext/parquet-ruby-adapter/src/batch_manager.rs +116 -0
- data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
- data/ext/parquet-ruby-adapter/src/converter.rs +1685 -0
- data/ext/parquet-ruby-adapter/src/error.rs +148 -0
- data/ext/{parquet/src/ruby_reader.rs → parquet-ruby-adapter/src/io.rs} +190 -56
- data/ext/parquet-ruby-adapter/src/lib.rs +90 -0
- data/ext/parquet-ruby-adapter/src/logger.rs +64 -0
- data/ext/parquet-ruby-adapter/src/metadata.rs +427 -0
- data/ext/parquet-ruby-adapter/src/reader.rs +317 -0
- data/ext/parquet-ruby-adapter/src/schema.rs +810 -0
- data/ext/parquet-ruby-adapter/src/string_cache.rs +106 -0
- data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
- data/ext/parquet-ruby-adapter/src/types.rs +94 -0
- data/ext/parquet-ruby-adapter/src/utils.rs +186 -0
- data/ext/parquet-ruby-adapter/src/writer.rs +435 -0
- data/lib/parquet/schema.rb +19 -0
- data/lib/parquet/version.rb +1 -1
- metadata +50 -24
- data/ext/parquet/src/enumerator.rs +0 -68
- data/ext/parquet/src/header_cache.rs +0 -99
- data/ext/parquet/src/logger.rs +0 -171
- data/ext/parquet/src/reader/common.rs +0 -111
- data/ext/parquet/src/reader/mod.rs +0 -211
- data/ext/parquet/src/reader/parquet_column_reader.rs +0 -44
- data/ext/parquet/src/reader/parquet_row_reader.rs +0 -43
- data/ext/parquet/src/reader/unified/mod.rs +0 -363
- data/ext/parquet/src/types/core_types.rs +0 -120
- data/ext/parquet/src/types/mod.rs +0 -100
- data/ext/parquet/src/types/parquet_value.rs +0 -1275
- data/ext/parquet/src/types/record_types.rs +0 -603
- data/ext/parquet/src/types/schema_converter.rs +0 -290
- data/ext/parquet/src/types/schema_node.rs +0 -424
- data/ext/parquet/src/types/timestamp.rs +0 -285
- data/ext/parquet/src/types/type_conversion.rs +0 -1949
- data/ext/parquet/src/types/writer_types.rs +0 -329
- data/ext/parquet/src/utils.rs +0 -184
- data/ext/parquet/src/writer/mod.rs +0 -505
- data/ext/parquet/src/writer/write_columns.rs +0 -238
- data/ext/parquet/src/writer/write_rows.rs +0 -488
@@ -0,0 +1,427 @@
|
|
1
|
+
use magnus::value::ReprValue;
|
2
|
+
use magnus::{Error as MagnusError, IntoValue, Ruby, Value};
|
3
|
+
use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
|
4
|
+
use std::fs::File;
|
5
|
+
|
6
|
+
use crate::error::{IntoMagnusError, Result, RubyAdapterError};
|
7
|
+
use crate::io::{RubyIOReader, ThreadSafeRubyIOReader};
|
8
|
+
use crate::TryIntoValue;
|
9
|
+
|
10
|
+
/// Wrapper for ParquetMetaData to implement IntoValue trait
|
11
|
+
pub struct RubyParquetMetaData(pub ParquetMetaData);
|
12
|
+
|
13
|
+
impl TryIntoValue for RubyParquetMetaData {
|
14
|
+
fn try_into_value(self, handle: &Ruby) -> Result<Value> {
|
15
|
+
let metadata = &self.0;
|
16
|
+
let file_metadata = metadata.file_metadata();
|
17
|
+
let row_groups = metadata.row_groups();
|
18
|
+
|
19
|
+
// Construct a hash with the metadata
|
20
|
+
let hash = handle.hash_new();
|
21
|
+
hash.aset("num_rows", file_metadata.num_rows())
|
22
|
+
.map_err(|e| RubyAdapterError::metadata(format!("Failed to set num_rows: {}", e)))?;
|
23
|
+
hash.aset("created_by", file_metadata.created_by())
|
24
|
+
.map_err(|e| RubyAdapterError::metadata(format!("Failed to set created_by: {}", e)))?;
|
25
|
+
|
26
|
+
// Convert key_value_metadata to a Ruby array if it exists
|
27
|
+
if let Some(key_value_metadata) = file_metadata.key_value_metadata() {
|
28
|
+
let kv_array = handle.ary_new();
|
29
|
+
for kv in key_value_metadata {
|
30
|
+
let kv_hash = handle.hash_new();
|
31
|
+
kv_hash
|
32
|
+
.aset("key", kv.key.clone())
|
33
|
+
.map_err(|e| RubyAdapterError::metadata(format!("Failed to set key: {}", e)))?;
|
34
|
+
kv_hash.aset("value", kv.value.clone()).map_err(|e| {
|
35
|
+
RubyAdapterError::metadata(format!("Failed to set value: {}", e))
|
36
|
+
})?;
|
37
|
+
kv_array.push(kv_hash).map_err(|e| {
|
38
|
+
RubyAdapterError::metadata(format!("Failed to push kv_hash: {}", e))
|
39
|
+
})?;
|
40
|
+
}
|
41
|
+
hash.aset("key_value_metadata", kv_array).map_err(|e| {
|
42
|
+
RubyAdapterError::metadata(format!("Failed to set key_value_metadata: {}", e))
|
43
|
+
})?;
|
44
|
+
} else {
|
45
|
+
hash.aset("key_value_metadata", None::<Value>)
|
46
|
+
.map_err(|e| {
|
47
|
+
RubyAdapterError::metadata(format!("Failed to set key_value_metadata: {}", e))
|
48
|
+
})?;
|
49
|
+
}
|
50
|
+
|
51
|
+
// Convert schema to a Ruby hash since &Type doesn't implement IntoValue
|
52
|
+
let schema_hash = handle.hash_new();
|
53
|
+
let schema = file_metadata.schema();
|
54
|
+
schema_hash
|
55
|
+
.aset("name", schema.name())
|
56
|
+
.map_err(|e| RubyAdapterError::metadata(format!("Failed to set schema name: {}", e)))?;
|
57
|
+
|
58
|
+
// Add schema fields information
|
59
|
+
let fields_array = handle.ary_new();
|
60
|
+
for field in schema.get_fields() {
|
61
|
+
let field_hash = handle.hash_new();
|
62
|
+
field_hash.aset("name", field.name()).map_err(|e| {
|
63
|
+
RubyAdapterError::metadata(format!("Failed to set field name: {}", e))
|
64
|
+
})?;
|
65
|
+
|
66
|
+
// Handle different field types
|
67
|
+
match field.as_ref() {
|
68
|
+
parquet::schema::types::Type::PrimitiveType {
|
69
|
+
physical_type,
|
70
|
+
type_length,
|
71
|
+
scale,
|
72
|
+
precision,
|
73
|
+
..
|
74
|
+
} => {
|
75
|
+
field_hash.aset("type", "primitive").map_err(|e| {
|
76
|
+
RubyAdapterError::metadata(format!("Failed to set type: {}", e))
|
77
|
+
})?;
|
78
|
+
field_hash
|
79
|
+
.aset("physical_type", format!("{:?}", physical_type))
|
80
|
+
.map_err(|e| {
|
81
|
+
RubyAdapterError::metadata(format!(
|
82
|
+
"Failed to set physical_type: {}",
|
83
|
+
e
|
84
|
+
))
|
85
|
+
})?;
|
86
|
+
field_hash.aset("type_length", *type_length).map_err(|e| {
|
87
|
+
RubyAdapterError::metadata(format!("Failed to set type_length: {}", e))
|
88
|
+
})?;
|
89
|
+
field_hash.aset("scale", *scale).map_err(|e| {
|
90
|
+
RubyAdapterError::metadata(format!("Failed to set scale: {}", e))
|
91
|
+
})?;
|
92
|
+
field_hash.aset("precision", *precision).map_err(|e| {
|
93
|
+
RubyAdapterError::metadata(format!("Failed to set precision: {}", e))
|
94
|
+
})?;
|
95
|
+
}
|
96
|
+
parquet::schema::types::Type::GroupType { .. } => {
|
97
|
+
field_hash.aset("type", "group").map_err(|e| {
|
98
|
+
RubyAdapterError::metadata(format!("Failed to set type: {}", e))
|
99
|
+
})?;
|
100
|
+
}
|
101
|
+
}
|
102
|
+
|
103
|
+
// Add basic info
|
104
|
+
let basic_info = field.get_basic_info();
|
105
|
+
field_hash
|
106
|
+
.aset("repetition", format!("{:?}", basic_info.repetition()))
|
107
|
+
.map_err(|e| {
|
108
|
+
RubyAdapterError::metadata(format!("Failed to set repetition: {}", e))
|
109
|
+
})?;
|
110
|
+
field_hash
|
111
|
+
.aset(
|
112
|
+
"converted_type",
|
113
|
+
format!("{:?}", basic_info.converted_type()),
|
114
|
+
)
|
115
|
+
.map_err(|e| {
|
116
|
+
RubyAdapterError::metadata(format!("Failed to set converted_type: {}", e))
|
117
|
+
})?;
|
118
|
+
if let Some(logical_type) = basic_info.logical_type() {
|
119
|
+
field_hash
|
120
|
+
.aset("logical_type", format!("{:?}", logical_type))
|
121
|
+
.map_err(|e| {
|
122
|
+
RubyAdapterError::metadata(format!("Failed to set logical_type: {}", e))
|
123
|
+
})?;
|
124
|
+
}
|
125
|
+
|
126
|
+
fields_array.push(field_hash).map_err(|e| {
|
127
|
+
RubyAdapterError::metadata(format!("Failed to push field_hash: {}", e))
|
128
|
+
})?;
|
129
|
+
}
|
130
|
+
schema_hash
|
131
|
+
.aset("fields", fields_array)
|
132
|
+
.map_err(|e| RubyAdapterError::metadata(format!("Failed to set fields: {}", e)))?;
|
133
|
+
|
134
|
+
hash.aset("schema", schema_hash)
|
135
|
+
.map_err(|e| RubyAdapterError::metadata(format!("Failed to set schema: {}", e)))?;
|
136
|
+
|
137
|
+
// Convert row_groups to a Ruby array since &[RowGroupMetaData] doesn't implement IntoValue
|
138
|
+
let row_groups_array = handle.ary_new();
|
139
|
+
for row_group in row_groups.iter() {
|
140
|
+
let rg_hash = handle.hash_new();
|
141
|
+
rg_hash
|
142
|
+
.aset("num_columns", row_group.num_columns())
|
143
|
+
.map_err(|e| {
|
144
|
+
RubyAdapterError::metadata(format!("Failed to set num_columns: {}", e))
|
145
|
+
})?;
|
146
|
+
rg_hash
|
147
|
+
.aset("num_rows", row_group.num_rows())
|
148
|
+
.map_err(|e| {
|
149
|
+
RubyAdapterError::metadata(format!("Failed to set num_rows: {}", e))
|
150
|
+
})?;
|
151
|
+
rg_hash
|
152
|
+
.aset("total_byte_size", row_group.total_byte_size())
|
153
|
+
.map_err(|e| {
|
154
|
+
RubyAdapterError::metadata(format!("Failed to set total_byte_size: {}", e))
|
155
|
+
})?;
|
156
|
+
rg_hash
|
157
|
+
.aset("file_offset", row_group.file_offset())
|
158
|
+
.map_err(|e| {
|
159
|
+
RubyAdapterError::metadata(format!("Failed to set file_offset: {}", e))
|
160
|
+
})?;
|
161
|
+
rg_hash
|
162
|
+
.aset("ordinal", row_group.ordinal())
|
163
|
+
.map_err(|e| RubyAdapterError::metadata(format!("Failed to set ordinal: {}", e)))?;
|
164
|
+
rg_hash
|
165
|
+
.aset("compressed_size", row_group.compressed_size())
|
166
|
+
.map_err(|e| {
|
167
|
+
RubyAdapterError::metadata(format!("Failed to set compressed_size: {}", e))
|
168
|
+
})?;
|
169
|
+
|
170
|
+
// Add column chunks metadata
|
171
|
+
let columns_array = handle.ary_new();
|
172
|
+
for col_idx in 0..row_group.num_columns() {
|
173
|
+
let column = row_group.column(col_idx);
|
174
|
+
let col_hash = handle.hash_new();
|
175
|
+
|
176
|
+
col_hash
|
177
|
+
.aset("column_path", column.column_path().string())
|
178
|
+
.map_err(|e| {
|
179
|
+
RubyAdapterError::metadata(format!("Failed to set column_path: {}", e))
|
180
|
+
})?;
|
181
|
+
col_hash
|
182
|
+
.aset("file_path", column.file_path())
|
183
|
+
.map_err(|e| {
|
184
|
+
RubyAdapterError::metadata(format!("Failed to set file_path: {}", e))
|
185
|
+
})?;
|
186
|
+
col_hash
|
187
|
+
.aset("file_offset", column.file_offset())
|
188
|
+
.map_err(|e| {
|
189
|
+
RubyAdapterError::metadata(format!("Failed to set file_offset: {}", e))
|
190
|
+
})?;
|
191
|
+
col_hash
|
192
|
+
.aset("num_values", column.num_values())
|
193
|
+
.map_err(|e| {
|
194
|
+
RubyAdapterError::metadata(format!("Failed to set num_values: {}", e))
|
195
|
+
})?;
|
196
|
+
col_hash
|
197
|
+
.aset("compression", format!("{:?}", column.compression()))
|
198
|
+
.map_err(|e| {
|
199
|
+
RubyAdapterError::metadata(format!("Failed to set compression: {}", e))
|
200
|
+
})?;
|
201
|
+
col_hash
|
202
|
+
.aset("total_compressed_size", column.compressed_size())
|
203
|
+
.map_err(|e| {
|
204
|
+
RubyAdapterError::metadata(format!(
|
205
|
+
"Failed to set total_compressed_size: {}",
|
206
|
+
e
|
207
|
+
))
|
208
|
+
})?;
|
209
|
+
col_hash
|
210
|
+
.aset("total_uncompressed_size", column.uncompressed_size())
|
211
|
+
.map_err(|e| {
|
212
|
+
RubyAdapterError::metadata(format!(
|
213
|
+
"Failed to set total_uncompressed_size: {}",
|
214
|
+
e
|
215
|
+
))
|
216
|
+
})?;
|
217
|
+
col_hash
|
218
|
+
.aset("data_page_offset", column.data_page_offset())
|
219
|
+
.map_err(|e| {
|
220
|
+
RubyAdapterError::metadata(format!("Failed to set data_page_offset: {}", e))
|
221
|
+
})?;
|
222
|
+
|
223
|
+
if let Some(offset) = column.dictionary_page_offset() {
|
224
|
+
col_hash
|
225
|
+
.aset("dictionary_page_offset", offset)
|
226
|
+
.map_err(|e| {
|
227
|
+
RubyAdapterError::metadata(format!(
|
228
|
+
"Failed to set dictionary_page_offset: {}",
|
229
|
+
e
|
230
|
+
))
|
231
|
+
})?;
|
232
|
+
}
|
233
|
+
|
234
|
+
if let Some(offset) = column.bloom_filter_offset() {
|
235
|
+
col_hash.aset("bloom_filter_offset", offset).map_err(|e| {
|
236
|
+
RubyAdapterError::metadata(format!(
|
237
|
+
"Failed to set bloom_filter_offset: {}",
|
238
|
+
e
|
239
|
+
))
|
240
|
+
})?;
|
241
|
+
}
|
242
|
+
|
243
|
+
if let Some(length) = column.bloom_filter_length() {
|
244
|
+
col_hash.aset("bloom_filter_length", length).map_err(|e| {
|
245
|
+
RubyAdapterError::metadata(format!(
|
246
|
+
"Failed to set bloom_filter_length: {}",
|
247
|
+
e
|
248
|
+
))
|
249
|
+
})?;
|
250
|
+
}
|
251
|
+
|
252
|
+
if let Some(offset) = column.offset_index_offset() {
|
253
|
+
col_hash.aset("offset_index_offset", offset).map_err(|e| {
|
254
|
+
RubyAdapterError::metadata(format!(
|
255
|
+
"Failed to set offset_index_offset: {}",
|
256
|
+
e
|
257
|
+
))
|
258
|
+
})?;
|
259
|
+
}
|
260
|
+
|
261
|
+
if let Some(length) = column.offset_index_length() {
|
262
|
+
col_hash.aset("offset_index_length", length).map_err(|e| {
|
263
|
+
RubyAdapterError::metadata(format!(
|
264
|
+
"Failed to set offset_index_length: {}",
|
265
|
+
e
|
266
|
+
))
|
267
|
+
})?;
|
268
|
+
}
|
269
|
+
|
270
|
+
if let Some(offset) = column.column_index_offset() {
|
271
|
+
col_hash.aset("column_index_offset", offset).map_err(|e| {
|
272
|
+
RubyAdapterError::metadata(format!(
|
273
|
+
"Failed to set column_index_offset: {}",
|
274
|
+
e
|
275
|
+
))
|
276
|
+
})?;
|
277
|
+
}
|
278
|
+
|
279
|
+
if let Some(length) = column.column_index_length() {
|
280
|
+
col_hash.aset("column_index_length", length).map_err(|e| {
|
281
|
+
RubyAdapterError::metadata(format!(
|
282
|
+
"Failed to set column_index_length: {}",
|
283
|
+
e
|
284
|
+
))
|
285
|
+
})?;
|
286
|
+
}
|
287
|
+
|
288
|
+
// Add encodings
|
289
|
+
let encodings_array = handle.ary_new();
|
290
|
+
for encoding in column.encodings() {
|
291
|
+
encodings_array
|
292
|
+
.push(format!("{:?}", encoding))
|
293
|
+
.map_err(|e| {
|
294
|
+
RubyAdapterError::metadata(format!("Failed to push encoding: {}", e))
|
295
|
+
})?;
|
296
|
+
}
|
297
|
+
col_hash.aset("encodings", encodings_array).map_err(|e| {
|
298
|
+
RubyAdapterError::metadata(format!("Failed to set encodings: {}", e))
|
299
|
+
})?;
|
300
|
+
|
301
|
+
// Add statistics if available
|
302
|
+
if let Some(stats) = column.statistics() {
|
303
|
+
let stats_hash = handle.hash_new();
|
304
|
+
stats_hash
|
305
|
+
.aset("min_is_exact", stats.min_is_exact())
|
306
|
+
.map_err(|e| {
|
307
|
+
RubyAdapterError::metadata(format!("Failed to set min_is_exact: {}", e))
|
308
|
+
})?;
|
309
|
+
stats_hash
|
310
|
+
.aset("max_is_exact", stats.max_is_exact())
|
311
|
+
.map_err(|e| {
|
312
|
+
RubyAdapterError::metadata(format!("Failed to set max_is_exact: {}", e))
|
313
|
+
})?;
|
314
|
+
|
315
|
+
col_hash.aset("statistics", stats_hash).map_err(|e| {
|
316
|
+
RubyAdapterError::metadata(format!("Failed to set statistics: {}", e))
|
317
|
+
})?;
|
318
|
+
}
|
319
|
+
|
320
|
+
// Add page encoding stats if available
|
321
|
+
if let Some(page_encoding_stats) = column.page_encoding_stats() {
|
322
|
+
let page_stats_array = handle.ary_new();
|
323
|
+
for stat in page_encoding_stats {
|
324
|
+
let stat_hash = handle.hash_new();
|
325
|
+
stat_hash
|
326
|
+
.aset("page_type", format!("{:?}", stat.page_type))
|
327
|
+
.map_err(|e| {
|
328
|
+
RubyAdapterError::metadata(format!(
|
329
|
+
"Failed to set page_type: {}",
|
330
|
+
e
|
331
|
+
))
|
332
|
+
})?;
|
333
|
+
stat_hash
|
334
|
+
.aset("encoding", format!("{:?}", stat.encoding))
|
335
|
+
.map_err(|e| {
|
336
|
+
RubyAdapterError::metadata(format!("Failed to set encoding: {}", e))
|
337
|
+
})?;
|
338
|
+
stat_hash.aset("count", stat.count).map_err(|e| {
|
339
|
+
RubyAdapterError::metadata(format!("Failed to set count: {}", e))
|
340
|
+
})?;
|
341
|
+
page_stats_array.push(stat_hash).map_err(|e| {
|
342
|
+
RubyAdapterError::metadata(format!("Failed to push stat_hash: {}", e))
|
343
|
+
})?;
|
344
|
+
}
|
345
|
+
col_hash
|
346
|
+
.aset("page_encoding_stats", page_stats_array)
|
347
|
+
.map_err(|e| {
|
348
|
+
RubyAdapterError::metadata(format!(
|
349
|
+
"Failed to set page_encoding_stats: {}",
|
350
|
+
e
|
351
|
+
))
|
352
|
+
})?;
|
353
|
+
}
|
354
|
+
|
355
|
+
columns_array.push(col_hash).map_err(|e| {
|
356
|
+
RubyAdapterError::metadata(format!("Failed to push col_hash: {}", e))
|
357
|
+
})?;
|
358
|
+
}
|
359
|
+
rg_hash
|
360
|
+
.aset("columns", columns_array)
|
361
|
+
.map_err(|e| RubyAdapterError::metadata(format!("Failed to set columns: {}", e)))?;
|
362
|
+
|
363
|
+
row_groups_array.push(rg_hash).map_err(|e| {
|
364
|
+
RubyAdapterError::metadata(format!("Failed to push rg_hash: {}", e))
|
365
|
+
})?;
|
366
|
+
}
|
367
|
+
hash.aset("row_groups", row_groups_array)
|
368
|
+
.map_err(|e| RubyAdapterError::metadata(format!("Failed to set row_groups: {}", e)))?;
|
369
|
+
|
370
|
+
Ok(handle.into_value(hash))
|
371
|
+
}
|
372
|
+
}
|
373
|
+
|
374
|
+
// Also implement IntoValue for backwards compatibility
|
375
|
+
impl IntoValue for RubyParquetMetaData {
|
376
|
+
fn into_value_with(self, handle: &Ruby) -> Value {
|
377
|
+
// Use TryIntoValue and handle errors by returning an error hash
|
378
|
+
match self.try_into_value(handle) {
|
379
|
+
Ok(value) => value,
|
380
|
+
Err(e) => {
|
381
|
+
// Create an error hash instead of panicking
|
382
|
+
let error_hash = handle.hash_new();
|
383
|
+
let _ = error_hash.aset("error", true);
|
384
|
+
let _ = error_hash.aset("message", e.to_string());
|
385
|
+
handle.into_value(error_hash)
|
386
|
+
}
|
387
|
+
}
|
388
|
+
}
|
389
|
+
}
|
390
|
+
|
391
|
+
/// Parse metadata from a file path or Ruby IO object
|
392
|
+
pub fn parse_metadata(arg: Value) -> std::result::Result<Value, MagnusError> {
|
393
|
+
parse_metadata_impl(arg).into_magnus_error()
|
394
|
+
}
|
395
|
+
|
396
|
+
fn parse_metadata_impl(arg: Value) -> Result<Value> {
|
397
|
+
let ruby = Ruby::get().map_err(|_| RubyAdapterError::runtime("Failed to get Ruby runtime"))?;
|
398
|
+
|
399
|
+
let mut reader = ParquetMetaDataReader::new();
|
400
|
+
if arg.is_kind_of(ruby.class_string()) {
|
401
|
+
let path = arg
|
402
|
+
.to_r_string()
|
403
|
+
.map_err(|e| {
|
404
|
+
RubyAdapterError::invalid_input(format!("Failed to convert to string: {}", e))
|
405
|
+
})?
|
406
|
+
.to_string()
|
407
|
+
.map_err(|e| {
|
408
|
+
RubyAdapterError::invalid_input(format!("Failed to convert to Rust string: {}", e))
|
409
|
+
})?;
|
410
|
+
let file = File::open(path).map_err(RubyAdapterError::Io)?;
|
411
|
+
reader
|
412
|
+
.try_parse(&file)
|
413
|
+
.map_err(|e| RubyAdapterError::Parquet(parquet_core::ParquetError::Parquet(e)))?;
|
414
|
+
} else {
|
415
|
+
let file = RubyIOReader::new(arg).map_err(RubyAdapterError::Io)?;
|
416
|
+
reader
|
417
|
+
.try_parse(&ThreadSafeRubyIOReader::new(file))
|
418
|
+
.map_err(|e| RubyAdapterError::Parquet(parquet_core::ParquetError::Parquet(e)))?;
|
419
|
+
}
|
420
|
+
|
421
|
+
let metadata = reader
|
422
|
+
.finish()
|
423
|
+
.map_err(|e| RubyAdapterError::Parquet(parquet_core::ParquetError::Parquet(e)))?;
|
424
|
+
|
425
|
+
// Use TryIntoValue instead of IntoValue
|
426
|
+
RubyParquetMetaData(metadata).try_into_value(&ruby)
|
427
|
+
}
|