parquet 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,122 +1,62 @@
1
- use crate::header_cache::{CacheError, StringCache};
2
- use crate::ruby_reader::{build_ruby_reader, SeekableRead};
3
- use crate::utils::*;
4
- use bytes::Bytes;
1
+ // =============================================================================
2
+ // Imports and Dependencies
3
+ // =============================================================================
4
+ use crate::header_cache::{CacheError, HeaderCacheCleanupIter, StringCache};
5
+ use crate::{
6
+ create_column_enumerator, create_row_enumerator, utils::*, ColumnEnumeratorArgs, ColumnRecord,
7
+ ForgottenFileHandle, ParquetField, ParquetValueVec, RowEnumeratorArgs, RowRecord,
8
+ SeekableRubyValue,
9
+ };
10
+ use ahash::RandomState;
5
11
  use magnus::rb_sys::AsRawValue;
6
12
  use magnus::value::{Opaque, ReprValue};
7
- use magnus::IntoValue;
8
- use magnus::{block::Yield, Error as MagnusError, KwArgs, RHash, Ruby, Symbol, Value};
13
+ use magnus::{block::Yield, Error as MagnusError, Ruby, Value};
14
+ use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
15
+ use parquet::arrow::ProjectionMask;
9
16
  use parquet::errors::ParquetError;
10
- use parquet::file::reader::{ChunkReader, Length, SerializedFileReader};
11
- use parquet::record::Field;
17
+ use parquet::file::reader::FileReader;
18
+ use parquet::file::reader::SerializedFileReader;
19
+ use parquet::record::reader::RowIter as ParquetRowIter;
20
+ use parquet::schema::types::{Type as SchemaType, TypePtr};
12
21
  use std::collections::HashMap;
13
22
  use std::fs::File;
14
- use std::io::{self, BufReader, Read, Seek, SeekFrom};
23
+ use std::io::{self};
15
24
  use std::mem::ManuallyDrop;
16
25
  use std::os::fd::FromRawFd;
17
26
  use std::sync::OnceLock;
18
- use std::{borrow::Cow, hash::BuildHasher};
19
27
  use thiserror::Error;
20
- use xxhash_rust::xxh3::Xxh3Builder;
21
-
22
- use parquet::record::reader::RowIter as ParquetRowIter;
23
-
24
- #[derive(Error, Debug)]
25
- pub enum ReaderError {
26
- #[error("Failed to get file descriptor: {0}")]
27
- FileDescriptor(String),
28
- #[error("Invalid file descriptor")]
29
- InvalidFileDescriptor,
30
- #[error("Failed to open file: {0}")]
31
- FileOpen(#[from] io::Error),
32
- #[error("Failed to intern headers: {0}")]
33
- HeaderIntern(#[from] CacheError),
34
- #[error("Ruby error: {0}")]
35
- Ruby(String),
36
- }
37
-
38
- impl From<MagnusError> for ReaderError {
39
- fn from(err: MagnusError) -> Self {
40
- Self::Ruby(err.to_string())
41
- }
42
- }
43
-
44
- impl From<ReaderError> for MagnusError {
45
- fn from(err: ReaderError) -> Self {
46
- MagnusError::new(
47
- Ruby::get().unwrap().exception_runtime_error(),
48
- err.to_string(),
49
- )
50
- }
51
- }
52
-
53
- struct ForgottenFileHandle(ManuallyDrop<File>);
54
-
55
- impl Length for ForgottenFileHandle {
56
- fn len(&self) -> u64 {
57
- self.0.len()
58
- }
59
- }
60
-
61
- impl ChunkReader for ForgottenFileHandle {
62
- type T = BufReader<File>;
63
-
64
- fn get_read(&self, start: u64) -> parquet::errors::Result<Self::T> {
65
- self.0.get_read(start)
66
- }
67
-
68
- fn get_bytes(&self, start: u64, length: usize) -> parquet::errors::Result<Bytes> {
69
- self.0.get_bytes(start, length)
70
- }
71
- }
72
-
73
- struct HeaderCacheCleanupIter<I> {
74
- inner: I,
75
- headers: OnceLock<Vec<&'static str>>,
76
- }
77
-
78
- impl<I: Iterator> Iterator for HeaderCacheCleanupIter<I> {
79
- type Item = I::Item;
80
28
 
81
- fn next(&mut self) -> Option<Self::Item> {
82
- self.inner.next()
83
- }
84
- }
85
-
86
- impl<I> Drop for HeaderCacheCleanupIter<I> {
87
- fn drop(&mut self) {
88
- if let Some(headers) = self.headers.get() {
89
- StringCache::clear(&headers).unwrap();
90
- }
91
- }
92
- }
93
-
94
- pub fn parse_parquet<'a>(
29
+ #[inline]
30
+ pub fn parse_parquet_rows<'a>(
95
31
  rb_self: Value,
96
32
  args: &[Value],
97
- ) -> Result<Yield<Box<dyn Iterator<Item = Record<Xxh3Builder>>>>, MagnusError> {
33
+ ) -> Result<Yield<Box<dyn Iterator<Item = RowRecord<RandomState>>>>, MagnusError> {
98
34
  let original = unsafe { Ruby::get_unchecked() };
99
35
  let ruby: &'static Ruby = Box::leak(Box::new(original));
100
36
 
101
- let ParquetArgs {
37
+ let ParquetRowsArgs {
102
38
  to_read,
103
39
  result_type,
104
- } = parse_parquet_args(&ruby, args)?;
40
+ columns,
41
+ } = parse_parquet_rows_args(&ruby, args)?;
105
42
 
106
43
  if !ruby.block_given() {
107
- return create_enumerator(EnumeratorArgs {
44
+ return create_row_enumerator(RowEnumeratorArgs {
108
45
  rb_self,
109
46
  to_read,
110
47
  result_type,
48
+ columns,
111
49
  });
112
50
  }
113
51
 
114
- let iter = if to_read.is_kind_of(ruby.class_string()) {
52
+ let (schema, mut iter) = if to_read.is_kind_of(ruby.class_string()) {
115
53
  let path_string = to_read.to_r_string()?;
116
54
  let file_path = unsafe { path_string.as_str()? };
117
55
  let file = File::open(file_path).unwrap();
118
56
  let reader = SerializedFileReader::new(file).unwrap();
119
- ParquetRowIter::from_file_into(Box::new(reader))
57
+ let schema = reader.metadata().file_metadata().schema().clone();
58
+
59
+ (schema, ParquetRowIter::from_file_into(Box::new(reader)))
120
60
  } else if to_read.is_kind_of(ruby.class_io()) {
121
61
  let raw_value = to_read.as_raw();
122
62
  let fd = std::panic::catch_unwind(|| unsafe { rb_sys::rb_io_descriptor(raw_value) })
@@ -131,14 +71,28 @@ pub fn parse_parquet<'a>(
131
71
  let file = unsafe { File::from_raw_fd(fd) };
132
72
  let file = ForgottenFileHandle(ManuallyDrop::new(file));
133
73
  let reader = SerializedFileReader::new(file).unwrap();
134
- ParquetRowIter::from_file_into(Box::new(reader))
74
+ let schema = reader.metadata().file_metadata().schema().clone();
75
+
76
+ (schema, ParquetRowIter::from_file_into(Box::new(reader)))
135
77
  } else {
136
78
  let readable = SeekableRubyValue(Opaque::from(to_read));
137
79
  let reader = SerializedFileReader::new(readable).unwrap();
138
- ParquetRowIter::from_file_into(Box::new(reader))
80
+ let schema = reader.metadata().file_metadata().schema().clone();
81
+
82
+ (schema, ParquetRowIter::from_file_into(Box::new(reader)))
139
83
  };
140
84
 
141
- let iter: Box<dyn Iterator<Item = Record<Xxh3Builder>>> = match result_type.as_str() {
85
+ if let Some(cols) = columns {
86
+ let projection = create_projection_schema(&schema, &cols);
87
+ iter = iter.project(Some(projection.to_owned())).map_err(|e| {
88
+ MagnusError::new(
89
+ ruby.exception_runtime_error(),
90
+ format!("Failed to create projection: {}", e),
91
+ )
92
+ })?;
93
+ }
94
+
95
+ let iter: Box<dyn Iterator<Item = RowRecord<RandomState>>> = match result_type.as_str() {
142
96
  "hash" => {
143
97
  let headers = OnceLock::new();
144
98
  let headers_clone = headers.clone();
@@ -146,21 +100,27 @@ pub fn parse_parquet<'a>(
146
100
  .filter_map(move |row| {
147
101
  row.ok().map(|row| {
148
102
  let headers = headers_clone.get_or_init(|| {
149
- row.get_column_iter()
150
- .map(|(k, _)| StringCache::intern(k.to_owned()).unwrap())
151
- .collect::<Vec<_>>()
103
+ let column_count = row.get_column_iter().count();
104
+
105
+ let mut header_string = Vec::with_capacity(column_count);
106
+ for (k, _) in row.get_column_iter() {
107
+ header_string.push(k.to_owned());
108
+ }
109
+
110
+ let headers = StringCache::intern_many(&header_string).unwrap();
111
+
112
+ headers
152
113
  });
153
114
 
154
- row.get_column_iter()
155
- .enumerate()
156
- .map(|(i, (_, v))| {
157
- let key = headers[i];
158
- (key, ParquetField(v.clone()))
159
- })
160
- .collect::<HashMap<&'static str, ParquetField, Xxh3Builder>>()
115
+ let mut map =
116
+ HashMap::with_capacity_and_hasher(headers.len(), Default::default());
117
+ row.get_column_iter().enumerate().for_each(|(i, (_, v))| {
118
+ map.insert(headers[i], ParquetField(v.clone()));
119
+ });
120
+ map
161
121
  })
162
122
  })
163
- .map(|row| Record::Map(row));
123
+ .map(RowRecord::Map);
164
124
 
165
125
  Box::new(HeaderCacheCleanupIter {
166
126
  inner: iter,
@@ -170,12 +130,14 @@ pub fn parse_parquet<'a>(
170
130
  "array" => Box::new(
171
131
  iter.filter_map(|row| {
172
132
  row.ok().map(|row| {
133
+ let column_count = row.get_column_iter().count();
134
+ let mut vec = Vec::with_capacity(column_count);
173
135
  row.get_column_iter()
174
- .map(|(_, v)| ParquetField(v.clone()))
175
- .collect::<Vec<ParquetField>>()
136
+ .for_each(|(_, v)| vec.push(ParquetField(v.clone())));
137
+ vec
176
138
  })
177
139
  })
178
- .map(|row| Record::Vec(row)),
140
+ .map(RowRecord::Vec),
179
141
  ),
180
142
  _ => {
181
143
  return Err(MagnusError::new(
@@ -188,150 +150,246 @@ pub fn parse_parquet<'a>(
188
150
  Ok(Yield::Iter(iter))
189
151
  }
190
152
 
191
- struct EnumeratorArgs {
153
+ #[inline]
154
+ pub fn parse_parquet_columns<'a>(
192
155
  rb_self: Value,
193
- to_read: Value,
194
- result_type: String,
195
- }
156
+ args: &[Value],
157
+ ) -> Result<Yield<Box<dyn Iterator<Item = ColumnRecord<RandomState>>>>, MagnusError> {
158
+ let original = unsafe { Ruby::get_unchecked() };
159
+ let ruby: &'static Ruby = Box::leak(Box::new(original));
196
160
 
197
- fn create_enumerator(
198
- args: EnumeratorArgs,
199
- ) -> Result<Yield<Box<dyn Iterator<Item = Record<Xxh3Builder>>>>, MagnusError> {
200
- let kwargs = RHash::new();
161
+ let ParquetColumnsArgs {
162
+ to_read,
163
+ result_type,
164
+ columns,
165
+ batch_size,
166
+ } = parse_parquet_columns_args(&ruby, args)?;
201
167
 
202
- kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
168
+ if !ruby.block_given() {
169
+ return create_column_enumerator(ColumnEnumeratorArgs {
170
+ rb_self,
171
+ to_read,
172
+ result_type,
173
+ columns,
174
+ batch_size,
175
+ });
176
+ }
203
177
 
204
- let enumerator = args
205
- .rb_self
206
- .enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
207
- Ok(Yield::Enumerator(enumerator))
208
- }
178
+ let (batch_reader, schema, num_rows) = if to_read.is_kind_of(ruby.class_string()) {
179
+ let path_string = to_read.to_r_string()?;
180
+ let file_path = unsafe { path_string.as_str()? };
181
+ let file = File::open(file_path).map_err(|e| ReaderError::FileOpen(e))?;
209
182
 
210
- #[derive(Debug)]
211
- pub enum Record<S: BuildHasher + Default> {
212
- Vec(Vec<ParquetField>),
213
- Map(HashMap<&'static str, ParquetField, S>),
214
- }
183
+ let mut builder =
184
+ ParquetRecordBatchReaderBuilder::try_new(file).map_err(|e| ReaderError::Parquet(e))?;
185
+ let schema = builder.schema().clone();
186
+ let num_rows = builder.metadata().file_metadata().num_rows();
215
187
 
216
- impl<S: BuildHasher + Default> IntoValue for Record<S> {
217
- #[inline]
218
- fn into_value_with(self, handle: &Ruby) -> Value {
219
- match self {
220
- Record::Vec(vec) => {
221
- let ary = handle.ary_new_capa(vec.len());
222
- vec.into_iter().try_for_each(|v| ary.push(v)).unwrap();
223
- ary.into_value_with(handle)
224
- }
225
- Record::Map(map) => {
226
- // Pre-allocate the hash with the known size
227
- let hash = handle.hash_new_capa(map.len());
228
- map.into_iter()
229
- .try_for_each(|(k, v)| hash.aset(k, v))
230
- .unwrap();
231
- hash.into_value_with(handle)
232
- }
188
+ // If columns are specified, project only those columns
189
+ if let Some(cols) = &columns {
190
+ // Get the parquet schema
191
+ let parquet_schema = builder.parquet_schema();
192
+
193
+ // Create a projection mask from column names
194
+ let projection =
195
+ ProjectionMask::columns(parquet_schema, cols.iter().map(|s| s.as_str()));
196
+
197
+ builder = builder.with_projection(projection);
198
+ }
199
+
200
+ if let Some(batch_size) = batch_size {
201
+ builder = builder.with_batch_size(batch_size);
233
202
  }
234
- }
235
- }
236
203
 
237
- #[derive(Debug, Clone)]
238
- pub struct CowValue<'a>(pub Cow<'a, str>);
204
+ let reader = builder.build().unwrap();
239
205
 
240
- impl<'a> IntoValue for CowValue<'a> {
241
- fn into_value_with(self, handle: &Ruby) -> Value {
242
- self.0.into_value_with(handle)
206
+ (reader, schema, num_rows)
207
+ } else if to_read.is_kind_of(ruby.class_io()) {
208
+ let raw_value = to_read.as_raw();
209
+ let fd = std::panic::catch_unwind(|| unsafe { rb_sys::rb_io_descriptor(raw_value) })
210
+ .map_err(|_| {
211
+ ReaderError::FileDescriptor("Failed to get file descriptor".to_string())
212
+ })?;
213
+
214
+ if fd < 0 {
215
+ return Err(ReaderError::InvalidFileDescriptor.into());
216
+ }
217
+
218
+ let file = unsafe { File::from_raw_fd(fd) };
219
+ let file = ForgottenFileHandle(ManuallyDrop::new(file));
220
+
221
+ let mut builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
222
+ let schema = builder.schema().clone();
223
+ let num_rows = builder.metadata().file_metadata().num_rows();
224
+
225
+ if let Some(batch_size) = batch_size {
226
+ builder = builder.with_batch_size(batch_size);
227
+ }
228
+
229
+ // If columns are specified, project only those columns
230
+ if let Some(cols) = &columns {
231
+ // Get the parquet schema
232
+ let parquet_schema = builder.parquet_schema();
233
+
234
+ // Create a projection mask from column names
235
+ let projection =
236
+ ProjectionMask::columns(parquet_schema, cols.iter().map(|s| s.as_str()));
237
+
238
+ builder = builder.with_projection(projection);
239
+ }
240
+
241
+ let reader = builder.build().unwrap();
242
+
243
+ (reader, schema, num_rows)
244
+ } else {
245
+ let readable = SeekableRubyValue(Opaque::from(to_read));
246
+
247
+ let mut builder = ParquetRecordBatchReaderBuilder::try_new(readable).unwrap();
248
+ let schema = builder.schema().clone();
249
+ let num_rows = builder.metadata().file_metadata().num_rows();
250
+
251
+ if let Some(batch_size) = batch_size {
252
+ builder = builder.with_batch_size(batch_size);
253
+ }
254
+
255
+ // If columns are specified, project only those columns
256
+ if let Some(cols) = &columns {
257
+ // Get the parquet schema
258
+ let parquet_schema = builder.parquet_schema();
259
+
260
+ // Create a projection mask from column names
261
+ let projection =
262
+ ProjectionMask::columns(parquet_schema, cols.iter().map(|s| s.as_str()));
263
+
264
+ builder = builder.with_projection(projection);
265
+ }
266
+
267
+ let reader = builder.build().unwrap();
268
+
269
+ (reader, schema, num_rows)
270
+ };
271
+
272
+ if num_rows == 0 {
273
+ let mut map =
274
+ HashMap::with_capacity_and_hasher(schema.fields().len(), RandomState::default());
275
+ for field in schema.fields() {
276
+ map.insert(
277
+ StringCache::intern(field.name().to_string()).unwrap(),
278
+ vec![],
279
+ );
280
+ }
281
+ let column_record = vec![ColumnRecord::Map(map)];
282
+ return Ok(Yield::Iter(Box::new(column_record.into_iter())));
243
283
  }
244
- }
245
284
 
246
- #[derive(Debug)]
247
- pub struct ParquetField(Field);
248
-
249
- impl<'a> IntoValue for ParquetField {
250
- fn into_value_with(self, handle: &Ruby) -> Value {
251
- match self.0 {
252
- Field::Byte(b) => b.into_value_with(handle),
253
- Field::Bool(b) => b.into_value_with(handle),
254
- Field::Short(s) => s.into_value_with(handle),
255
- Field::Int(i) => i.into_value_with(handle),
256
- Field::Long(l) => l.into_value_with(handle),
257
- Field::UByte(ub) => ub.into_value_with(handle),
258
- Field::UShort(us) => us.into_value_with(handle),
259
- Field::UInt(ui) => ui.into_value_with(handle),
260
- Field::ULong(ul) => ul.into_value_with(handle),
261
- Field::Float16(f) => f32::from(f).into_value_with(handle),
262
- Field::Float(f) => f.into_value_with(handle),
263
- Field::Double(d) => d.into_value_with(handle),
264
-
265
- Field::Str(s) => s.into_value_with(handle),
266
- Field::Bytes(b) => handle.str_from_slice(b.data()).as_value(),
267
- Field::Date(d) => d.into_value_with(handle),
268
- Field::TimestampMillis(ts) => ts.into_value_with(handle),
269
- Field::TimestampMicros(ts) => ts.into_value_with(handle),
270
- Field::ListInternal(list) => {
271
- let ary = handle.ary_new_capa(list.elements().len());
272
- list.elements()
273
- .iter()
274
- .try_for_each(|e| ary.push(ParquetField(e.clone()).into_value_with(handle)))
275
- .unwrap();
276
- ary.into_value_with(handle)
277
- }
278
- Field::MapInternal(map) => {
279
- let hash = handle.hash_new_capa(map.entries().len());
280
- map.entries()
281
- .iter()
282
- .try_for_each(|(k, v)| {
283
- hash.aset(
284
- ParquetField(k.clone()).into_value_with(handle),
285
- ParquetField(v.clone()).into_value_with(handle),
286
- )
285
+ let iter: Box<dyn Iterator<Item = ColumnRecord<RandomState>>> = match result_type.as_str() {
286
+ "hash" => {
287
+ let headers = OnceLock::new();
288
+ let headers_clone = headers.clone();
289
+ let iter = batch_reader
290
+ .filter_map(move |batch| {
291
+ batch.ok().map(|batch| {
292
+ let headers = headers_clone.get_or_init(|| {
293
+ let schema = batch.schema();
294
+ let fields = schema.fields();
295
+ let mut header_string = Vec::with_capacity(fields.len());
296
+ for field in fields {
297
+ header_string.push(field.name().to_owned());
298
+ }
299
+ StringCache::intern_many(&header_string).unwrap()
300
+ });
301
+
302
+ let mut map =
303
+ HashMap::with_capacity_and_hasher(headers.len(), Default::default());
304
+
305
+ batch.columns().iter().enumerate().for_each(|(i, column)| {
306
+ let header = headers[i];
307
+ let values = ParquetValueVec::try_from(column.clone()).unwrap();
308
+ map.insert(header, values.into_inner());
309
+ });
310
+
311
+ map
287
312
  })
288
- .unwrap();
289
- hash.into_value_with(handle)
290
- }
291
- // Field::Decimal(d) => d.to_string().into_value_with(handle),
292
- // Field::Group(row) => row.into_value_with(handle),
293
- Field::Null => handle.qnil().as_value(),
294
- _ => panic!("Unsupported field type"),
313
+ })
314
+ .map(ColumnRecord::Map);
315
+
316
+ Box::new(HeaderCacheCleanupIter {
317
+ inner: iter,
318
+ headers,
319
+ })
295
320
  }
296
- }
297
- }
321
+ "array" => Box::new(
322
+ batch_reader
323
+ .filter_map(|batch| {
324
+ batch.ok().map(|batch| {
325
+ batch
326
+ .columns()
327
+ .into_iter()
328
+ .map(|column| {
329
+ let values = ParquetValueVec::try_from(column.clone()).unwrap();
330
+ values.into_inner()
331
+ })
332
+ .collect()
333
+ })
334
+ })
335
+ .map(ColumnRecord::Vec),
336
+ ),
337
+ _ => {
338
+ return Err(MagnusError::new(
339
+ ruby.exception_runtime_error(),
340
+ "Invalid result type",
341
+ ))
342
+ }
343
+ };
298
344
 
299
- struct SeekableRubyValue(Opaque<Value>);
345
+ Ok(Yield::Iter(iter))
346
+ }
300
347
 
301
- impl Length for SeekableRubyValue {
302
- fn len(&self) -> u64 {
303
- let ruby = unsafe { Ruby::get_unchecked() };
304
- let mut reader = build_ruby_reader(&ruby, ruby.get_inner(self.0)).unwrap();
305
- let current_pos = reader.seek(SeekFrom::Current(0)).unwrap();
306
- let file_len = reader.seek(SeekFrom::End(0)).unwrap();
307
- reader.seek(SeekFrom::Start(current_pos)).unwrap();
308
- file_len
348
+ fn create_projection_schema(schema: &SchemaType, columns: &[String]) -> SchemaType {
349
+ if let SchemaType::GroupType { fields, .. } = schema {
350
+ let projected_fields: Vec<TypePtr> = fields
351
+ .iter()
352
+ .filter(|field| columns.contains(&field.name().to_string()))
353
+ .cloned()
354
+ .collect();
355
+
356
+ SchemaType::GroupType {
357
+ basic_info: schema.get_basic_info().clone(),
358
+ fields: projected_fields,
359
+ }
360
+ } else {
361
+ // Return original schema if not a group type
362
+ schema.clone()
309
363
  }
310
364
  }
311
365
 
312
- impl ChunkReader for SeekableRubyValue {
313
- type T = BufReader<Box<dyn SeekableRead>>;
366
+ #[derive(Error, Debug)]
367
+ pub enum ReaderError {
368
+ #[error("Failed to get file descriptor: {0}")]
369
+ FileDescriptor(String),
370
+ #[error("Invalid file descriptor")]
371
+ InvalidFileDescriptor,
372
+ #[error("Failed to open file: {0}")]
373
+ FileOpen(#[from] io::Error),
374
+ #[error("Failed to intern headers: {0}")]
375
+ HeaderIntern(#[from] CacheError),
376
+ #[error("Ruby error: {0}")]
377
+ Ruby(String),
378
+ #[error("Parquet error: {0}")]
379
+ Parquet(#[from] ParquetError),
380
+ }
314
381
 
315
- fn get_read(&self, start: u64) -> parquet::errors::Result<Self::T> {
316
- let ruby = unsafe { Ruby::get_unchecked() };
317
- let mut reader = build_ruby_reader(&ruby, ruby.get_inner(self.0)).unwrap();
318
- reader.seek(SeekFrom::Start(start))?;
319
- Ok(BufReader::new(reader))
382
+ impl From<MagnusError> for ReaderError {
383
+ fn from(err: MagnusError) -> Self {
384
+ Self::Ruby(err.to_string())
320
385
  }
386
+ }
321
387
 
322
- fn get_bytes(&self, start: u64, length: usize) -> parquet::errors::Result<Bytes> {
323
- let ruby = unsafe { Ruby::get_unchecked() };
324
- let mut buffer = Vec::with_capacity(length);
325
- let mut reader = build_ruby_reader(&ruby, ruby.get_inner(self.0)).unwrap();
326
- reader.seek(SeekFrom::Start(start))?;
327
- let read = reader.take(length as _).read_to_end(&mut buffer)?;
328
-
329
- if read != length {
330
- return Err(ParquetError::EOF(format!(
331
- "Expected to read {} bytes, read only {}",
332
- length, read
333
- )));
334
- }
335
- Ok(buffer.into())
388
+ impl From<ReaderError> for MagnusError {
389
+ fn from(err: ReaderError) -> Self {
390
+ MagnusError::new(
391
+ Ruby::get().unwrap().exception_runtime_error(),
392
+ err.to_string(),
393
+ )
336
394
  }
337
395
  }