parquet 0.0.1 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,122 +1,62 @@
1
- use crate::header_cache::{CacheError, StringCache};
2
- use crate::ruby_reader::{build_ruby_reader, SeekableRead};
3
- use crate::utils::*;
4
- use bytes::Bytes;
1
+ // =============================================================================
2
+ // Imports and Dependencies
3
+ // =============================================================================
4
+ use crate::header_cache::{CacheError, HeaderCacheCleanupIter, StringCache};
5
+ use crate::{
6
+ create_column_enumerator, create_row_enumerator, utils::*, ColumnEnumeratorArgs, ColumnRecord,
7
+ ForgottenFileHandle, ParquetField, ParquetValueVec, RowEnumeratorArgs, RowRecord,
8
+ SeekableRubyValue,
9
+ };
10
+ use ahash::RandomState;
5
11
  use magnus::rb_sys::AsRawValue;
6
12
  use magnus::value::{Opaque, ReprValue};
7
- use magnus::IntoValue;
8
- use magnus::{block::Yield, Error as MagnusError, KwArgs, RHash, Ruby, Symbol, Value};
13
+ use magnus::{block::Yield, Error as MagnusError, Ruby, Value};
14
+ use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
15
+ use parquet::arrow::ProjectionMask;
9
16
  use parquet::errors::ParquetError;
10
- use parquet::file::reader::{ChunkReader, Length, SerializedFileReader};
11
- use parquet::record::Field;
17
+ use parquet::file::reader::FileReader;
18
+ use parquet::file::reader::SerializedFileReader;
19
+ use parquet::record::reader::RowIter as ParquetRowIter;
20
+ use parquet::schema::types::{Type as SchemaType, TypePtr};
12
21
  use std::collections::HashMap;
13
22
  use std::fs::File;
14
- use std::io::{self, BufReader, Read, Seek, SeekFrom};
23
+ use std::io::{self};
15
24
  use std::mem::ManuallyDrop;
16
25
  use std::os::fd::FromRawFd;
17
26
  use std::sync::OnceLock;
18
- use std::{borrow::Cow, hash::BuildHasher};
19
27
  use thiserror::Error;
20
- use xxhash_rust::xxh3::Xxh3Builder;
21
-
22
- use parquet::record::reader::RowIter as ParquetRowIter;
23
-
24
- #[derive(Error, Debug)]
25
- pub enum ReaderError {
26
- #[error("Failed to get file descriptor: {0}")]
27
- FileDescriptor(String),
28
- #[error("Invalid file descriptor")]
29
- InvalidFileDescriptor,
30
- #[error("Failed to open file: {0}")]
31
- FileOpen(#[from] io::Error),
32
- #[error("Failed to intern headers: {0}")]
33
- HeaderIntern(#[from] CacheError),
34
- #[error("Ruby error: {0}")]
35
- Ruby(String),
36
- }
37
-
38
- impl From<MagnusError> for ReaderError {
39
- fn from(err: MagnusError) -> Self {
40
- Self::Ruby(err.to_string())
41
- }
42
- }
43
-
44
- impl From<ReaderError> for MagnusError {
45
- fn from(err: ReaderError) -> Self {
46
- MagnusError::new(
47
- Ruby::get().unwrap().exception_runtime_error(),
48
- err.to_string(),
49
- )
50
- }
51
- }
52
-
53
- struct ForgottenFileHandle(ManuallyDrop<File>);
54
-
55
- impl Length for ForgottenFileHandle {
56
- fn len(&self) -> u64 {
57
- self.0.len()
58
- }
59
- }
60
-
61
- impl ChunkReader for ForgottenFileHandle {
62
- type T = BufReader<File>;
63
-
64
- fn get_read(&self, start: u64) -> parquet::errors::Result<Self::T> {
65
- self.0.get_read(start)
66
- }
67
-
68
- fn get_bytes(&self, start: u64, length: usize) -> parquet::errors::Result<Bytes> {
69
- self.0.get_bytes(start, length)
70
- }
71
- }
72
-
73
- struct HeaderCacheCleanupIter<I> {
74
- inner: I,
75
- headers: OnceLock<Vec<&'static str>>,
76
- }
77
-
78
- impl<I: Iterator> Iterator for HeaderCacheCleanupIter<I> {
79
- type Item = I::Item;
80
28
 
81
- fn next(&mut self) -> Option<Self::Item> {
82
- self.inner.next()
83
- }
84
- }
85
-
86
- impl<I> Drop for HeaderCacheCleanupIter<I> {
87
- fn drop(&mut self) {
88
- if let Some(headers) = self.headers.get() {
89
- StringCache::clear(&headers).unwrap();
90
- }
91
- }
92
- }
93
-
94
- pub fn parse_parquet<'a>(
29
+ #[inline]
30
+ pub fn parse_parquet_rows<'a>(
95
31
  rb_self: Value,
96
32
  args: &[Value],
97
- ) -> Result<Yield<Box<dyn Iterator<Item = Record<Xxh3Builder>>>>, MagnusError> {
33
+ ) -> Result<Yield<Box<dyn Iterator<Item = RowRecord<RandomState>>>>, MagnusError> {
98
34
  let original = unsafe { Ruby::get_unchecked() };
99
35
  let ruby: &'static Ruby = Box::leak(Box::new(original));
100
36
 
101
- let ParquetArgs {
37
+ let ParquetRowsArgs {
102
38
  to_read,
103
39
  result_type,
104
- } = parse_parquet_args(&ruby, args)?;
40
+ columns,
41
+ } = parse_parquet_rows_args(&ruby, args)?;
105
42
 
106
43
  if !ruby.block_given() {
107
- return create_enumerator(EnumeratorArgs {
44
+ return create_row_enumerator(RowEnumeratorArgs {
108
45
  rb_self,
109
46
  to_read,
110
47
  result_type,
48
+ columns,
111
49
  });
112
50
  }
113
51
 
114
- let iter = if to_read.is_kind_of(ruby.class_string()) {
52
+ let (schema, mut iter) = if to_read.is_kind_of(ruby.class_string()) {
115
53
  let path_string = to_read.to_r_string()?;
116
54
  let file_path = unsafe { path_string.as_str()? };
117
55
  let file = File::open(file_path).unwrap();
118
56
  let reader = SerializedFileReader::new(file).unwrap();
119
- ParquetRowIter::from_file_into(Box::new(reader))
57
+ let schema = reader.metadata().file_metadata().schema().clone();
58
+
59
+ (schema, ParquetRowIter::from_file_into(Box::new(reader)))
120
60
  } else if to_read.is_kind_of(ruby.class_io()) {
121
61
  let raw_value = to_read.as_raw();
122
62
  let fd = std::panic::catch_unwind(|| unsafe { rb_sys::rb_io_descriptor(raw_value) })
@@ -131,14 +71,28 @@ pub fn parse_parquet<'a>(
131
71
  let file = unsafe { File::from_raw_fd(fd) };
132
72
  let file = ForgottenFileHandle(ManuallyDrop::new(file));
133
73
  let reader = SerializedFileReader::new(file).unwrap();
134
- ParquetRowIter::from_file_into(Box::new(reader))
74
+ let schema = reader.metadata().file_metadata().schema().clone();
75
+
76
+ (schema, ParquetRowIter::from_file_into(Box::new(reader)))
135
77
  } else {
136
78
  let readable = SeekableRubyValue(Opaque::from(to_read));
137
79
  let reader = SerializedFileReader::new(readable).unwrap();
138
- ParquetRowIter::from_file_into(Box::new(reader))
80
+ let schema = reader.metadata().file_metadata().schema().clone();
81
+
82
+ (schema, ParquetRowIter::from_file_into(Box::new(reader)))
139
83
  };
140
84
 
141
- let iter: Box<dyn Iterator<Item = Record<Xxh3Builder>>> = match result_type.as_str() {
85
+ if let Some(cols) = columns {
86
+ let projection = create_projection_schema(&schema, &cols);
87
+ iter = iter.project(Some(projection.to_owned())).map_err(|e| {
88
+ MagnusError::new(
89
+ ruby.exception_runtime_error(),
90
+ format!("Failed to create projection: {}", e),
91
+ )
92
+ })?;
93
+ }
94
+
95
+ let iter: Box<dyn Iterator<Item = RowRecord<RandomState>>> = match result_type.as_str() {
142
96
  "hash" => {
143
97
  let headers = OnceLock::new();
144
98
  let headers_clone = headers.clone();
@@ -146,21 +100,27 @@ pub fn parse_parquet<'a>(
146
100
  .filter_map(move |row| {
147
101
  row.ok().map(|row| {
148
102
  let headers = headers_clone.get_or_init(|| {
149
- row.get_column_iter()
150
- .map(|(k, _)| StringCache::intern(k.to_owned()).unwrap())
151
- .collect::<Vec<_>>()
103
+ let column_count = row.get_column_iter().count();
104
+
105
+ let mut header_string = Vec::with_capacity(column_count);
106
+ for (k, _) in row.get_column_iter() {
107
+ header_string.push(k.to_owned());
108
+ }
109
+
110
+ let headers = StringCache::intern_many(&header_string).unwrap();
111
+
112
+ headers
152
113
  });
153
114
 
154
- row.get_column_iter()
155
- .enumerate()
156
- .map(|(i, (_, v))| {
157
- let key = headers[i];
158
- (key, ParquetField(v.clone()))
159
- })
160
- .collect::<HashMap<&'static str, ParquetField, Xxh3Builder>>()
115
+ let mut map =
116
+ HashMap::with_capacity_and_hasher(headers.len(), Default::default());
117
+ row.get_column_iter().enumerate().for_each(|(i, (_, v))| {
118
+ map.insert(headers[i], ParquetField(v.clone()));
119
+ });
120
+ map
161
121
  })
162
122
  })
163
- .map(|row| Record::Map(row));
123
+ .map(RowRecord::Map);
164
124
 
165
125
  Box::new(HeaderCacheCleanupIter {
166
126
  inner: iter,
@@ -170,12 +130,14 @@ pub fn parse_parquet<'a>(
170
130
  "array" => Box::new(
171
131
  iter.filter_map(|row| {
172
132
  row.ok().map(|row| {
133
+ let column_count = row.get_column_iter().count();
134
+ let mut vec = Vec::with_capacity(column_count);
173
135
  row.get_column_iter()
174
- .map(|(_, v)| ParquetField(v.clone()))
175
- .collect::<Vec<ParquetField>>()
136
+ .for_each(|(_, v)| vec.push(ParquetField(v.clone())));
137
+ vec
176
138
  })
177
139
  })
178
- .map(|row| Record::Vec(row)),
140
+ .map(RowRecord::Vec),
179
141
  ),
180
142
  _ => {
181
143
  return Err(MagnusError::new(
@@ -188,150 +150,246 @@ pub fn parse_parquet<'a>(
188
150
  Ok(Yield::Iter(iter))
189
151
  }
190
152
 
191
- struct EnumeratorArgs {
153
+ #[inline]
154
+ pub fn parse_parquet_columns<'a>(
192
155
  rb_self: Value,
193
- to_read: Value,
194
- result_type: String,
195
- }
156
+ args: &[Value],
157
+ ) -> Result<Yield<Box<dyn Iterator<Item = ColumnRecord<RandomState>>>>, MagnusError> {
158
+ let original = unsafe { Ruby::get_unchecked() };
159
+ let ruby: &'static Ruby = Box::leak(Box::new(original));
196
160
 
197
- fn create_enumerator(
198
- args: EnumeratorArgs,
199
- ) -> Result<Yield<Box<dyn Iterator<Item = Record<Xxh3Builder>>>>, MagnusError> {
200
- let kwargs = RHash::new();
161
+ let ParquetColumnsArgs {
162
+ to_read,
163
+ result_type,
164
+ columns,
165
+ batch_size,
166
+ } = parse_parquet_columns_args(&ruby, args)?;
201
167
 
202
- kwargs.aset(Symbol::new("result_type"), Symbol::new(args.result_type))?;
168
+ if !ruby.block_given() {
169
+ return create_column_enumerator(ColumnEnumeratorArgs {
170
+ rb_self,
171
+ to_read,
172
+ result_type,
173
+ columns,
174
+ batch_size,
175
+ });
176
+ }
203
177
 
204
- let enumerator = args
205
- .rb_self
206
- .enumeratorize("for_each", (args.to_read, KwArgs(kwargs)));
207
- Ok(Yield::Enumerator(enumerator))
208
- }
178
+ let (batch_reader, schema, num_rows) = if to_read.is_kind_of(ruby.class_string()) {
179
+ let path_string = to_read.to_r_string()?;
180
+ let file_path = unsafe { path_string.as_str()? };
181
+ let file = File::open(file_path).map_err(|e| ReaderError::FileOpen(e))?;
209
182
 
210
- #[derive(Debug)]
211
- pub enum Record<S: BuildHasher + Default> {
212
- Vec(Vec<ParquetField>),
213
- Map(HashMap<&'static str, ParquetField, S>),
214
- }
183
+ let mut builder =
184
+ ParquetRecordBatchReaderBuilder::try_new(file).map_err(|e| ReaderError::Parquet(e))?;
185
+ let schema = builder.schema().clone();
186
+ let num_rows = builder.metadata().file_metadata().num_rows();
215
187
 
216
- impl<S: BuildHasher + Default> IntoValue for Record<S> {
217
- #[inline]
218
- fn into_value_with(self, handle: &Ruby) -> Value {
219
- match self {
220
- Record::Vec(vec) => {
221
- let ary = handle.ary_new_capa(vec.len());
222
- vec.into_iter().try_for_each(|v| ary.push(v)).unwrap();
223
- ary.into_value_with(handle)
224
- }
225
- Record::Map(map) => {
226
- // Pre-allocate the hash with the known size
227
- let hash = handle.hash_new_capa(map.len());
228
- map.into_iter()
229
- .try_for_each(|(k, v)| hash.aset(k, v))
230
- .unwrap();
231
- hash.into_value_with(handle)
232
- }
188
+ // If columns are specified, project only those columns
189
+ if let Some(cols) = &columns {
190
+ // Get the parquet schema
191
+ let parquet_schema = builder.parquet_schema();
192
+
193
+ // Create a projection mask from column names
194
+ let projection =
195
+ ProjectionMask::columns(parquet_schema, cols.iter().map(|s| s.as_str()));
196
+
197
+ builder = builder.with_projection(projection);
198
+ }
199
+
200
+ if let Some(batch_size) = batch_size {
201
+ builder = builder.with_batch_size(batch_size);
233
202
  }
234
- }
235
- }
236
203
 
237
- #[derive(Debug, Clone)]
238
- pub struct CowValue<'a>(pub Cow<'a, str>);
204
+ let reader = builder.build().unwrap();
239
205
 
240
- impl<'a> IntoValue for CowValue<'a> {
241
- fn into_value_with(self, handle: &Ruby) -> Value {
242
- self.0.into_value_with(handle)
206
+ (reader, schema, num_rows)
207
+ } else if to_read.is_kind_of(ruby.class_io()) {
208
+ let raw_value = to_read.as_raw();
209
+ let fd = std::panic::catch_unwind(|| unsafe { rb_sys::rb_io_descriptor(raw_value) })
210
+ .map_err(|_| {
211
+ ReaderError::FileDescriptor("Failed to get file descriptor".to_string())
212
+ })?;
213
+
214
+ if fd < 0 {
215
+ return Err(ReaderError::InvalidFileDescriptor.into());
216
+ }
217
+
218
+ let file = unsafe { File::from_raw_fd(fd) };
219
+ let file = ForgottenFileHandle(ManuallyDrop::new(file));
220
+
221
+ let mut builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
222
+ let schema = builder.schema().clone();
223
+ let num_rows = builder.metadata().file_metadata().num_rows();
224
+
225
+ if let Some(batch_size) = batch_size {
226
+ builder = builder.with_batch_size(batch_size);
227
+ }
228
+
229
+ // If columns are specified, project only those columns
230
+ if let Some(cols) = &columns {
231
+ // Get the parquet schema
232
+ let parquet_schema = builder.parquet_schema();
233
+
234
+ // Create a projection mask from column names
235
+ let projection =
236
+ ProjectionMask::columns(parquet_schema, cols.iter().map(|s| s.as_str()));
237
+
238
+ builder = builder.with_projection(projection);
239
+ }
240
+
241
+ let reader = builder.build().unwrap();
242
+
243
+ (reader, schema, num_rows)
244
+ } else {
245
+ let readable = SeekableRubyValue(Opaque::from(to_read));
246
+
247
+ let mut builder = ParquetRecordBatchReaderBuilder::try_new(readable).unwrap();
248
+ let schema = builder.schema().clone();
249
+ let num_rows = builder.metadata().file_metadata().num_rows();
250
+
251
+ if let Some(batch_size) = batch_size {
252
+ builder = builder.with_batch_size(batch_size);
253
+ }
254
+
255
+ // If columns are specified, project only those columns
256
+ if let Some(cols) = &columns {
257
+ // Get the parquet schema
258
+ let parquet_schema = builder.parquet_schema();
259
+
260
+ // Create a projection mask from column names
261
+ let projection =
262
+ ProjectionMask::columns(parquet_schema, cols.iter().map(|s| s.as_str()));
263
+
264
+ builder = builder.with_projection(projection);
265
+ }
266
+
267
+ let reader = builder.build().unwrap();
268
+
269
+ (reader, schema, num_rows)
270
+ };
271
+
272
+ if num_rows == 0 {
273
+ let mut map =
274
+ HashMap::with_capacity_and_hasher(schema.fields().len(), RandomState::default());
275
+ for field in schema.fields() {
276
+ map.insert(
277
+ StringCache::intern(field.name().to_string()).unwrap(),
278
+ vec![],
279
+ );
280
+ }
281
+ let column_record = vec![ColumnRecord::Map(map)];
282
+ return Ok(Yield::Iter(Box::new(column_record.into_iter())));
243
283
  }
244
- }
245
284
 
246
- #[derive(Debug)]
247
- pub struct ParquetField(Field);
248
-
249
- impl<'a> IntoValue for ParquetField {
250
- fn into_value_with(self, handle: &Ruby) -> Value {
251
- match self.0 {
252
- Field::Byte(b) => b.into_value_with(handle),
253
- Field::Bool(b) => b.into_value_with(handle),
254
- Field::Short(s) => s.into_value_with(handle),
255
- Field::Int(i) => i.into_value_with(handle),
256
- Field::Long(l) => l.into_value_with(handle),
257
- Field::UByte(ub) => ub.into_value_with(handle),
258
- Field::UShort(us) => us.into_value_with(handle),
259
- Field::UInt(ui) => ui.into_value_with(handle),
260
- Field::ULong(ul) => ul.into_value_with(handle),
261
- Field::Float16(f) => f32::from(f).into_value_with(handle),
262
- Field::Float(f) => f.into_value_with(handle),
263
- Field::Double(d) => d.into_value_with(handle),
264
-
265
- Field::Str(s) => s.into_value_with(handle),
266
- Field::Bytes(b) => handle.str_from_slice(b.data()).as_value(),
267
- Field::Date(d) => d.into_value_with(handle),
268
- Field::TimestampMillis(ts) => ts.into_value_with(handle),
269
- Field::TimestampMicros(ts) => ts.into_value_with(handle),
270
- Field::ListInternal(list) => {
271
- let ary = handle.ary_new_capa(list.elements().len());
272
- list.elements()
273
- .iter()
274
- .try_for_each(|e| ary.push(ParquetField(e.clone()).into_value_with(handle)))
275
- .unwrap();
276
- ary.into_value_with(handle)
277
- }
278
- Field::MapInternal(map) => {
279
- let hash = handle.hash_new_capa(map.entries().len());
280
- map.entries()
281
- .iter()
282
- .try_for_each(|(k, v)| {
283
- hash.aset(
284
- ParquetField(k.clone()).into_value_with(handle),
285
- ParquetField(v.clone()).into_value_with(handle),
286
- )
285
+ let iter: Box<dyn Iterator<Item = ColumnRecord<RandomState>>> = match result_type.as_str() {
286
+ "hash" => {
287
+ let headers = OnceLock::new();
288
+ let headers_clone = headers.clone();
289
+ let iter = batch_reader
290
+ .filter_map(move |batch| {
291
+ batch.ok().map(|batch| {
292
+ let headers = headers_clone.get_or_init(|| {
293
+ let schema = batch.schema();
294
+ let fields = schema.fields();
295
+ let mut header_string = Vec::with_capacity(fields.len());
296
+ for field in fields {
297
+ header_string.push(field.name().to_owned());
298
+ }
299
+ StringCache::intern_many(&header_string).unwrap()
300
+ });
301
+
302
+ let mut map =
303
+ HashMap::with_capacity_and_hasher(headers.len(), Default::default());
304
+
305
+ batch.columns().iter().enumerate().for_each(|(i, column)| {
306
+ let header = headers[i];
307
+ let values = ParquetValueVec::try_from(column.clone()).unwrap();
308
+ map.insert(header, values.into_inner());
309
+ });
310
+
311
+ map
287
312
  })
288
- .unwrap();
289
- hash.into_value_with(handle)
290
- }
291
- // Field::Decimal(d) => d.to_string().into_value_with(handle),
292
- // Field::Group(row) => row.into_value_with(handle),
293
- Field::Null => handle.qnil().as_value(),
294
- _ => panic!("Unsupported field type"),
313
+ })
314
+ .map(ColumnRecord::Map);
315
+
316
+ Box::new(HeaderCacheCleanupIter {
317
+ inner: iter,
318
+ headers,
319
+ })
295
320
  }
296
- }
297
- }
321
+ "array" => Box::new(
322
+ batch_reader
323
+ .filter_map(|batch| {
324
+ batch.ok().map(|batch| {
325
+ batch
326
+ .columns()
327
+ .into_iter()
328
+ .map(|column| {
329
+ let values = ParquetValueVec::try_from(column.clone()).unwrap();
330
+ values.into_inner()
331
+ })
332
+ .collect()
333
+ })
334
+ })
335
+ .map(ColumnRecord::Vec),
336
+ ),
337
+ _ => {
338
+ return Err(MagnusError::new(
339
+ ruby.exception_runtime_error(),
340
+ "Invalid result type",
341
+ ))
342
+ }
343
+ };
298
344
 
299
- struct SeekableRubyValue(Opaque<Value>);
345
+ Ok(Yield::Iter(iter))
346
+ }
300
347
 
301
- impl Length for SeekableRubyValue {
302
- fn len(&self) -> u64 {
303
- let ruby = unsafe { Ruby::get_unchecked() };
304
- let mut reader = build_ruby_reader(&ruby, ruby.get_inner(self.0)).unwrap();
305
- let current_pos = reader.seek(SeekFrom::Current(0)).unwrap();
306
- let file_len = reader.seek(SeekFrom::End(0)).unwrap();
307
- reader.seek(SeekFrom::Start(current_pos)).unwrap();
308
- file_len
348
+ fn create_projection_schema(schema: &SchemaType, columns: &[String]) -> SchemaType {
349
+ if let SchemaType::GroupType { fields, .. } = schema {
350
+ let projected_fields: Vec<TypePtr> = fields
351
+ .iter()
352
+ .filter(|field| columns.contains(&field.name().to_string()))
353
+ .cloned()
354
+ .collect();
355
+
356
+ SchemaType::GroupType {
357
+ basic_info: schema.get_basic_info().clone(),
358
+ fields: projected_fields,
359
+ }
360
+ } else {
361
+ // Return original schema if not a group type
362
+ schema.clone()
309
363
  }
310
364
  }
311
365
 
312
- impl ChunkReader for SeekableRubyValue {
313
- type T = BufReader<Box<dyn SeekableRead>>;
366
+ #[derive(Error, Debug)]
367
+ pub enum ReaderError {
368
+ #[error("Failed to get file descriptor: {0}")]
369
+ FileDescriptor(String),
370
+ #[error("Invalid file descriptor")]
371
+ InvalidFileDescriptor,
372
+ #[error("Failed to open file: {0}")]
373
+ FileOpen(#[from] io::Error),
374
+ #[error("Failed to intern headers: {0}")]
375
+ HeaderIntern(#[from] CacheError),
376
+ #[error("Ruby error: {0}")]
377
+ Ruby(String),
378
+ #[error("Parquet error: {0}")]
379
+ Parquet(#[from] ParquetError),
380
+ }
314
381
 
315
- fn get_read(&self, start: u64) -> parquet::errors::Result<Self::T> {
316
- let ruby = unsafe { Ruby::get_unchecked() };
317
- let mut reader = build_ruby_reader(&ruby, ruby.get_inner(self.0)).unwrap();
318
- reader.seek(SeekFrom::Start(start))?;
319
- Ok(BufReader::new(reader))
382
+ impl From<MagnusError> for ReaderError {
383
+ fn from(err: MagnusError) -> Self {
384
+ Self::Ruby(err.to_string())
320
385
  }
386
+ }
321
387
 
322
- fn get_bytes(&self, start: u64, length: usize) -> parquet::errors::Result<Bytes> {
323
- let ruby = unsafe { Ruby::get_unchecked() };
324
- let mut buffer = Vec::with_capacity(length);
325
- let mut reader = build_ruby_reader(&ruby, ruby.get_inner(self.0)).unwrap();
326
- reader.seek(SeekFrom::Start(start))?;
327
- let read = reader.take(length as _).read_to_end(&mut buffer)?;
328
-
329
- if read != length {
330
- return Err(ParquetError::EOF(format!(
331
- "Expected to read {} bytes, read only {}",
332
- length, read
333
- )));
334
- }
335
- Ok(buffer.into())
388
+ impl From<ReaderError> for MagnusError {
389
+ fn from(err: ReaderError) -> Self {
390
+ MagnusError::new(
391
+ Ruby::get().unwrap().exception_runtime_error(),
392
+ err.to_string(),
393
+ )
336
394
  }
337
395
  }