parquet 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0d72c16371c10a011af5118f2915de9bbeb33cde133369bdac2050e3c035572e
4
- data.tar.gz: b39c6ec9a8232eca5b5b156bf28992ed59c05e9a36e4c13db2b8933a74485ba0
3
+ metadata.gz: 896f2833b6db8e4466af8fc9d43eb5c695e25a207a6f8050d22052458edded36
4
+ data.tar.gz: 38de2831bf7013e0194b2e61a91b26a1283fef65a04309dfbe125c570d64e9ed
5
5
  SHA512:
6
- metadata.gz: c7f338b1d010fa59c2344065b233ff20a08d4a17c6ca987ef72677150dd1cbf55d134855585d68e187b748dc5121f13d5e86cb82aabc1eeb3562a3326aca459c
7
- data.tar.gz: 69eaa6b133123944138a826612a7b48d9f87acb202ecbe172e253be02a1a1c7009e3d7182e8bb31ae423098bc34bb5dddc4ce042453f0d1cb41505d56d02c21e
6
+ metadata.gz: 52d83bc198f789856eac4bff7ff985a82c3f03f75e5de79efc5b388ce5afc63cb507b4cacc90625ee321619ade1ddc16f66f6c437ca0f60d144bc593bbec8cc5
7
+ data.tar.gz: c6dd98694fd2a1d29ceebec6b58d63220f3992fe4dc63dae1c28ea27f8a353764f87a16a13c95e3e2111bca811c57fde45da9f008a93d8868112b4be608d46ee
@@ -19,12 +19,10 @@ use crate::types::ParquetGemError;
19
19
  /// and provide a standard Read implementation for them.
20
20
  pub enum RubyReader {
21
21
  String {
22
- ruby: Arc<Ruby>,
23
22
  inner: Opaque<RString>,
24
23
  offset: usize,
25
24
  },
26
25
  RubyIoLike {
27
- ruby: Arc<Ruby>,
28
26
  inner: Opaque<Value>,
29
27
  },
30
28
  NativeProxyIoLike {
@@ -40,7 +38,6 @@ impl RubyReader {
40
38
  pub fn new(ruby: Arc<Ruby>, value: Value) -> Result<Self, ParquetGemError> {
41
39
  if RubyReader::is_seekable_io_like(&value) {
42
40
  Ok(RubyReader::RubyIoLike {
43
- ruby,
44
41
  inner: Opaque::from(value),
45
42
  })
46
43
  } else if RubyReader::is_io_like(&value) {
@@ -49,7 +46,6 @@ impl RubyReader {
49
46
 
50
47
  // This is safe, because we won't call seek
51
48
  let inner_readable = RubyReader::RubyIoLike {
52
- ruby: ruby.clone(),
53
49
  inner: Opaque::from(value),
54
50
  };
55
51
  let mut reader = BufReader::new(inner_readable);
@@ -68,7 +64,6 @@ impl RubyReader {
68
64
  .funcall::<_, _, RString>("to_str", ())
69
65
  .or_else(|_| value.funcall::<_, _, RString>("to_s", ()))?;
70
66
  Ok(RubyReader::String {
71
- ruby,
72
67
  inner: Opaque::from(string_content),
73
68
  offset: 0,
74
69
  })
@@ -89,10 +84,10 @@ impl RubyReader {
89
84
 
90
85
  impl Seek for RubyReader {
91
86
  fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
87
+ let ruby = unsafe { Ruby::get_unchecked() };
92
88
  match self {
93
89
  RubyReader::NativeProxyIoLike { proxy_file } => proxy_file.seek(pos),
94
90
  RubyReader::String {
95
- ruby,
96
91
  inner,
97
92
  offset: original_offset,
98
93
  } => {
@@ -113,7 +108,7 @@ impl Seek for RubyReader {
113
108
  *original_offset = new_offset.min(unwrapped_inner.len());
114
109
  Ok(*original_offset as u64)
115
110
  }
116
- RubyReader::RubyIoLike { ruby, inner } => {
111
+ RubyReader::RubyIoLike { inner } => {
117
112
  let unwrapped_inner = ruby.get_inner(*inner);
118
113
 
119
114
  let (whence, ruby_offset) = match pos {
@@ -138,13 +133,10 @@ impl Seek for RubyReader {
138
133
 
139
134
  impl Read for RubyReader {
140
135
  fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
136
+ let ruby = unsafe { Ruby::get_unchecked() };
141
137
  match self {
142
138
  RubyReader::NativeProxyIoLike { proxy_file } => proxy_file.read(buf),
143
- RubyReader::String {
144
- ruby,
145
- inner,
146
- offset,
147
- } => {
139
+ RubyReader::String { inner, offset } => {
148
140
  let unwrapped_inner = ruby.get_inner(*inner);
149
141
 
150
142
  let string_buffer = unsafe { unwrapped_inner.as_slice() };
@@ -160,7 +152,7 @@ impl Read for RubyReader {
160
152
 
161
153
  Ok(copy_size)
162
154
  }
163
- RubyReader::RubyIoLike { ruby, inner } => {
155
+ RubyReader::RubyIoLike { inner } => {
164
156
  let unwrapped_inner = ruby.get_inner(*inner);
165
157
 
166
158
  let bytes = unwrapped_inner
@@ -184,17 +176,14 @@ impl Read for RubyReader {
184
176
 
185
177
  impl Length for RubyReader {
186
178
  fn len(&self) -> u64 {
179
+ let ruby = unsafe { Ruby::get_unchecked() };
187
180
  match self {
188
181
  RubyReader::NativeProxyIoLike { proxy_file } => proxy_file.len(),
189
- RubyReader::String {
190
- ruby,
191
- inner,
192
- offset: _,
193
- } => {
182
+ RubyReader::String { inner, offset: _ } => {
194
183
  let unwrapped_inner = ruby.get_inner(*inner);
195
184
  unwrapped_inner.len() as u64
196
185
  }
197
- RubyReader::RubyIoLike { ruby, inner } => {
186
+ RubyReader::RubyIoLike { inner } => {
198
187
  let unwrapped_inner = ruby.get_inner(*inner);
199
188
 
200
189
  // Get current position
@@ -160,7 +160,12 @@ impl TryIntoValue for ParquetValue {
160
160
  Ok(ary.into_value_with(handle))
161
161
  }
162
162
  ParquetValue::Map(m) => {
163
+ #[cfg(ruby_lt_3_2)]
163
164
  let hash = handle.hash_new_capa(m.len());
165
+
166
+ #[cfg(not(ruby_lt_3_2))]
167
+ let hash = handle.hash_new();
168
+
164
169
  m.into_iter().try_for_each(|(k, v)| {
165
170
  hash.aset(
166
171
  k.try_into_value_with(handle)?,
@@ -29,8 +29,12 @@ impl<S: BuildHasher + Default> TryIntoValue for RowRecord<S> {
29
29
  Ok(handle.into_value(ary))
30
30
  }
31
31
  RowRecord::Map(map) => {
32
+ #[cfg(ruby_lt_3_2)]
32
33
  let hash = handle.hash_new_capa(map.len());
33
34
 
35
+ #[cfg(not(ruby_lt_3_2))]
36
+ let hash = handle.hash_new();
37
+
34
38
  let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
35
39
  let mut i = 0;
36
40
 
@@ -78,8 +82,12 @@ impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
78
82
  Ok(ary.into_value_with(handle))
79
83
  }
80
84
  ColumnRecord::Map(map) => {
85
+ #[cfg(ruby_lt_3_2)]
81
86
  let hash = handle.hash_new_capa(map.len());
82
87
 
88
+ #[cfg(not(ruby_lt_3_2))]
89
+ let hash = handle.hash_new();
90
+
83
91
  let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
84
92
  let mut i = 0;
85
93
 
@@ -175,9 +183,13 @@ impl TryIntoValue for ParquetField {
175
183
  Ok(ary.into_value_with(handle))
176
184
  }
177
185
  Field::MapInternal(map) => {
178
- let entries = map.entries();
179
- let hash = handle.hash_new_capa(entries.len());
180
- entries.iter().try_for_each(|(k, v)| {
186
+ #[cfg(ruby_lt_3_2)]
187
+ let hash = handle.hash_new_capa(map.len());
188
+
189
+ #[cfg(not(ruby_lt_3_2))]
190
+ let hash = handle.hash_new();
191
+
192
+ map.entries().iter().try_for_each(|(k, v)| {
181
193
  hash.aset(
182
194
  ParquetField(k.clone(), self.1).try_into_value_with(handle)?,
183
195
  ParquetField(v.clone(), self.1).try_into_value_with(handle)?,
@@ -29,7 +29,6 @@ use crate::{
29
29
  IoLikeValue, ParquetSchemaType as PST, ParquetWriteArgs, SchemaField, SendableWrite,
30
30
  };
31
31
 
32
- const MIN_SAMPLES_FOR_ESTIMATE: usize = 10;
33
32
  const SAMPLE_SIZE: usize = 100;
34
33
  const MIN_BATCH_SIZE: usize = 10;
35
34
  const INITIAL_BATCH_SIZE: usize = 100;
@@ -1,7 +1,7 @@
1
1
  use super::{
2
2
  build_column_collectors_from_dsl, copy_temp_file_to_io_like, create_writer,
3
3
  parse_parquet_write_args, DEFAULT_MEMORY_THRESHOLD, INITIAL_BATCH_SIZE, MIN_BATCH_SIZE,
4
- MIN_SAMPLES_FOR_ESTIMATE, SAMPLE_SIZE,
4
+ SAMPLE_SIZE,
5
5
  };
6
6
  use crate::{
7
7
  logger::RubyLogger,
@@ -18,6 +18,8 @@ use magnus::{
18
18
  use rand::Rng;
19
19
  use std::sync::Arc;
20
20
 
21
+ const MIN_SAMPLES_FOR_ESTIMATE: usize = 10;
22
+
21
23
  #[inline]
22
24
  pub fn write_rows(args: &[Value]) -> Result<(), MagnusError> {
23
25
  let ruby = unsafe { Ruby::get_unchecked() };
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-02-27 00:00:00.000000000 Z
11
+ date: 2025-03-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys