parquet 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/parquet/src/ruby_reader.rs +8 -19
- data/ext/parquet/src/types/parquet_value.rs +5 -0
- data/ext/parquet/src/types/record_types.rs +15 -3
- data/ext/parquet/src/writer/mod.rs +0 -1
- data/ext/parquet/src/writer/write_rows.rs +3 -1
- data/lib/parquet/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 896f2833b6db8e4466af8fc9d43eb5c695e25a207a6f8050d22052458edded36
|
4
|
+
data.tar.gz: 38de2831bf7013e0194b2e61a91b26a1283fef65a04309dfbe125c570d64e9ed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52d83bc198f789856eac4bff7ff985a82c3f03f75e5de79efc5b388ce5afc63cb507b4cacc90625ee321619ade1ddc16f66f6c437ca0f60d144bc593bbec8cc5
|
7
|
+
data.tar.gz: c6dd98694fd2a1d29ceebec6b58d63220f3992fe4dc63dae1c28ea27f8a353764f87a16a13c95e3e2111bca811c57fde45da9f008a93d8868112b4be608d46ee
|
@@ -19,12 +19,10 @@ use crate::types::ParquetGemError;
|
|
19
19
|
/// and provide a standard Read implementation for them.
|
20
20
|
pub enum RubyReader {
|
21
21
|
String {
|
22
|
-
ruby: Arc<Ruby>,
|
23
22
|
inner: Opaque<RString>,
|
24
23
|
offset: usize,
|
25
24
|
},
|
26
25
|
RubyIoLike {
|
27
|
-
ruby: Arc<Ruby>,
|
28
26
|
inner: Opaque<Value>,
|
29
27
|
},
|
30
28
|
NativeProxyIoLike {
|
@@ -40,7 +38,6 @@ impl RubyReader {
|
|
40
38
|
pub fn new(ruby: Arc<Ruby>, value: Value) -> Result<Self, ParquetGemError> {
|
41
39
|
if RubyReader::is_seekable_io_like(&value) {
|
42
40
|
Ok(RubyReader::RubyIoLike {
|
43
|
-
ruby,
|
44
41
|
inner: Opaque::from(value),
|
45
42
|
})
|
46
43
|
} else if RubyReader::is_io_like(&value) {
|
@@ -49,7 +46,6 @@ impl RubyReader {
|
|
49
46
|
|
50
47
|
// This is safe, because we won't call seek
|
51
48
|
let inner_readable = RubyReader::RubyIoLike {
|
52
|
-
ruby: ruby.clone(),
|
53
49
|
inner: Opaque::from(value),
|
54
50
|
};
|
55
51
|
let mut reader = BufReader::new(inner_readable);
|
@@ -68,7 +64,6 @@ impl RubyReader {
|
|
68
64
|
.funcall::<_, _, RString>("to_str", ())
|
69
65
|
.or_else(|_| value.funcall::<_, _, RString>("to_s", ()))?;
|
70
66
|
Ok(RubyReader::String {
|
71
|
-
ruby,
|
72
67
|
inner: Opaque::from(string_content),
|
73
68
|
offset: 0,
|
74
69
|
})
|
@@ -89,10 +84,10 @@ impl RubyReader {
|
|
89
84
|
|
90
85
|
impl Seek for RubyReader {
|
91
86
|
fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
|
87
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
92
88
|
match self {
|
93
89
|
RubyReader::NativeProxyIoLike { proxy_file } => proxy_file.seek(pos),
|
94
90
|
RubyReader::String {
|
95
|
-
ruby,
|
96
91
|
inner,
|
97
92
|
offset: original_offset,
|
98
93
|
} => {
|
@@ -113,7 +108,7 @@ impl Seek for RubyReader {
|
|
113
108
|
*original_offset = new_offset.min(unwrapped_inner.len());
|
114
109
|
Ok(*original_offset as u64)
|
115
110
|
}
|
116
|
-
RubyReader::RubyIoLike {
|
111
|
+
RubyReader::RubyIoLike { inner } => {
|
117
112
|
let unwrapped_inner = ruby.get_inner(*inner);
|
118
113
|
|
119
114
|
let (whence, ruby_offset) = match pos {
|
@@ -138,13 +133,10 @@ impl Seek for RubyReader {
|
|
138
133
|
|
139
134
|
impl Read for RubyReader {
|
140
135
|
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
|
136
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
141
137
|
match self {
|
142
138
|
RubyReader::NativeProxyIoLike { proxy_file } => proxy_file.read(buf),
|
143
|
-
RubyReader::String {
|
144
|
-
ruby,
|
145
|
-
inner,
|
146
|
-
offset,
|
147
|
-
} => {
|
139
|
+
RubyReader::String { inner, offset } => {
|
148
140
|
let unwrapped_inner = ruby.get_inner(*inner);
|
149
141
|
|
150
142
|
let string_buffer = unsafe { unwrapped_inner.as_slice() };
|
@@ -160,7 +152,7 @@ impl Read for RubyReader {
|
|
160
152
|
|
161
153
|
Ok(copy_size)
|
162
154
|
}
|
163
|
-
RubyReader::RubyIoLike {
|
155
|
+
RubyReader::RubyIoLike { inner } => {
|
164
156
|
let unwrapped_inner = ruby.get_inner(*inner);
|
165
157
|
|
166
158
|
let bytes = unwrapped_inner
|
@@ -184,17 +176,14 @@ impl Read for RubyReader {
|
|
184
176
|
|
185
177
|
impl Length for RubyReader {
|
186
178
|
fn len(&self) -> u64 {
|
179
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
187
180
|
match self {
|
188
181
|
RubyReader::NativeProxyIoLike { proxy_file } => proxy_file.len(),
|
189
|
-
RubyReader::String {
|
190
|
-
ruby,
|
191
|
-
inner,
|
192
|
-
offset: _,
|
193
|
-
} => {
|
182
|
+
RubyReader::String { inner, offset: _ } => {
|
194
183
|
let unwrapped_inner = ruby.get_inner(*inner);
|
195
184
|
unwrapped_inner.len() as u64
|
196
185
|
}
|
197
|
-
RubyReader::RubyIoLike {
|
186
|
+
RubyReader::RubyIoLike { inner } => {
|
198
187
|
let unwrapped_inner = ruby.get_inner(*inner);
|
199
188
|
|
200
189
|
// Get current position
|
@@ -160,7 +160,12 @@ impl TryIntoValue for ParquetValue {
|
|
160
160
|
Ok(ary.into_value_with(handle))
|
161
161
|
}
|
162
162
|
ParquetValue::Map(m) => {
|
163
|
+
#[cfg(ruby_lt_3_2)]
|
163
164
|
let hash = handle.hash_new_capa(m.len());
|
165
|
+
|
166
|
+
#[cfg(not(ruby_lt_3_2))]
|
167
|
+
let hash = handle.hash_new();
|
168
|
+
|
164
169
|
m.into_iter().try_for_each(|(k, v)| {
|
165
170
|
hash.aset(
|
166
171
|
k.try_into_value_with(handle)?,
|
@@ -29,8 +29,12 @@ impl<S: BuildHasher + Default> TryIntoValue for RowRecord<S> {
|
|
29
29
|
Ok(handle.into_value(ary))
|
30
30
|
}
|
31
31
|
RowRecord::Map(map) => {
|
32
|
+
#[cfg(ruby_lt_3_2)]
|
32
33
|
let hash = handle.hash_new_capa(map.len());
|
33
34
|
|
35
|
+
#[cfg(not(ruby_lt_3_2))]
|
36
|
+
let hash = handle.hash_new();
|
37
|
+
|
34
38
|
let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
|
35
39
|
let mut i = 0;
|
36
40
|
|
@@ -78,8 +82,12 @@ impl<S: BuildHasher + Default> TryIntoValue for ColumnRecord<S> {
|
|
78
82
|
Ok(ary.into_value_with(handle))
|
79
83
|
}
|
80
84
|
ColumnRecord::Map(map) => {
|
85
|
+
#[cfg(ruby_lt_3_2)]
|
81
86
|
let hash = handle.hash_new_capa(map.len());
|
82
87
|
|
88
|
+
#[cfg(not(ruby_lt_3_2))]
|
89
|
+
let hash = handle.hash_new();
|
90
|
+
|
83
91
|
let mut values: [Value; 128] = [handle.qnil().as_value(); 128];
|
84
92
|
let mut i = 0;
|
85
93
|
|
@@ -175,9 +183,13 @@ impl TryIntoValue for ParquetField {
|
|
175
183
|
Ok(ary.into_value_with(handle))
|
176
184
|
}
|
177
185
|
Field::MapInternal(map) => {
|
178
|
-
|
179
|
-
let hash = handle.hash_new_capa(
|
180
|
-
|
186
|
+
#[cfg(ruby_lt_3_2)]
|
187
|
+
let hash = handle.hash_new_capa(map.len());
|
188
|
+
|
189
|
+
#[cfg(not(ruby_lt_3_2))]
|
190
|
+
let hash = handle.hash_new();
|
191
|
+
|
192
|
+
map.entries().iter().try_for_each(|(k, v)| {
|
181
193
|
hash.aset(
|
182
194
|
ParquetField(k.clone(), self.1).try_into_value_with(handle)?,
|
183
195
|
ParquetField(v.clone(), self.1).try_into_value_with(handle)?,
|
@@ -29,7 +29,6 @@ use crate::{
|
|
29
29
|
IoLikeValue, ParquetSchemaType as PST, ParquetWriteArgs, SchemaField, SendableWrite,
|
30
30
|
};
|
31
31
|
|
32
|
-
const MIN_SAMPLES_FOR_ESTIMATE: usize = 10;
|
33
32
|
const SAMPLE_SIZE: usize = 100;
|
34
33
|
const MIN_BATCH_SIZE: usize = 10;
|
35
34
|
const INITIAL_BATCH_SIZE: usize = 100;
|
@@ -1,7 +1,7 @@
|
|
1
1
|
use super::{
|
2
2
|
build_column_collectors_from_dsl, copy_temp_file_to_io_like, create_writer,
|
3
3
|
parse_parquet_write_args, DEFAULT_MEMORY_THRESHOLD, INITIAL_BATCH_SIZE, MIN_BATCH_SIZE,
|
4
|
-
|
4
|
+
SAMPLE_SIZE,
|
5
5
|
};
|
6
6
|
use crate::{
|
7
7
|
logger::RubyLogger,
|
@@ -18,6 +18,8 @@ use magnus::{
|
|
18
18
|
use rand::Rng;
|
19
19
|
use std::sync::Arc;
|
20
20
|
|
21
|
+
const MIN_SAMPLES_FOR_ESTIMATE: usize = 10;
|
22
|
+
|
21
23
|
#[inline]
|
22
24
|
pub fn write_rows(args: &[Value]) -> Result<(), MagnusError> {
|
23
25
|
let ruby = unsafe { Ruby::get_unchecked() };
|
data/lib/parquet/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parquet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-03-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|