parquet 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 259f41f1ea1b111a0b0fdec15d17b54ac5a4efee750157159a33a4272b0b1310
4
- data.tar.gz: 71579d25b3ec411208103e4afe116285bd75a30003af3f21b7d7972ef8942ff6
3
+ metadata.gz: bb28e49c647fcf9dddba5d18806b366e73ecaa6fb3bf8a3534eb7df4749710f0
4
+ data.tar.gz: 44953fd9a1f3fc89f24754a6cb555f4d32a90d4af4bb4aa3c14fc373a1c389df
5
5
  SHA512:
6
- metadata.gz: 792a6653554393a94de0572ad2164e5da89c7b49b9476599b9d45efd38448f6e7065fa5b7b0082e036ea19da19441b462083ca9e61226f82dd38a812905a189e
7
- data.tar.gz: 1520073d0668751a5c449dde08deea5dd06e50845e8e8fead398d7015d5cc979885897a258f1ab86eb405203fcaf6ebe0164ccdb31e4033e4911ad4b68268d11
6
+ metadata.gz: eaa9767d45deab2c3ba3fb0fc4c7aeb222b4446494262ff01083bdc488cca3164feee0f2f07bff644a3196479da0db4049dee1395b8b89f87f5aeba24a97b60b
7
+ data.tar.gz: 805a9cad6698f6ade3a61b40eb514a948ed6b07682db0bd3e724aad533468a114053b9589ec6261c31e8fe6b244c4808457f9e801a6991dffa86a36326736ab0
@@ -34,9 +34,10 @@ impl RubyReader {
34
34
  }
35
35
 
36
36
  // For now, don't use this. Having to use seek in length is scary.
37
- fn is_seekable_io_like(_value: &Value) -> bool {
38
- // Self::is_io_like(value) && value.respond_to("seek", false).unwrap_or(false)
39
- false
37
+ fn is_seekable_io_like(value: &Value) -> bool {
38
+ Self::is_io_like(value)
39
+ && value.respond_to("seek", false).unwrap_or(false)
40
+ && value.respond_to("pos", false).unwrap_or(false)
40
41
  }
41
42
  }
42
43
 
@@ -92,10 +93,14 @@ impl Seek for RubyReader {
92
93
  let unwrapped_inner = ruby.get_inner(*inner);
93
94
 
94
95
  let new_offset = match pos {
95
- io::SeekFrom::Start(offset) => offset as usize,
96
- io::SeekFrom::Current(offset) => (*original_offset as i64 + offset) as usize,
97
- io::SeekFrom::End(offset) => {
98
- unwrapped_inner.len().saturating_sub(offset as usize)
96
+ SeekFrom::Start(off) => off as usize,
97
+ SeekFrom::Current(off) => {
98
+ let signed = *original_offset as i64 + off;
99
+ signed.max(0) as usize
100
+ }
101
+ SeekFrom::End(off) => {
102
+ let signed = unwrapped_inner.len() as i64 + off;
103
+ signed.max(0) as usize
99
104
  }
100
105
  };
101
106
 
@@ -111,8 +116,12 @@ impl Seek for RubyReader {
111
116
  SeekFrom::End(i) => (2, i),
112
117
  };
113
118
 
119
+ unwrapped_inner
120
+ .funcall::<_, _, u64>("seek", (ruby_offset, whence))
121
+ .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
122
+
114
123
  let new_position = unwrapped_inner
115
- .funcall("seek", (ruby_offset, whence))
124
+ .funcall::<_, _, u64>("pos", ())
116
125
  .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
117
126
 
118
127
  Ok(new_position)
@@ -136,7 +136,8 @@ impl TryIntoValue for ParquetField {
136
136
  .map_err(|e| ReaderError::Utf8Error(e))
137
137
  .and_then(|s| Ok(s.into_value_with(handle)))?)
138
138
  } else {
139
- Ok(handle.str_from_slice(s.as_bytes()).as_value())
139
+ let s = String::from_utf8_lossy(s.as_bytes());
140
+ Ok(s.into_value_with(handle))
140
141
  }
141
142
  }
142
143
  Field::Byte(b) => Ok(b.into_value_with(handle)),
@@ -83,7 +83,7 @@ pub fn parse_parquet_rows_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetRow
83
83
  None => ParserResultType::Hash,
84
84
  };
85
85
 
86
- let strict = kwargs.optional.2.flatten().unwrap_or(false);
86
+ let strict = kwargs.optional.2.flatten().unwrap_or(true);
87
87
 
88
88
  Ok(ParquetRowsArgs {
89
89
  to_read,
@@ -159,6 +159,6 @@ pub fn parse_parquet_columns_args(
159
159
  result_type,
160
160
  columns: kwargs.optional.1.flatten(),
161
161
  batch_size: kwargs.optional.2.flatten(),
162
- strict: kwargs.optional.3.flatten().unwrap_or(false),
162
+ strict: kwargs.optional.3.flatten().unwrap_or(true),
163
163
  })
164
164
  }
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.3.2"
2
+ VERSION = "0.4.0"
3
3
  end
data/lib/parquet.rbi CHANGED
@@ -11,7 +11,8 @@ module Parquet
11
11
  params(
12
12
  input: T.any(String, File, StringIO, IO),
13
13
  result_type: T.nilable(T.any(String, Symbol)),
14
- columns: T.nilable(T::Array[String])
14
+ columns: T.nilable(T::Array[String]),
15
+ strict: T.nilable(T::Boolean)
15
16
  ).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
16
17
  end
17
18
  sig do
@@ -19,10 +20,11 @@ module Parquet
19
20
  input: T.any(String, File, StringIO, IO),
20
21
  result_type: T.nilable(T.any(String, Symbol)),
21
22
  columns: T.nilable(T::Array[String]),
23
+ strict: T.nilable(T::Boolean),
22
24
  blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.untyped], T::Array[T.untyped])).void)
23
25
  ).returns(NilClass)
24
26
  end
25
- def self.each_row(input, result_type: nil, columns: nil, &blk)
27
+ def self.each_row(input, result_type: nil, columns: nil, strict: nil, &blk)
26
28
  end
27
29
 
28
30
  # Options:
@@ -36,7 +38,8 @@ module Parquet
36
38
  input: T.any(String, File, StringIO, IO),
37
39
  result_type: T.nilable(T.any(String, Symbol)),
38
40
  columns: T.nilable(T::Array[String]),
39
- batch_size: T.nilable(Integer)
41
+ batch_size: T.nilable(Integer),
42
+ strict: T.nilable(T::Boolean)
40
43
  ).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
41
44
  end
42
45
  sig do
@@ -45,11 +48,12 @@ module Parquet
45
48
  result_type: T.nilable(T.any(String, Symbol)),
46
49
  columns: T.nilable(T::Array[String]),
47
50
  batch_size: T.nilable(Integer),
51
+ strict: T.nilable(T::Boolean),
48
52
  blk:
49
53
  T.nilable(T.proc.params(batch: T.any(T::Hash[String, T::Array[T.untyped]], T::Array[T::Array[T.untyped]])).void)
50
54
  ).returns(NilClass)
51
55
  end
52
- def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, &blk)
56
+ def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, strict: nil, &blk)
53
57
  end
54
58
 
55
59
  # Options:
@@ -80,7 +84,15 @@ module Parquet
80
84
  sample_size: T.nilable(Integer)
81
85
  ).void
82
86
  end
83
- def self.write_rows(read_from, schema:, write_to:, batch_size: nil, flush_threshold: nil, compression: nil, sample_size: nil)
87
+ def self.write_rows(
88
+ read_from,
89
+ schema:,
90
+ write_to:,
91
+ batch_size: nil,
92
+ flush_threshold: nil,
93
+ compression: nil,
94
+ sample_size: nil
95
+ )
84
96
  end
85
97
 
86
98
  # Options:
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-02-19 00:00:00.000000000 Z
11
+ date: 2025-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys