parquet 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b44b6f3ca56a0f4318c361b309c1af213186dcf09e09acbe54561a2dcc920042
4
- data.tar.gz: 68aa0636e1467e008ec29bbdb43737904b8f0a26f2d91a5169005f896950687f
3
+ metadata.gz: bb28e49c647fcf9dddba5d18806b366e73ecaa6fb3bf8a3534eb7df4749710f0
4
+ data.tar.gz: 44953fd9a1f3fc89f24754a6cb555f4d32a90d4af4bb4aa3c14fc373a1c389df
5
5
  SHA512:
6
- metadata.gz: 30966ae0335b9caa4458a79e5d627b920f2368605785a978690490a7ed05beb1f7ab25eeeed7349cd3c05b49573dc68405cb8c52af5aad5240a42ce4a56b184b
7
- data.tar.gz: 5ea6021844baac3bb31acf41f0fdc3795710f9eff21e9c3556692e70f10398249d802b6f3bb22f414d282f2ff33a2ea951fe9d3040625002752d86311eeae160
6
+ metadata.gz: eaa9767d45deab2c3ba3fb0fc4c7aeb222b4446494262ff01083bdc488cca3164feee0f2f07bff644a3196479da0db4049dee1395b8b89f87f5aeba24a97b60b
7
+ data.tar.gz: 805a9cad6698f6ade3a61b40eb514a948ed6b07682db0bd3e724aad533468a114053b9589ec6261c31e8fe6b244c4808457f9e801a6991dffa86a36326736ab0
@@ -136,7 +136,8 @@ impl TryIntoValue for ParquetField {
136
136
  .map_err(|e| ReaderError::Utf8Error(e))
137
137
  .and_then(|s| Ok(s.into_value_with(handle)))?)
138
138
  } else {
139
- Ok(handle.str_from_slice(s.as_bytes()).as_value())
139
+ let s = String::from_utf8_lossy(s.as_bytes());
140
+ Ok(s.into_value_with(handle))
140
141
  }
141
142
  }
142
143
  Field::Byte(b) => Ok(b.into_value_with(handle)),
@@ -83,7 +83,7 @@ pub fn parse_parquet_rows_args(ruby: &Ruby, args: &[Value]) -> Result<ParquetRow
83
83
  None => ParserResultType::Hash,
84
84
  };
85
85
 
86
- let strict = kwargs.optional.2.flatten().unwrap_or(false);
86
+ let strict = kwargs.optional.2.flatten().unwrap_or(true);
87
87
 
88
88
  Ok(ParquetRowsArgs {
89
89
  to_read,
@@ -159,6 +159,6 @@ pub fn parse_parquet_columns_args(
159
159
  result_type,
160
160
  columns: kwargs.optional.1.flatten(),
161
161
  batch_size: kwargs.optional.2.flatten(),
162
- strict: kwargs.optional.3.flatten().unwrap_or(false),
162
+ strict: kwargs.optional.3.flatten().unwrap_or(true),
163
163
  })
164
164
  }
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.3.3"
2
+ VERSION = "0.4.0"
3
3
  end
data/lib/parquet.rbi CHANGED
@@ -11,7 +11,8 @@ module Parquet
11
11
  params(
12
12
  input: T.any(String, File, StringIO, IO),
13
13
  result_type: T.nilable(T.any(String, Symbol)),
14
- columns: T.nilable(T::Array[String])
14
+ columns: T.nilable(T::Array[String]),
15
+ strict: T.nilable(T::Boolean)
15
16
  ).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
16
17
  end
17
18
  sig do
@@ -19,10 +20,11 @@ module Parquet
19
20
  input: T.any(String, File, StringIO, IO),
20
21
  result_type: T.nilable(T.any(String, Symbol)),
21
22
  columns: T.nilable(T::Array[String]),
23
+ strict: T.nilable(T::Boolean),
22
24
  blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.untyped], T::Array[T.untyped])).void)
23
25
  ).returns(NilClass)
24
26
  end
25
- def self.each_row(input, result_type: nil, columns: nil, &blk)
27
+ def self.each_row(input, result_type: nil, columns: nil, strict: nil, &blk)
26
28
  end
27
29
 
28
30
  # Options:
@@ -36,7 +38,8 @@ module Parquet
36
38
  input: T.any(String, File, StringIO, IO),
37
39
  result_type: T.nilable(T.any(String, Symbol)),
38
40
  columns: T.nilable(T::Array[String]),
39
- batch_size: T.nilable(Integer)
41
+ batch_size: T.nilable(Integer),
42
+ strict: T.nilable(T::Boolean)
40
43
  ).returns(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])])
41
44
  end
42
45
  sig do
@@ -45,11 +48,12 @@ module Parquet
45
48
  result_type: T.nilable(T.any(String, Symbol)),
46
49
  columns: T.nilable(T::Array[String]),
47
50
  batch_size: T.nilable(Integer),
51
+ strict: T.nilable(T::Boolean),
48
52
  blk:
49
53
  T.nilable(T.proc.params(batch: T.any(T::Hash[String, T::Array[T.untyped]], T::Array[T::Array[T.untyped]])).void)
50
54
  ).returns(NilClass)
51
55
  end
52
- def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, &blk)
56
+ def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, strict: nil, &blk)
53
57
  end
54
58
 
55
59
  # Options:
@@ -80,7 +84,15 @@ module Parquet
80
84
  sample_size: T.nilable(Integer)
81
85
  ).void
82
86
  end
83
- def self.write_rows(read_from, schema:, write_to:, batch_size: nil, flush_threshold: nil, compression: nil, sample_size: nil)
87
+ def self.write_rows(
88
+ read_from,
89
+ schema:,
90
+ write_to:,
91
+ batch_size: nil,
92
+ flush_threshold: nil,
93
+ compression: nil,
94
+ sample_size: nil
95
+ )
84
96
  end
85
97
 
86
98
  # Options:
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-02-19 00:00:00.000000000 Z
11
+ date: 2025-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys