parquet 0.0.5 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/parquet.rbi CHANGED
@@ -1,4 +1,5 @@
1
1
  # typed: strict
2
+
2
3
  module Parquet
3
4
  # Options:
4
5
  # - `input`: String, File, or IO object containing parquet data
@@ -12,7 +13,7 @@ module Parquet
12
13
  result_type: T.nilable(T.any(String, Symbol)),
13
14
  columns: T.nilable(T::Array[String]),
14
15
  blk: T.nilable(T.proc.params(row: T.any(T::Hash[String, T.untyped], T::Array[T.untyped])).void)
15
- ).returns(T.any(Enumerator, NilClass))
16
+ ).returns(T.any(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])], NilClass))
16
17
  end
17
18
  def self.each_row(input, result_type: nil, columns: nil, &blk)
18
19
  end
@@ -31,8 +32,38 @@ module Parquet
31
32
  batch_size: T.nilable(Integer),
32
33
  blk:
33
34
  T.nilable(T.proc.params(batch: T.any(T::Hash[String, T::Array[T.untyped]], T::Array[T::Array[T.untyped]])).void)
34
- ).returns(T.any(Enumerator, NilClass))
35
+ ).returns(T.any(T::Enumerator[T.any(T::Hash[String, T.untyped], T::Array[T.untyped])], NilClass))
35
36
  end
36
37
  def self.each_column(input, result_type: nil, columns: nil, batch_size: nil, &blk)
37
38
  end
39
+
40
+ # Options:
41
+ # - `read_from`: An Enumerator yielding arrays of values representing each row
42
+ # - `schema`: Array of hashes specifying column names and types
43
+ # - `write_to`: String path or IO object to write the parquet file to
44
+ # - `batch_size`: Optional batch size for writing (defaults to 1000)
45
+ sig do
46
+ params(
47
+ read_from: T::Enumerator[T::Array[T.untyped]],
48
+ schema: T::Array[T::Hash[String, String]],
49
+ write_to: T.any(String, IO),
50
+ batch_size: T.nilable(Integer)
51
+ ).void
52
+ end
53
+ def self.write_rows(read_from, schema:, write_to:, batch_size: nil)
54
+ end
55
+
56
+ # Options:
57
+ # - `read_from`: An Enumerator yielding arrays of column batches
58
+ # - `schema`: Array of hashes specifying column names and types
59
+ # - `write_to`: String path or IO object to write the parquet file to
60
+ sig do
61
+ params(
62
+ read_from: T::Enumerator[T::Array[T::Array[T.untyped]]],
63
+ schema: T::Array[T::Hash[String, String]],
64
+ write_to: T.any(String, IO)
65
+ ).void
66
+ end
67
+ def self.write_columns(read_from, schema:, write_to:)
68
+ end
38
69
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-06 00:00:00.000000000 Z
11
+ date: 2025-01-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -60,13 +60,20 @@ files:
60
60
  - ext/parquet/src/enumerator.rs
61
61
  - ext/parquet/src/header_cache.rs
62
62
  - ext/parquet/src/lib.rs
63
- - ext/parquet/src/parquet_column_reader.rs
64
- - ext/parquet/src/parquet_row_reader.rs
65
- - ext/parquet/src/reader.rs
63
+ - ext/parquet/src/reader/mod.rs
64
+ - ext/parquet/src/reader/parquet_column_reader.rs
65
+ - ext/parquet/src/reader/parquet_row_reader.rs
66
66
  - ext/parquet/src/ruby_integration.rs
67
67
  - ext/parquet/src/ruby_reader.rs
68
- - ext/parquet/src/types.rs
68
+ - ext/parquet/src/types/core_types.rs
69
+ - ext/parquet/src/types/mod.rs
70
+ - ext/parquet/src/types/parquet_value.rs
71
+ - ext/parquet/src/types/record_types.rs
72
+ - ext/parquet/src/types/timestamp.rs
73
+ - ext/parquet/src/types/type_conversion.rs
74
+ - ext/parquet/src/types/writer_types.rs
69
75
  - ext/parquet/src/utils.rs
76
+ - ext/parquet/src/writer/mod.rs
70
77
  - lib/parquet.rb
71
78
  - lib/parquet.rbi
72
79
  - lib/parquet/version.rb