parquet 0.5.1-arm64-darwin → 0.5.3-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4699979e4f19d30fc231e9b5a786696033a10695e2b8eed42cedbfd166b3242f
4
- data.tar.gz: 49a7d0b6aa605c709ca36b7cf3abe5bbcaff7d2c2a920b74afe9400dd6bf5259
3
+ metadata.gz: b7eb67be4612a8c8d5e197ba5b054264e97a428409d6c090c743d70b608b3308
4
+ data.tar.gz: 8c07288afe0c2211306c7785557fd92429fd954f9ef1861456bae610de10c2b5
5
5
  SHA512:
6
- metadata.gz: 5640d3e1d37205cb442b4193d9639edc9ceabb806a0a07fcc63c74b8fc1636ae0f17387d90707aafb8cd2e0533df9e9378ef3d923d7436ddeac721fe89c0220e
7
- data.tar.gz: c679658ca9341ed0b74a29725c47f65e9dbbf2fe5b1893380723fe396479987407e295fa2c41535c4dd2550d419497ec335a84f572a9d7873411db6ace33be19
6
+ metadata.gz: eb9693596f72f2659dae5caffe35c13a781b3231589875ddaba9a22fa16e1d6a3901bf8ced9e84ec1ab2991b759b48643d16da2af5a7042c218f89482727d907
7
+ data.tar.gz: b7695b0dfea66a3535961caf69f5f9f7b36db31a54e56086b803d39d6bba7e19dc4a066bc3dd58dc7c28b3d312d12c30923555e675d2445b677981e3696a629a
Binary file
Binary file
Binary file
@@ -11,6 +11,9 @@ module Parquet
11
11
  # field :id, :int64, nullable: false # ID cannot be null
12
12
  # field :name, :string # Default nullable: true
13
13
  #
14
+ # # Decimal field with precision and scale
15
+ # field :price, :decimal, precision: 10, scale: 2
16
+ #
14
17
  # # List with non-nullable items
15
18
  # field :scores, :list, item: :float, item_nullable: false
16
19
  #
@@ -45,7 +48,7 @@ module Parquet
45
48
 
46
49
  # Define a field in the schema
47
50
  # @param name [String, Symbol] field name
48
- # @param type [Symbol] data type (:int32, :int64, :string, :list, :map, :struct, etc)
51
+ # @param type [Symbol] data type (:int32, :int64, :string, :list, :map, :struct, :decimal, etc)
49
52
  # @param nullable [Boolean] whether the field can be null (default: true)
50
53
  # @param kwargs [Hash] additional options depending on type
51
54
  #
@@ -55,6 +58,7 @@ module Parquet
55
58
  # - `key:, value:` if type == :map
56
59
  # - `key_nullable:, value_nullable:` controls nullability of map keys/values (default: true)
57
60
  # - `format:` if you want to store some format string
61
+ # - `precision:, scale:` if type == :decimal (precision defaults to 18, scale to 2)
58
62
  # - `nullable:` default to true if not specified
59
63
  def field(name, type, nullable: true, **kwargs, &block)
60
64
  field_hash = { name: name.to_s, type: type, nullable: !!nullable }
@@ -73,7 +77,15 @@ module Parquet
73
77
  raise ArgumentError, "list field `#{name}` requires `item:` type" unless item_type
74
78
  # Pass item_nullable if provided, otherwise use true as default
75
79
  item_nullable = kwargs[:item_nullable].nil? ? true : !!kwargs[:item_nullable]
76
- field_hash[:item] = wrap_subtype(item_type, nullable: item_nullable, &block)
80
+
81
+ # Pass precision and scale if type is decimal
82
+ if item_type == :decimal
83
+ precision = kwargs[:precision]
84
+ scale = kwargs[:scale]
85
+ field_hash[:item] = wrap_subtype(item_type, nullable: item_nullable, precision: precision, scale: scale, &block)
86
+ else
87
+ field_hash[:item] = wrap_subtype(item_type, nullable: item_nullable, &block)
88
+ end
77
89
  when :map
78
90
  # user must specify key:, value:
79
91
  key_type = kwargs[:key]
@@ -82,8 +94,41 @@ module Parquet
82
94
  # Pass key_nullable and value_nullable if provided, otherwise use true as default
83
95
  key_nullable = kwargs[:key_nullable].nil? ? true : !!kwargs[:key_nullable]
84
96
  value_nullable = kwargs[:value_nullable].nil? ? true : !!kwargs[:value_nullable]
97
+
85
98
  field_hash[:key] = wrap_subtype(key_type, nullable: key_nullable)
86
- field_hash[:value] = wrap_subtype(value_type, nullable: value_nullable, &block)
99
+
100
+ # Pass precision and scale if value type is decimal
101
+ if value_type == :decimal
102
+ precision = kwargs[:precision]
103
+ scale = kwargs[:scale]
104
+ field_hash[:value] = wrap_subtype(value_type, nullable: value_nullable, precision: precision, scale: scale, &block)
105
+ else
106
+ field_hash[:value] = wrap_subtype(value_type, nullable: value_nullable, &block)
107
+ end
108
+ when :decimal
109
+ # Store precision and scale for decimal type according to rules:
110
+ # 1. When neither precision nor scale is provided, use maximum precision (38)
111
+ # 2. When only precision is provided, scale defaults to 0
112
+ # 3. When only scale is provided, use maximum precision (38)
113
+ # 4. When both are provided, use the provided values
114
+
115
+ if kwargs[:precision].nil? && kwargs[:scale].nil?
116
+ # No precision or scale provided - use maximum precision
117
+ field_hash[:precision] = 38
118
+ field_hash[:scale] = 0
119
+ elsif kwargs[:precision] && kwargs[:scale].nil?
120
+ # Precision only - scale defaults to 0
121
+ field_hash[:precision] = kwargs[:precision]
122
+ field_hash[:scale] = 0
123
+ elsif kwargs[:precision].nil? && kwargs[:scale]
124
+ # Scale only - use maximum precision
125
+ field_hash[:precision] = 38
126
+ field_hash[:scale] = kwargs[:scale]
127
+ else
128
+ # Both provided
129
+ field_hash[:precision] = kwargs[:precision]
130
+ field_hash[:scale] = kwargs[:scale]
131
+ end
87
132
  else
88
133
  # primitive type: :int32, :int64, :string, etc.
89
134
  # do nothing else special
@@ -122,7 +167,7 @@ module Parquet
122
167
  # If user said: field "something", :list, item: :struct do ... end
123
168
  # we want to recursively parse that sub-struct from the block.
124
169
  # So wrap_subtype might be:
125
- def wrap_subtype(t, nullable: true, &block)
170
+ def wrap_subtype(t, nullable: true, precision: nil, scale: nil, &block)
126
171
  if t == :struct
127
172
  sub_builder = SchemaBuilder.new
128
173
  sub_builder.instance_eval(&block) if block
@@ -144,6 +189,34 @@ module Parquet
144
189
  end
145
190
 
146
191
  { type: :list, nullable: nullable, name: "item", item: sub_builder.fields[0] }
192
+ elsif t == :decimal
193
+ # Handle decimal type with precision and scale
194
+ result = { type: t, nullable: nullable, name: "item" }
195
+
196
+ # Follow the same rules as in field() method:
197
+ # 1. When neither precision nor scale is provided, use maximum precision (38)
198
+ # 2. When only precision is provided, scale defaults to 0
199
+ # 3. When only scale is provided, use maximum precision (38)
200
+ # 4. When both are provided, use the provided values
201
+ if precision.nil? && scale.nil?
202
+ # No precision or scale provided - use maximum precision
203
+ result[:precision] = 38
204
+ result[:scale] = 0
205
+ elsif precision && scale.nil?
206
+ # Precision only - scale defaults to 0
207
+ result[:precision] = precision
208
+ result[:scale] = 0
209
+ elsif precision.nil? && scale
210
+ # Scale only - use maximum precision
211
+ result[:precision] = 38
212
+ result[:scale] = scale
213
+ else
214
+ # Both provided
215
+ result[:precision] = precision
216
+ result[:scale] = scale
217
+ end
218
+
219
+ result
147
220
  else
148
221
  # e.g. :int32 => { type: :int32, nullable: true }
149
222
  { type: t, nullable: nullable, name: "item" }
@@ -1,3 +1,3 @@
1
1
  module Parquet
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.3"
3
3
  end
data/lib/parquet.rbi CHANGED
@@ -1,6 +1,17 @@
1
1
  # typed: true
2
2
 
3
3
  module Parquet
4
+ # Returns metadata information about a Parquet file
5
+ #
6
+ # The returned hash contains information about:
7
+ # - Basic file metadata (num_rows, created_by)
8
+ # - Schema information (fields, types, etc.)
9
+ # - Row group details
10
+ # - Column chunk information (compression, encodings, statistics)
11
+ sig { params(path: String).returns(T::Hash[String, T.untyped]) }
12
+ def self.metadata(path)
13
+ end
14
+
4
15
  # Options:
5
16
  # - `input`: String, File, or IO object containing parquet data
6
17
  # - `result_type`: String specifying the output format
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parquet
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.3
5
5
  platform: arm64-darwin
6
6
  authors:
7
7
  - Nathan Jaremko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-02-27 00:00:00.000000000 Z
11
+ date: 2025-04-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler