parquet-tyfoom 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/Cargo.lock +1854 -0
  3. data/Cargo.toml +3 -0
  4. data/Gemfile +21 -0
  5. data/LICENSE +21 -0
  6. data/README.md +428 -0
  7. data/Rakefile +43 -0
  8. data/ext/parquet/Cargo.toml +39 -0
  9. data/ext/parquet/build.rs +5 -0
  10. data/ext/parquet/extconf.rb +4 -0
  11. data/ext/parquet/src/adapter_ffi.rs +297 -0
  12. data/ext/parquet/src/allocator.rs +13 -0
  13. data/ext/parquet/src/lib.rs +24 -0
  14. data/ext/parquet-core/Cargo.toml +24 -0
  15. data/ext/parquet-core/src/arrow_conversion.rs +1243 -0
  16. data/ext/parquet-core/src/error.rs +189 -0
  17. data/ext/parquet-core/src/lib.rs +60 -0
  18. data/ext/parquet-core/src/reader.rs +368 -0
  19. data/ext/parquet-core/src/schema.rs +452 -0
  20. data/ext/parquet-core/src/test_utils.rs +308 -0
  21. data/ext/parquet-core/src/traits/mod.rs +5 -0
  22. data/ext/parquet-core/src/traits/schema.rs +190 -0
  23. data/ext/parquet-core/src/value.rs +220 -0
  24. data/ext/parquet-core/src/writer.rs +1241 -0
  25. data/ext/parquet-core/tests/arrow_conversion_tests.rs +484 -0
  26. data/ext/parquet-core/tests/binary_data.rs +437 -0
  27. data/ext/parquet-core/tests/column_projection.rs +557 -0
  28. data/ext/parquet-core/tests/complex_types.rs +821 -0
  29. data/ext/parquet-core/tests/compression_tests.rs +434 -0
  30. data/ext/parquet-core/tests/concurrent_access.rs +431 -0
  31. data/ext/parquet-core/tests/decimal_tests.rs +488 -0
  32. data/ext/parquet-core/tests/edge_cases_corner_cases.rs +322 -0
  33. data/ext/parquet-core/tests/error_handling_comprehensive_tests.rs +540 -0
  34. data/ext/parquet-core/tests/null_handling_tests.rs +430 -0
  35. data/ext/parquet-core/tests/performance_memory.rs +181 -0
  36. data/ext/parquet-core/tests/primitive_types.rs +547 -0
  37. data/ext/parquet-core/tests/real_world_patterns.rs +777 -0
  38. data/ext/parquet-core/tests/review_regressions.rs +787 -0
  39. data/ext/parquet-core/tests/roundtrip_correctness.rs +279 -0
  40. data/ext/parquet-core/tests/schema_comprehensive_tests.rs +542 -0
  41. data/ext/parquet-core/tests/temporal_tests.rs +518 -0
  42. data/ext/parquet-core/tests/test_helpers.rs +132 -0
  43. data/ext/parquet-core/tests/writer_tests.rs +545 -0
  44. data/ext/parquet-ruby-adapter/Cargo.toml +24 -0
  45. data/ext/parquet-ruby-adapter/build.rs +5 -0
  46. data/ext/parquet-ruby-adapter/examples/try_into_value_demo.rs +98 -0
  47. data/ext/parquet-ruby-adapter/src/chunk_reader.rs +237 -0
  48. data/ext/parquet-ruby-adapter/src/converter.rs +1734 -0
  49. data/ext/parquet-ruby-adapter/src/error.rs +141 -0
  50. data/ext/parquet-ruby-adapter/src/io.rs +432 -0
  51. data/ext/parquet-ruby-adapter/src/lib.rs +91 -0
  52. data/ext/parquet-ruby-adapter/src/logger.rs +67 -0
  53. data/ext/parquet-ruby-adapter/src/metadata.rs +529 -0
  54. data/ext/parquet-ruby-adapter/src/reader.rs +339 -0
  55. data/ext/parquet-ruby-adapter/src/schema.rs +884 -0
  56. data/ext/parquet-ruby-adapter/src/string_cache.rs +115 -0
  57. data/ext/parquet-ruby-adapter/src/string_cache_test.rs +122 -0
  58. data/ext/parquet-ruby-adapter/src/string_storage.rs +632 -0
  59. data/ext/parquet-ruby-adapter/src/try_into_value.rs +91 -0
  60. data/ext/parquet-ruby-adapter/src/types.rs +98 -0
  61. data/ext/parquet-ruby-adapter/src/utils.rs +280 -0
  62. data/ext/parquet-ruby-adapter/src/writer.rs +625 -0
  63. data/lib/parquet/schema.rb +262 -0
  64. data/lib/parquet/version.rb +3 -0
  65. data/lib/parquet.rb +11 -0
  66. data/lib/parquet.rbi +181 -0
  67. metadata +165 -0
data/Cargo.toml ADDED
@@ -0,0 +1,3 @@
1
+ [workspace]
2
+ members = ["./ext/parquet", "./ext/parquet-core", "./ext/parquet-ruby-adapter"]
3
+ resolver = "2"
data/Gemfile ADDED
@@ -0,0 +1,21 @@
1
+ source "https://rubygems.org"
2
+
3
+ gem "rb_sys", "~> 0.9.56"
4
+ gem "rake"
5
+ gem "bigdecimal"
6
+
7
+ # Use local version of parquet
8
+ gemspec
9
+
10
+ group :development do
11
+ # gem "benchmark-ips", "~> 2.12"
12
+ # gem "polars-df"
13
+ # gem "duckdb"
14
+ gem "benchmark-memory"
15
+ end
16
+
17
+ group :test do
18
+ gem "csv"
19
+ gem "logger"
20
+ gem "minitest", "~> 5.0"
21
+ end
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Nathan Jaremko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,428 @@
1
+ # parquet-ruby
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/parquet.svg)](https://badge.fury.io/rb/parquet)
4
+
5
+ Read and write [Apache Parquet](https://parquet.apache.org/) files from Ruby. This gem wraps the official Apache [`parquet`](https://github.com/apache/arrow-rs/tree/main/parquet) rust crate, providing:
6
+
7
+ - **High performance** columnar data storage and retrieval
8
+ - **Memory-efficient** streaming APIs for large datasets
9
+ - **Full compatibility** with the Apache Parquet specification
10
+ - **Simple, Ruby-native** APIs that feel natural
11
+
12
+ ## Why Use This Library?
13
+
14
+ Apache Parquet is the de facto standard for analytical data storage, offering:
15
+ - **Efficient compression** - typically 2-10x smaller than CSV
16
+ - **Fast columnar access** - read only the columns you need
17
+ - **Rich type system** - preserves data types, including nested structures
18
+ - **Wide ecosystem support** - works with Spark, Pandas, DuckDB, and more
19
+
20
+ ## Installation
21
+
22
+ Add this line to your application's Gemfile:
23
+
24
+ ```ruby
25
+ gem 'parquet'
26
+ ```
27
+
28
+ Then execute:
29
+
30
+ ```bash
31
+ $ bundle install
32
+ ```
33
+
34
+ Or install it directly:
35
+
36
+ ```bash
37
+ $ gem install parquet
38
+ ```
39
+
40
+ ## Quick Start
41
+
42
+ ### Reading Data
43
+
44
+ ```ruby
45
+ require "parquet"
46
+
47
+ # Read Parquet files row by row
48
+ Parquet.each_row("data.parquet") do |row|
49
+ puts row # => {"id" => 1, "name" => "Alice", "score" => 95.5}
50
+ end
51
+
52
+ # Or column by column for better performance
53
+ Parquet.each_column("data.parquet", batch_size: 1000) do |batch|
54
+ puts batch # => {"id" => [1, 2, ...], "name" => ["Alice", "Bob", ...]}
55
+ end
56
+ ```
57
+
58
+ ### Writing Data
59
+
60
+ ```ruby
61
+ # Define your schema
62
+ schema = [
63
+ { "id" => "int64" },
64
+ { "name" => "string" },
65
+ { "score" => "double" }
66
+ ]
67
+
68
+ # Write row by row
69
+ rows = [
70
+ [1, "Alice", 95.5],
71
+ [2, "Bob", 82.3]
72
+ ]
73
+
74
+ Parquet.write_rows(rows.each, schema: schema, write_to: "output.parquet")
75
+ ```
76
+
77
+ ## Reading Parquet Files
78
+
79
+ The library provides two APIs for reading data, each optimized for different use cases:
80
+
81
+ ### Row-wise Reading (Sequential Access)
82
+
83
+ Best for: Processing records one at a time, data transformations, ETL pipelines
84
+
85
+ ```ruby
86
+ # Basic usage - returns hashes
87
+ Parquet.each_row("data.parquet") do |row|
88
+ puts row # => {"id" => 1, "name" => "Alice"}
89
+ end
90
+
91
+ # Memory-efficient array format
92
+ Parquet.each_row("data.parquet", result_type: :array) do |row|
93
+ puts row # => [1, "Alice"]
94
+ end
95
+
96
+ # Read specific columns only
97
+ Parquet.each_row("data.parquet", columns: ["id", "name"]) do |row|
98
+ # Only requested columns are loaded from disk
99
+ end
100
+
101
+ # Works with IO objects
102
+ File.open("data.parquet", "rb") do |file|
103
+ Parquet.each_row(file) do |row|
104
+ # Process row
105
+ end
106
+ end
107
+ ```
108
+
109
+ ### Column-wise Reading (Analytical Access)
110
+
111
+ Best for: Analytics, aggregations, when you need few columns from wide tables
112
+
113
+ ```ruby
114
+ # Process data in column batches
115
+ Parquet.each_column("data.parquet", batch_size: 1000) do |batch|
116
+ # batch is a hash of column_name => array_of_values
117
+ ids = batch["id"] # => [1, 2, 3, ..., 1000]
118
+ names = batch["name"] # => ["Alice", "Bob", ...]
119
+
120
+ # Perform columnar operations
121
+ avg_id = ids.sum.to_f / ids.length
122
+ end
123
+
124
+ # Array format for more control
125
+ Parquet.each_column("data.parquet",
126
+ result_type: :array,
127
+ columns: ["id", "name"]) do |batch|
128
+ # batch is an array of arrays
129
+ # [[1, 2, ...], ["Alice", "Bob", ...]]
130
+ end
131
+ ```
132
+
133
+ ### File Metadata
134
+
135
+ Inspect file structure without reading data:
136
+
137
+ ```ruby
138
+ metadata = Parquet.metadata("data.parquet")
139
+
140
+ puts metadata["num_rows"] # Total row count
141
+ puts metadata["created_by"] # Writer identification
142
+ puts metadata["schema"]["fields"] # Column definitions
143
+ puts metadata["row_groups"].size # Number of row groups
144
+ ```
145
+
146
+ ## Writing Parquet Files
147
+
148
+ ### Row-wise Writing
149
+
150
+ Best for: Streaming data, converting from other formats, memory-constrained environments
151
+
152
+ ```ruby
153
+ # Basic schema definition
154
+ schema = [
155
+ { "id" => "int64" },
156
+ { "name" => "string" },
157
+ { "active" => "boolean" },
158
+ { "balance" => "double" }
159
+ ]
160
+
161
+ # Stream data from any enumerable
162
+ rows = CSV.foreach("input.csv").map do |row|
163
+ [row[0].to_i, row[1], row[2] == "true", row[3].to_f]
164
+ end
165
+
166
+ Parquet.write_rows(rows,
167
+ schema: schema,
168
+ write_to: "output.parquet",
169
+ batch_size: 5000 # Positive rows per batch (default: 1000)
170
+ )
171
+ ```
172
+
173
+ ### Column-wise Writing
174
+
175
+ Best for: Pre-columnar data, better compression, higher performance
176
+
177
+ ```ruby
178
+ # Prepare columnar data
179
+ ids = [1, 2, 3, 4, 5]
180
+ names = ["Alice", "Bob", "Charlie", "Diana", "Eve"]
181
+ scores = [95.5, 82.3, 88.7, 91.2, 79.8]
182
+
183
+ # Create batches
184
+ batches = [[
185
+ ids, # First column
186
+ names, # Second column
187
+ scores # Third column
188
+ ]]
189
+
190
+ schema = [
191
+ { "id" => "int64" },
192
+ { "name" => "string" },
193
+ { "score" => "double" }
194
+ ]
195
+
196
+ Parquet.write_columns(batches.each,
197
+ schema: schema,
198
+ write_to: "output.parquet",
199
+ compression: "snappy" # Options: none, snappy, gzip, lz4, zstd
200
+ )
201
+ ```
202
+
203
+ `write_columns` also accepts `logger:` with the same Ruby logger interface as
204
+ row writes.
205
+
206
+ ## Data Types
207
+
208
+ ### Basic Types
209
+
210
+ ```ruby
211
+ schema = [
212
+ # Integers
213
+ { "tiny" => "int8" }, # -128 to 127
214
+ { "small" => "int16" }, # -32,768 to 32,767
215
+ { "medium" => "int32" }, # ±2 billion
216
+ { "large" => "int64" }, # ±9 quintillion
217
+
218
+ # Unsigned integers
219
+ { "ubyte" => "uint8" }, # 0 to 255
220
+ { "ushort" => "uint16" }, # 0 to 65,535
221
+ { "uint" => "uint32" }, # 0 to 4 billion
222
+ { "ulong" => "uint64" }, # 0 to 18 quintillion
223
+
224
+ # Floating point
225
+ { "price" => "float" }, # 32-bit precision
226
+ { "amount" => "double" }, # 64-bit precision
227
+
228
+ # Other basics
229
+ { "name" => "string" },
230
+ { "data" => "binary" },
231
+ { "active" => "boolean" }
232
+ ]
233
+ ```
234
+
235
+ ### Date and Time Types
236
+
237
+ ```ruby
238
+ schema = [
239
+ # Date (days since Unix epoch)
240
+ { "date" => "date32" },
241
+
242
+ # Timestamps (with different precisions)
243
+ { "created_sec" => "timestamp_second" },
244
+ { "created_ms" => "timestamp_millis" }, # Most common
245
+ { "created_us" => "timestamp_micros" },
246
+ { "created_ns" => "timestamp_nanos" },
247
+
248
+ # Time of day (without date)
249
+ { "time_ms" => "time_millis" }, # Milliseconds since midnight
250
+ { "time_us" => "time_micros" } # Microseconds since midnight
251
+ ]
252
+ ```
253
+
254
+ ### Decimal Type (Financial Data)
255
+
256
+ For exact decimal arithmetic (no floating-point errors):
257
+
258
+ ```ruby
259
+ require "bigdecimal"
260
+
261
+ schema = [
262
+ # Financial amounts with 2 decimal places
263
+ { "price" => "decimal", "precision" => 10, "scale" => 2 }, # Up to 99,999,999.99
264
+ { "balance" => "decimal", "precision" => 15, "scale" => 2 }, # Larger amounts
265
+
266
+ # High-precision calculations
267
+ { "rate" => "decimal", "precision" => 10, "scale" => 8 } # 8 decimal places
268
+ ]
269
+
270
+ # Use BigDecimal for exact values
271
+ data = [[
272
+ BigDecimal("19.99"),
273
+ BigDecimal("1234567.89"),
274
+ BigDecimal("0.00000123")
275
+ ]]
276
+ ```
277
+
278
+ ## Complex Data Structures
279
+
280
+ The library includes a powerful Schema DSL for defining nested data:
281
+
282
+ ### Using the Schema DSL
283
+
284
+ ```ruby
285
+ schema = Parquet::Schema.define do
286
+ # Simple fields
287
+ field :id, :int64, nullable: false # Required field
288
+ field :name, :string # Optional by default
289
+
290
+ # Nested structure
291
+ field :address, :struct do
292
+ field :street, :string
293
+ field :city, :string
294
+ field :location, :struct do
295
+ field :lat, :double
296
+ field :lng, :double
297
+ end
298
+ end
299
+
300
+ # Lists
301
+ field :tags, :list, item: :string
302
+ field :scores, :list, item: :int32
303
+
304
+ # Maps (dictionaries)
305
+ field :metadata, :map, key: :string, value: :string
306
+
307
+ # Complex combinations
308
+ field :contacts, :list, item: :struct do
309
+ field :name, :string
310
+ field :email, :string
311
+ field :primary, :boolean
312
+ end
313
+ end
314
+ ```
315
+
316
+ ### Writing Complex Data
317
+
318
+ ```ruby
319
+ data = [[
320
+ 1, # id
321
+ "Alice Johnson", # name
322
+ { # address
323
+ "street" => "123 Main St",
324
+ "city" => "Springfield",
325
+ "location" => {
326
+ "lat" => 40.7128,
327
+ "lng" => -74.0060
328
+ }
329
+ },
330
+ ["ruby", "parquet", "data"], # tags
331
+ [85, 92, 88], # scores
332
+ { "dept" => "Engineering" }, # metadata
333
+ [ # contacts
334
+ { "name" => "Bob", "email" => "bob@example.com", "primary" => true },
335
+ { "name" => "Carol", "email" => "carol@example.com", "primary" => false }
336
+ ]
337
+ ]]
338
+
339
+ Parquet.write_rows(data.each, schema: schema, write_to: "complex.parquet")
340
+ ```
341
+
342
+ ## ⚠️ Important Limitations
343
+
344
+ ### Timezone Handling in Parquet
345
+
346
+ The Parquet specification has a fundamental limitation with timezone storage:
347
+
348
+ 1. **UTC-normalized**: Any timestamp with timezone info (including "+09:00" or "America/New_York") is converted to UTC
349
+ 2. **Local/unzoned**: Timestamps without timezone info are stored as-is
350
+
351
+ **The original timezone information is permanently lost.** This is not a limitation of this library but of the Parquet format itself.
352
+
353
+ ```ruby
354
+ schema = Parquet::Schema.define do
355
+ # These BOTH store in UTC - timezone info is lost!
356
+ field :timestamp_utc, :timestamp_millis, timezone: "UTC"
357
+ field :timestamp_tokyo, :timestamp_millis, timezone: "+09:00"
358
+
359
+ # This stores as local time (no timezone)
360
+ field :timestamp_local, :timestamp_millis
361
+ end
362
+
363
+ # If you need timezone preservation, store it separately:
364
+ schema = Parquet::Schema.define do
365
+ field :timestamp, :timestamp_millis, has_timezone: true # UTC storage
366
+ field :original_tz, :string # "America/New_York"
367
+ end
368
+ ```
369
+
370
+ ## Performance Tips
371
+
372
+ 1. **Use column-wise reading** when you need only a few columns from wide tables
373
+ 2. **Specify columns parameter** to avoid reading unnecessary data
374
+ 3. **Choose appropriate batch sizes**:
375
+ - Larger batches = better throughput but more memory
376
+ - Smaller batches = less memory but more overhead
377
+ 4. **Pre-sort data** by commonly filtered columns for better compression
378
+
379
+
380
+ ## Memory Management
381
+
382
+ Writes are streamed: an Enumerator (or any Enumerable) passed to `write_rows`
383
+ is consumed in bounded slices rather than materialized up front, and completed
384
+ row groups are flushed to the destination while the input is still being
385
+ enumerated. Peak memory is bounded by `batch_size` and `flush_threshold`, not
386
+ by the total dataset size:
387
+
388
+ ```ruby
389
+ Parquet.write_rows(huge_dataset.each,
390
+ schema: schema,
391
+ write_to: "output.parquet",
392
+ batch_size: 1000, # Rows buffered per write batch (also the
393
+ # slice size pulled from an Enumerator)
394
+ flush_threshold: 32 * 1024**2 # Flush a row group to the destination once
395
+ # ~32MB of raw row data is staged (default 100MB)
396
+ )
397
+ ```
398
+
399
+ `flush_threshold` bounds both the raw bytes staged since the last flush and
400
+ the writer's encoded in-progress buffer, so row groups reach the destination
401
+ incrementally even when compression shrinks the encoded data dramatically.
402
+ `write_columns` flushes the same way after each batch of columns.
403
+
404
+ When `write_to:` is an IO object instead of a file path, output is staged in a
405
+ temporary file on disk (memory stays bounded) and copied to the IO after the
406
+ write completes, so the IO receives its bytes only at the end.
407
+
408
+ Write batch and sample sizes are bounded before buffer allocation. Very large
409
+ batch sizes are rejected, and wide schemas have a lower effective batch cap so
410
+ the writer cannot reserve unbounded per-column value slots.
411
+
412
+ ## Architecture
413
+
414
+ This gem uses a modular architecture:
415
+
416
+ - **parquet-core**: Language-agnostic Rust core for Parquet operations
417
+ - **parquet-ruby-adapter**: Ruby-specific FFI adapter layer
418
+ - **parquet gem**: High-level Ruby API
419
+
420
+ Take a look at [ARCH.md](./ARCH.md)
421
+
422
+ ## Contributing
423
+
424
+ Bug reports and pull requests are welcome on GitHub at https://github.com/njaremko/parquet-ruby.
425
+
426
+ ## License
427
+
428
+ The gem is available as open source under the terms of the MIT License.
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake/testtask"
4
+ require "rb_sys/extensiontask"
5
+
6
+ task default: :test
7
+
8
+ GEMSPEC = Gem::Specification.load("parquet.gemspec")
9
+
10
+
11
+ platforms = [
12
+ "x86_64-linux",
13
+ "x86_64-linux-musl",
14
+ "aarch64-linux",
15
+ "aarch64-linux-musl",
16
+ "x86_64-darwin",
17
+ "arm64-darwin"
18
+ ]
19
+
20
+ RbSys::ExtensionTask.new("parquet", GEMSPEC) do |ext|
21
+ ext.lib_dir = "lib/parquet"
22
+ ext.ext_dir = "ext/parquet"
23
+ ext.cross_compile = true
24
+ ext.cross_platform = platforms
25
+ ext.cross_compiling do |spec|
26
+ spec.dependencies.reject! { |dep| dep.name == "rb_sys" }
27
+ spec.files.reject! { |file| File.fnmatch?("ext/*", file, File::FNM_EXTGLOB) }
28
+ end
29
+ end
30
+
31
+ Rake::TestTask.new do |t|
32
+ t.deps << :compile
33
+ t.test_files = FileList[File.expand_path("test/*_test.rb", __dir__)]
34
+ t.libs << "lib"
35
+ t.libs << "test"
36
+ end
37
+
38
+ task :release do
39
+ sh "bundle exec rake test"
40
+ sh "mkdir -p pkg"
41
+ sh "gem build parquet.gemspec -o pkg/parquet-#{Parquet::VERSION}.gem"
42
+ sh "gem push pkg/parquet-#{Parquet::VERSION}.gem"
43
+ end
@@ -0,0 +1,39 @@
1
+ [package]
2
+ name = "parquet"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+
6
+ [lib]
7
+ crate-type = ["cdylib"]
8
+
9
+ [build-dependencies]
10
+ rb-sys-env = "^0.2"
11
+
12
+ [dependencies]
13
+ ahash = "0.8"
14
+ arrow-array = "58.3.0"
15
+ arrow-buffer = "58.3.0"
16
+ arrow-ipc = { version = "58.3.0", features = ["lz4"] }
17
+ arrow-schema = "58.3.0"
18
+ bytes = "^1.9"
19
+ either = "1.9"
20
+ itertools = "^0.14"
21
+ jiff = "0.2"
22
+ magnus = { version = "0.8", features = ["rb-sys"] }
23
+ parquet = { version = "58.3.0", features = ["json"] }
24
+ parquet-ruby-adapter = { path = "../parquet-ruby-adapter" }
25
+ rand = "0.9"
26
+ rb-sys = "^0.9"
27
+ simdutf8 = "0.1.5"
28
+ tempfile = "^3.15"
29
+ thiserror = "2.0"
30
+ num = "0.4.3"
31
+ uuid = "1.16.0"
32
+ ordered-float = "5.0.0"
33
+
34
+
35
+ [target.'cfg(target_os = "linux")'.dependencies]
36
+ jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
37
+
38
+ [target.'cfg(not(any(target_os = "linux", target_os = "windows")))'.dependencies]
39
+ mimalloc = { version = "0.1", default-features = false }
@@ -0,0 +1,5 @@
1
+ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
2
+ rb_sys_env::activate()?;
3
+
4
+ Ok(())
5
+ }
@@ -0,0 +1,4 @@
1
+ require "mkmf"
2
+ require "rb_sys/mkmf"
3
+
4
+ create_rust_makefile("parquet/parquet")