red-arrow-activerecord 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 40f485293bb04645002b8b113c077728fc300c5a292ce3d85d26bf703b09845c
4
- data.tar.gz: 2139a0c5b8a850f943060d69b083d2604c83366aa709607884cc21154d6cb62f
3
+ metadata.gz: e8d253180a9bf6653d4034c162efd4b092e4ea9e44f983c2f4cb8ea2aa95c8e1
4
+ data.tar.gz: d5d7ea4de22c17357d4eefa6518fadc8c88420e161be91df946e85dfdf7964c9
5
5
  SHA512:
6
- metadata.gz: de3dfa57565917e173b24c9bdcba48dd8d6f285439c096e4e6853dd01252c5bd5002874fee28a054c0c2232f04e36548d16410145728fe9759a20e44fe372ff9
7
- data.tar.gz: fb52b67b4f2ff43d5bbd01acd08edc934eb036d116a8e46f250988f72a0b1452884a944c0c17c9b5f7ff87d9f6e3ed267b67d630116399c6645bad2ff7a9171f
6
+ metadata.gz: 90502eefb44fd4d6d6ffa6e66ce9864b5c6e0b384885ff5f5512f640629b19100a3d708af5b2644711bb6a25988f26bda90ee88c3f5adaad698288bda81a2f6c
7
+ data.tar.gz: 38f2b95f2da0f994ee554edeedaff59d5a5ec19a344c2c0957b74647f78c0bb20538d8d0119766f728356bd9e54850c95212a13160a8a64e61c914791eb73029
data/doc/text/news.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # News
2
2
 
3
+ ## 0.1.3 - 2026-05-22
4
+
5
+ ### Improvements
6
+
7
+ * Added `#each_record_batch`.
8
+
3
9
  ## 0.1.2 - 2026-05-22
4
10
 
5
11
  ### Improvements
@@ -3,20 +3,15 @@ require 'arrow'
3
3
  module ArrowActiveRecord
4
4
  module Arrowable
5
5
  def to_arrow(batch_size: 10000)
6
- target_column_names = select_values
7
- target_column_names = column_names if select_values.empty?
6
+ record_batches = each_record_batch(batch_size:).to_a
7
+ Arrow::Table.new(record_batches.first.schema, record_batches)
8
+ end
8
9
 
9
- fields = []
10
- target_column_names.each do |name|
11
- name = name.to_s
12
- target_column = columns.find do |column|
13
- column.name == name
14
- end
15
- fields << {name: name, data_type: extract_arrow_data_type(target_column)}
16
- end
17
- schema = Arrow::Schema.new(fields)
10
+ def each_record_batch(batch_size: 10000, &block)
11
+ return to_enum(__method__, batch_size:) unless block_given?
18
12
 
19
- record_batches = []
13
+ schema = build_arrow_schema
14
+ target_column_names = schema.fields.collect(&:name)
20
15
  record_batch_builder = Arrow::RecordBatchBuilder.new(schema)
21
16
  in_batches(of: batch_size).each do |relation|
22
17
  records = relation.pluck(*target_column_names)
@@ -27,12 +22,26 @@ module ArrowActiveRecord
27
22
  else
28
23
  record_batch_builder.append(records)
29
24
  end
30
- record_batches << record_batch_builder.flush
25
+ yield(record_batch_builder.flush)
31
26
  end
32
- Arrow::Table.new(schema, record_batches)
33
27
  end
34
28
 
35
29
  private
30
+ def build_arrow_schema
31
+ target_column_names = select_values
32
+ target_column_names = column_names if select_values.empty?
33
+
34
+ fields = []
35
+ target_column_names.each do |name|
36
+ name = name.to_s
37
+ target_column = columns.find do |column|
38
+ column.name == name
39
+ end
40
+ fields << {name: name, data_type: extract_arrow_data_type(target_column)}
41
+ end
42
+ Arrow::Schema.new(fields)
43
+ end
44
+
36
45
  def extract_arrow_data_type(column)
37
46
  type = nil
38
47
  if column
@@ -1,3 +1,3 @@
1
1
  module ArrowActiverecord
2
- VERSION = '0.1.2'
2
+ VERSION = "0.1.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow-activerecord
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - hatappi