med_pipe 0.1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 2d62e07ad3b1678e874749b5e34e930992476342d40ddad20be2dfd5b1da6593
4
+ data.tar.gz: 6d9c9dbffe01a6e7d4dd47c59a61f8233139d140600dc30d361cbe8d33c965ae
5
+ SHA512:
6
+ metadata.gz: 198784f564acaa4e36cbfdfb8e8ded498645124ca6d6a74f07b32a6e0515f2fbb0ddb3a9df9c2c79e983f6cc3f3811c4aac5d03518237fdb4e1eaf9de6f36731
7
+ data.tar.gz: 51f1a3a9e7eaca4c62e304874022da628e4bf9c85de965c24328d12b5ea5c87abe0755f3b05e8f59a857d4d5615c869f058af9780e50f75af963118562ac67c5
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright mpg-taichi-sato
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,55 @@
1
+ # MedPipe <sup>BETA</sup>
2
+ 100万 ~ 数10億程度のデータを処理するための仕組みを提供する Rails エンジンです。
3
+
4
+ ## Concept
5
+ ### MedPipe::Pipeline
6
+ apply で後述する PipelineTask を登録し、run で順番に実行します。
7
+
8
+ ### MedPipe::PipelineTask
9
+ Pipeline に登録する処理の単位です。
10
+ DB からの読み込みや、S3 へのアップロード等やることを分割してタスク化します。
11
+ 大量データを扱う際には Enumerable::Lazy を使うことで分割して処理をすることができます。
12
+ call を実装する必要があります
13
+
14
+ ```.rb
15
+ @param context [Hash] Stores data during pipeline execution
16
+ @param prev_result [Object] The result of the previous task
17
+ def call(context, prev_result)
18
+ yield 次のTaskに渡すデータ
19
+ end
20
+ ```
21
+
22
+ ### MedPipe::PipelinePlan
23
+ Pipeline の状態、オプション、結果を保存するためのモデルです。
24
+ Task で使うためのオプションを渡す方法は PipelinePlan から取得するか、contextで伝搬するかの二択です。
25
+
26
+ ### MedPipe::PipelineGroup
27
+ 一つのジョブで実行する Plan をまとめるためのモデルです。
28
+ 実行中に parallel_limit を 0 にすることで中断することができます。
29
+
30
+ ## Usage
31
+
32
+ 1. Reader, Uploader 等の PipelineTask を作成 [Samples](https://github.com/medpeer-dev/med_pipe/tree/main/spec/dummy/app/models/pipeline_task)
33
+ 2. PipelineRunner を作成 [Sample](https://github.com/medpeer-dev/med_pipe/blob/main/spec/dummy/app/models/sample_pipeline_runner.rb)
34
+ 3. Pipeline を並列実行するためのジョブを作成 [Sample](https://github.com/medpeer-dev/med_pipe/blob/main/spec/dummy/app/jobs/sample_execute_pipeline_job.rb)
35
+ 4. PipelinePlan を登録するコードを記述
36
+ 5. 実行
37
+
38
+ ## Installation
39
+ Add this line to your application's Gemfile:
40
+
41
+ ```ruby
42
+ gem "med_pipe"
43
+ ```
44
+
45
+ ### migrationファイルの追加
46
+
47
+ ```shell
48
+ $ rails med_pipe:install:migrations
49
+ ```
50
+
51
+ ## Contributing
52
+ Bug reports and pull requests are welcome.
53
+
54
+ ## License
55
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/setup"
4
+
5
+ APP_RAKEFILE = File.expand_path("spec/dummy/Rakefile", __dir__)
6
+ load "rails/tasks/engine.rake"
7
+
8
+ load "rails/tasks/statistics.rake"
9
+
10
+ require "bundler/gem_tasks"
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MedPipe
4
+ class ApplicationRecord < ActiveRecord::Base
5
+ self.abstract_class = true
6
+
7
+ def self.table_name_prefix
8
+ "med_pipe_"
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class MedPipe::PipelineGroup < MedPipe::ApplicationRecord
4
+ has_many :pipeline_plans, class_name: "MedPipe::PipelinePlan", dependent: :destroy
5
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ class MedPipe::PipelinePlan < MedPipe::ApplicationRecord
4
+ belongs_to :pipeline_group, class_name: "MedPipe::PipelineGroup", optional: true
5
+
6
+ scope :active, -> { where(status: %i[enqueued running]) }
7
+
8
+ validates :name, presence: true
9
+ validates :output_unit, presence: true
10
+ validates :status, presence: true
11
+
12
+ # TODO: Rails6記法のため、Rails8に上げる際に定義の仕方を変える
13
+ # https://zenn.dev/kanazawa/articles/8bc1fcbba3ef1d#enum%E3%81%AE%E5%AE%9A%E7%BE%A9%E6%96%B9%E6%B3%95%E3%81%8C%E5%A4%89%E3%82%8F%E3%82%8B
14
+ enum status: {
15
+ waiting: "waiting",
16
+ enqueued: "enqueued",
17
+ running: "running",
18
+ finished: "finished",
19
+ failed: "failed"
20
+ }, _prefix: true
21
+
22
+ enum output_unit: {
23
+ daily: "daily",
24
+ all: "all"
25
+ }, _prefix: true
26
+ end
data/config/routes.rb ADDED
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ Rails.application.routes.draw do
4
+ # なし
5
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateMedPipePipelinePlans < ActiveRecord::Migration[7.2]
4
+ def change
5
+ create_table :med_pipe_pipeline_plans do |t|
6
+ t.string :name, null: false, comment: "パイプライン名"
7
+ t.integer :priority, null: false, default: 0, comment: "実行優先度"
8
+ t.string :status, null: false
9
+ t.string :output_unit, null: false, comment: "実行単位. 日ごと、全て等"
10
+ t.date :target_date, comment: "実行対象日. output_unit が daily の場合に指定"
11
+ t.bigint :data_count
12
+ t.string :file_name
13
+ t.bigint :file_size
14
+ t.string :upload_to
15
+ t.datetime :started_at
16
+ t.datetime :finished_at
17
+
18
+ t.timestamps
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateMedPipePipelineGroups < ActiveRecord::Migration[7.2]
4
+ def change
5
+ create_table :med_pipe_pipeline_groups do |t|
6
+ t.integer :parallel_limit, null: false, default: 1, comment: "並列実行数"
7
+ t.timestamps
8
+ end
9
+
10
+ add_reference :med_pipe_pipeline_plans, :pipeline_group, null: false
11
+ end
12
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ # idを最大max_load_size件ずつ分割取得するためのクラス
4
+ # 使い時:
5
+ # - 10万件以上のidを取得したい場合
6
+ # - 速度を改善するために in_batches を使いたくない場合
7
+ class MedPipe::BatchIdFetcher
8
+ def initialize(relation, batch_size: 1_000, max_load_size: 100_000)
9
+ @relation = relation
10
+ @batch_size = batch_size
11
+ @max_load_size = max_load_size
12
+ validate_parameters
13
+ end
14
+
15
+ def each
16
+ return enum_for(:each) unless block_given?
17
+
18
+ last_id = 0
19
+ cached_ids = []
20
+
21
+ loop do
22
+ loaded_ids = load_ids(last_id)
23
+ break if loaded_ids.blank?
24
+
25
+ last_id = loaded_ids.last
26
+ cached_ids.concat(loaded_ids)
27
+ yield(cached_ids.shift(@batch_size)) while cached_ids.size >= @batch_size
28
+
29
+ if loaded_ids.size < @max_load_size
30
+ yield(cached_ids) if cached_ids.present?
31
+ break
32
+ end
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ def validate_parameters
39
+ raise ArgumentError, "batch_size must be greater than 0" if @batch_size <= 0
40
+ end
41
+
42
+ def load_ids(last_id)
43
+ if last_id.zero?
44
+ @relation.limit(@max_load_size).order(:id).ids
45
+ else
46
+ @relation.where("id > ?", last_id).order(:id).limit(@max_load_size).ids
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ # 大量データを分割取得するためのクラス
4
+ # in_batches では scope が全クエリに含まれるが、本クラスではidの取得でのみ scope を使用する
5
+ class MedPipe::BatchReader
6
+ def initialize(model_class, scope: nil, pluck_columns: [:id], batch_size: 1_000,
7
+ max_id_load_size: 100_000)
8
+ @model_class = model_class
9
+ @scope = scope || model_class.all
10
+ @pluck_columns = pluck_columns
11
+ @batch_size = batch_size
12
+ @max_id_load_size = max_id_load_size
13
+ @around_load_callback = nil
14
+ validate_parameters
15
+ end
16
+
17
+ # EXAMPLE:
18
+ # MedPipe::BatchReader.new(User)
19
+ # .around_load { |&block| ApplicationRecord.connected_to(role: :reading, &block) }
20
+ def around_load(&block)
21
+ @around_load_callback = block
22
+ self
23
+ end
24
+
25
+ # @yieldparam [Array] pluck結果を1件ずつ渡す
26
+ def each(&block)
27
+ return enum_for(:each) unless block
28
+
29
+ each_ids = MedPipe::BatchIdFetcher.new(@scope, batch_size: @batch_size, max_load_size: @max_id_load_size).each
30
+ loop do
31
+ records = @around_load_callback&.call { batch_load(each_ids) } || batch_load(each_ids)
32
+ records.each(&block)
33
+ rescue StopIteration
34
+ break
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def validate_parameters
41
+ raise ArgumentError, "model_class must be a subclass of ApplicationRecord" unless @model_class < ApplicationRecord
42
+ end
43
+
44
+ def batch_load(each_ids)
45
+ # in_batches ではクエリキャッシュが無効になっているため、それに倣う
46
+ @model_class.uncached do
47
+ @model_class.where(id: each_ids.next).pluck(*@pluck_columns)
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ class MedPipe::Engine < Rails::Engine
4
+ # migrationファイルの生成コマンドを med_pipe_engine:install:migrations から med_pipe:install:migrations に変更
5
+ # https://edgeapi.rubyonrails.org/classes/Rails/Engine.html#class-Rails::Engine-label-Engine+name
6
+ engine_name "med_pipe"
7
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ # 直列に繋いだtaskを順番に実行するクラス
4
+ class MedPipe::Pipeline
5
+ def initialize
6
+ @tasks = []
7
+ end
8
+
9
+ # @param task [Object] def call(context, prev_result, &block) を実装したクラス
10
+ def apply(task)
11
+ @tasks << task
12
+ self
13
+ end
14
+
15
+ # @param context [Hash] Stores data during pipeline execution
16
+ def run(context = {}) = run_task_recursive(context)
17
+ # 展開すると以下のようになる
18
+ # @tasks[0].call(context, nil) do |prev_result|
19
+ # @tasks[1].call(context, prev_result) do |prev_result|
20
+ # @tasks[2].call(context, prev_result) do |prev_result|
21
+ # nil
22
+ # end
23
+ # end
24
+ # end
25
+
26
+ private
27
+
28
+ def run_task_recursive(context, prev_result = nil, task_index = 0)
29
+ return prev_result if task_index >= @tasks.size
30
+
31
+ @tasks[task_index]&.call(context, prev_result) do |result|
32
+ run_task_recursive(context, result, task_index + 1)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ # enqueued な pipeline plan を1つ取得 & 実行
4
+ class MedPipe::PipelinePlanConsumer
5
+ # @param [Proc] pipeline_runner pipeline plan から pipeline を作成し実行する
6
+ def initialize(pipeline_group:, pipeline_runner:)
7
+ @pipeline_group = pipeline_group
8
+ @pipeline_runner = pipeline_runner
9
+ end
10
+
11
+ # @return [PipelinePlan] 実行した pipeline plan。なければ nil
12
+ def run
13
+ pipeline_plan = fetch_and_run_pipeline_plan
14
+ return nil if pipeline_plan.nil?
15
+
16
+ @pipeline_runner.call(pipeline_plan)
17
+ complete_pipeline_plan(pipeline_plan)
18
+ pipeline_plan
19
+ rescue StandardError => e
20
+ error_pipeline_plan(pipeline_plan)
21
+ raise e
22
+ end
23
+
24
+ private
25
+
26
+ def fetch_and_run_pipeline_plan
27
+ ApplicationRecord.transaction do
28
+ target_pipeline_plan = @pipeline_group.pipeline_plans.lock.status_enqueued.order(priority: :desc).first
29
+ return if target_pipeline_plan.nil?
30
+
31
+ target_pipeline_plan.update!(status: :running)
32
+ target_pipeline_plan
33
+ end
34
+ end
35
+
36
+ def complete_pipeline_plan(pipeline_plan)
37
+ pipeline_plan.update!(status: :finished, finished_at: Time.current)
38
+ end
39
+
40
+ def error_pipeline_plan(pipeline_plan)
41
+ pipeline_plan.update!(status: :failed)
42
+ end
43
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ # prioryty が高いものから順に、並列数を踏まえて複数のPipelinePlanの状態をenqueuedに変更する
4
+ class MedPipe::PipelinePlanProducer
5
+ # @param pipeline_group [MedPipe::PipelineGroup]
6
+ def initialize(pipeline_group)
7
+ @pipeline_group = pipeline_group
8
+ end
9
+
10
+ # @return [Array<MedPipe::PipelinePlan>] Enqueued pipeline plans. 未実行ならnilを返す
11
+ def run
12
+ return if @pipeline_group.parallel_limit <= 0
13
+
14
+ @pipeline_group.with_lock do
15
+ enqueue_count = @pipeline_group.parallel_limit - @pipeline_group.pipeline_plans.active.count
16
+ enqueue(enqueue_count) if enqueue_count.positive?
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def enqueue(size)
23
+ target_pipeline_plans = fetch_target_pipeline_plans(size: size)
24
+ return if target_pipeline_plans.empty?
25
+
26
+ target_pipeline_plans.each do |pipline_plan|
27
+ pipline_plan.update!(status: :enqueued)
28
+ end
29
+ end
30
+
31
+ def fetch_target_pipeline_plans(size:)
32
+ @pipeline_group.pipeline_plans.status_waiting.order(priority: :desc).limit(size)
33
+ end
34
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ # PipelinePlanConsumerに渡すPipelineRunnerの作成を補助するクラス
4
+ # call(pipeline_plan)さえ実装していれば良いため、必ずしも本クラスを使う必要はありません。
5
+ class MedPipe::PipelineRunnerBase
6
+ # PipelinePlanConsumerから呼び出されるメソッド
7
+ def call(pipeline_plan)
8
+ pipeline = build_pipeline(pipeline_plan)
9
+ context = { plan: pipeline_plan }
10
+ pipeline.run(context)
11
+ end
12
+
13
+ def build_pipeline(pipeline_plan)
14
+ raise NotImplementedError("#{pipeline_plan.name}に対応するPipelineを作成する処理をサブクラスで実装してください")
15
+ end
16
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ class MedPipe::PipelineTask::Counter
4
+ def initialize
5
+ @count = 0
6
+ end
7
+
8
+ # @param context [Hash]
9
+ # @param input [Enumerable<Array<Object>>]
10
+ # @yieldparam [Enumerable<Array<Object>>] inputをそのまま流す
11
+ def call(context, input)
12
+ yield input.map { |x| increment(context); x } # rubocop:disable Style/Semicolon
13
+ end
14
+
15
+ def increment(context)
16
+ @count += 1
17
+ context[:data_count] = @count
18
+ end
19
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ class MedPipe::PipelineTask::PlanUpdater
4
+ # @param save [Boolean] trueの場合、Planを保存する。finishにするために更新が走るためここで保存しないことをdefaultにしている
5
+ def initialize(save: false)
6
+ @save = save
7
+ end
8
+
9
+ # @param context [Hash]
10
+ # @param input [Enumerable<Array<Object>>]
11
+ # @yieldparam [Enumerable<Array<Object>>] inputをそのまま流す
12
+ def call(context, input)
13
+ update_plan(context)
14
+ block_given? ? yield(input) : input
15
+ end
16
+
17
+ private
18
+
19
+ def update_plan(context)
20
+ return unless context[:plan]
21
+
22
+ plan = context[:plan]
23
+ plan.data_count = context[:data_count] if context[:data_count]
24
+ plan.file_name = context[:file_name] if context[:file_name]
25
+ plan.file_size = context[:file_size] if context[:file_size]
26
+ plan.upload_to = context[:upload_to] if context[:upload_to]
27
+ plan.save if @save
28
+ end
29
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ class MedPipe::PipelineTask::TsvGenerater
6
+ TSV_OPTION = { col_sep: "\t" }.freeze
7
+
8
+ # @param lines [Enumerable<Array<Object>>] to_s可能なオブジェクトの配列のEnumerable
9
+ # @yieldparam [File] 生成したtsvファイル
10
+ def call(_context, lines)
11
+ Tempfile.create do |file|
12
+ lines.each do |line|
13
+ # nil に置き換えることで""という文字列が出力されてしまうのを回避
14
+ normalized_line = line.map { |v| v == "" ? nil : v }
15
+ tsv_line = CSV.generate_line(normalized_line, **TSV_OPTION)
16
+ file.puts(tsv_line)
17
+ end
18
+ file.rewind
19
+
20
+ yield(file)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MedPipe::PipelineTask
4
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MedPipe
4
+ VERSION = "0.1.0.4"
5
+ end
data/lib/med_pipe.rb ADDED
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "med_pipe/version"
4
+ require "med_pipe/engine"
5
+
6
+ require "med_pipe/batch_id_fetcher"
7
+ require "med_pipe/batch_reader"
8
+ require "med_pipe/pipeline_plan_consumer"
9
+ require "med_pipe/pipeline_plan_producer"
10
+ require "med_pipe/pipeline_runner_base"
11
+ require "med_pipe/pipeline"
12
+
13
+ require "med_pipe/pipeline_task"
14
+ require "med_pipe/pipeline_task/counter"
15
+ require "med_pipe/pipeline_task/tsv_generator"
16
+ require "med_pipe/pipeline_task/plan_updater"
17
+
18
+ module MedPipe
19
+ # Your code goes here...
20
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ # desc "Explaining what the task does"
4
+ # task :med_pipe do
5
+ # # Task goes here
6
+ # end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ FactoryBot.define do
4
+ factory :med_pipe_pipeline_group, class: "MedPipe::PipelineGroup" do
5
+ parallel_limit { 1 }
6
+ end
7
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ FactoryBot.define do
4
+ factory :med_pipe_pipeline_plan, class: "MedPipe::PipelinePlan" do
5
+ name { "dummy" }
6
+ output_unit { :all }
7
+ status { :waiting }
8
+ association :pipeline_group, factory: :med_pipe_pipeline_group
9
+ end
10
+ end
metadata ADDED
@@ -0,0 +1,107 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: med_pipe
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0.4
5
+ platform: ruby
6
+ authors:
7
+ - mpg-taichi-sato
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-11-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rails
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 6.1.7
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '8.0'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: 6.1.7
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '8.0'
33
+ description: "# MedPipe <sup>BETA</sup>\n100万 ~ 数10億程度のデータを処理するための仕組みを提供する Rails エンジンです。\n\n##
34
+ Concept\n### MedPipe::Pipeline\napply で後述する PipelineTask を登録し、run で順番に実行します。\n\n###
35
+ MedPipe::PipelineTask\nPipeline に登録する処理の単位です。 \nDB からの読み込みや、S3 へのアップロード等やることを分割してタスク化します。
36
+ \ \n大量データを扱う際には Enumerable::Lazy を使うことで分割して処理をすることができます。 \ncall を実装する必要があります\n\n```.rb\n@param
37
+ context [Hash] Stores data during pipeline execution\n@param prev_result [Object]
38
+ The result of the previous task\ndef call(context, prev_result)\n yield 次のTaskに渡すデータ\nend\n```\n\n###
39
+ MedPipe::PipelinePlan\nPipeline の状態、オプション、結果を保存するためのモデルです。 \nTask で使うためのオプションを渡す方法は
40
+ PipelinePlan から取得するか、contextで伝搬するかの二択です。\n\n### MedPipe::PipelineGroup\n一つのジョブで実行する
41
+ Plan をまとめるためのモデルです。 \n実行中に parallel_limit を 0 にすることで中断することができます。\n\n## Usage\n\n1.
42
+ Reader, Uploader 等の PipelineTask を作成 [Samples](https://github.com/medpeer-dev/med_pipe/tree/main/spec/dummy/app/models/pipeline_task)\n2.
43
+ PipelineRunner を作成 [Sample](https://github.com/medpeer-dev/med_pipe/blob/main/spec/dummy/app/models/sample_pipeline_runner.rb)\n3.
44
+ Pipeline を並列実行するためのジョブを作成 [Sample](https://github.com/medpeer-dev/med_pipe/blob/main/spec/dummy/app/jobs/sample_execute_pipeline_job.rb)\n4.
45
+ PipelinePlan を登録するコードを記述\n5. 実行\n\n## Installation\nAdd this line to your application's
46
+ Gemfile:\n\n```ruby\ngem \"med_pipe\"\n```\n\n### migrationファイルの追加\n\n```shell\n$
47
+ rails med_pipe:install:migrations\n```\n\n## Contributing\nBug reports and pull
48
+ requests are welcome.\n\n## License\nThe gem is available as open source under the
49
+ terms of the [MIT License](https://opensource.org/licenses/MIT).\n"
50
+ email:
51
+ executables: []
52
+ extensions: []
53
+ extra_rdoc_files: []
54
+ files:
55
+ - MIT-LICENSE
56
+ - README.md
57
+ - Rakefile
58
+ - app/models/med_pipe/application_record.rb
59
+ - app/models/med_pipe/pipeline_group.rb
60
+ - app/models/med_pipe/pipeline_plan.rb
61
+ - config/routes.rb
62
+ - db/migrate/20241118063336_create_med_pipe_pipeline_plans.rb
63
+ - db/migrate/20241122022123_create_med_pipe_pipeline_groups.rb
64
+ - lib/med_pipe.rb
65
+ - lib/med_pipe/batch_id_fetcher.rb
66
+ - lib/med_pipe/batch_reader.rb
67
+ - lib/med_pipe/engine.rb
68
+ - lib/med_pipe/pipeline.rb
69
+ - lib/med_pipe/pipeline_plan_consumer.rb
70
+ - lib/med_pipe/pipeline_plan_producer.rb
71
+ - lib/med_pipe/pipeline_runner_base.rb
72
+ - lib/med_pipe/pipeline_task.rb
73
+ - lib/med_pipe/pipeline_task/counter.rb
74
+ - lib/med_pipe/pipeline_task/plan_updater.rb
75
+ - lib/med_pipe/pipeline_task/tsv_generator.rb
76
+ - lib/med_pipe/version.rb
77
+ - lib/tasks/med_pipe_tasks.rake
78
+ - spec/factories/med_pipe_pipeline_groups.rb
79
+ - spec/factories/med_pipe_pipeline_plans.rb
80
+ homepage: https://github.com/medpeer-dev/med_pipe
81
+ licenses:
82
+ - MIT
83
+ metadata:
84
+ homepage_uri: https://github.com/medpeer-dev/med_pipe
85
+ source_code_uri: https://github.com/medpeer-dev/med_pipe
86
+ rubygems_mfa_required: 'true'
87
+ post_install_message:
88
+ rdoc_options: []
89
+ require_paths:
90
+ - lib
91
+ required_ruby_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: 3.0.0
96
+ required_rubygems_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ requirements: []
102
+ rubygems_version: 3.5.22
103
+ signing_key:
104
+ specification_version: 4
105
+ summary: Provides a system for processing data ranging from 1 million to several billion
106
+ records
107
+ test_files: []