threaded_pipeline 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5a894c4ca00f14b3a16899f18115ece8958520a36e1cc8ea2ede67abf19a1ed9
4
+ data.tar.gz: e9aa1280acbeb6bf5bccb3812e315efe9912134b850a39c8048f599d001badab
5
+ SHA512:
6
+ metadata.gz: 6de0080d5b599d5aa40ef8d86649b0ad7af98c1962df936274e9d24b33105140f529c0333d041c6bc276ffc7175985c4e40bba170e200a0b05e4250e601e5928
7
+ data.tar.gz: e769f97578a2bf9a91dd761d21633e68eb0411a3af1daab5b4df1a52cd574e19b0799c3d47e435bc44ca2e6c84a14e16995bf6478152220aa743bcee6728de3e
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 TODO: Write your name
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,133 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'threaded_pipeline/version'
4
+
5
+ # Create a pipeline where each stage runs in its own thread. Each stage must
6
+ # accept a single argument and will pass its result to the next stage. The
7
+ # results of the last stage are then returned (unless opted out).
8
+ #
9
+ # = Example
10
+ #
11
+ # threaded_pipeline = ThreadedPipeline.new
12
+ # threaded_pipeline.stages << -> (url) { fetch_large_csv(url) }
13
+ # threaded_pipeline.stages << -> (local_file) { process_local_file(local_file) }
14
+ # results = threaded_pipeline.process([list, of, large, csv, urls])
15
+ #
16
+ # = Example
17
+ #
18
+ # another_pipeline = ThreadedPipeline.new(discard_results: true)
19
+ # another_pipeline.stages << -> (url) { api_query(url) }
20
+ # another_pipeline.stages << -> (returned_data) { process_returned_data(returned_data) }
21
+ # another_pipeline.stages << -> (processed_results) { record_results_in_database(processed_results) }
22
+ # while url = web_crawl_urls
23
+ # another_pipeline.feed(url)
24
+ # end
25
+ # another_pipeline.finish
26
+ #
27
+ class ThreadedPipeline
28
+ # Each stage will process the results of the previous one.
29
+ #
30
+ # my_threaded_pipeline.stages << ->(arg) { process(arg) }
31
+ attr_accessor :stages
32
+ attr_reader :started
33
+
34
+ def initialize(discard_results: false)
35
+ @stages = []
36
+ @started = false
37
+ @discard_results = discard_results
38
+ end
39
+
40
+ # The elements of enumerable will begin processing immediately.
41
+ def process(enumerable)
42
+ initialize_run
43
+ initialize_first_queue(enumerable)
44
+ finish
45
+ end
46
+
47
+ # Process the enumerale list without using threads.
48
+ # Maybe you have a bug you want to work on without threading. Or you have a
49
+ # benchmark you want to run.
50
+ def process_unthreaded(enumerable)
51
+ initialize_run
52
+ @results = enumerable.map do |element|
53
+ stages.each do |stage|
54
+ element = stage[element]
55
+ end
56
+ element
57
+ end
58
+ finish
59
+ end
60
+
61
+ # Add another element to the list of work to be processed. Work will start
62
+ # on the first element immediately (only feed once you have all your stages added).
63
+ # You could use .process if you already have the full list.
64
+ # This method is not thread safe (wrap access in a mutex if feeding from
65
+ # multiple threads).
66
+ def feed(element)
67
+ initialize_run unless @started
68
+ queue_hash[stages.first] << element
69
+ end
70
+
71
+ # Wait for all the threads to finish and return the results.
72
+ # @return results of last stage (unless discard_results was set to true)
73
+ def finish
74
+ raise "You never started pipeline #{inspect}" unless @started
75
+
76
+ queue_hash[stages.first] << finish_object
77
+ @threads.each(&:join)
78
+ @started = false
79
+ @queue_hash = nil
80
+ @finish_object = nil
81
+ @results unless @discard_results
82
+ end
83
+
84
+ private
85
+
86
+ def initialize_run(without_threads = false)
87
+ raise "Already started pipeline #{inspect}" if @started
88
+
89
+ @started = true
90
+ @queue_hash = nil
91
+ @threads = []
92
+ @results = []
93
+ return if without_threads
94
+
95
+ queue_hash # initialize outside of threads
96
+ @threads = stages.each_with_index.map do |stage, index|
97
+ Thread.new do
98
+ # Grab the next element off our queue
99
+ while (element = queue_hash[stage].pop) != finish_object
100
+ # The way you call a lambda is with []'s. Who knew?
101
+ result = stage[element]
102
+ if index == stages.count - 1
103
+ # Only one thread is accessing @results
104
+ @results << result unless @discard_results
105
+ else
106
+ queue_hash[stages[index + 1]] << result
107
+ end
108
+ end
109
+ queue_hash[stages[index + 1]] << finish_object unless index == stages.count - 1
110
+ end
111
+ end
112
+ end
113
+
114
+ def initialize_first_queue(enumerable)
115
+ first_queue = queue_hash[stages.first]
116
+ enumerable.each do |element|
117
+ first_queue << element
118
+ end
119
+ first_queue << finish_object
120
+ end
121
+
122
+ # one queue after each stage but the last
123
+ def queue_hash
124
+ return @queue_hash unless @queue_hash.nil?
125
+
126
+ @queue_hash = stages.map { |stage| [stage, Queue.new] }.to_h
127
+ end
128
+
129
+ # How we know we're done?
130
+ def finish_object
131
+ @finish_object ||= Object.new
132
+ end
133
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class ThreadedPipeline
4
+ VERSION = '0.0.1'
5
+ end
metadata ADDED
@@ -0,0 +1,191 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: threaded_pipeline
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Kurt Werle
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-01-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.17'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.17'
27
+ - !ruby/object:Gem::Dependency
28
+ name: byebug
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: guard
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard-minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: guard-rubocop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: minitest
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '5.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '5.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: minitest-color
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rake
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '10.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '10.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rubocop
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: yard
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ description: If you are eg. downloading a large file, processing that file, then writing
154
+ the results to a database, ThreadedPipeline may be for you. Download in one thread,
155
+ process in another, and write in a third, etc.
156
+ email:
157
+ - kurt@CircleW.org
158
+ executables: []
159
+ extensions: []
160
+ extra_rdoc_files: []
161
+ files:
162
+ - LICENSE.txt
163
+ - lib/threaded_pipeline.rb
164
+ - lib/threaded_pipeline/version.rb
165
+ homepage: https://github.com/kwerle/threaded_pipeline
166
+ licenses:
167
+ - MIT
168
+ metadata:
169
+ homepage_uri: https://github.com/kwerle/threaded_pipeline
170
+ source_code_uri: https://github.com/kwerle/threaded_pipeline
171
+ changelog_uri: https://github.com/kwerle/threaded_pipeline
172
+ post_install_message:
173
+ rdoc_options: []
174
+ require_paths:
175
+ - lib
176
+ required_ruby_version: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ required_rubygems_version: !ruby/object:Gem::Requirement
182
+ requirements:
183
+ - - ">="
184
+ - !ruby/object:Gem::Version
185
+ version: '0'
186
+ requirements: []
187
+ rubygems_version: 3.0.1
188
+ signing_key:
189
+ specification_version: 4
190
+ summary: Run several stages of a pipeline in threads.
191
+ test_files: []