thimble 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +21 -0
- data/README.md +126 -0
- data/lib/thimble/version.rb +6 -0
- data/lib/thimble.rb +7 -1
- metadata +42 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9d5dba5ce3a256f520a9c75924f886fe3d94fd5e64369528570ef57d7b2e226a
|
|
4
|
+
data.tar.gz: d99df41c042a2360e4f8c36f8f9b00d4923174abc71b78ce6c35d06fa966cfff
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a9d708451d9b0fe90a133401f4f999930067abd4af81856ad039851f8d1c0bfc44be8da8c99fcfc19eb121927490b0b1283512c8be2ff445c63fcdbd6d80f64d
|
|
7
|
+
data.tar.gz: e8026d10b4f8d7265c282d59acce8c5e8186aff83b76b75f93f54ad7cceba2f24d428941629d14a3210b467381b531ac31c8b82c2a3ee2f9c6f237ae422ceb15
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2022 Andrew Kovanda
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# Thimble
|
|
2
|
+
Thimble is a Ruby gem for parallelism and concurrency. It lets you choose threads (good for IO) or processes (good for CPU) and build pipelines using stages backed by a thread-safe queue.
|
|
3
|
+
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
## Installation
|
|
7
|
+
Add this line to your application's Gemfile:
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
gem 'thimble'
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
And then execute:
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
bundle install
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Or install it yourself as:
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
gem install thimble
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Supported Ruby and platforms
|
|
26
|
+
- Ruby >= 3.0
|
|
27
|
+
- MRI: threads are limited by the GVL for CPU-bound work. Use `worker_type: :fork` for CPU-bound pipelines.
|
|
28
|
+
- JRuby/TruffleRuby: threads can run in parallel; `:thread` often suffices.
|
|
29
|
+
- Windows: `fork` is not available. Use `worker_type: :thread`.
|
|
30
|
+
|
|
31
|
+
## Quick start
|
|
32
|
+
|
|
33
|
+
Example 1: parallel map using forked processes (CPU-bound)
|
|
34
|
+
```
|
|
35
|
+
require 'thimble'
|
|
36
|
+
|
|
37
|
+
manager = Thimble::Manager.new(max_workers: 5, batch_size: 5, queue_size: 10, worker_type: :fork)
|
|
38
|
+
thimble = Thimble::Thimble.new((1..100).to_a, manager)
|
|
39
|
+
results = thimble.map { |x| x * 1000 }
|
|
40
|
+
# results is a Thimble::ThimbleQueue; consume it as needed
|
|
41
|
+
p results.to_a
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Example 2: feed an intermediate queue from a threaded stage (IO-bound)
|
|
45
|
+
```
|
|
46
|
+
require 'thimble'
|
|
47
|
+
# We create a queue to store intermediate work
|
|
48
|
+
queue = Thimble::ThimbleQueue.new(3, 'stage 2')
|
|
49
|
+
# Our array of data
|
|
50
|
+
ary = (1..10).to_a
|
|
51
|
+
# A separate thread worker who will be processing the intermediate queue
|
|
52
|
+
thread = Thimble::Thimble.async do
|
|
53
|
+
queue.each { |x| puts "I did work on #{x}!"; sleep 1 }
|
|
54
|
+
end
|
|
55
|
+
# Our Thimble, plus its manager. Note we are using Thread in this example.
|
|
56
|
+
thim = Thimble::Thimble.new(ary, Thimble::Manager.new(batch_size: 1, worker_type: :thread))
|
|
57
|
+
# We in parallel push data to the Thimble Queue
|
|
58
|
+
thim.map { |e| queue.push(e); sleep 0.1; puts "I pushed #{e} to the queue!" }
|
|
59
|
+
# The queue is closed (no more work can come in)
|
|
60
|
+
queue.close
|
|
61
|
+
# join the thread
|
|
62
|
+
thread.join
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Manager quick reference
|
|
66
|
+
```
|
|
67
|
+
m = Thimble::Manager.new(max_workers: 10, batch_size: 100, worker_type: :fork)
|
|
68
|
+
Thimble::Thimble.new(array, m)
|
|
69
|
+
```
|
|
70
|
+
- max_workers: how many workers can run at the same time
|
|
71
|
+
- batch_size: how many items to send to each worker (tune for workload)
|
|
72
|
+
- worker_type: :thread or :fork
|
|
73
|
+
|
|
74
|
+
The same Manager can be shared across Thimble instances to coordinate concurrency limits.
|
|
75
|
+
|
|
76
|
+
All thimbles require an explicit manager.
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## ThimbleQueue
|
|
81
|
+
ThimbleQueue is the queue underpinning Thimble. Taking from it is destructive. It is thread-safe for multi-thread producers/consumers.
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
q = Thimble::ThimbleQueue.new(10, 'name')
|
|
85
|
+
q.push(1)
|
|
86
|
+
q.close
|
|
87
|
+
q.each { |x| puts x }
|
|
88
|
+
# => 1
|
|
89
|
+
```
|
|
90
|
+
If you do not close the queue, consumers will wait for more data. Creating a Thimble creates a "closed" input queue; transformations create a new queue.
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Caveats and best practices
|
|
95
|
+
These are common pitfalls and how Thimble helps you avoid them:
|
|
96
|
+
|
|
97
|
+
- MRI GVL and workload choice
|
|
98
|
+
- Threads do not run CPU-bound Ruby in parallel on MRI. Use `worker_type: :fork` for CPU-bound tasks; `:thread` shines for IO-bound tasks.
|
|
99
|
+
- Platform differences
|
|
100
|
+
- `fork` is Unix-only. On Windows, use `:thread`.
|
|
101
|
+
- Forking and safety
|
|
102
|
+
- Thimble forks child workers before creating additional threads inside children. Children trap HUP and exit cleanly; the parent detaches workers to avoid zombies.
|
|
103
|
+
- Recreate external resources in children (DB connections, sockets, clients). Don’t share them across a fork.
|
|
104
|
+
- Memory and copy-on-write
|
|
105
|
+
- Each process has its own heap and GC. Batching reduces IPC overhead. Freeze large constants to improve CoW where possible.
|
|
106
|
+
- Backpressure
|
|
107
|
+
- ThimbleQueue is bounded; tune `queue_size` to avoid unbounded growth.
|
|
108
|
+
- Shutdown
|
|
109
|
+
- ThimbleQueue supports `close` and `close(true)` for immediate close. Avoid closing from multiple places.
|
|
110
|
+
- Error propagation
|
|
111
|
+
- Exceptions in workers are propagated back through results. For `:thread`, thread exceptions are surfaced; for `:fork`, exceptions are marshaled back and re-raised when consumed.
|
|
112
|
+
- Signal handling
|
|
113
|
+
- The main process receives signals; Thimble sends HUP to child workers when their results are consumed.
|
|
114
|
+
- Ordering
|
|
115
|
+
- Parallel stages may reorder results. If you need original order, attach sequence numbers to items and reorder at the end.
|
|
116
|
+
- Tuning
|
|
117
|
+
- Start with `max_workers` ~ number of cores for CPU-bound, higher for IO-bound. Adjust `batch_size` to minimize overhead without starving workers.
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## Development
|
|
122
|
+
- Run tests: `bundle exec rspec`
|
|
123
|
+
- Linting: consider adding RuboCop (`rubocop`)
|
|
124
|
+
- Releasing: bump `Thimble::VERSION` in `lib/thimble/version.rb`, tag and push, then build and push the gem
|
|
125
|
+
|
|
126
|
+
Contributions welcome! Please open issues and PRs.
|
data/lib/thimble.rb
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require_relative 'manager'
|
|
4
4
|
require_relative 'thimble_queue'
|
|
5
5
|
require_relative 'queue_item'
|
|
6
|
+
require_relative 'thimble/version'
|
|
6
7
|
require 'io/wait'
|
|
7
8
|
require 'ostruct'
|
|
8
9
|
|
|
@@ -100,10 +101,15 @@ module Thimble
|
|
|
100
101
|
if @manager.worker_type == :fork
|
|
101
102
|
if tuple.reader.ready?
|
|
102
103
|
piped_result = tuple.reader.read
|
|
104
|
+
tuple.reader.close unless tuple.reader.closed?
|
|
103
105
|
loadedResult = Marshal.load(piped_result)
|
|
104
106
|
loadedResult.each { |r| raise r if r.class <= Exception }
|
|
105
107
|
push_result(loadedResult)
|
|
106
|
-
|
|
108
|
+
begin
|
|
109
|
+
Process.kill('HUP', tuple.pid)
|
|
110
|
+
rescue Errno::ESRCH
|
|
111
|
+
# Process already exited; nothing to do
|
|
112
|
+
end
|
|
107
113
|
@manager.rem_worker(tuple)
|
|
108
114
|
end
|
|
109
115
|
elsif tuple.done == true
|
metadata
CHANGED
|
@@ -1,15 +1,42 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: thimble
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kovanda
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
12
|
-
dependencies:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: logger
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '0'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: ostruct
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '0'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '0'
|
|
13
40
|
description: Thimble is a ruby gem for parallelism and concurrency. It allows you
|
|
14
41
|
to decide if you want to use separate processes, or if you want to use threads in
|
|
15
42
|
ruby. It allows you to create stages with a thread safe queue, and break apart large
|
|
@@ -19,15 +46,22 @@ executables: []
|
|
|
19
46
|
extensions: []
|
|
20
47
|
extra_rdoc_files: []
|
|
21
48
|
files:
|
|
49
|
+
- LICENSE
|
|
50
|
+
- README.md
|
|
22
51
|
- lib/manager.rb
|
|
23
52
|
- lib/queue_item.rb
|
|
24
53
|
- lib/thimble.rb
|
|
54
|
+
- lib/thimble/version.rb
|
|
25
55
|
- lib/thimble_queue.rb
|
|
26
56
|
homepage: https://github.com/akovanda/thimble
|
|
27
57
|
licenses:
|
|
28
58
|
- MIT
|
|
29
|
-
metadata:
|
|
30
|
-
|
|
59
|
+
metadata:
|
|
60
|
+
source_code_uri: https://github.com/akovanda/thimble
|
|
61
|
+
bug_tracker_uri: https://github.com/akovanda/thimble/issues
|
|
62
|
+
changelog_uri: https://github.com/akovanda/thimble/releases
|
|
63
|
+
documentation_uri: https://github.com/akovanda/thimble#readme
|
|
64
|
+
rubygems_mfa_required: 'true'
|
|
31
65
|
rdoc_options: []
|
|
32
66
|
require_paths:
|
|
33
67
|
- lib
|
|
@@ -35,15 +69,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
35
69
|
requirements:
|
|
36
70
|
- - ">="
|
|
37
71
|
- !ruby/object:Gem::Version
|
|
38
|
-
version:
|
|
72
|
+
version: 3.0.0
|
|
39
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
40
74
|
requirements:
|
|
41
75
|
- - ">="
|
|
42
76
|
- !ruby/object:Gem::Version
|
|
43
77
|
version: '0'
|
|
44
78
|
requirements: []
|
|
45
|
-
rubygems_version: 3.
|
|
46
|
-
signing_key:
|
|
79
|
+
rubygems_version: 3.6.7
|
|
47
80
|
specification_version: 4
|
|
48
81
|
summary: Concurrency and Parallelism gem that uses blocks to move data
|
|
49
82
|
test_files: []
|