datacraft 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0d5b93cedf58b0affe9e22abc1f1597a3de90c4e
4
- data.tar.gz: d7592ee90f256ad37cd2c6b8b90a5b71cc397581
3
+ metadata.gz: 5f512f220800ff71938c3d2dfbe7c640dab90f13
4
+ data.tar.gz: aad2c68ec6f1663b074dff6c4981d4594588c560
5
5
  SHA512:
6
- metadata.gz: e0eb59fa1bb9d2cb546425393bd07f97c754c343a4d685eeb2e11b3c55061617ff1ec22626a16814bd5a0c0ce29e188205834af64f8a570dd85abf566d2a7116
7
- data.tar.gz: 2cb72b1a6852382c61985f620c8542744af97dab13df5091d443e810cac397af54a46525e86a44169381b00748533a2eaa3c2b168a00f1fe90dd09a60d6596b0
6
+ metadata.gz: f1f18984edf137b33982301aadb15c7a78a268b1cc50bde6ed3ad1939315b6eb9dc456cd23b268be13a443c5eb1e508c3fe3341b9d5ad0bb71ab7e0643a607bf
7
+ data.tar.gz: 9d172bf503e1576535550d60a764114fc33f900f63cf234ef73ce7419d60e37879e4f4a0fa42f9aac601b39e915fb505b09349e29e70e6b8ca639fc54d05d574
@@ -23,6 +23,7 @@ Gem::Specification.new do |spec|
23
23
  spec.require_paths = ['lib']
24
24
 
25
25
  spec.add_dependency 'thor', '~> 0'
26
+ spec.add_dependency 'thread', '~> 0'
26
27
 
27
28
  spec.add_development_dependency 'bundler', '~> 1.10'
28
29
  spec.add_development_dependency 'rake', '~> 10.0'
@@ -1,5 +1,11 @@
1
1
  module Datacraft
2
2
  class Context
3
+ def initialize
4
+ options[:parallel] = false
5
+ options[:benchmark] = false
6
+ options[:n_threads] = 8
7
+ end
8
+
3
9
  def providers
4
10
  @providers ||= ProviderRegistry.new
5
11
  end
@@ -11,7 +11,7 @@ module Datacraft
11
11
  # common registry
12
12
  class Registry
13
13
  extend Forwardable
14
- def_delegators :instances, :each, :map
14
+ def_delegators :instances, :each, :map, :size
15
15
  def initialize
16
16
  @items = []
17
17
  end
@@ -5,33 +5,23 @@ module Datacraft
5
5
  module Runner
6
6
  # run the instruction
7
7
  def run(instruction)
8
- context = instruction.context
8
+ @context = instruction.context
9
9
  measurements = []
10
10
  measurements << Benchmark.measure('pre build:') do
11
- context.pre_hooks.each(&:call)
11
+ @context.pre_hooks.each(&:call)
12
12
  end
13
13
  measurements << Benchmark.measure('process rows:') do
14
- if context.options[:parallel]
15
- pprocess_rows(
16
- context.providers,
17
- context.tweakers,
18
- context.consumers)
19
- else
20
- process_rows(
21
- context.providers,
22
- context.tweakers,
23
- context.consumers)
24
- end
14
+ @context.options[:parallel] ? pprocess_rows : process_rows
25
15
  end
26
16
 
27
17
  measurements << Benchmark.measure('build:') do
28
- build context.consumers
18
+ build @context.consumers
29
19
  end
30
20
 
31
21
  measurements << Benchmark.measure('post build:') do
32
- context.post_hooks.each(&:call)
22
+ @context.post_hooks.each(&:call)
33
23
  end
34
- report measurements if context.options[:benchmark]
24
+ report measurements if @context.options[:benchmark]
35
25
  end
36
26
 
37
27
  # output benchmark results
@@ -44,35 +34,37 @@ module Datacraft
44
34
  end
45
35
 
46
36
  # process rows sequentially
47
- def process_rows(providers, tweakers, consumers)
48
- providers.each do |provider|
37
+ def process_rows
38
+ @context.providers.each do |provider|
49
39
  provider.each do |row|
50
- tweakers.each do |tweaker|
51
- row = tweaker.tweak row
52
- break unless row
53
- end
54
- # nil means to dismiss the row
55
- next unless row
56
- consumers.each do |consumer|
57
- consumer << row
58
- end
40
+ process row
59
41
  end
60
42
  end
61
43
  end
62
44
 
45
+ # tweak & consume one row
46
+ def process(row)
47
+ @context.tweakers.each do |tweaker|
48
+ row = tweaker.tweak row
49
+ return nil unless row
50
+ end
51
+ @context.consumers.each do |consumer|
52
+ consumer << row
53
+ end
54
+ end
55
+
63
56
  # process rows in parallel
64
- def pprocess_rows(providers, tweakers, consumers)
65
- threads = providers.map do |provider|
66
- Thread.new(provider) do |p|
67
- p.each do |row|
68
- tweakers.each do |tweaker|
69
- row = tweaker.tweak row
70
- break unless row
71
- end
72
- next unless row
73
- consumers.each do |consumer|
74
- consumer << row
75
- end
57
+ def pprocess_rows
58
+ thread_number = [@context.providers.size,
59
+ @context.options[:n_threads]].min
60
+ queue = Queue.new
61
+ @context.providers.each { |p| queue << p }
62
+ threads = thread_number.times.map do |tn|
63
+ Thread.new do
64
+ until queue.empty?
65
+ puts ">> thread #{tn} working..."
66
+ p = queue.pop(true)
67
+ p.each { |row| process row }
76
68
  end
77
69
  end
78
70
  end
@@ -1,3 +1,3 @@
1
1
  module Datacraft
2
- VERSION = "0.2.0"
2
+ VERSION = '0.3.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datacraft
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xiaoxing Hu
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-07-27 00:00:00.000000000 Z
11
+ date: 2015-07-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: thread
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement