datacraft 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0d5b93cedf58b0affe9e22abc1f1597a3de90c4e
4
- data.tar.gz: d7592ee90f256ad37cd2c6b8b90a5b71cc397581
3
+ metadata.gz: 5f512f220800ff71938c3d2dfbe7c640dab90f13
4
+ data.tar.gz: aad2c68ec6f1663b074dff6c4981d4594588c560
5
5
  SHA512:
6
- metadata.gz: e0eb59fa1bb9d2cb546425393bd07f97c754c343a4d685eeb2e11b3c55061617ff1ec22626a16814bd5a0c0ce29e188205834af64f8a570dd85abf566d2a7116
7
- data.tar.gz: 2cb72b1a6852382c61985f620c8542744af97dab13df5091d443e810cac397af54a46525e86a44169381b00748533a2eaa3c2b168a00f1fe90dd09a60d6596b0
6
+ metadata.gz: f1f18984edf137b33982301aadb15c7a78a268b1cc50bde6ed3ad1939315b6eb9dc456cd23b268be13a443c5eb1e508c3fe3341b9d5ad0bb71ab7e0643a607bf
7
+ data.tar.gz: 9d172bf503e1576535550d60a764114fc33f900f63cf234ef73ce7419d60e37879e4f4a0fa42f9aac601b39e915fb505b09349e29e70e6b8ca639fc54d05d574
@@ -23,6 +23,7 @@ Gem::Specification.new do |spec|
23
23
  spec.require_paths = ['lib']
24
24
 
25
25
  spec.add_dependency 'thor', '~> 0'
26
+ spec.add_dependency 'thread', '~> 0'
26
27
 
27
28
  spec.add_development_dependency 'bundler', '~> 1.10'
28
29
  spec.add_development_dependency 'rake', '~> 10.0'
@@ -1,5 +1,11 @@
1
1
  module Datacraft
2
2
  class Context
3
+ def initialize
4
+ options[:parallel] = false
5
+ options[:benchmark] = false
6
+ options[:n_threads] = 8
7
+ end
8
+
3
9
  def providers
4
10
  @providers ||= ProviderRegistry.new
5
11
  end
@@ -11,7 +11,7 @@ module Datacraft
11
11
  # common registry
12
12
  class Registry
13
13
  extend Forwardable
14
- def_delegators :instances, :each, :map
14
+ def_delegators :instances, :each, :map, :size
15
15
  def initialize
16
16
  @items = []
17
17
  end
@@ -5,33 +5,23 @@ module Datacraft
5
5
  module Runner
6
6
  # run the instruction
7
7
  def run(instruction)
8
- context = instruction.context
8
+ @context = instruction.context
9
9
  measurements = []
10
10
  measurements << Benchmark.measure('pre build:') do
11
- context.pre_hooks.each(&:call)
11
+ @context.pre_hooks.each(&:call)
12
12
  end
13
13
  measurements << Benchmark.measure('process rows:') do
14
- if context.options[:parallel]
15
- pprocess_rows(
16
- context.providers,
17
- context.tweakers,
18
- context.consumers)
19
- else
20
- process_rows(
21
- context.providers,
22
- context.tweakers,
23
- context.consumers)
24
- end
14
+ @context.options[:parallel] ? pprocess_rows : process_rows
25
15
  end
26
16
 
27
17
  measurements << Benchmark.measure('build:') do
28
- build context.consumers
18
+ build @context.consumers
29
19
  end
30
20
 
31
21
  measurements << Benchmark.measure('post build:') do
32
- context.post_hooks.each(&:call)
22
+ @context.post_hooks.each(&:call)
33
23
  end
34
- report measurements if context.options[:benchmark]
24
+ report measurements if @context.options[:benchmark]
35
25
  end
36
26
 
37
27
  # output benchmark results
@@ -44,35 +34,37 @@ module Datacraft
44
34
  end
45
35
 
46
36
  # process rows sequentially
47
- def process_rows(providers, tweakers, consumers)
48
- providers.each do |provider|
37
+ def process_rows
38
+ @context.providers.each do |provider|
49
39
  provider.each do |row|
50
- tweakers.each do |tweaker|
51
- row = tweaker.tweak row
52
- break unless row
53
- end
54
- # nil means to dismiss the row
55
- next unless row
56
- consumers.each do |consumer|
57
- consumer << row
58
- end
40
+ process row
59
41
  end
60
42
  end
61
43
  end
62
44
 
45
+ # tweak & consume one row
46
+ def process(row)
47
+ @context.tweakers.each do |tweaker|
48
+ row = tweaker.tweak row
49
+ return nil unless row
50
+ end
51
+ @context.consumers.each do |consumer|
52
+ consumer << row
53
+ end
54
+ end
55
+
63
56
  # process rows in parallel
64
- def pprocess_rows(providers, tweakers, consumers)
65
- threads = providers.map do |provider|
66
- Thread.new(provider) do |p|
67
- p.each do |row|
68
- tweakers.each do |tweaker|
69
- row = tweaker.tweak row
70
- break unless row
71
- end
72
- next unless row
73
- consumers.each do |consumer|
74
- consumer << row
75
- end
57
+ def pprocess_rows
58
+ thread_number = [@context.providers.size,
59
+ @context.options[:n_threads]].min
60
+ queue = Queue.new
61
+ @context.providers.each { |p| queue << p }
62
+ threads = thread_number.times.map do |tn|
63
+ Thread.new do
64
+ until queue.empty?
65
+ puts ">> thread #{tn} working..."
66
+ p = queue.pop(true)
67
+ p.each { |row| process row }
76
68
  end
77
69
  end
78
70
  end
@@ -1,3 +1,3 @@
1
1
  module Datacraft
2
- VERSION = "0.2.0"
2
+ VERSION = '0.3.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datacraft
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xiaoxing Hu
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-07-27 00:00:00.000000000 Z
11
+ date: 2015-07-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: thread
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement