datacraft 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d74a3c5f9f45ac21db0385ffedc9f3a7bf4b6d30
4
- data.tar.gz: 11519d1e16e3adc961f6d961e3c417a22fee051d
3
+ metadata.gz: 2d9abea0dcb9fc8da790558bc9d848c5420e42e0
4
+ data.tar.gz: 114b10e2d87db5adada974f86b506a1ccd007e52
5
5
  SHA512:
6
- metadata.gz: e359f0949e8ba8bf623c3e005401c8327e69a54d1c87e84722ec42a75a078d69e8968451071d85969578202b85dcf2681979bb6d5a38abce346c7a78b2f0e8e8
7
- data.tar.gz: 5138f7d55179f9a35e54a4fd41f34850d4f78e48db119228eed9a848ceff8ee769359cf7822c980c2723ef8c52b6a35369cb0303f3fe812705cbbe48065d9319
6
+ metadata.gz: 1c812f178e2f5da84826fa70584ac66ca5fdccb0f92de6e505ba3c9253d74ecaa6ac93c25fb34eba8cf770d7418207c235de4a18d56d4b1192bf308c11c54f3e
7
+ data.tar.gz: 274b7e3e6022f75021e52272d9c08d2e980077444a5e1fadc9c42881628c7ba539510dece0e892880a973a8e05c68163424c00ea68c904e864958f9605a19296
@@ -1,6 +1,6 @@
1
1
  require 'datacraft/version'
2
2
  require 'datacraft/exceptions'
3
- require 'datacraft/registry'
3
+ require 'datacraft/definition'
4
4
  require 'datacraft/context'
5
5
  require 'datacraft/instruction'
6
6
  require 'datacraft/runner'
@@ -9,9 +9,15 @@ module Datacraft
9
9
  Datacraft.run instruction
10
10
  end
11
11
 
12
- desc 'check [INSTRUCTION_FILE]', 'evaluate the instruction without running it'
12
+ desc 'check [INSTRUCTION_FILE]',
13
+ 'evaluate the instruction without running it'
13
14
  def check(filename)
14
- puts Instruction.check filename
15
+ begin
16
+ Instruction.from_file filename
17
+ puts 'You are ready to go.'
18
+ rescue InvalidInstruction => e
19
+ puts e
20
+ end
15
21
  end
16
22
  end
17
23
  end
@@ -1,33 +1,38 @@
1
1
  module Datacraft
2
- class Context
3
- def initialize
4
- options[:parallel] = false
5
- options[:benchmark] = false
6
- options[:n_threads] = 8
7
- end
8
-
9
- def providers
10
- @providers ||= ProviderRegistry.new
11
- end
12
-
13
- def consumers
14
- @consumers ||= ConsumerRegistry.new
2
+ # define the context of the instruction
3
+ module Context
4
+ def sources
5
+ @sources ||= []
15
6
  end
16
7
 
17
8
  def tweakers
18
- @tweakers ||= TweakerRegistry.new
9
+ @tweakers ||= []
19
10
  end
20
11
 
21
12
  def pre_hooks
22
- @pre_hooks ||= HookRegistry.new
13
+ @pre_hooks ||= []
23
14
  end
24
15
 
25
16
  def post_hooks
26
- @post_hooks ||= HookRegistry.new
17
+ @post_hooks ||= []
18
+ end
19
+
20
+ def consumers
21
+ @consumers ||= []
27
22
  end
28
23
 
29
24
  def options
30
25
  @options ||= {}
31
26
  end
27
+
28
+ def validate
29
+ fail InvalidInstruction, "Please define data source with keyword 'from'." unless sources.size > 0
30
+ fail InvalidInstruction, "Please define data consumer with keyword 'to'." unless consumers.size > 0
31
+ sources.each(&:validate)
32
+ consumers.each(&:validate)
33
+ tweakers.each(&:validate)
34
+ pre_hooks.each(&:validate)
35
+ post_hooks.each(&:validate)
36
+ end
32
37
  end
33
38
  end
@@ -0,0 +1,108 @@
1
+ require 'forwardable'
2
+
3
+ module Datacraft
4
+
5
+ # for normalizing blocks
6
+ class CompatiableProc < Proc
7
+ # alias_method :run, :call
8
+ # alias_method :process, :call
9
+ alias_method :tweak, :call
10
+ end
11
+
12
+ # method management
13
+ module MethodDef
14
+ extend Forwardable
15
+ attr_reader :mandatory, :optional
16
+
17
+ def def_mandatory(*methods)
18
+ @mandatory = methods
19
+ methods.each do |method|
20
+ def_delegator :instance, method
21
+ end
22
+ end
23
+
24
+ def def_optional(*methods)
25
+ @optional = methods
26
+ methods.each do |method|
27
+ def_delegator :instance, method
28
+ end
29
+ end
30
+
31
+ def validate_methods(klass)
32
+ mandatory.each do |m|
33
+ fail InvalidInstruction, "Class <#{klass}> missing mandatory methods: #{m}." unless klass.method_defined?(m)
34
+ end
35
+ end
36
+ end
37
+
38
+ # definition of component
39
+ module Definition
40
+ extend Forwardable
41
+ def_delegator :instance, :respond_to?
42
+ def self.included(base)
43
+ base.extend(Forwardable)
44
+ base.extend(MethodDef)
45
+ end
46
+
47
+ def initialize(d)
48
+ @d = d
49
+ end
50
+
51
+ def instance
52
+ @instance ||= create_instance
53
+ end
54
+
55
+ private
56
+
57
+ def create_instance
58
+ if @d[:klass]
59
+ @d[:klass].new(*@d[:args])
60
+ elsif @d[:block]
61
+ CompatiableProc.new(&@d[:block])
62
+ end
63
+ end
64
+ end
65
+
66
+ # data source that provide data
67
+ class Source
68
+ include Definition
69
+ def_mandatory :each
70
+
71
+ def validate
72
+ fail InvalidSource, 'Source needs to be a class.' unless @d[:klass]
73
+ self.class.validate_methods @d[:klass]
74
+ end
75
+ end
76
+
77
+ # data consumer that consume data
78
+ class Consumer
79
+ include Definition
80
+ def_mandatory :<<
81
+ def_optional :build, :close
82
+
83
+ def validate
84
+ fail InvalidConsumer, 'Consumer needs to be a class.' unless @d[:klass]
85
+ self.class.validate_methods @d[:klass]
86
+ end
87
+ end
88
+
89
+ # tweaking data row
90
+ class Tweaker
91
+ include Definition
92
+ def_mandatory :tweak
93
+
94
+ def validate
95
+ self.class.validate_methods @d[:klass] if @d[:klass]
96
+ end
97
+ end
98
+
99
+ # pre/post build hooks
100
+ class Hook
101
+ include Definition
102
+ def_mandatory :call
103
+
104
+ def validate
105
+ fail InvalidHook, 'Hook has to be a block.' unless @d[:block]
106
+ end
107
+ end
108
+ end
@@ -1,3 +1,7 @@
1
1
  module Datacraft
2
2
  class InvalidInstruction < StandardError; end
3
+ class InvalidSource < InvalidInstruction; end
4
+ class InvalidConsumer < InvalidInstruction; end
5
+ class InvalidTweaker < InvalidInstruction; end
6
+ class InvalidHook < InvalidInstruction; end
3
7
  end
@@ -1,52 +1,45 @@
1
1
  module Datacraft
2
2
  # Building Instruction
3
3
  class Instruction
4
+ include Context
4
5
  attr_reader :context
5
6
 
6
- def initialize(context = Context.new)
7
- @context = context
7
+ def initialize
8
+ options[:parallel] = false
9
+ options[:benchmark] = false
10
+ options[:n_threads] = 8
8
11
  end
9
12
 
10
13
  def pre_build(&block)
11
- @context.pre_hooks << { block: block }
14
+ pre_hooks << Hook.new(block: block)
12
15
  end
13
16
 
14
17
  def from(klass, *initialization_params)
15
- @context.providers << { klass: klass, args: initialization_params }
18
+ sources << Source.new(klass: klass, args: initialization_params)
16
19
  end
17
20
 
18
21
  def tweak(klass = nil, *initialization_params, &block)
19
- @context.tweakers << { klass: klass, args: initialization_params, block: block }
22
+ tweakers << Tweaker.new(klass: klass, args: initialization_params, block: block)
20
23
  end
21
24
 
22
25
  def to(klass, *initialization_params)
23
- @context.consumers << { klass: klass, args: initialization_params }
26
+ consumers << Consumer.new(klass: klass, args: initialization_params)
24
27
  end
25
28
 
26
29
  def post_build(&block)
27
- @context.post_hooks << { block: block }
30
+ post_hooks << Hook.new(block: block)
28
31
  end
29
32
 
30
33
  def set(key, value)
31
- @context.options[key.to_sym] = value
34
+ options[key.to_sym] = value
32
35
  end
33
36
 
34
37
  def self.from_file(filename)
35
38
  script_content = IO.read(filename)
36
39
  instruction = Instruction.new
37
40
  instruction.instance_eval(script_content)
41
+ instruction.validate
38
42
  instruction
39
43
  end
40
-
41
- def self.check(filename)
42
- begin
43
- script_content = IO.read(filename)
44
- instruction = Instruction.new
45
- instruction.instance_eval(script_content)
46
- 'Looks good.'
47
- rescue InvalidInstruction => e
48
- e.message
49
- end
50
- end
51
44
  end
52
45
  end
@@ -5,23 +5,33 @@ module Datacraft
5
5
  module Runner
6
6
  # run the instruction
7
7
  def run(instruction)
8
- @context = instruction.context
8
+ @inst = instruction
9
9
  measurements = []
10
- measurements << Benchmark.measure('pre build:') do
11
- @context.pre_hooks.each(&:call)
10
+
11
+ # run pre_build hooks
12
+ if @inst.respond_to? :pre_hooks
13
+ measurements << Benchmark.measure('pre build:') do
14
+ @inst.pre_hooks.each(&:call)
15
+ end
12
16
  end
17
+
18
+ # process the rows
13
19
  measurements << Benchmark.measure('process rows:') do
14
- @context.options[:parallel] ? pprocess_rows : process_rows
20
+ @inst.options[:parallel] ? pprocess_rows : process_rows
15
21
  end
16
22
 
23
+ # build
17
24
  measurements << Benchmark.measure('build:') do
18
- build @context.consumers
25
+ build @inst.consumers
19
26
  end
20
27
 
21
- measurements << Benchmark.measure('post build:') do
22
- @context.post_hooks.each(&:call)
28
+ # run post_build hooks
29
+ if @inst.respond_to? :post_hooks
30
+ measurements << Benchmark.measure('post build:') do
31
+ @inst.post_hooks.each(&:call)
32
+ end
23
33
  end
24
- report measurements if @context.options[:benchmark]
34
+ report measurements if @inst.options[:benchmark]
25
35
  end
26
36
 
27
37
  # output benchmark results
@@ -35,8 +45,8 @@ module Datacraft
35
45
 
36
46
  # process rows sequentially
37
47
  def process_rows
38
- @context.providers.each do |provider|
39
- provider.each do |row|
48
+ @inst.sources.each do |source|
49
+ source.each do |row|
40
50
  process row
41
51
  end
42
52
  end
@@ -44,21 +54,21 @@ module Datacraft
44
54
 
45
55
  # tweak & consume one row
46
56
  def process(row)
47
- @context.tweakers.each do |tweaker|
57
+ @inst.tweakers.each do |tweaker|
48
58
  row = tweaker.tweak row
49
59
  return nil unless row
50
60
  end
51
- @context.consumers.each do |consumer|
61
+ @inst.consumers.each do |consumer|
52
62
  consumer << row
53
63
  end
54
64
  end
55
65
 
56
66
  # process rows in parallel
57
67
  def pprocess_rows
58
- thread_number = [@context.providers.size,
59
- @context.options[:n_threads]].min
68
+ thread_number = [@inst.sources.size,
69
+ @inst.options[:n_threads]].min
60
70
  queue = Queue.new
61
- @context.providers.each { |p| queue << p }
71
+ @inst.sources.each { |p| queue << p }
62
72
  threads = thread_number.times.map do
63
73
  Thread.new do
64
74
  begin
@@ -67,10 +77,6 @@ module Datacraft
67
77
  end
68
78
  rescue ThreadError
69
79
  end
70
- # until queue.empty?
71
- # p = queue.pop(true)
72
- # p.each { |row| process row }
73
- # end
74
80
  end
75
81
  end
76
82
  threads.each(&:join)
@@ -1,3 +1,3 @@
1
1
  module Datacraft
2
- VERSION = '0.3.2'
2
+ VERSION = '0.4.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datacraft
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xiaoxing Hu
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-07-30 00:00:00.000000000 Z
11
+ date: 2015-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -119,9 +119,9 @@ files:
119
119
  - lib/datacraft.rb
120
120
  - lib/datacraft/cli.rb
121
121
  - lib/datacraft/context.rb
122
+ - lib/datacraft/definition.rb
122
123
  - lib/datacraft/exceptions.rb
123
124
  - lib/datacraft/instruction.rb
124
- - lib/datacraft/registry.rb
125
125
  - lib/datacraft/runner.rb
126
126
  - lib/datacraft/version.rb
127
127
  homepage: https://github.com/xiaoxinghu/datacraft
@@ -1,74 +0,0 @@
1
- require 'forwardable'
2
-
3
- module Datacraft
4
- # for normalizing blocks
5
- class CompatiableProc < Proc
6
- alias_method :run, :call
7
- alias_method :process, :call
8
- alias_method :tweak, :call
9
- end
10
-
11
- # common registry
12
- class Registry
13
- extend Forwardable
14
- def_delegators :instances, :each, :map, :size
15
- def initialize
16
- @items = []
17
- end
18
-
19
- attr_accessor :mandatory_methods
20
-
21
- def instances
22
- @instances ||= @items.map do |i|
23
- if i[:klass]
24
- i[:klass].new(*i[:args])
25
- elsif i[:block]
26
- CompatiableProc.new(&i[:block])
27
- end
28
- end
29
- end
30
-
31
- def <<(val)
32
- klass = val[:klass]
33
- block = val[:block]
34
- if klass
35
- fail InvalidInstruction, "#{klass.name} needs to implement methods: "\
36
- "#{mandatory_methods}" unless valid? klass
37
- elsif block
38
- else
39
- fail 'registry error'
40
- end
41
- @items << val
42
- end
43
-
44
- def valid?(klass)
45
- mandatory_methods.all? do |m|
46
- klass.method_defined? m
47
- end
48
- end
49
- end
50
-
51
- class ProviderRegistry < Registry
52
- def mandatory_methods
53
- [:each]
54
- end
55
- end
56
-
57
- class ConsumerRegistry < Registry
58
- def mandatory_methods
59
- [:<<]
60
- end
61
- end
62
-
63
- class TweakerRegistry < Registry
64
- def mandatory_methods
65
- [:tweak]
66
- end
67
- end
68
-
69
- class HookRegistry < Registry
70
- def mandatory_methods
71
- [:run]
72
- end
73
- end
74
- end