datacraft 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d74a3c5f9f45ac21db0385ffedc9f3a7bf4b6d30
4
- data.tar.gz: 11519d1e16e3adc961f6d961e3c417a22fee051d
3
+ metadata.gz: 2d9abea0dcb9fc8da790558bc9d848c5420e42e0
4
+ data.tar.gz: 114b10e2d87db5adada974f86b506a1ccd007e52
5
5
  SHA512:
6
- metadata.gz: e359f0949e8ba8bf623c3e005401c8327e69a54d1c87e84722ec42a75a078d69e8968451071d85969578202b85dcf2681979bb6d5a38abce346c7a78b2f0e8e8
7
- data.tar.gz: 5138f7d55179f9a35e54a4fd41f34850d4f78e48db119228eed9a848ceff8ee769359cf7822c980c2723ef8c52b6a35369cb0303f3fe812705cbbe48065d9319
6
+ metadata.gz: 1c812f178e2f5da84826fa70584ac66ca5fdccb0f92de6e505ba3c9253d74ecaa6ac93c25fb34eba8cf770d7418207c235de4a18d56d4b1192bf308c11c54f3e
7
+ data.tar.gz: 274b7e3e6022f75021e52272d9c08d2e980077444a5e1fadc9c42881628c7ba539510dece0e892880a973a8e05c68163424c00ea68c904e864958f9605a19296
@@ -1,6 +1,6 @@
1
1
  require 'datacraft/version'
2
2
  require 'datacraft/exceptions'
3
- require 'datacraft/registry'
3
+ require 'datacraft/definition'
4
4
  require 'datacraft/context'
5
5
  require 'datacraft/instruction'
6
6
  require 'datacraft/runner'
@@ -9,9 +9,15 @@ module Datacraft
9
9
  Datacraft.run instruction
10
10
  end
11
11
 
12
- desc 'check [INSTRUCTION_FILE]', 'evaluate the instruction without running it'
12
+ desc 'check [INSTRUCTION_FILE]',
13
+ 'evaluate the instruction without running it'
13
14
  def check(filename)
14
- puts Instruction.check filename
15
+ begin
16
+ Instruction.from_file filename
17
+ puts 'You are ready to go.'
18
+ rescue InvalidInstruction => e
19
+ puts e
20
+ end
15
21
  end
16
22
  end
17
23
  end
@@ -1,33 +1,38 @@
1
1
  module Datacraft
2
- class Context
3
- def initialize
4
- options[:parallel] = false
5
- options[:benchmark] = false
6
- options[:n_threads] = 8
7
- end
8
-
9
- def providers
10
- @providers ||= ProviderRegistry.new
11
- end
12
-
13
- def consumers
14
- @consumers ||= ConsumerRegistry.new
2
+ # define the context of the instruction
3
+ module Context
4
+ def sources
5
+ @sources ||= []
15
6
  end
16
7
 
17
8
  def tweakers
18
- @tweakers ||= TweakerRegistry.new
9
+ @tweakers ||= []
19
10
  end
20
11
 
21
12
  def pre_hooks
22
- @pre_hooks ||= HookRegistry.new
13
+ @pre_hooks ||= []
23
14
  end
24
15
 
25
16
  def post_hooks
26
- @post_hooks ||= HookRegistry.new
17
+ @post_hooks ||= []
18
+ end
19
+
20
+ def consumers
21
+ @consumers ||= []
27
22
  end
28
23
 
29
24
  def options
30
25
  @options ||= {}
31
26
  end
27
+
28
+ def validate
29
+ fail InvalidInstruction, "Please define data source with keyword 'from'." unless sources.size > 0
30
+ fail InvalidInstruction, "Please define data consumer with keyword 'to'." unless consumers.size > 0
31
+ sources.each(&:validate)
32
+ consumers.each(&:validate)
33
+ tweakers.each(&:validate)
34
+ pre_hooks.each(&:validate)
35
+ post_hooks.each(&:validate)
36
+ end
32
37
  end
33
38
  end
@@ -0,0 +1,108 @@
1
+ require 'forwardable'
2
+
3
+ module Datacraft
4
+
5
+ # for normalizing blocks
6
+ class CompatiableProc < Proc
7
+ # alias_method :run, :call
8
+ # alias_method :process, :call
9
+ alias_method :tweak, :call
10
+ end
11
+
12
+ # method management
13
+ module MethodDef
14
+ extend Forwardable
15
+ attr_reader :mandatory, :optional
16
+
17
+ def def_mandatory(*methods)
18
+ @mandatory = methods
19
+ methods.each do |method|
20
+ def_delegator :instance, method
21
+ end
22
+ end
23
+
24
+ def def_optional(*methods)
25
+ @optional = methods
26
+ methods.each do |method|
27
+ def_delegator :instance, method
28
+ end
29
+ end
30
+
31
+ def validate_methods(klass)
32
+ mandatory.each do |m|
33
+ fail InvalidInstruction, "Class <#{klass}> missing mandatory methods: #{m}." unless klass.method_defined?(m)
34
+ end
35
+ end
36
+ end
37
+
38
+ # definition of component
39
+ module Definition
40
+ extend Forwardable
41
+ def_delegator :instance, :respond_to?
42
+ def self.included(base)
43
+ base.extend(Forwardable)
44
+ base.extend(MethodDef)
45
+ end
46
+
47
+ def initialize(d)
48
+ @d = d
49
+ end
50
+
51
+ def instance
52
+ @instance ||= create_instance
53
+ end
54
+
55
+ private
56
+
57
+ def create_instance
58
+ if @d[:klass]
59
+ @d[:klass].new(*@d[:args])
60
+ elsif @d[:block]
61
+ CompatiableProc.new(&@d[:block])
62
+ end
63
+ end
64
+ end
65
+
66
+ # data source that provide data
67
+ class Source
68
+ include Definition
69
+ def_mandatory :each
70
+
71
+ def validate
72
+ fail InvalidSource, 'Source needs to be a class.' unless @d[:klass]
73
+ self.class.validate_methods @d[:klass]
74
+ end
75
+ end
76
+
77
+ # data consumer that consume data
78
+ class Consumer
79
+ include Definition
80
+ def_mandatory :<<
81
+ def_optional :build, :close
82
+
83
+ def validate
84
+ fail InvalidConsumer, 'Consumer needs to be a class.' unless @d[:klass]
85
+ self.class.validate_methods @d[:klass]
86
+ end
87
+ end
88
+
89
+ # tweaking data row
90
+ class Tweaker
91
+ include Definition
92
+ def_mandatory :tweak
93
+
94
+ def validate
95
+ self.class.validate_methods @d[:klass] if @d[:klass]
96
+ end
97
+ end
98
+
99
+ # pre/post build hooks
100
+ class Hook
101
+ include Definition
102
+ def_mandatory :call
103
+
104
+ def validate
105
+ fail InvalidHook, 'Hook has to be a block.' unless @d[:block]
106
+ end
107
+ end
108
+ end
@@ -1,3 +1,7 @@
1
1
  module Datacraft
2
2
  class InvalidInstruction < StandardError; end
3
+ class InvalidSource < InvalidInstruction; end
4
+ class InvalidConsumer < InvalidInstruction; end
5
+ class InvalidTweaker < InvalidInstruction; end
6
+ class InvalidHook < InvalidInstruction; end
3
7
  end
@@ -1,52 +1,45 @@
1
1
  module Datacraft
2
2
  # Building Instruction
3
3
  class Instruction
4
+ include Context
4
5
  attr_reader :context
5
6
 
6
- def initialize(context = Context.new)
7
- @context = context
7
+ def initialize
8
+ options[:parallel] = false
9
+ options[:benchmark] = false
10
+ options[:n_threads] = 8
8
11
  end
9
12
 
10
13
  def pre_build(&block)
11
- @context.pre_hooks << { block: block }
14
+ pre_hooks << Hook.new(block: block)
12
15
  end
13
16
 
14
17
  def from(klass, *initialization_params)
15
- @context.providers << { klass: klass, args: initialization_params }
18
+ sources << Source.new(klass: klass, args: initialization_params)
16
19
  end
17
20
 
18
21
  def tweak(klass = nil, *initialization_params, &block)
19
- @context.tweakers << { klass: klass, args: initialization_params, block: block }
22
+ tweakers << Tweaker.new(klass: klass, args: initialization_params, block: block)
20
23
  end
21
24
 
22
25
  def to(klass, *initialization_params)
23
- @context.consumers << { klass: klass, args: initialization_params }
26
+ consumers << Consumer.new(klass: klass, args: initialization_params)
24
27
  end
25
28
 
26
29
  def post_build(&block)
27
- @context.post_hooks << { block: block }
30
+ post_hooks << Hook.new(block: block)
28
31
  end
29
32
 
30
33
  def set(key, value)
31
- @context.options[key.to_sym] = value
34
+ options[key.to_sym] = value
32
35
  end
33
36
 
34
37
  def self.from_file(filename)
35
38
  script_content = IO.read(filename)
36
39
  instruction = Instruction.new
37
40
  instruction.instance_eval(script_content)
41
+ instruction.validate
38
42
  instruction
39
43
  end
40
-
41
- def self.check(filename)
42
- begin
43
- script_content = IO.read(filename)
44
- instruction = Instruction.new
45
- instruction.instance_eval(script_content)
46
- 'Looks good.'
47
- rescue InvalidInstruction => e
48
- e.message
49
- end
50
- end
51
44
  end
52
45
  end
@@ -5,23 +5,33 @@ module Datacraft
5
5
  module Runner
6
6
  # run the instruction
7
7
  def run(instruction)
8
- @context = instruction.context
8
+ @inst = instruction
9
9
  measurements = []
10
- measurements << Benchmark.measure('pre build:') do
11
- @context.pre_hooks.each(&:call)
10
+
11
+ # run pre_build hooks
12
+ if @inst.respond_to? :pre_hooks
13
+ measurements << Benchmark.measure('pre build:') do
14
+ @inst.pre_hooks.each(&:call)
15
+ end
12
16
  end
17
+
18
+ # process the rows
13
19
  measurements << Benchmark.measure('process rows:') do
14
- @context.options[:parallel] ? pprocess_rows : process_rows
20
+ @inst.options[:parallel] ? pprocess_rows : process_rows
15
21
  end
16
22
 
23
+ # build
17
24
  measurements << Benchmark.measure('build:') do
18
- build @context.consumers
25
+ build @inst.consumers
19
26
  end
20
27
 
21
- measurements << Benchmark.measure('post build:') do
22
- @context.post_hooks.each(&:call)
28
+ # run post_build hooks
29
+ if @inst.respond_to? :post_hooks
30
+ measurements << Benchmark.measure('post build:') do
31
+ @inst.post_hooks.each(&:call)
32
+ end
23
33
  end
24
- report measurements if @context.options[:benchmark]
34
+ report measurements if @inst.options[:benchmark]
25
35
  end
26
36
 
27
37
  # output benchmark results
@@ -35,8 +45,8 @@ module Datacraft
35
45
 
36
46
  # process rows sequentially
37
47
  def process_rows
38
- @context.providers.each do |provider|
39
- provider.each do |row|
48
+ @inst.sources.each do |source|
49
+ source.each do |row|
40
50
  process row
41
51
  end
42
52
  end
@@ -44,21 +54,21 @@ module Datacraft
44
54
 
45
55
  # tweak & consume one row
46
56
  def process(row)
47
- @context.tweakers.each do |tweaker|
57
+ @inst.tweakers.each do |tweaker|
48
58
  row = tweaker.tweak row
49
59
  return nil unless row
50
60
  end
51
- @context.consumers.each do |consumer|
61
+ @inst.consumers.each do |consumer|
52
62
  consumer << row
53
63
  end
54
64
  end
55
65
 
56
66
  # process rows in parallel
57
67
  def pprocess_rows
58
- thread_number = [@context.providers.size,
59
- @context.options[:n_threads]].min
68
+ thread_number = [@inst.sources.size,
69
+ @inst.options[:n_threads]].min
60
70
  queue = Queue.new
61
- @context.providers.each { |p| queue << p }
71
+ @inst.sources.each { |p| queue << p }
62
72
  threads = thread_number.times.map do
63
73
  Thread.new do
64
74
  begin
@@ -67,10 +77,6 @@ module Datacraft
67
77
  end
68
78
  rescue ThreadError
69
79
  end
70
- # until queue.empty?
71
- # p = queue.pop(true)
72
- # p.each { |row| process row }
73
- # end
74
80
  end
75
81
  end
76
82
  threads.each(&:join)
@@ -1,3 +1,3 @@
1
1
  module Datacraft
2
- VERSION = '0.3.2'
2
+ VERSION = '0.4.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datacraft
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xiaoxing Hu
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-07-30 00:00:00.000000000 Z
11
+ date: 2015-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -119,9 +119,9 @@ files:
119
119
  - lib/datacraft.rb
120
120
  - lib/datacraft/cli.rb
121
121
  - lib/datacraft/context.rb
122
+ - lib/datacraft/definition.rb
122
123
  - lib/datacraft/exceptions.rb
123
124
  - lib/datacraft/instruction.rb
124
- - lib/datacraft/registry.rb
125
125
  - lib/datacraft/runner.rb
126
126
  - lib/datacraft/version.rb
127
127
  homepage: https://github.com/xiaoxinghu/datacraft
@@ -1,74 +0,0 @@
1
- require 'forwardable'
2
-
3
- module Datacraft
4
- # for normalizing blocks
5
- class CompatiableProc < Proc
6
- alias_method :run, :call
7
- alias_method :process, :call
8
- alias_method :tweak, :call
9
- end
10
-
11
- # common registry
12
- class Registry
13
- extend Forwardable
14
- def_delegators :instances, :each, :map, :size
15
- def initialize
16
- @items = []
17
- end
18
-
19
- attr_accessor :mandatory_methods
20
-
21
- def instances
22
- @instances ||= @items.map do |i|
23
- if i[:klass]
24
- i[:klass].new(*i[:args])
25
- elsif i[:block]
26
- CompatiableProc.new(&i[:block])
27
- end
28
- end
29
- end
30
-
31
- def <<(val)
32
- klass = val[:klass]
33
- block = val[:block]
34
- if klass
35
- fail InvalidInstruction, "#{klass.name} needs to implement methods: "\
36
- "#{mandatory_methods}" unless valid? klass
37
- elsif block
38
- else
39
- fail 'registry error'
40
- end
41
- @items << val
42
- end
43
-
44
- def valid?(klass)
45
- mandatory_methods.all? do |m|
46
- klass.method_defined? m
47
- end
48
- end
49
- end
50
-
51
- class ProviderRegistry < Registry
52
- def mandatory_methods
53
- [:each]
54
- end
55
- end
56
-
57
- class ConsumerRegistry < Registry
58
- def mandatory_methods
59
- [:<<]
60
- end
61
- end
62
-
63
- class TweakerRegistry < Registry
64
- def mandatory_methods
65
- [:tweak]
66
- end
67
- end
68
-
69
- class HookRegistry < Registry
70
- def mandatory_methods
71
- [:run]
72
- end
73
- end
74
- end