datacraft 0.3.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/datacraft.rb +1 -1
- data/lib/datacraft/cli.rb +8 -2
- data/lib/datacraft/context.rb +21 -16
- data/lib/datacraft/definition.rb +108 -0
- data/lib/datacraft/exceptions.rb +4 -0
- data/lib/datacraft/instruction.rb +12 -19
- data/lib/datacraft/runner.rb +25 -19
- data/lib/datacraft/version.rb +1 -1
- metadata +3 -3
- data/lib/datacraft/registry.rb +0 -74
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2d9abea0dcb9fc8da790558bc9d848c5420e42e0
|
4
|
+
data.tar.gz: 114b10e2d87db5adada974f86b506a1ccd007e52
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c812f178e2f5da84826fa70584ac66ca5fdccb0f92de6e505ba3c9253d74ecaa6ac93c25fb34eba8cf770d7418207c235de4a18d56d4b1192bf308c11c54f3e
|
7
|
+
data.tar.gz: 274b7e3e6022f75021e52272d9c08d2e980077444a5e1fadc9c42881628c7ba539510dece0e892880a973a8e05c68163424c00ea68c904e864958f9605a19296
|
data/lib/datacraft.rb
CHANGED
data/lib/datacraft/cli.rb
CHANGED
@@ -9,9 +9,15 @@ module Datacraft
|
|
9
9
|
Datacraft.run instruction
|
10
10
|
end
|
11
11
|
|
12
|
-
desc 'check [INSTRUCTION_FILE]',
|
12
|
+
desc 'check [INSTRUCTION_FILE]',
|
13
|
+
'evaluate the instruction without running it'
|
13
14
|
def check(filename)
|
14
|
-
|
15
|
+
begin
|
16
|
+
Instruction.from_file filename
|
17
|
+
puts 'You are ready to go.'
|
18
|
+
rescue InvalidInstruction => e
|
19
|
+
puts e
|
20
|
+
end
|
15
21
|
end
|
16
22
|
end
|
17
23
|
end
|
data/lib/datacraft/context.rb
CHANGED
@@ -1,33 +1,38 @@
|
|
1
1
|
module Datacraft
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
options[:n_threads] = 8
|
7
|
-
end
|
8
|
-
|
9
|
-
def providers
|
10
|
-
@providers ||= ProviderRegistry.new
|
11
|
-
end
|
12
|
-
|
13
|
-
def consumers
|
14
|
-
@consumers ||= ConsumerRegistry.new
|
2
|
+
# define the context of the instruction
|
3
|
+
module Context
|
4
|
+
def sources
|
5
|
+
@sources ||= []
|
15
6
|
end
|
16
7
|
|
17
8
|
def tweakers
|
18
|
-
@tweakers ||=
|
9
|
+
@tweakers ||= []
|
19
10
|
end
|
20
11
|
|
21
12
|
def pre_hooks
|
22
|
-
@pre_hooks ||=
|
13
|
+
@pre_hooks ||= []
|
23
14
|
end
|
24
15
|
|
25
16
|
def post_hooks
|
26
|
-
@post_hooks ||=
|
17
|
+
@post_hooks ||= []
|
18
|
+
end
|
19
|
+
|
20
|
+
def consumers
|
21
|
+
@consumers ||= []
|
27
22
|
end
|
28
23
|
|
29
24
|
def options
|
30
25
|
@options ||= {}
|
31
26
|
end
|
27
|
+
|
28
|
+
def validate
|
29
|
+
fail InvalidInstruction, "Please define data source with keyword 'from'." unless sources.size > 0
|
30
|
+
fail InvalidInstruction, "Please define data consumer with keyword 'to'." unless consumers.size > 0
|
31
|
+
sources.each(&:validate)
|
32
|
+
consumers.each(&:validate)
|
33
|
+
tweakers.each(&:validate)
|
34
|
+
pre_hooks.each(&:validate)
|
35
|
+
post_hooks.each(&:validate)
|
36
|
+
end
|
32
37
|
end
|
33
38
|
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module Datacraft
|
4
|
+
|
5
|
+
# for normalizing blocks
|
6
|
+
class CompatiableProc < Proc
|
7
|
+
# alias_method :run, :call
|
8
|
+
# alias_method :process, :call
|
9
|
+
alias_method :tweak, :call
|
10
|
+
end
|
11
|
+
|
12
|
+
# method management
|
13
|
+
module MethodDef
|
14
|
+
extend Forwardable
|
15
|
+
attr_reader :mandatory, :optional
|
16
|
+
|
17
|
+
def def_mandatory(*methods)
|
18
|
+
@mandatory = methods
|
19
|
+
methods.each do |method|
|
20
|
+
def_delegator :instance, method
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def def_optional(*methods)
|
25
|
+
@optional = methods
|
26
|
+
methods.each do |method|
|
27
|
+
def_delegator :instance, method
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def validate_methods(klass)
|
32
|
+
mandatory.each do |m|
|
33
|
+
fail InvalidInstruction, "Class <#{klass}> missing mandatory methods: #{m}." unless klass.method_defined?(m)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# definition of component
|
39
|
+
module Definition
|
40
|
+
extend Forwardable
|
41
|
+
def_delegator :instance, :respond_to?
|
42
|
+
def self.included(base)
|
43
|
+
base.extend(Forwardable)
|
44
|
+
base.extend(MethodDef)
|
45
|
+
end
|
46
|
+
|
47
|
+
def initialize(d)
|
48
|
+
@d = d
|
49
|
+
end
|
50
|
+
|
51
|
+
def instance
|
52
|
+
@instance ||= create_instance
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def create_instance
|
58
|
+
if @d[:klass]
|
59
|
+
@d[:klass].new(*@d[:args])
|
60
|
+
elsif @d[:block]
|
61
|
+
CompatiableProc.new(&@d[:block])
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# data source that provide data
|
67
|
+
class Source
|
68
|
+
include Definition
|
69
|
+
def_mandatory :each
|
70
|
+
|
71
|
+
def validate
|
72
|
+
fail InvalidSource, 'Source needs to be a class.' unless @d[:klass]
|
73
|
+
self.class.validate_methods @d[:klass]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# data consumer that consume data
|
78
|
+
class Consumer
|
79
|
+
include Definition
|
80
|
+
def_mandatory :<<
|
81
|
+
def_optional :build, :close
|
82
|
+
|
83
|
+
def validate
|
84
|
+
fail InvalidConsumer, 'Consumer needs to be a class.' unless @d[:klass]
|
85
|
+
self.class.validate_methods @d[:klass]
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# tweaking data row
|
90
|
+
class Tweaker
|
91
|
+
include Definition
|
92
|
+
def_mandatory :tweak
|
93
|
+
|
94
|
+
def validate
|
95
|
+
self.class.validate_methods @d[:klass] if @d[:klass]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# pre/post build hooks
|
100
|
+
class Hook
|
101
|
+
include Definition
|
102
|
+
def_mandatory :call
|
103
|
+
|
104
|
+
def validate
|
105
|
+
fail InvalidHook, 'Hook has to be a block.' unless @d[:block]
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
data/lib/datacraft/exceptions.rb
CHANGED
@@ -1,52 +1,45 @@
|
|
1
1
|
module Datacraft
|
2
2
|
# Building Instruction
|
3
3
|
class Instruction
|
4
|
+
include Context
|
4
5
|
attr_reader :context
|
5
6
|
|
6
|
-
def initialize
|
7
|
-
|
7
|
+
def initialize
|
8
|
+
options[:parallel] = false
|
9
|
+
options[:benchmark] = false
|
10
|
+
options[:n_threads] = 8
|
8
11
|
end
|
9
12
|
|
10
13
|
def pre_build(&block)
|
11
|
-
|
14
|
+
pre_hooks << Hook.new(block: block)
|
12
15
|
end
|
13
16
|
|
14
17
|
def from(klass, *initialization_params)
|
15
|
-
|
18
|
+
sources << Source.new(klass: klass, args: initialization_params)
|
16
19
|
end
|
17
20
|
|
18
21
|
def tweak(klass = nil, *initialization_params, &block)
|
19
|
-
|
22
|
+
tweakers << Tweaker.new(klass: klass, args: initialization_params, block: block)
|
20
23
|
end
|
21
24
|
|
22
25
|
def to(klass, *initialization_params)
|
23
|
-
|
26
|
+
consumers << Consumer.new(klass: klass, args: initialization_params)
|
24
27
|
end
|
25
28
|
|
26
29
|
def post_build(&block)
|
27
|
-
|
30
|
+
post_hooks << Hook.new(block: block)
|
28
31
|
end
|
29
32
|
|
30
33
|
def set(key, value)
|
31
|
-
|
34
|
+
options[key.to_sym] = value
|
32
35
|
end
|
33
36
|
|
34
37
|
def self.from_file(filename)
|
35
38
|
script_content = IO.read(filename)
|
36
39
|
instruction = Instruction.new
|
37
40
|
instruction.instance_eval(script_content)
|
41
|
+
instruction.validate
|
38
42
|
instruction
|
39
43
|
end
|
40
|
-
|
41
|
-
def self.check(filename)
|
42
|
-
begin
|
43
|
-
script_content = IO.read(filename)
|
44
|
-
instruction = Instruction.new
|
45
|
-
instruction.instance_eval(script_content)
|
46
|
-
'Looks good.'
|
47
|
-
rescue InvalidInstruction => e
|
48
|
-
e.message
|
49
|
-
end
|
50
|
-
end
|
51
44
|
end
|
52
45
|
end
|
data/lib/datacraft/runner.rb
CHANGED
@@ -5,23 +5,33 @@ module Datacraft
|
|
5
5
|
module Runner
|
6
6
|
# run the instruction
|
7
7
|
def run(instruction)
|
8
|
-
@
|
8
|
+
@inst = instruction
|
9
9
|
measurements = []
|
10
|
-
|
11
|
-
|
10
|
+
|
11
|
+
# run pre_build hooks
|
12
|
+
if @inst.respond_to? :pre_hooks
|
13
|
+
measurements << Benchmark.measure('pre build:') do
|
14
|
+
@inst.pre_hooks.each(&:call)
|
15
|
+
end
|
12
16
|
end
|
17
|
+
|
18
|
+
# process the rows
|
13
19
|
measurements << Benchmark.measure('process rows:') do
|
14
|
-
@
|
20
|
+
@inst.options[:parallel] ? pprocess_rows : process_rows
|
15
21
|
end
|
16
22
|
|
23
|
+
# build
|
17
24
|
measurements << Benchmark.measure('build:') do
|
18
|
-
build @
|
25
|
+
build @inst.consumers
|
19
26
|
end
|
20
27
|
|
21
|
-
|
22
|
-
|
28
|
+
# run post_build hooks
|
29
|
+
if @inst.respond_to? :post_hooks
|
30
|
+
measurements << Benchmark.measure('post build:') do
|
31
|
+
@inst.post_hooks.each(&:call)
|
32
|
+
end
|
23
33
|
end
|
24
|
-
report measurements if @
|
34
|
+
report measurements if @inst.options[:benchmark]
|
25
35
|
end
|
26
36
|
|
27
37
|
# output benchmark results
|
@@ -35,8 +45,8 @@ module Datacraft
|
|
35
45
|
|
36
46
|
# process rows sequentially
|
37
47
|
def process_rows
|
38
|
-
@
|
39
|
-
|
48
|
+
@inst.sources.each do |source|
|
49
|
+
source.each do |row|
|
40
50
|
process row
|
41
51
|
end
|
42
52
|
end
|
@@ -44,21 +54,21 @@ module Datacraft
|
|
44
54
|
|
45
55
|
# tweak & consume one row
|
46
56
|
def process(row)
|
47
|
-
@
|
57
|
+
@inst.tweakers.each do |tweaker|
|
48
58
|
row = tweaker.tweak row
|
49
59
|
return nil unless row
|
50
60
|
end
|
51
|
-
@
|
61
|
+
@inst.consumers.each do |consumer|
|
52
62
|
consumer << row
|
53
63
|
end
|
54
64
|
end
|
55
65
|
|
56
66
|
# process rows in parallel
|
57
67
|
def pprocess_rows
|
58
|
-
thread_number = [@
|
59
|
-
@
|
68
|
+
thread_number = [@inst.sources.size,
|
69
|
+
@inst.options[:n_threads]].min
|
60
70
|
queue = Queue.new
|
61
|
-
@
|
71
|
+
@inst.sources.each { |p| queue << p }
|
62
72
|
threads = thread_number.times.map do
|
63
73
|
Thread.new do
|
64
74
|
begin
|
@@ -67,10 +77,6 @@ module Datacraft
|
|
67
77
|
end
|
68
78
|
rescue ThreadError
|
69
79
|
end
|
70
|
-
# until queue.empty?
|
71
|
-
# p = queue.pop(true)
|
72
|
-
# p.each { |row| process row }
|
73
|
-
# end
|
74
80
|
end
|
75
81
|
end
|
76
82
|
threads.each(&:join)
|
data/lib/datacraft/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datacraft
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Xiaoxing Hu
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -119,9 +119,9 @@ files:
|
|
119
119
|
- lib/datacraft.rb
|
120
120
|
- lib/datacraft/cli.rb
|
121
121
|
- lib/datacraft/context.rb
|
122
|
+
- lib/datacraft/definition.rb
|
122
123
|
- lib/datacraft/exceptions.rb
|
123
124
|
- lib/datacraft/instruction.rb
|
124
|
-
- lib/datacraft/registry.rb
|
125
125
|
- lib/datacraft/runner.rb
|
126
126
|
- lib/datacraft/version.rb
|
127
127
|
homepage: https://github.com/xiaoxinghu/datacraft
|
data/lib/datacraft/registry.rb
DELETED
@@ -1,74 +0,0 @@
|
|
1
|
-
require 'forwardable'
|
2
|
-
|
3
|
-
module Datacraft
|
4
|
-
# for normalizing blocks
|
5
|
-
class CompatiableProc < Proc
|
6
|
-
alias_method :run, :call
|
7
|
-
alias_method :process, :call
|
8
|
-
alias_method :tweak, :call
|
9
|
-
end
|
10
|
-
|
11
|
-
# common registry
|
12
|
-
class Registry
|
13
|
-
extend Forwardable
|
14
|
-
def_delegators :instances, :each, :map, :size
|
15
|
-
def initialize
|
16
|
-
@items = []
|
17
|
-
end
|
18
|
-
|
19
|
-
attr_accessor :mandatory_methods
|
20
|
-
|
21
|
-
def instances
|
22
|
-
@instances ||= @items.map do |i|
|
23
|
-
if i[:klass]
|
24
|
-
i[:klass].new(*i[:args])
|
25
|
-
elsif i[:block]
|
26
|
-
CompatiableProc.new(&i[:block])
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def <<(val)
|
32
|
-
klass = val[:klass]
|
33
|
-
block = val[:block]
|
34
|
-
if klass
|
35
|
-
fail InvalidInstruction, "#{klass.name} needs to implement methods: "\
|
36
|
-
"#{mandatory_methods}" unless valid? klass
|
37
|
-
elsif block
|
38
|
-
else
|
39
|
-
fail 'registry error'
|
40
|
-
end
|
41
|
-
@items << val
|
42
|
-
end
|
43
|
-
|
44
|
-
def valid?(klass)
|
45
|
-
mandatory_methods.all? do |m|
|
46
|
-
klass.method_defined? m
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
class ProviderRegistry < Registry
|
52
|
-
def mandatory_methods
|
53
|
-
[:each]
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
class ConsumerRegistry < Registry
|
58
|
-
def mandatory_methods
|
59
|
-
[:<<]
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
class TweakerRegistry < Registry
|
64
|
-
def mandatory_methods
|
65
|
-
[:tweak]
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
class HookRegistry < Registry
|
70
|
-
def mandatory_methods
|
71
|
-
[:run]
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|