datacraft 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/datacraft.rb +1 -1
- data/lib/datacraft/cli.rb +8 -2
- data/lib/datacraft/context.rb +21 -16
- data/lib/datacraft/definition.rb +108 -0
- data/lib/datacraft/exceptions.rb +4 -0
- data/lib/datacraft/instruction.rb +12 -19
- data/lib/datacraft/runner.rb +25 -19
- data/lib/datacraft/version.rb +1 -1
- metadata +3 -3
- data/lib/datacraft/registry.rb +0 -74
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2d9abea0dcb9fc8da790558bc9d848c5420e42e0
|
4
|
+
data.tar.gz: 114b10e2d87db5adada974f86b506a1ccd007e52
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c812f178e2f5da84826fa70584ac66ca5fdccb0f92de6e505ba3c9253d74ecaa6ac93c25fb34eba8cf770d7418207c235de4a18d56d4b1192bf308c11c54f3e
|
7
|
+
data.tar.gz: 274b7e3e6022f75021e52272d9c08d2e980077444a5e1fadc9c42881628c7ba539510dece0e892880a973a8e05c68163424c00ea68c904e864958f9605a19296
|
data/lib/datacraft.rb
CHANGED
data/lib/datacraft/cli.rb
CHANGED
@@ -9,9 +9,15 @@ module Datacraft
|
|
9
9
|
Datacraft.run instruction
|
10
10
|
end
|
11
11
|
|
12
|
-
desc 'check [INSTRUCTION_FILE]',
|
12
|
+
desc 'check [INSTRUCTION_FILE]',
|
13
|
+
'evaluate the instruction without running it'
|
13
14
|
def check(filename)
|
14
|
-
|
15
|
+
begin
|
16
|
+
Instruction.from_file filename
|
17
|
+
puts 'You are ready to go.'
|
18
|
+
rescue InvalidInstruction => e
|
19
|
+
puts e
|
20
|
+
end
|
15
21
|
end
|
16
22
|
end
|
17
23
|
end
|
data/lib/datacraft/context.rb
CHANGED
@@ -1,33 +1,38 @@
|
|
1
1
|
module Datacraft
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
options[:n_threads] = 8
|
7
|
-
end
|
8
|
-
|
9
|
-
def providers
|
10
|
-
@providers ||= ProviderRegistry.new
|
11
|
-
end
|
12
|
-
|
13
|
-
def consumers
|
14
|
-
@consumers ||= ConsumerRegistry.new
|
2
|
+
# define the context of the instruction
|
3
|
+
module Context
|
4
|
+
def sources
|
5
|
+
@sources ||= []
|
15
6
|
end
|
16
7
|
|
17
8
|
def tweakers
|
18
|
-
@tweakers ||=
|
9
|
+
@tweakers ||= []
|
19
10
|
end
|
20
11
|
|
21
12
|
def pre_hooks
|
22
|
-
@pre_hooks ||=
|
13
|
+
@pre_hooks ||= []
|
23
14
|
end
|
24
15
|
|
25
16
|
def post_hooks
|
26
|
-
@post_hooks ||=
|
17
|
+
@post_hooks ||= []
|
18
|
+
end
|
19
|
+
|
20
|
+
def consumers
|
21
|
+
@consumers ||= []
|
27
22
|
end
|
28
23
|
|
29
24
|
def options
|
30
25
|
@options ||= {}
|
31
26
|
end
|
27
|
+
|
28
|
+
def validate
|
29
|
+
fail InvalidInstruction, "Please define data source with keyword 'from'." unless sources.size > 0
|
30
|
+
fail InvalidInstruction, "Please define data consumer with keyword 'to'." unless consumers.size > 0
|
31
|
+
sources.each(&:validate)
|
32
|
+
consumers.each(&:validate)
|
33
|
+
tweakers.each(&:validate)
|
34
|
+
pre_hooks.each(&:validate)
|
35
|
+
post_hooks.each(&:validate)
|
36
|
+
end
|
32
37
|
end
|
33
38
|
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module Datacraft
|
4
|
+
|
5
|
+
# for normalizing blocks
|
6
|
+
class CompatiableProc < Proc
|
7
|
+
# alias_method :run, :call
|
8
|
+
# alias_method :process, :call
|
9
|
+
alias_method :tweak, :call
|
10
|
+
end
|
11
|
+
|
12
|
+
# method management
|
13
|
+
module MethodDef
|
14
|
+
extend Forwardable
|
15
|
+
attr_reader :mandatory, :optional
|
16
|
+
|
17
|
+
def def_mandatory(*methods)
|
18
|
+
@mandatory = methods
|
19
|
+
methods.each do |method|
|
20
|
+
def_delegator :instance, method
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def def_optional(*methods)
|
25
|
+
@optional = methods
|
26
|
+
methods.each do |method|
|
27
|
+
def_delegator :instance, method
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def validate_methods(klass)
|
32
|
+
mandatory.each do |m|
|
33
|
+
fail InvalidInstruction, "Class <#{klass}> missing mandatory methods: #{m}." unless klass.method_defined?(m)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# definition of component
|
39
|
+
module Definition
|
40
|
+
extend Forwardable
|
41
|
+
def_delegator :instance, :respond_to?
|
42
|
+
def self.included(base)
|
43
|
+
base.extend(Forwardable)
|
44
|
+
base.extend(MethodDef)
|
45
|
+
end
|
46
|
+
|
47
|
+
def initialize(d)
|
48
|
+
@d = d
|
49
|
+
end
|
50
|
+
|
51
|
+
def instance
|
52
|
+
@instance ||= create_instance
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def create_instance
|
58
|
+
if @d[:klass]
|
59
|
+
@d[:klass].new(*@d[:args])
|
60
|
+
elsif @d[:block]
|
61
|
+
CompatiableProc.new(&@d[:block])
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# data source that provide data
|
67
|
+
class Source
|
68
|
+
include Definition
|
69
|
+
def_mandatory :each
|
70
|
+
|
71
|
+
def validate
|
72
|
+
fail InvalidSource, 'Source needs to be a class.' unless @d[:klass]
|
73
|
+
self.class.validate_methods @d[:klass]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# data consumer that consume data
|
78
|
+
class Consumer
|
79
|
+
include Definition
|
80
|
+
def_mandatory :<<
|
81
|
+
def_optional :build, :close
|
82
|
+
|
83
|
+
def validate
|
84
|
+
fail InvalidConsumer, 'Consumer needs to be a class.' unless @d[:klass]
|
85
|
+
self.class.validate_methods @d[:klass]
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# tweaking data row
|
90
|
+
class Tweaker
|
91
|
+
include Definition
|
92
|
+
def_mandatory :tweak
|
93
|
+
|
94
|
+
def validate
|
95
|
+
self.class.validate_methods @d[:klass] if @d[:klass]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# pre/post build hooks
|
100
|
+
class Hook
|
101
|
+
include Definition
|
102
|
+
def_mandatory :call
|
103
|
+
|
104
|
+
def validate
|
105
|
+
fail InvalidHook, 'Hook has to be a block.' unless @d[:block]
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
data/lib/datacraft/exceptions.rb
CHANGED
@@ -1,52 +1,45 @@
|
|
1
1
|
module Datacraft
|
2
2
|
# Building Instruction
|
3
3
|
class Instruction
|
4
|
+
include Context
|
4
5
|
attr_reader :context
|
5
6
|
|
6
|
-
def initialize
|
7
|
-
|
7
|
+
def initialize
|
8
|
+
options[:parallel] = false
|
9
|
+
options[:benchmark] = false
|
10
|
+
options[:n_threads] = 8
|
8
11
|
end
|
9
12
|
|
10
13
|
def pre_build(&block)
|
11
|
-
|
14
|
+
pre_hooks << Hook.new(block: block)
|
12
15
|
end
|
13
16
|
|
14
17
|
def from(klass, *initialization_params)
|
15
|
-
|
18
|
+
sources << Source.new(klass: klass, args: initialization_params)
|
16
19
|
end
|
17
20
|
|
18
21
|
def tweak(klass = nil, *initialization_params, &block)
|
19
|
-
|
22
|
+
tweakers << Tweaker.new(klass: klass, args: initialization_params, block: block)
|
20
23
|
end
|
21
24
|
|
22
25
|
def to(klass, *initialization_params)
|
23
|
-
|
26
|
+
consumers << Consumer.new(klass: klass, args: initialization_params)
|
24
27
|
end
|
25
28
|
|
26
29
|
def post_build(&block)
|
27
|
-
|
30
|
+
post_hooks << Hook.new(block: block)
|
28
31
|
end
|
29
32
|
|
30
33
|
def set(key, value)
|
31
|
-
|
34
|
+
options[key.to_sym] = value
|
32
35
|
end
|
33
36
|
|
34
37
|
def self.from_file(filename)
|
35
38
|
script_content = IO.read(filename)
|
36
39
|
instruction = Instruction.new
|
37
40
|
instruction.instance_eval(script_content)
|
41
|
+
instruction.validate
|
38
42
|
instruction
|
39
43
|
end
|
40
|
-
|
41
|
-
def self.check(filename)
|
42
|
-
begin
|
43
|
-
script_content = IO.read(filename)
|
44
|
-
instruction = Instruction.new
|
45
|
-
instruction.instance_eval(script_content)
|
46
|
-
'Looks good.'
|
47
|
-
rescue InvalidInstruction => e
|
48
|
-
e.message
|
49
|
-
end
|
50
|
-
end
|
51
44
|
end
|
52
45
|
end
|
data/lib/datacraft/runner.rb
CHANGED
@@ -5,23 +5,33 @@ module Datacraft
|
|
5
5
|
module Runner
|
6
6
|
# run the instruction
|
7
7
|
def run(instruction)
|
8
|
-
@
|
8
|
+
@inst = instruction
|
9
9
|
measurements = []
|
10
|
-
|
11
|
-
|
10
|
+
|
11
|
+
# run pre_build hooks
|
12
|
+
if @inst.respond_to? :pre_hooks
|
13
|
+
measurements << Benchmark.measure('pre build:') do
|
14
|
+
@inst.pre_hooks.each(&:call)
|
15
|
+
end
|
12
16
|
end
|
17
|
+
|
18
|
+
# process the rows
|
13
19
|
measurements << Benchmark.measure('process rows:') do
|
14
|
-
@
|
20
|
+
@inst.options[:parallel] ? pprocess_rows : process_rows
|
15
21
|
end
|
16
22
|
|
23
|
+
# build
|
17
24
|
measurements << Benchmark.measure('build:') do
|
18
|
-
build @
|
25
|
+
build @inst.consumers
|
19
26
|
end
|
20
27
|
|
21
|
-
|
22
|
-
|
28
|
+
# run post_build hooks
|
29
|
+
if @inst.respond_to? :post_hooks
|
30
|
+
measurements << Benchmark.measure('post build:') do
|
31
|
+
@inst.post_hooks.each(&:call)
|
32
|
+
end
|
23
33
|
end
|
24
|
-
report measurements if @
|
34
|
+
report measurements if @inst.options[:benchmark]
|
25
35
|
end
|
26
36
|
|
27
37
|
# output benchmark results
|
@@ -35,8 +45,8 @@ module Datacraft
|
|
35
45
|
|
36
46
|
# process rows sequentially
|
37
47
|
def process_rows
|
38
|
-
@
|
39
|
-
|
48
|
+
@inst.sources.each do |source|
|
49
|
+
source.each do |row|
|
40
50
|
process row
|
41
51
|
end
|
42
52
|
end
|
@@ -44,21 +54,21 @@ module Datacraft
|
|
44
54
|
|
45
55
|
# tweak & consume one row
|
46
56
|
def process(row)
|
47
|
-
@
|
57
|
+
@inst.tweakers.each do |tweaker|
|
48
58
|
row = tweaker.tweak row
|
49
59
|
return nil unless row
|
50
60
|
end
|
51
|
-
@
|
61
|
+
@inst.consumers.each do |consumer|
|
52
62
|
consumer << row
|
53
63
|
end
|
54
64
|
end
|
55
65
|
|
56
66
|
# process rows in parallel
|
57
67
|
def pprocess_rows
|
58
|
-
thread_number = [@
|
59
|
-
@
|
68
|
+
thread_number = [@inst.sources.size,
|
69
|
+
@inst.options[:n_threads]].min
|
60
70
|
queue = Queue.new
|
61
|
-
@
|
71
|
+
@inst.sources.each { |p| queue << p }
|
62
72
|
threads = thread_number.times.map do
|
63
73
|
Thread.new do
|
64
74
|
begin
|
@@ -67,10 +77,6 @@ module Datacraft
|
|
67
77
|
end
|
68
78
|
rescue ThreadError
|
69
79
|
end
|
70
|
-
# until queue.empty?
|
71
|
-
# p = queue.pop(true)
|
72
|
-
# p.each { |row| process row }
|
73
|
-
# end
|
74
80
|
end
|
75
81
|
end
|
76
82
|
threads.each(&:join)
|
data/lib/datacraft/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datacraft
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Xiaoxing Hu
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -119,9 +119,9 @@ files:
|
|
119
119
|
- lib/datacraft.rb
|
120
120
|
- lib/datacraft/cli.rb
|
121
121
|
- lib/datacraft/context.rb
|
122
|
+
- lib/datacraft/definition.rb
|
122
123
|
- lib/datacraft/exceptions.rb
|
123
124
|
- lib/datacraft/instruction.rb
|
124
|
-
- lib/datacraft/registry.rb
|
125
125
|
- lib/datacraft/runner.rb
|
126
126
|
- lib/datacraft/version.rb
|
127
127
|
homepage: https://github.com/xiaoxinghu/datacraft
|
data/lib/datacraft/registry.rb
DELETED
@@ -1,74 +0,0 @@
|
|
1
|
-
require 'forwardable'
|
2
|
-
|
3
|
-
module Datacraft
|
4
|
-
# for normalizing blocks
|
5
|
-
class CompatiableProc < Proc
|
6
|
-
alias_method :run, :call
|
7
|
-
alias_method :process, :call
|
8
|
-
alias_method :tweak, :call
|
9
|
-
end
|
10
|
-
|
11
|
-
# common registry
|
12
|
-
class Registry
|
13
|
-
extend Forwardable
|
14
|
-
def_delegators :instances, :each, :map, :size
|
15
|
-
def initialize
|
16
|
-
@items = []
|
17
|
-
end
|
18
|
-
|
19
|
-
attr_accessor :mandatory_methods
|
20
|
-
|
21
|
-
def instances
|
22
|
-
@instances ||= @items.map do |i|
|
23
|
-
if i[:klass]
|
24
|
-
i[:klass].new(*i[:args])
|
25
|
-
elsif i[:block]
|
26
|
-
CompatiableProc.new(&i[:block])
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def <<(val)
|
32
|
-
klass = val[:klass]
|
33
|
-
block = val[:block]
|
34
|
-
if klass
|
35
|
-
fail InvalidInstruction, "#{klass.name} needs to implement methods: "\
|
36
|
-
"#{mandatory_methods}" unless valid? klass
|
37
|
-
elsif block
|
38
|
-
else
|
39
|
-
fail 'registry error'
|
40
|
-
end
|
41
|
-
@items << val
|
42
|
-
end
|
43
|
-
|
44
|
-
def valid?(klass)
|
45
|
-
mandatory_methods.all? do |m|
|
46
|
-
klass.method_defined? m
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
class ProviderRegistry < Registry
|
52
|
-
def mandatory_methods
|
53
|
-
[:each]
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
class ConsumerRegistry < Registry
|
58
|
-
def mandatory_methods
|
59
|
-
[:<<]
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
class TweakerRegistry < Registry
|
64
|
-
def mandatory_methods
|
65
|
-
[:tweak]
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
class HookRegistry < Registry
|
70
|
-
def mandatory_methods
|
71
|
-
[:run]
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|