data_miner 0.5.7 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +8 -0
- data/CHANGELOG +7 -0
- data/Gemfile +4 -0
- data/LICENSE +1 -1
- data/Rakefile +23 -0
- data/data_miner.gemspec +35 -0
- data/lib/data_miner.rb +55 -96
- data/lib/data_miner/active_record_extensions.rb +38 -0
- data/lib/data_miner/attribute.rb +63 -58
- data/lib/data_miner/config.rb +184 -0
- data/lib/data_miner/dictionary.rb +25 -12
- data/lib/data_miner/import.rb +59 -50
- data/lib/data_miner/process.rb +24 -19
- data/lib/data_miner/run.rb +3 -3
- data/lib/data_miner/schema.rb +50 -53
- data/lib/data_miner/tap.rb +24 -24
- data/lib/data_miner/verify.rb +17 -24
- data/lib/data_miner/version.rb +3 -0
- data/test/{test_helper.rb → helper.rb} +20 -3
- data/test/{data_miner/attribute_test.rb → test_attribute.rb} +2 -2
- data/test/{data_miner_test.rb → test_old_syntax.rb} +28 -32
- data/test/{data_miner/verify_test.rb → test_verify.rb} +4 -4
- metadata +80 -101
- data/lib/data_miner/base.rb +0 -204
@@ -0,0 +1,184 @@
|
|
1
|
+
require 'blockenspiel'
|
2
|
+
|
3
|
+
class DataMiner
|
4
|
+
class Config
|
5
|
+
include ::Blockenspiel::DSL
|
6
|
+
|
7
|
+
attr_reader :resource
|
8
|
+
|
9
|
+
def initialize(resource)
|
10
|
+
@resource = resource
|
11
|
+
end
|
12
|
+
|
13
|
+
def steps
|
14
|
+
@steps ||= []
|
15
|
+
end
|
16
|
+
|
17
|
+
# def attributes
|
18
|
+
# @attributes ||= {}
|
19
|
+
# end
|
20
|
+
|
21
|
+
def schema(create_table_options = {}, &blk)
|
22
|
+
step = Schema.new self, create_table_options
|
23
|
+
::Blockenspiel.invoke blk, step
|
24
|
+
steps.push step
|
25
|
+
end
|
26
|
+
|
27
|
+
def process(method_id_or_block_description, &blk)
|
28
|
+
step = Process.new self, method_id_or_block_description, &blk
|
29
|
+
steps.push step
|
30
|
+
end
|
31
|
+
|
32
|
+
def tap(description, source, options = {})
|
33
|
+
step = Tap.new self, description, source, options
|
34
|
+
steps.push step
|
35
|
+
end
|
36
|
+
|
37
|
+
def import(*args, &blk)
|
38
|
+
if args.length == 1
|
39
|
+
description = '(no description)'
|
40
|
+
else
|
41
|
+
description = args[0]
|
42
|
+
end
|
43
|
+
options = args.last
|
44
|
+
|
45
|
+
step = Import.new self, description, options
|
46
|
+
::Blockenspiel.invoke blk, step
|
47
|
+
steps.push step
|
48
|
+
end
|
49
|
+
|
50
|
+
def verify(description = '(no description)', &blk)
|
51
|
+
step = Verify.new self, description, &blk
|
52
|
+
steps.push step
|
53
|
+
end
|
54
|
+
|
55
|
+
# Mine data for this class.
|
56
|
+
def run(options = {})
|
57
|
+
options = options.dup
|
58
|
+
options.stringify_keys!
|
59
|
+
|
60
|
+
return if ::DataMiner.instance.call_stack.include? resource.name
|
61
|
+
::DataMiner.instance.call_stack.push resource.name
|
62
|
+
|
63
|
+
finished = false
|
64
|
+
skipped = false
|
65
|
+
if Run.table_exists?
|
66
|
+
run = Run.create! :started_at => ::Time.now, :resource_name => resource.name, :killed => true
|
67
|
+
else
|
68
|
+
run = nil
|
69
|
+
::DataMiner.logger.info "Not logging individual runs. Please run DataMiner::Run.create_tables if you want to enable this."
|
70
|
+
end
|
71
|
+
resource.delete_all if options['from_scratch']
|
72
|
+
begin
|
73
|
+
steps.each do |step|
|
74
|
+
step.run# run
|
75
|
+
resource.reset_column_information
|
76
|
+
end
|
77
|
+
finished = true
|
78
|
+
rescue Finish
|
79
|
+
finished = true
|
80
|
+
rescue Skip
|
81
|
+
skipped = true
|
82
|
+
ensure
|
83
|
+
if Run.table_exists?
|
84
|
+
run.update_attributes! :terminated_at => ::Time.now, :finished => finished, :skipped => skipped, :killed => false
|
85
|
+
end
|
86
|
+
if ::DataMiner.instance.call_stack.first == resource.name and !options['preserve_call_stack_between_runs']
|
87
|
+
::DataMiner.instance.call_stack.clear
|
88
|
+
end
|
89
|
+
end
|
90
|
+
nil
|
91
|
+
end
|
92
|
+
|
93
|
+
def import_steps
|
94
|
+
steps.select { |step| step.is_a? Import }
|
95
|
+
end
|
96
|
+
|
97
|
+
def before_invoke
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
def after_invoke
|
102
|
+
return unless resource.table_exists?
|
103
|
+
make_sure_unit_definitions_make_sense
|
104
|
+
suggest_missing_column_migrations
|
105
|
+
end
|
106
|
+
|
107
|
+
COMPLETE_UNIT_DEFINITIONS = [
|
108
|
+
%w{units},
|
109
|
+
%w{from_units to_units},
|
110
|
+
%w{units_field_name},
|
111
|
+
%w{units_field_name to_units},
|
112
|
+
%w{units_field_number},
|
113
|
+
%w{units_field_number to_units}
|
114
|
+
]
|
115
|
+
|
116
|
+
def make_sure_unit_definitions_make_sense
|
117
|
+
import_steps.each do |step|
|
118
|
+
step.attributes.each do |_, attribute|
|
119
|
+
if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
|
120
|
+
raise %{
|
121
|
+
|
122
|
+
================================
|
123
|
+
|
124
|
+
You don't have a valid unit definition for #{resource.name}##{attribute.name}.
|
125
|
+
|
126
|
+
You supplied #{attribute.options.keys.select { |k, _| k.to_s =~ /unit/ }.inspect }.
|
127
|
+
|
128
|
+
You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence}".
|
129
|
+
|
130
|
+
================================
|
131
|
+
}
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def suggest_missing_column_migrations
|
138
|
+
missing_columns = []
|
139
|
+
|
140
|
+
import_steps.each do |step|
|
141
|
+
step.attributes.each do |_, attribute|
|
142
|
+
raise "You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.end_with? '_units'
|
143
|
+
unless resource.column_names.include? attribute.name
|
144
|
+
missing_columns << attribute.name
|
145
|
+
end
|
146
|
+
if attribute.wants_units? and !resource.column_names.include?(units_column = "#{attribute.name}_units")
|
147
|
+
missing_columns << units_column
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
missing_columns.uniq!
|
152
|
+
if missing_columns.any?
|
153
|
+
::DataMiner.logger.debug %{
|
154
|
+
|
155
|
+
================================
|
156
|
+
|
157
|
+
On #{resource}, it looks like you're missing some columns...
|
158
|
+
|
159
|
+
Please run this...
|
160
|
+
|
161
|
+
./script/generate migration AddMissingColumnsTo#{resource.name}
|
162
|
+
|
163
|
+
and **replace** the resulting file with this:
|
164
|
+
|
165
|
+
class AddMissingColumnsTo#{resource.name} < ActiveRecord::Migration
|
166
|
+
def self.up
|
167
|
+
#{missing_columns.map { |column_name| " add_column :#{resource.table_name}, :#{column_name}, :#{column_name.end_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' }" }.join("\n") }
|
168
|
+
end
|
169
|
+
|
170
|
+
def self.down
|
171
|
+
#{missing_columns.map { |column_name| " remove_column :#{resource.table_name}, :#{column_name}" }.join("\n") }
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
On the other hand, if you're working directly with create_table, this might be helpful:
|
176
|
+
|
177
|
+
#{missing_columns.map { |column_name| "t.#{column_name.end_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' } '#{column_name}'" }.join("\n") }
|
178
|
+
|
179
|
+
================================
|
180
|
+
}
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
@@ -1,20 +1,33 @@
|
|
1
|
-
|
1
|
+
class DataMiner
|
2
2
|
class Dictionary
|
3
|
-
|
4
|
-
|
3
|
+
attr_reader :options
|
5
4
|
def initialize(options = {})
|
6
|
-
@
|
7
|
-
@
|
8
|
-
|
9
|
-
|
5
|
+
@options = options.dup
|
6
|
+
@options.stringify_keys!
|
7
|
+
end
|
8
|
+
|
9
|
+
def key_name
|
10
|
+
options['input']
|
11
|
+
end
|
12
|
+
|
13
|
+
def value_name
|
14
|
+
options['output']
|
15
|
+
end
|
16
|
+
|
17
|
+
def sprintf
|
18
|
+
options['sprintf'] || '%s'
|
19
|
+
end
|
20
|
+
|
21
|
+
def table
|
22
|
+
@table ||= ::RemoteTable.new options['url']
|
10
23
|
end
|
11
24
|
|
12
25
|
def lookup(key)
|
13
|
-
find
|
26
|
+
find key_name, key, value_name, 'sprintf' => sprintf
|
14
27
|
end
|
15
28
|
|
16
29
|
def find(key_name, key, value_name, options = {})
|
17
|
-
if match = table.
|
30
|
+
if match = table.detect { |row| normalize_for_comparison(key, options) == normalize_for_comparison(row[key_name], options) }
|
18
31
|
match[value_name].to_s
|
19
32
|
end
|
20
33
|
end
|
@@ -22,10 +35,10 @@ module DataMiner
|
|
22
35
|
private
|
23
36
|
|
24
37
|
def normalize_for_comparison(string, options = {})
|
25
|
-
if options[
|
26
|
-
if /\%[0-9\.]*f/.match
|
38
|
+
if options['sprintf']
|
39
|
+
if /\%[0-9\.]*f/.match options['sprintf']
|
27
40
|
string = string.to_f
|
28
|
-
elsif /\%[0-9\.]*d/.match
|
41
|
+
elsif /\%[0-9\.]*d/.match options['sprintf']
|
29
42
|
string = string.to_i
|
30
43
|
end
|
31
44
|
string = sprintf % string
|
data/lib/data_miner/import.rb
CHANGED
@@ -1,79 +1,88 @@
|
|
1
|
-
|
1
|
+
require 'blockenspiel'
|
2
|
+
class DataMiner
|
2
3
|
class Import
|
3
|
-
include Blockenspiel::DSL
|
4
|
+
include ::Blockenspiel::DSL
|
4
5
|
|
5
6
|
attr_reader :attributes
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
attr_accessor :description
|
10
|
-
delegate :resource, :to => :base
|
7
|
+
attr_reader :config
|
8
|
+
attr_reader :options
|
9
|
+
attr_reader :description
|
11
10
|
|
12
|
-
def initialize(
|
13
|
-
@
|
14
|
-
@table_options.symbolize_keys!
|
15
|
-
|
16
|
-
@attributes = ActiveSupport::OrderedHash.new
|
17
|
-
@base = base
|
18
|
-
@position_in_run = position_in_run
|
11
|
+
def initialize(config, description, options = {})
|
12
|
+
@config = config
|
19
13
|
@description = description
|
20
|
-
|
21
|
-
|
22
|
-
|
14
|
+
@options = options.dup
|
15
|
+
@options.stringify_keys!
|
16
|
+
# legacy
|
17
|
+
if @options.has_key? 'table'
|
18
|
+
::DataMiner.logger.info "Warning: 'table' is no longer an allowed option, taking the url from it and ignoring the rest"
|
19
|
+
table_instance = @options.delete 'table'
|
20
|
+
@options['url'] = table_instance.url
|
23
21
|
end
|
24
|
-
|
25
|
-
if @
|
26
|
-
DataMiner.
|
22
|
+
# legacy
|
23
|
+
if @options.has_key?('errata') and not @options['errata'].is_a?(::Hash)
|
24
|
+
::DataMiner.logger.info "Warning: 'errata' must be a hash of Errata options. taking the URL from the Errata instance you provided and ignoring everything else"
|
25
|
+
errata_instance = @options.delete 'errata'
|
26
|
+
@options['errata'] = { 'url' => errata_instance.options['url'] }
|
27
27
|
end
|
28
28
|
end
|
29
|
-
|
30
|
-
def
|
31
|
-
@
|
29
|
+
|
30
|
+
def attributes
|
31
|
+
@attributes ||= ::ActiveSupport::OrderedHash.new
|
32
32
|
end
|
33
33
|
|
34
|
-
def
|
35
|
-
|
34
|
+
def resource
|
35
|
+
config.resource
|
36
36
|
end
|
37
37
|
|
38
38
|
def inspect
|
39
|
-
|
40
|
-
end
|
41
|
-
|
42
|
-
def stores?(attr_name)
|
43
|
-
attributes.has_key? attr_name
|
39
|
+
%{#<DataMiner::Import(#{resource}) (#{description})>}
|
44
40
|
end
|
45
41
|
|
46
42
|
def store(attr_name, attr_options = {})
|
47
|
-
|
43
|
+
raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
|
48
44
|
attributes[attr_name] = Attribute.new self, attr_name, attr_options
|
49
45
|
end
|
50
46
|
|
51
47
|
def key(attr_name, attr_options = {})
|
52
|
-
|
53
|
-
@
|
48
|
+
raise "You should only call store or key once for #{resource.name}##{attr_name}" if attributes.has_key? attr_name
|
49
|
+
@_key = attr_name
|
54
50
|
store attr_name, attr_options
|
55
51
|
end
|
56
52
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
53
|
+
def primary_key
|
54
|
+
resource.primary_key
|
55
|
+
end
|
56
|
+
|
57
|
+
def table
|
58
|
+
return @table if @table.is_a? ::RemoteTable
|
59
|
+
# don't mess with the originals
|
60
|
+
options = @options.dup
|
61
|
+
if options['errata']
|
62
|
+
errata_options = options['errata'].dup
|
63
|
+
errata_options.stringify_keys!
|
64
|
+
errata_options['responder'] ||= resource
|
65
|
+
options['errata'] = errata_options
|
66
|
+
end
|
67
|
+
@table = ::RemoteTable.new options
|
68
|
+
end
|
60
69
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
record = resource.send "find_or_initialize_by_#{@key}", attributes[@key].value_from_row(row)
|
70
|
+
def expire_remote_data
|
71
|
+
@table = nil
|
72
|
+
attributes.each { |_, attr| attr.instance_variable_set :@dictionary, nil }
|
73
|
+
end
|
74
|
+
|
75
|
+
def run
|
76
|
+
expire_remote_data
|
77
|
+
table.each do |row|
|
78
|
+
record = resource.send "find_or_initialize_by_#{@_key}", attributes[@_key].value_from_row(row)
|
72
79
|
attributes.each { |_, attr| attr.set_record_from_row record, row }
|
73
|
-
|
80
|
+
if record.send(primary_key).present?
|
81
|
+
record.save!
|
82
|
+
else
|
83
|
+
::DataMiner.logger.debug "Skipping #{row} because there's no primary key"
|
84
|
+
end
|
74
85
|
end
|
75
|
-
DataMiner.log_info "performed #{inspect}"
|
76
|
-
clear_table
|
77
86
|
nil
|
78
87
|
end
|
79
88
|
end
|
data/lib/data_miner/process.rb
CHANGED
@@ -1,37 +1,42 @@
|
|
1
|
-
|
1
|
+
class DataMiner
|
2
2
|
class Process
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
attr_reader :config
|
4
|
+
attr_reader :method_id
|
5
|
+
attr_reader :block_description
|
6
|
+
attr_reader :blk
|
7
7
|
|
8
|
-
def initialize(
|
9
|
-
@
|
10
|
-
@position_in_run = position_in_run
|
8
|
+
def initialize(config, method_id_or_block_description, &blk)
|
9
|
+
@config = config
|
11
10
|
if block_given?
|
12
|
-
@block_description =
|
13
|
-
@
|
11
|
+
@block_description = method_id_or_block_description
|
12
|
+
@blk = blk
|
14
13
|
else
|
15
|
-
@
|
14
|
+
@method_id = method_id_or_block_description
|
16
15
|
end
|
17
16
|
end
|
18
17
|
|
18
|
+
def resource
|
19
|
+
config.resource
|
20
|
+
end
|
21
|
+
|
19
22
|
def inspect
|
20
|
-
str =
|
23
|
+
str = %{<#Process(#{resource})}
|
21
24
|
if block
|
22
|
-
str <<
|
25
|
+
str << %{ called block "#{block_description}"}
|
23
26
|
else
|
24
|
-
str <<
|
27
|
+
str << %{ called method :#{method_id}}
|
25
28
|
end
|
29
|
+
str << ">"
|
30
|
+
str
|
26
31
|
end
|
27
32
|
|
28
|
-
def run
|
29
|
-
if
|
30
|
-
|
33
|
+
def run
|
34
|
+
if blk
|
35
|
+
blk.call
|
31
36
|
else
|
32
|
-
resource.send
|
37
|
+
resource.send method_id
|
33
38
|
end
|
34
|
-
|
39
|
+
nil
|
35
40
|
end
|
36
41
|
end
|
37
42
|
end
|
data/lib/data_miner/run.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
class Run < ActiveRecord::Base
|
1
|
+
class DataMiner
|
2
|
+
class Run < ::ActiveRecord::Base
|
3
3
|
set_table_name 'data_miner_runs'
|
4
4
|
|
5
5
|
def resource
|
@@ -8,7 +8,7 @@ module DataMiner
|
|
8
8
|
|
9
9
|
class << self
|
10
10
|
def create_tables
|
11
|
-
return if table_exists?
|
11
|
+
return if table_exists?
|
12
12
|
connection.create_table 'data_miner_runs', :force => true do |t|
|
13
13
|
t.string 'resource_name'
|
14
14
|
t.boolean 'killed'
|