data_miner 2.0.1 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -7
- data/CHANGELOG +13 -0
- data/LICENSE +1 -1
- data/README.markdown +112 -0
- data/data_miner.gemspec +2 -2
- data/lib/data_miner.rb +26 -12
- data/lib/data_miner/active_record_class_methods.rb +108 -0
- data/lib/data_miner/attribute.rb +150 -76
- data/lib/data_miner/dictionary.rb +40 -18
- data/lib/data_miner/run.rb +35 -0
- data/lib/data_miner/script.rb +123 -2
- data/lib/data_miner/step.rb +11 -3
- data/lib/data_miner/step/import.rb +100 -64
- data/lib/data_miner/step/process.rb +46 -28
- data/lib/data_miner/step/tap.rb +156 -123
- data/lib/data_miner/version.rb +1 -1
- data/test/test_safety.rb +61 -25
- metadata +8 -6
- data/README.rdoc +0 -289
- data/lib/data_miner/active_record_extensions.rb +0 -38
@@ -1,34 +1,52 @@
|
|
1
|
-
class DataMiner
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
class DataMiner
|
2
|
+
class Step
|
3
|
+
# A step that executes a single class method on the model or an arbitrary code block.
|
4
|
+
#
|
5
|
+
# Create these by calling +process+ inside a +data_miner+ block.
|
6
|
+
#
|
7
|
+
# @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
|
8
|
+
# @see DataMiner::Script#process
|
9
|
+
class Process < Step
|
10
|
+
# @private
|
11
|
+
attr_reader :script
|
6
12
|
|
7
|
-
|
13
|
+
# The method to be called on the model class.
|
14
|
+
# @return [Symbol]
|
15
|
+
attr_reader :method_id
|
8
16
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
17
|
+
# A description of what the block does. Doesn't exist when a single class method is specified using a Symbol.
|
18
|
+
# @return [String]
|
19
|
+
attr_reader :description
|
20
|
+
|
21
|
+
# The block of arbitrary code to be run.
|
22
|
+
# @return [Proc]
|
23
|
+
attr_reader :blk
|
24
|
+
|
25
|
+
alias :block_description :description
|
26
|
+
|
27
|
+
# @private
|
28
|
+
def initialize(script, method_id_or_description, ignored_options = {}, &blk)
|
29
|
+
@script = script
|
30
|
+
if block_given?
|
31
|
+
@description = method_id_or_description
|
32
|
+
@blk = blk
|
33
|
+
else
|
34
|
+
@description = method_id_or_description
|
35
|
+
@method_id = method_id_or_description
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# @private
|
40
|
+
def perform
|
41
|
+
DataMiner::Script.uniq do
|
42
|
+
if blk
|
43
|
+
model.instance_eval(&blk)
|
44
|
+
else
|
45
|
+
model.send method_id
|
46
|
+
end
|
47
|
+
end
|
48
|
+
nil
|
30
49
|
end
|
31
50
|
end
|
32
|
-
nil
|
33
51
|
end
|
34
52
|
end
|
data/lib/data_miner/step/tap.rb
CHANGED
@@ -1,134 +1,167 @@
|
|
1
1
|
require 'uri'
|
2
|
-
# Note that you probably shouldn't put taps into your Gemfile, because it depends on sequel and other gems that may not compile on Heroku (etc.)
|
3
|
-
#
|
4
|
-
# This class automatically detects if you have Bundler installed, and if so, executes the `taps` binary with a "clean" environment (i.e. one that will not pay attention to the fact that taps is not in your Gemfile)
|
5
|
-
class DataMiner::Step::Tap
|
6
|
-
DEFAULT_PORTS = {
|
7
|
-
:mysql => 3306,
|
8
|
-
:mysql2 => 3306,
|
9
|
-
:postgres => 5432
|
10
|
-
}
|
11
|
-
|
12
|
-
DEFAULT_USERNAMES = {
|
13
|
-
:mysql => 'root',
|
14
|
-
:mysql2 => 'root',
|
15
|
-
:postgres => ''
|
16
|
-
}
|
17
|
-
|
18
|
-
DEFAULT_PASSWORDS = {}
|
19
|
-
DEFAULT_PASSWORDS.default = ''
|
20
|
-
|
21
|
-
DEFAULT_HOSTS = {}
|
22
|
-
DEFAULT_HOSTS.default = '127.0.0.1'
|
23
2
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
3
|
+
class DataMiner
|
4
|
+
class Step
|
5
|
+
# A step that uses https://github.com/ricardochimal/taps to import table structure and data.
|
6
|
+
#
|
7
|
+
# Create these by calling +tap+ inside a +data_miner+ block.
|
8
|
+
#
|
9
|
+
# @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
|
10
|
+
# @see DataMiner::Script#tap
|
11
|
+
class Tap < Step
|
12
|
+
DEFAULT_PORTS = {
|
13
|
+
:mysql => 3306,
|
14
|
+
:mysql2 => 3306,
|
15
|
+
:postgres => 5432
|
16
|
+
}
|
17
|
+
|
18
|
+
DEFAULT_USERNAMES = {
|
19
|
+
:mysql => 'root',
|
20
|
+
:mysql2 => 'root',
|
21
|
+
:postgres => ''
|
22
|
+
}
|
23
|
+
|
24
|
+
DEFAULT_PASSWORDS = {}
|
25
|
+
DEFAULT_PASSWORDS.default = ''
|
26
|
+
|
27
|
+
DEFAULT_HOSTS = {}
|
28
|
+
DEFAULT_HOSTS.default = '127.0.0.1'
|
29
29
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
30
|
+
# @private
|
31
|
+
attr_reader :script
|
32
|
+
|
33
|
+
# A description of the tapped data source.
|
34
|
+
# @return [String]
|
35
|
+
attr_reader :description
|
36
|
+
|
37
|
+
# The URL of the tapped data source, including username, password, domain, and port number.
|
38
|
+
# @return [String]
|
39
|
+
attr_reader :source
|
40
|
+
|
41
|
+
# Connection options that will be passed to the +taps pull command+. Defaults to the ActiveRecord connection config, if available.
|
42
|
+
# @return [Hash]
|
43
|
+
attr_reader :database_options
|
44
|
+
|
45
|
+
# Source table name. Defaults to the table name of the model.
|
46
|
+
# @return [String]
|
47
|
+
attr_reader :source_table_name
|
48
|
+
|
49
|
+
# @private
|
50
|
+
def initialize(script, description, source, options = {})
|
51
|
+
options = options.symbolize_keys
|
52
|
+
@script = script
|
53
|
+
@description = description
|
54
|
+
@source = source
|
55
|
+
@source_table_name = options.delete(:source_table_name) || model.table_name
|
56
|
+
@database_options = options.reverse_merge script.model.connection.instance_variable_get(:@config).symbolize_keys
|
57
|
+
end
|
58
|
+
|
59
|
+
# @private
|
60
|
+
def perform
|
61
|
+
[ source_table_name, model.table_name ].each do |possible_obstacle|
|
62
|
+
if connection.table_exists? possible_obstacle
|
63
|
+
connection.drop_table possible_obstacle
|
64
|
+
end
|
65
|
+
end
|
66
|
+
taps_pull
|
67
|
+
if needs_table_rename?
|
68
|
+
connection.rename_table source_table_name, model.table_name
|
69
|
+
end
|
70
|
+
nil
|
71
|
+
end
|
72
|
+
|
73
|
+
# @return [String] The name of the current database.
|
74
|
+
def database
|
75
|
+
unless database = database_options[:database]
|
76
|
+
raise ::ArgumentError, %{[data_miner] Can't infer database name from options or ActiveRecord config.}
|
77
|
+
end
|
78
|
+
database
|
79
|
+
end
|
80
|
+
|
81
|
+
# @return [String] The database username.
|
82
|
+
def username
|
83
|
+
database_options[:username] || DEFAULT_USERNAMES[adapter.to_sym]
|
47
84
|
end
|
48
|
-
end
|
49
|
-
taps_pull
|
50
|
-
if needs_table_rename?
|
51
|
-
connection.rename_table source_table_name, model.table_name
|
52
|
-
end
|
53
|
-
nil
|
54
|
-
end
|
55
|
-
|
56
|
-
# sabshere 1/25/11 what if there were multiple connections
|
57
|
-
# blockenspiel doesn't like to delegate this to #model
|
58
|
-
def connection
|
59
|
-
::ActiveRecord::Base.connection
|
60
|
-
end
|
61
|
-
|
62
|
-
def needs_table_rename?
|
63
|
-
source_table_name != model.table_name
|
64
|
-
end
|
65
|
-
|
66
|
-
def adapter
|
67
|
-
case connection.adapter_name
|
68
|
-
when /mysql2/i
|
69
|
-
'mysql2'
|
70
|
-
when /mysql/i
|
71
|
-
'mysql'
|
72
|
-
when /postgres/i
|
73
|
-
'postgres'
|
74
|
-
when /sqlite/i
|
75
|
-
'sqlite'
|
76
|
-
end
|
77
|
-
end
|
78
85
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
end
|
83
|
-
|
84
|
-
%w{ username password port host }.each do |x|
|
85
|
-
module_eval %{
|
86
|
-
def #{x}
|
87
|
-
database_options[:#{x}] || DEFAULT_#{x.upcase}S[adapter.to_sym]
|
86
|
+
# @return [String] The database password.
|
87
|
+
def password
|
88
|
+
database_options[:password] || DEFAULT_PASSWORDS[adapter.to_sym]
|
88
89
|
end
|
89
|
-
}
|
90
|
-
end
|
91
|
-
|
92
|
-
# "user:pass"
|
93
|
-
# "user"
|
94
|
-
# nil
|
95
|
-
def userinfo
|
96
|
-
if username.present?
|
97
|
-
[username, password].select(&:present?).join(':')
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
def db_url
|
102
|
-
case adapter
|
103
|
-
when 'sqlite'
|
104
|
-
"sqlite://#{database}"
|
105
|
-
else
|
106
|
-
::URI::Generic.new(adapter, userinfo, host, port, nil, "/#{database}", nil, nil, nil).to_s
|
107
|
-
end
|
108
|
-
end
|
109
90
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
91
|
+
# @return [String] The database port number.
|
92
|
+
def port
|
93
|
+
database_options[:port] || DEFAULT_PORTS[adapter.to_sym]
|
94
|
+
end
|
95
|
+
|
96
|
+
# @return [String] The database hostname.
|
97
|
+
def host
|
98
|
+
database_options[:host] || DEFAULT_HOSTS[adapter.to_sym]
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
def connection
|
104
|
+
model.connection
|
105
|
+
end
|
106
|
+
|
107
|
+
def needs_table_rename?
|
108
|
+
source_table_name != model.table_name
|
109
|
+
end
|
110
|
+
|
111
|
+
def adapter
|
112
|
+
case connection.adapter_name
|
113
|
+
when /mysql2/i
|
114
|
+
'mysql2'
|
115
|
+
when /mysql/i
|
116
|
+
'mysql'
|
117
|
+
when /postgres/i
|
118
|
+
'postgres'
|
119
|
+
when /sqlite/i
|
120
|
+
'sqlite'
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# "user:pass"
|
125
|
+
# "user"
|
126
|
+
# nil
|
127
|
+
def userinfo
|
128
|
+
if username.present?
|
129
|
+
[username, password].select(&:present?).join(':')
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def db_url
|
134
|
+
case adapter
|
135
|
+
when 'sqlite'
|
136
|
+
"sqlite://#{database}"
|
137
|
+
else
|
138
|
+
::URI::Generic.new(adapter, userinfo, host, port, nil, "/#{database}", nil, nil, nil).to_s
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Note that you probably shouldn't put taps into your Gemfile, because it depends on sequel and other gems that may not compile on Heroku (etc.)
|
143
|
+
#
|
144
|
+
# This class automatically detects if you have Bundler installed, and if so, executes the `taps` binary with a "clean" environment (i.e. one that will not pay attention to the fact that taps is not in your Gemfile)
|
145
|
+
def taps_pull
|
146
|
+
args = [
|
147
|
+
'taps',
|
148
|
+
'pull',
|
149
|
+
db_url,
|
150
|
+
source,
|
151
|
+
'--indexes-first',
|
152
|
+
'--tables',
|
153
|
+
source_table_name
|
154
|
+
]
|
155
|
+
|
156
|
+
# https://github.com/carlhuda/bundler/issues/1579
|
157
|
+
if defined?(::Bundler)
|
158
|
+
::Bundler.with_clean_env do
|
159
|
+
::Kernel.system args.join(' ')
|
160
|
+
end
|
161
|
+
else
|
162
|
+
::Kernel.system args.join(' ')
|
163
|
+
end
|
129
164
|
end
|
130
|
-
else
|
131
|
-
::Kernel.system args.join(' ')
|
132
165
|
end
|
133
166
|
end
|
134
167
|
end
|
data/lib/data_miner/version.rb
CHANGED
data/test/test_safety.rb
CHANGED
@@ -7,36 +7,72 @@ Earth.init :locality, :pet, :load_data_miner => true, :apply_schemas => true
|
|
7
7
|
|
8
8
|
describe DataMiner do
|
9
9
|
describe "when being run in a multi-threaded environment" do
|
10
|
+
before do
|
11
|
+
@old_thread_abort_on_exception = Thread.abort_on_exception
|
12
|
+
Thread.abort_on_exception = false
|
13
|
+
end
|
14
|
+
|
15
|
+
after do
|
16
|
+
Thread.abort_on_exception = @old_thread_abort_on_exception
|
17
|
+
end
|
18
|
+
|
10
19
|
it "tries not to duplicate data" do
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
20
|
+
Breed.delete_all
|
21
|
+
Breed.run_data_miner!
|
22
|
+
reference_count = Breed.count
|
23
|
+
Breed.delete_all
|
24
|
+
threads = (0..2).map do |i|
|
25
|
+
Thread.new do
|
26
|
+
# $stderr.write "Thread #{i} starting\n"
|
27
|
+
Breed.run_data_miner!
|
28
|
+
# $stderr.write "Thread #{i} done\n"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
exceptions = []
|
32
|
+
threads.each do |t|
|
33
|
+
begin
|
34
|
+
t.join
|
35
|
+
rescue
|
36
|
+
exceptions << $!
|
24
37
|
end
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
38
|
+
end
|
39
|
+
exceptions.length.must_equal 2
|
40
|
+
exceptions.each do |exception|
|
41
|
+
exception.must_be_kind_of LockMethod::Locked
|
42
|
+
end
|
43
|
+
Breed.count.must_equal reference_count
|
44
|
+
end
|
45
|
+
|
46
|
+
it "allows you to clear locks if necessary" do
|
47
|
+
threads = (0..2).map do |i|
|
48
|
+
Thread.new do
|
49
|
+
# $stderr.write "Thread #{i} starting\n"
|
50
|
+
case i
|
51
|
+
when 0
|
52
|
+
Breed.run_data_miner!
|
53
|
+
when 1
|
54
|
+
sleep 0.3
|
55
|
+
DataMiner::Run.clear_locks
|
56
|
+
Breed.run_data_miner!
|
57
|
+
when 2
|
58
|
+
# i will hit a lock!
|
59
|
+
sleep 0.6
|
60
|
+
Breed.run_data_miner!
|
31
61
|
end
|
62
|
+
# $stderr.write "Thread #{i} done\n"
|
32
63
|
end
|
33
|
-
|
34
|
-
|
35
|
-
|
64
|
+
end
|
65
|
+
exceptions = []
|
66
|
+
threads.each do |t|
|
67
|
+
begin
|
68
|
+
t.join
|
69
|
+
rescue
|
70
|
+
exceptions << $!
|
36
71
|
end
|
37
|
-
|
38
|
-
|
39
|
-
|
72
|
+
end
|
73
|
+
exceptions.length.must_equal 1
|
74
|
+
exceptions.each do |exception|
|
75
|
+
exception.must_be_kind_of LockMethod::Locked
|
40
76
|
end
|
41
77
|
end
|
42
78
|
end
|