data_miner 2.0.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,34 +1,52 @@
1
- class DataMiner::Step::Process
2
- attr_reader :script
3
- attr_reader :method_id
4
- attr_reader :description
5
- attr_reader :blk
1
+ class DataMiner
2
+ class Step
3
+ # A step that executes a single class method on the model or an arbitrary code block.
4
+ #
5
+ # Create these by calling +process+ inside a +data_miner+ block.
6
+ #
7
+ # @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
8
+ # @see DataMiner::Script#process
9
+ class Process < Step
10
+ # @private
11
+ attr_reader :script
6
12
 
7
- alias :block_description :description
13
+ # The method to be called on the model class.
14
+ # @return [Symbol]
15
+ attr_reader :method_id
8
16
 
9
- def initialize(script, method_id_or_description, ignored_options = {}, &blk)
10
- @script = script
11
- if block_given?
12
- @description = method_id_or_description
13
- @blk = blk
14
- else
15
- @description = method_id_or_description
16
- @method_id = method_id_or_description
17
- end
18
- end
19
-
20
- def model
21
- script.model
22
- end
23
-
24
- def perform
25
- DataMiner::Script.uniq do
26
- if blk
27
- model.instance_eval(&blk)
28
- else
29
- model.send method_id
17
+ # A description of what the block does. Doesn't exist when a single class method is specified using a Symbol.
18
+ # @return [String]
19
+ attr_reader :description
20
+
21
+ # The block of arbitrary code to be run.
22
+ # @return [Proc]
23
+ attr_reader :blk
24
+
25
+ alias :block_description :description
26
+
27
+ # @private
28
+ def initialize(script, method_id_or_description, ignored_options = {}, &blk)
29
+ @script = script
30
+ if block_given?
31
+ @description = method_id_or_description
32
+ @blk = blk
33
+ else
34
+ @description = method_id_or_description
35
+ @method_id = method_id_or_description
36
+ end
37
+ end
38
+
39
+ # @private
40
+ def perform
41
+ DataMiner::Script.uniq do
42
+ if blk
43
+ model.instance_eval(&blk)
44
+ else
45
+ model.send method_id
46
+ end
47
+ end
48
+ nil
30
49
  end
31
50
  end
32
- nil
33
51
  end
34
52
  end
@@ -1,134 +1,167 @@
1
1
  require 'uri'
2
- # Note that you probably shouldn't put taps into your Gemfile, because it depends on sequel and other gems that may not compile on Heroku (etc.)
3
- #
4
- # This class automatically detects if you have Bundler installed, and if so, executes the `taps` binary with a "clean" environment (i.e. one that will not pay attention to the fact that taps is not in your Gemfile)
5
- class DataMiner::Step::Tap
6
- DEFAULT_PORTS = {
7
- :mysql => 3306,
8
- :mysql2 => 3306,
9
- :postgres => 5432
10
- }
11
-
12
- DEFAULT_USERNAMES = {
13
- :mysql => 'root',
14
- :mysql2 => 'root',
15
- :postgres => ''
16
- }
17
-
18
- DEFAULT_PASSWORDS = {}
19
- DEFAULT_PASSWORDS.default = ''
20
-
21
- DEFAULT_HOSTS = {}
22
- DEFAULT_HOSTS.default = '127.0.0.1'
23
2
 
24
- attr_reader :script
25
- attr_reader :description
26
- attr_reader :source
27
- attr_reader :database_options
28
- attr_reader :source_table_name
3
+ class DataMiner
4
+ class Step
5
+ # A step that uses https://github.com/ricardochimal/taps to import table structure and data.
6
+ #
7
+ # Create these by calling +tap+ inside a +data_miner+ block.
8
+ #
9
+ # @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
10
+ # @see DataMiner::Script#tap
11
+ class Tap < Step
12
+ DEFAULT_PORTS = {
13
+ :mysql => 3306,
14
+ :mysql2 => 3306,
15
+ :postgres => 5432
16
+ }
17
+
18
+ DEFAULT_USERNAMES = {
19
+ :mysql => 'root',
20
+ :mysql2 => 'root',
21
+ :postgres => ''
22
+ }
23
+
24
+ DEFAULT_PASSWORDS = {}
25
+ DEFAULT_PASSWORDS.default = ''
26
+
27
+ DEFAULT_HOSTS = {}
28
+ DEFAULT_HOSTS.default = '127.0.0.1'
29
29
 
30
- def initialize(script, description, source, options = {})
31
- options = options.symbolize_keys
32
- @script = script
33
- @description = description
34
- @source = source
35
- @database_options = options.except(:source_table_name).reverse_merge(active_record_config)
36
- @source_table_name = options.fetch :source_table_name, model.table_name
37
- end
38
-
39
- def model
40
- script.model
41
- end
42
-
43
- def perform
44
- [ source_table_name, model.table_name ].each do |possible_obstacle|
45
- if connection.table_exists? possible_obstacle
46
- connection.drop_table possible_obstacle
30
+ # @private
31
+ attr_reader :script
32
+
33
+ # A description of the tapped data source.
34
+ # @return [String]
35
+ attr_reader :description
36
+
37
+ # The URL of the tapped data source, including username, password, domain, and port number.
38
+ # @return [String]
39
+ attr_reader :source
40
+
41
+ # Connection options that will be passed to the +taps pull command+. Defaults to the ActiveRecord connection config, if available.
42
+ # @return [Hash]
43
+ attr_reader :database_options
44
+
45
+ # Source table name. Defaults to the table name of the model.
46
+ # @return [String]
47
+ attr_reader :source_table_name
48
+
49
+ # @private
50
+ def initialize(script, description, source, options = {})
51
+ options = options.symbolize_keys
52
+ @script = script
53
+ @description = description
54
+ @source = source
55
+ @source_table_name = options.delete(:source_table_name) || model.table_name
56
+ @database_options = options.reverse_merge script.model.connection.instance_variable_get(:@config).symbolize_keys
57
+ end
58
+
59
+ # @private
60
+ def perform
61
+ [ source_table_name, model.table_name ].each do |possible_obstacle|
62
+ if connection.table_exists? possible_obstacle
63
+ connection.drop_table possible_obstacle
64
+ end
65
+ end
66
+ taps_pull
67
+ if needs_table_rename?
68
+ connection.rename_table source_table_name, model.table_name
69
+ end
70
+ nil
71
+ end
72
+
73
+ # @return [String] The name of the current database.
74
+ def database
75
+ unless database = database_options[:database]
76
+ raise ::ArgumentError, %{[data_miner] Can't infer database name from options or ActiveRecord config.}
77
+ end
78
+ database
79
+ end
80
+
81
+ # @return [String] The database username.
82
+ def username
83
+ database_options[:username] || DEFAULT_USERNAMES[adapter.to_sym]
47
84
  end
48
- end
49
- taps_pull
50
- if needs_table_rename?
51
- connection.rename_table source_table_name, model.table_name
52
- end
53
- nil
54
- end
55
-
56
- # sabshere 1/25/11 what if there were multiple connections
57
- # blockenspiel doesn't like to delegate this to #model
58
- def connection
59
- ::ActiveRecord::Base.connection
60
- end
61
-
62
- def needs_table_rename?
63
- source_table_name != model.table_name
64
- end
65
-
66
- def adapter
67
- case connection.adapter_name
68
- when /mysql2/i
69
- 'mysql2'
70
- when /mysql/i
71
- 'mysql'
72
- when /postgres/i
73
- 'postgres'
74
- when /sqlite/i
75
- 'sqlite'
76
- end
77
- end
78
85
 
79
- # never optional
80
- def database
81
- database_options[:database]
82
- end
83
-
84
- %w{ username password port host }.each do |x|
85
- module_eval %{
86
- def #{x}
87
- database_options[:#{x}] || DEFAULT_#{x.upcase}S[adapter.to_sym]
86
+ # @return [String] The database password.
87
+ def password
88
+ database_options[:password] || DEFAULT_PASSWORDS[adapter.to_sym]
88
89
  end
89
- }
90
- end
91
-
92
- # "user:pass"
93
- # "user"
94
- # nil
95
- def userinfo
96
- if username.present?
97
- [username, password].select(&:present?).join(':')
98
- end
99
- end
100
-
101
- def db_url
102
- case adapter
103
- when 'sqlite'
104
- "sqlite://#{database}"
105
- else
106
- ::URI::Generic.new(adapter, userinfo, host, port, nil, "/#{database}", nil, nil, nil).to_s
107
- end
108
- end
109
90
 
110
- def active_record_config
111
- connection.instance_variable_get(:@config).symbolize_keys
112
- end
113
-
114
- def taps_pull
115
- args = [
116
- 'taps',
117
- 'pull',
118
- db_url,
119
- source,
120
- '--indexes-first',
121
- '--tables',
122
- source_table_name
123
- ]
124
-
125
- # https://github.com/carlhuda/bundler/issues/1579
126
- if defined?(::Bundler)
127
- ::Bundler.with_clean_env do
128
- ::Kernel.system args.join(' ')
91
+ # @return [String] The database port number.
92
+ def port
93
+ database_options[:port] || DEFAULT_PORTS[adapter.to_sym]
94
+ end
95
+
96
+ # @return [String] The database hostname.
97
+ def host
98
+ database_options[:host] || DEFAULT_HOSTS[adapter.to_sym]
99
+ end
100
+
101
+ private
102
+
103
+ def connection
104
+ model.connection
105
+ end
106
+
107
+ def needs_table_rename?
108
+ source_table_name != model.table_name
109
+ end
110
+
111
+ def adapter
112
+ case connection.adapter_name
113
+ when /mysql2/i
114
+ 'mysql2'
115
+ when /mysql/i
116
+ 'mysql'
117
+ when /postgres/i
118
+ 'postgres'
119
+ when /sqlite/i
120
+ 'sqlite'
121
+ end
122
+ end
123
+
124
+ # "user:pass"
125
+ # "user"
126
+ # nil
127
+ def userinfo
128
+ if username.present?
129
+ [username, password].select(&:present?).join(':')
130
+ end
131
+ end
132
+
133
+ def db_url
134
+ case adapter
135
+ when 'sqlite'
136
+ "sqlite://#{database}"
137
+ else
138
+ ::URI::Generic.new(adapter, userinfo, host, port, nil, "/#{database}", nil, nil, nil).to_s
139
+ end
140
+ end
141
+
142
+ # Note that you probably shouldn't put taps into your Gemfile, because it depends on sequel and other gems that may not compile on Heroku (etc.)
143
+ #
144
+ # This class automatically detects if you have Bundler installed, and if so, executes the `taps` binary with a "clean" environment (i.e. one that will not pay attention to the fact that taps is not in your Gemfile)
145
+ def taps_pull
146
+ args = [
147
+ 'taps',
148
+ 'pull',
149
+ db_url,
150
+ source,
151
+ '--indexes-first',
152
+ '--tables',
153
+ source_table_name
154
+ ]
155
+
156
+ # https://github.com/carlhuda/bundler/issues/1579
157
+ if defined?(::Bundler)
158
+ ::Bundler.with_clean_env do
159
+ ::Kernel.system args.join(' ')
160
+ end
161
+ else
162
+ ::Kernel.system args.join(' ')
163
+ end
129
164
  end
130
- else
131
- ::Kernel.system args.join(' ')
132
165
  end
133
166
  end
134
167
  end
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '2.0.1'
2
+ VERSION = '2.0.2'
3
3
  end
@@ -7,36 +7,72 @@ Earth.init :locality, :pet, :load_data_miner => true, :apply_schemas => true
7
7
 
8
8
  describe DataMiner do
9
9
  describe "when being run in a multi-threaded environment" do
10
+ before do
11
+ @old_thread_abort_on_exception = Thread.abort_on_exception
12
+ Thread.abort_on_exception = false
13
+ end
14
+
15
+ after do
16
+ Thread.abort_on_exception = @old_thread_abort_on_exception
17
+ end
18
+
10
19
  it "tries not to duplicate data" do
11
- begin
12
- old_thread_abort_on_exception = Thread.abort_on_exception
13
- Thread.abort_on_exception = false
14
- Breed.delete_all
15
- Breed.run_data_miner!
16
- reference_count = Breed.count
17
- Breed.delete_all
18
- threads = (0..2).map do |i|
19
- Thread.new do
20
- $stderr.write "Thread #{i} starting\n"
21
- Breed.run_data_miner!
22
- $stderr.write "Thread #{i} done\n"
23
- end
20
+ Breed.delete_all
21
+ Breed.run_data_miner!
22
+ reference_count = Breed.count
23
+ Breed.delete_all
24
+ threads = (0..2).map do |i|
25
+ Thread.new do
26
+ # $stderr.write "Thread #{i} starting\n"
27
+ Breed.run_data_miner!
28
+ # $stderr.write "Thread #{i} done\n"
29
+ end
30
+ end
31
+ exceptions = []
32
+ threads.each do |t|
33
+ begin
34
+ t.join
35
+ rescue
36
+ exceptions << $!
24
37
  end
25
- exceptions = []
26
- threads.each do |t|
27
- begin
28
- t.join
29
- rescue
30
- exceptions << $!
38
+ end
39
+ exceptions.length.must_equal 2
40
+ exceptions.each do |exception|
41
+ exception.must_be_kind_of LockMethod::Locked
42
+ end
43
+ Breed.count.must_equal reference_count
44
+ end
45
+
46
+ it "allows you to clear locks if necessary" do
47
+ threads = (0..2).map do |i|
48
+ Thread.new do
49
+ # $stderr.write "Thread #{i} starting\n"
50
+ case i
51
+ when 0
52
+ Breed.run_data_miner!
53
+ when 1
54
+ sleep 0.3
55
+ DataMiner::Run.clear_locks
56
+ Breed.run_data_miner!
57
+ when 2
58
+ # i will hit a lock!
59
+ sleep 0.6
60
+ Breed.run_data_miner!
31
61
  end
62
+ # $stderr.write "Thread #{i} done\n"
32
63
  end
33
- exceptions.length.must_equal 2
34
- exceptions.each do |exception|
35
- exception.must_be_kind_of LockMethod::Locked
64
+ end
65
+ exceptions = []
66
+ threads.each do |t|
67
+ begin
68
+ t.join
69
+ rescue
70
+ exceptions << $!
36
71
  end
37
- Breed.count.must_equal reference_count
38
- ensure
39
- Thread.abort_on_exception = old_thread_abort_on_exception
72
+ end
73
+ exceptions.length.must_equal 1
74
+ exceptions.each do |exception|
75
+ exception.must_be_kind_of LockMethod::Locked
40
76
  end
41
77
  end
42
78
  end