data_miner 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,34 +1,52 @@
1
- class DataMiner::Step::Process
2
- attr_reader :script
3
- attr_reader :method_id
4
- attr_reader :description
5
- attr_reader :blk
1
+ class DataMiner
2
+ class Step
3
+ # A step that executes a single class method on the model or an arbitrary code block.
4
+ #
5
+ # Create these by calling +process+ inside a +data_miner+ block.
6
+ #
7
+ # @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
8
+ # @see DataMiner::Script#process
9
+ class Process < Step
10
+ # @private
11
+ attr_reader :script
6
12
 
7
- alias :block_description :description
13
+ # The method to be called on the model class.
14
+ # @return [Symbol]
15
+ attr_reader :method_id
8
16
 
9
- def initialize(script, method_id_or_description, ignored_options = {}, &blk)
10
- @script = script
11
- if block_given?
12
- @description = method_id_or_description
13
- @blk = blk
14
- else
15
- @description = method_id_or_description
16
- @method_id = method_id_or_description
17
- end
18
- end
19
-
20
- def model
21
- script.model
22
- end
23
-
24
- def perform
25
- DataMiner::Script.uniq do
26
- if blk
27
- model.instance_eval(&blk)
28
- else
29
- model.send method_id
17
+ # A description of what the block does. Doesn't exist when a single class method is specified using a Symbol.
18
+ # @return [String]
19
+ attr_reader :description
20
+
21
+ # The block of arbitrary code to be run.
22
+ # @return [Proc]
23
+ attr_reader :blk
24
+
25
+ alias :block_description :description
26
+
27
+ # @private
28
+ def initialize(script, method_id_or_description, ignored_options = {}, &blk)
29
+ @script = script
30
+ if block_given?
31
+ @description = method_id_or_description
32
+ @blk = blk
33
+ else
34
+ @description = method_id_or_description
35
+ @method_id = method_id_or_description
36
+ end
37
+ end
38
+
39
+ # @private
40
+ def perform
41
+ DataMiner::Script.uniq do
42
+ if blk
43
+ model.instance_eval(&blk)
44
+ else
45
+ model.send method_id
46
+ end
47
+ end
48
+ nil
30
49
  end
31
50
  end
32
- nil
33
51
  end
34
52
  end
@@ -1,134 +1,167 @@
1
1
  require 'uri'
2
- # Note that you probably shouldn't put taps into your Gemfile, because it depends on sequel and other gems that may not compile on Heroku (etc.)
3
- #
4
- # This class automatically detects if you have Bundler installed, and if so, executes the `taps` binary with a "clean" environment (i.e. one that will not pay attention to the fact that taps is not in your Gemfile)
5
- class DataMiner::Step::Tap
6
- DEFAULT_PORTS = {
7
- :mysql => 3306,
8
- :mysql2 => 3306,
9
- :postgres => 5432
10
- }
11
-
12
- DEFAULT_USERNAMES = {
13
- :mysql => 'root',
14
- :mysql2 => 'root',
15
- :postgres => ''
16
- }
17
-
18
- DEFAULT_PASSWORDS = {}
19
- DEFAULT_PASSWORDS.default = ''
20
-
21
- DEFAULT_HOSTS = {}
22
- DEFAULT_HOSTS.default = '127.0.0.1'
23
2
 
24
- attr_reader :script
25
- attr_reader :description
26
- attr_reader :source
27
- attr_reader :database_options
28
- attr_reader :source_table_name
3
+ class DataMiner
4
+ class Step
5
+ # A step that uses https://github.com/ricardochimal/taps to import table structure and data.
6
+ #
7
+ # Create these by calling +tap+ inside a +data_miner+ block.
8
+ #
9
+ # @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
10
+ # @see DataMiner::Script#tap
11
+ class Tap < Step
12
+ DEFAULT_PORTS = {
13
+ :mysql => 3306,
14
+ :mysql2 => 3306,
15
+ :postgres => 5432
16
+ }
17
+
18
+ DEFAULT_USERNAMES = {
19
+ :mysql => 'root',
20
+ :mysql2 => 'root',
21
+ :postgres => ''
22
+ }
23
+
24
+ DEFAULT_PASSWORDS = {}
25
+ DEFAULT_PASSWORDS.default = ''
26
+
27
+ DEFAULT_HOSTS = {}
28
+ DEFAULT_HOSTS.default = '127.0.0.1'
29
29
 
30
- def initialize(script, description, source, options = {})
31
- options = options.symbolize_keys
32
- @script = script
33
- @description = description
34
- @source = source
35
- @database_options = options.except(:source_table_name).reverse_merge(active_record_config)
36
- @source_table_name = options.fetch :source_table_name, model.table_name
37
- end
38
-
39
- def model
40
- script.model
41
- end
42
-
43
- def perform
44
- [ source_table_name, model.table_name ].each do |possible_obstacle|
45
- if connection.table_exists? possible_obstacle
46
- connection.drop_table possible_obstacle
30
+ # @private
31
+ attr_reader :script
32
+
33
+ # A description of the tapped data source.
34
+ # @return [String]
35
+ attr_reader :description
36
+
37
+ # The URL of the tapped data source, including username, password, domain, and port number.
38
+ # @return [String]
39
+ attr_reader :source
40
+
41
+ # Connection options that will be passed to the +taps pull command+. Defaults to the ActiveRecord connection config, if available.
42
+ # @return [Hash]
43
+ attr_reader :database_options
44
+
45
+ # Source table name. Defaults to the table name of the model.
46
+ # @return [String]
47
+ attr_reader :source_table_name
48
+
49
+ # @private
50
+ def initialize(script, description, source, options = {})
51
+ options = options.symbolize_keys
52
+ @script = script
53
+ @description = description
54
+ @source = source
55
+ @source_table_name = options.delete(:source_table_name) || model.table_name
56
+ @database_options = options.reverse_merge script.model.connection.instance_variable_get(:@config).symbolize_keys
57
+ end
58
+
59
+ # @private
60
+ def perform
61
+ [ source_table_name, model.table_name ].each do |possible_obstacle|
62
+ if connection.table_exists? possible_obstacle
63
+ connection.drop_table possible_obstacle
64
+ end
65
+ end
66
+ taps_pull
67
+ if needs_table_rename?
68
+ connection.rename_table source_table_name, model.table_name
69
+ end
70
+ nil
71
+ end
72
+
73
+ # @return [String] The name of the current database.
74
+ def database
75
+ unless database = database_options[:database]
76
+ raise ::ArgumentError, %{[data_miner] Can't infer database name from options or ActiveRecord config.}
77
+ end
78
+ database
79
+ end
80
+
81
+ # @return [String] The database username.
82
+ def username
83
+ database_options[:username] || DEFAULT_USERNAMES[adapter.to_sym]
47
84
  end
48
- end
49
- taps_pull
50
- if needs_table_rename?
51
- connection.rename_table source_table_name, model.table_name
52
- end
53
- nil
54
- end
55
-
56
- # sabshere 1/25/11 what if there were multiple connections
57
- # blockenspiel doesn't like to delegate this to #model
58
- def connection
59
- ::ActiveRecord::Base.connection
60
- end
61
-
62
- def needs_table_rename?
63
- source_table_name != model.table_name
64
- end
65
-
66
- def adapter
67
- case connection.adapter_name
68
- when /mysql2/i
69
- 'mysql2'
70
- when /mysql/i
71
- 'mysql'
72
- when /postgres/i
73
- 'postgres'
74
- when /sqlite/i
75
- 'sqlite'
76
- end
77
- end
78
85
 
79
- # never optional
80
- def database
81
- database_options[:database]
82
- end
83
-
84
- %w{ username password port host }.each do |x|
85
- module_eval %{
86
- def #{x}
87
- database_options[:#{x}] || DEFAULT_#{x.upcase}S[adapter.to_sym]
86
+ # @return [String] The database password.
87
+ def password
88
+ database_options[:password] || DEFAULT_PASSWORDS[adapter.to_sym]
88
89
  end
89
- }
90
- end
91
-
92
- # "user:pass"
93
- # "user"
94
- # nil
95
- def userinfo
96
- if username.present?
97
- [username, password].select(&:present?).join(':')
98
- end
99
- end
100
-
101
- def db_url
102
- case adapter
103
- when 'sqlite'
104
- "sqlite://#{database}"
105
- else
106
- ::URI::Generic.new(adapter, userinfo, host, port, nil, "/#{database}", nil, nil, nil).to_s
107
- end
108
- end
109
90
 
110
- def active_record_config
111
- connection.instance_variable_get(:@config).symbolize_keys
112
- end
113
-
114
- def taps_pull
115
- args = [
116
- 'taps',
117
- 'pull',
118
- db_url,
119
- source,
120
- '--indexes-first',
121
- '--tables',
122
- source_table_name
123
- ]
124
-
125
- # https://github.com/carlhuda/bundler/issues/1579
126
- if defined?(::Bundler)
127
- ::Bundler.with_clean_env do
128
- ::Kernel.system args.join(' ')
91
+ # @return [String] The database port number.
92
+ def port
93
+ database_options[:port] || DEFAULT_PORTS[adapter.to_sym]
94
+ end
95
+
96
+ # @return [String] The database hostname.
97
+ def host
98
+ database_options[:host] || DEFAULT_HOSTS[adapter.to_sym]
99
+ end
100
+
101
+ private
102
+
103
+ def connection
104
+ model.connection
105
+ end
106
+
107
+ def needs_table_rename?
108
+ source_table_name != model.table_name
109
+ end
110
+
111
+ def adapter
112
+ case connection.adapter_name
113
+ when /mysql2/i
114
+ 'mysql2'
115
+ when /mysql/i
116
+ 'mysql'
117
+ when /postgres/i
118
+ 'postgres'
119
+ when /sqlite/i
120
+ 'sqlite'
121
+ end
122
+ end
123
+
124
+ # "user:pass"
125
+ # "user"
126
+ # nil
127
+ def userinfo
128
+ if username.present?
129
+ [username, password].select(&:present?).join(':')
130
+ end
131
+ end
132
+
133
+ def db_url
134
+ case adapter
135
+ when 'sqlite'
136
+ "sqlite://#{database}"
137
+ else
138
+ ::URI::Generic.new(adapter, userinfo, host, port, nil, "/#{database}", nil, nil, nil).to_s
139
+ end
140
+ end
141
+
142
+ # Note that you probably shouldn't put taps into your Gemfile, because it depends on sequel and other gems that may not compile on Heroku (etc.)
143
+ #
144
+ # This class automatically detects if you have Bundler installed, and if so, executes the `taps` binary with a "clean" environment (i.e. one that will not pay attention to the fact that taps is not in your Gemfile)
145
+ def taps_pull
146
+ args = [
147
+ 'taps',
148
+ 'pull',
149
+ db_url,
150
+ source,
151
+ '--indexes-first',
152
+ '--tables',
153
+ source_table_name
154
+ ]
155
+
156
+ # https://github.com/carlhuda/bundler/issues/1579
157
+ if defined?(::Bundler)
158
+ ::Bundler.with_clean_env do
159
+ ::Kernel.system args.join(' ')
160
+ end
161
+ else
162
+ ::Kernel.system args.join(' ')
163
+ end
129
164
  end
130
- else
131
- ::Kernel.system args.join(' ')
132
165
  end
133
166
  end
134
167
  end
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '2.0.1'
2
+ VERSION = '2.0.2'
3
3
  end
@@ -7,36 +7,72 @@ Earth.init :locality, :pet, :load_data_miner => true, :apply_schemas => true
7
7
 
8
8
  describe DataMiner do
9
9
  describe "when being run in a multi-threaded environment" do
10
+ before do
11
+ @old_thread_abort_on_exception = Thread.abort_on_exception
12
+ Thread.abort_on_exception = false
13
+ end
14
+
15
+ after do
16
+ Thread.abort_on_exception = @old_thread_abort_on_exception
17
+ end
18
+
10
19
  it "tries not to duplicate data" do
11
- begin
12
- old_thread_abort_on_exception = Thread.abort_on_exception
13
- Thread.abort_on_exception = false
14
- Breed.delete_all
15
- Breed.run_data_miner!
16
- reference_count = Breed.count
17
- Breed.delete_all
18
- threads = (0..2).map do |i|
19
- Thread.new do
20
- $stderr.write "Thread #{i} starting\n"
21
- Breed.run_data_miner!
22
- $stderr.write "Thread #{i} done\n"
23
- end
20
+ Breed.delete_all
21
+ Breed.run_data_miner!
22
+ reference_count = Breed.count
23
+ Breed.delete_all
24
+ threads = (0..2).map do |i|
25
+ Thread.new do
26
+ # $stderr.write "Thread #{i} starting\n"
27
+ Breed.run_data_miner!
28
+ # $stderr.write "Thread #{i} done\n"
29
+ end
30
+ end
31
+ exceptions = []
32
+ threads.each do |t|
33
+ begin
34
+ t.join
35
+ rescue
36
+ exceptions << $!
24
37
  end
25
- exceptions = []
26
- threads.each do |t|
27
- begin
28
- t.join
29
- rescue
30
- exceptions << $!
38
+ end
39
+ exceptions.length.must_equal 2
40
+ exceptions.each do |exception|
41
+ exception.must_be_kind_of LockMethod::Locked
42
+ end
43
+ Breed.count.must_equal reference_count
44
+ end
45
+
46
+ it "allows you to clear locks if necessary" do
47
+ threads = (0..2).map do |i|
48
+ Thread.new do
49
+ # $stderr.write "Thread #{i} starting\n"
50
+ case i
51
+ when 0
52
+ Breed.run_data_miner!
53
+ when 1
54
+ sleep 0.3
55
+ DataMiner::Run.clear_locks
56
+ Breed.run_data_miner!
57
+ when 2
58
+ # i will hit a lock!
59
+ sleep 0.6
60
+ Breed.run_data_miner!
31
61
  end
62
+ # $stderr.write "Thread #{i} done\n"
32
63
  end
33
- exceptions.length.must_equal 2
34
- exceptions.each do |exception|
35
- exception.must_be_kind_of LockMethod::Locked
64
+ end
65
+ exceptions = []
66
+ threads.each do |t|
67
+ begin
68
+ t.join
69
+ rescue
70
+ exceptions << $!
36
71
  end
37
- Breed.count.must_equal reference_count
38
- ensure
39
- Thread.abort_on_exception = old_thread_abort_on_exception
72
+ end
73
+ exceptions.length.must_equal 1
74
+ exceptions.each do |exception|
75
+ exception.must_be_kind_of LockMethod::Locked
40
76
  end
41
77
  end
42
78
  end