tango-etl 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 35dc1dc259634c71c5549cd9f44c03632bd446d6
4
- data.tar.gz: 4fb85e9a4017c02480b6194686a16ee6040791f8
3
+ metadata.gz: 4612c4976d0f3ebed10de7d9a4fdbf8178ab8072
4
+ data.tar.gz: 47afa4fffe497491fbcecd49ee8b001b88a4f9b6
5
5
  SHA512:
6
- metadata.gz: c945d4126a292c411ddc7a741363737181a1722849c2b43a50d0526773fe7fd989abc56075174f9b63df2452cb5da0268e808274ff39f597cc6608e4e866410b
7
- data.tar.gz: a4ce34f890c40498e894ed8dcf1429daae1597e4bfd05671c9a12182dc589d1d24a1574b5c05b5b02aac029361b99044dc540a53381ae7543db191887414f0df
6
+ metadata.gz: b487d74cd0dc5e319ef758d52e969727828f240496c345b60731d78663e015c2f5891f2bc405d9da2a9670b0bd0a56688c413c4f183a8c639ad656dd319edcd1
7
+ data.tar.gz: e9edf5573f63b9723d3b89e91f99390fd38d865f92dddeebc66e496812a3a009515e0a3ef3cd29d0e1213d55ed9f50540540132a36ad3b2f7f67dadd0b27e3b3
data/CHANGELOG.md CHANGED
@@ -8,4 +8,7 @@
8
8
  - Added full API documentation
9
9
 
10
10
  ###0.1.2 - 30/06/2014
11
- - Update of gemfile
11
+ - Update of gemfile
12
+
13
+ ###0.1.3 - 29/07/2014
14
+ - Change way of registering tango application dependencies
data/Gemfile CHANGED
@@ -11,6 +11,7 @@ group :development do
11
11
  end
12
12
 
13
13
  group :test do
14
+ gem 'minitest', '~> 5.3.5'
14
15
  gem 'shoulda-context', '~> 1.2.1'
15
16
  gem "mocha", "~> 1.1.0"
16
17
  gem 'activerecord-nulldb-adapter', '~> 0.3.1'
data/lib/tango/app.rb CHANGED
@@ -34,6 +34,9 @@ module Tango
34
34
  @parser = parser || Nokogiri::HTML
35
35
  @db_locker = db_locker || DatabaseLocker.new( Multidb.databases )
36
36
  @logger = logger || Logger.new( STDOUT )
37
+
38
+ @models = []
39
+ @operators = []
37
40
 
38
41
  end
39
42
 
@@ -44,46 +47,40 @@ module Tango
44
47
  # Filter run after Tango execution
45
48
  def after
46
49
  end
47
-
48
- # Register new resource model
50
+
51
+ # Register a new model
49
52
  #
50
- # @param symbol [Symbol]
51
- # @param model [Class]
52
- def register_model( symbol, model )
53
-
54
- @models[symbol] = model
55
-
56
- # Truncate table of non persistent model
57
- unless model.persistent?
58
- ActiveRecord::Base.connection.execute( "TRUNCATE #{model.table_name}" )
59
- end
60
-
53
+ # @param model [Symbol]
54
+ # @return [Array]
55
+ def register_model( model )
56
+ @models << model
61
57
  end
62
58
 
63
- # Register new resource operator
59
+ # Register a new resource operator
64
60
  #
65
- # @param symbol [Symbol]
66
- # @param operator [Class]
67
- def register_operator( symbol, operator )
68
-
69
- @operators[symbol] = operator
70
-
71
- # Register operator with resource cache system
72
- @cache.register( symbol ) do |resource|
73
- operator.load( resource )
74
- end
75
-
61
+ # @param operator [Tango::ETL::OperatorInterface]
62
+ # @return [Array]
63
+ def register_operator( operator )
64
+ @operators << operator
65
+ end
66
+
67
+ # Register a new handler with the dispatcher
68
+ #
69
+ # @param handler [Tango::ETL::HandlerInterface]
70
+ # @return [Array]
71
+ def register_handler( handler )
72
+ @dispatcher.register( handler )
76
73
  end
77
74
 
78
75
  # Run ETL process
79
76
  #
80
- # @param link_stack [Tango::LinkStack]
81
- # @param dispatcher [Tango::Etl::Dispatcher]
82
- # @param cache [Tango::Resources::Cache]
83
- # @param http_client [Object] Must implement get method
84
- # @param parser [Object] Must implement parse method
85
- # @param logger [Logger]
86
- # @return [Integer]
77
+ # @param link_stack [Tango::LinkStack]
78
+ # @param dispatcher [Tango::Etl::Dispatcher]
79
+ # @param cache [Tango::Resources::Cache]
80
+ # @param http_client [Object] Must implement get method
81
+ # @param parser [Object] Must implement parse method
82
+ # @param logger [Logger]
83
+ # @return [Nil]
87
84
  def run
88
85
 
89
86
  # Save beginning time
@@ -93,13 +90,17 @@ module Tango
93
90
  @logger.info "Target: #{@link_stack.host}."
94
91
 
95
92
  # Use next unlocked database
96
- Multidb.use( @db_locker.unlocked )
93
+ pick_database( @db_locker.unlocked )
97
94
  @logger.info "Using database '#{@db_locker.unlocked}'."
95
+
96
+ @logger.info "Truncating non persistent models ..."
97
+ truncate_tables( non_persistent_models )
98
98
 
99
- # Run before filter
99
+ # Load cache for persistent models
100
100
  @logger.info "Loading cache ..."
101
- load_cache
102
-
101
+ setup_cache( @operators )
102
+ load_cache( persistent_models )
103
+
103
104
  # Run before filter
104
105
  @logger.info "Running before callback ..."
105
106
  before
@@ -124,13 +125,14 @@ module Tango
124
125
  begin
125
126
  response = @http_client.get( @link_stack.host + link )
126
127
  rescue StandardError => e
127
- @logger.error "Could not download contents of #{@link_stack.host + link} link."; @logger.error e.message
128
+ @logger.error "Could not download contents of #{@link_stack.host + link} link."
129
+ @logger.error e.message
128
130
  next
129
131
  end
130
132
 
131
133
  # Continue only when response has code 200 or 201
132
134
  if ! [ 200, 201 ].include?( response.code )
133
- @logger.error "Response code for link #{link} is #{response.code}. Only code 200 is accepted."
135
+ @logger.error "Response code for link #{link} is #{response.code}. Only codes 200 and 201 are accepted."
134
136
  next
135
137
  end
136
138
 
@@ -162,14 +164,14 @@ module Tango
162
164
 
163
165
  # Release buffers
164
166
  @logger.info "Releasing buffers ..."
165
- @cache.buffer.release_all()
167
+ release_buffer( @cache.buffer )
166
168
 
167
169
  # Run after filter
168
170
  @logger.info "Running after callback ..."
169
171
  after
170
172
 
171
173
  # Lock database used in this Tango iteration
172
- @db_locker.lock( @db_locker.unlocked )
174
+ lock_database( @db_locker.unlocked )
173
175
 
174
176
  # Get time of script execution ending
175
177
  end_time = Time.now
@@ -183,15 +185,76 @@ module Tango
183
185
  end
184
186
 
185
187
  private
188
+
189
+ # Pick database ( e.g. to be used in this Tango run )
190
+ #
191
+ # @param database [String|Symbol]
192
+ # @return [String|Symbol]
193
+ def pick_database( database )
194
+ Multidb.use( database )
195
+ end
196
+
197
+ # Lock database ( e.g. used in this Tango run )
198
+ #
199
+ # @param database [String|Symbol]
200
+ # @return [String|Symbol]
201
+ def lock_database( database )
202
+ @db_locker.lock( database )
203
+ end
204
+
205
+ # Fetch list of persistent model registered with application
206
+ #
207
+ # @return [Array]
208
+ def persistent_models
209
+ @models.select { |m| m.persistent? }
210
+ end
211
+
212
+ # Fetch list of non persistent model registered with application
213
+ #
214
+ # @return [Array]
215
+ def non_persistent_models
216
+ @models.reject { |m| m.persistent? }
217
+ end
218
+
219
+ # Truncate table of given models
220
+ #
221
+ # @return [Nil]
222
+ def truncate_tables( models )
223
+ models.each do |model|
224
+ ActiveRecord::Base.connection.execute( "TRUNCATE #{model.table_name}" )
225
+ end
226
+ end
186
227
 
187
- def load_cache
188
-
189
- @models.each do |symbol, model|
190
- model.all.each do |m|
191
- @cache.set( symbol, m )
192
- end if model.persistent?
228
+ # Register cache with resource operators
229
+ #
230
+ # @param operators [Array]
231
+ # @return [Array]
232
+ def setup_cache( operators )
233
+ operators.each do |operator|
234
+ @cache.register( Tango::Kernel.symbolize( operator ) ) do |resource|
235
+ operator.load( resource )
236
+ end
193
237
  end
194
-
238
+ end
239
+
240
+ # Load cache for given models
241
+ #
242
+ # @return [Nil]
243
+ def load_cache( models )
244
+ models.each do |model|
245
+ symbol = Tango::Kernel.symbolize( model )
246
+ model.all.each do |record|
247
+ @cache.set( symbol, record )
248
+ end
249
+ end
250
+ end
251
+
252
+ # Release given buffer
253
+ #
254
+ # @param buffer [Tango::Resource::Buffer]
255
+ # @return [Nil]
256
+ def release_buffer( buffer )
257
+ buffer.release_all
195
258
  end
196
259
 
197
260
  end
@@ -11,7 +11,10 @@ module Tango
11
11
 
12
12
  # Constructor of Tango's handler
13
13
  #
14
- # @param url [String]
14
+ # @param [String]
15
+ # @param [Nokogiri::XML]
16
+ # @param [Tango::Resource::Cache]
17
+ # @return [Tango::ETL::HandlerInterface]
15
18
  def initialize( url, document, cache = nil )
16
19
  @url = url
17
20
  @document = document
data/lib/tango/kernel.rb CHANGED
@@ -1,25 +1,19 @@
1
1
  module Tango
2
2
  module Kernel
3
3
 
4
- # Convert file path to class name
5
- # @param file_path [String]
6
- # @return [String]
7
- def self.classify( file_path )
8
- File.basename( file_path, ".*" ).split( "_" ).map { |w| w.capitalize }.join
9
- end
10
4
 
11
- # Load class from a file
12
- #
13
- # @param file [String]
14
- # @param module_prefix [String]
15
- # @return [Class]
16
- def self.load( file, module_prefix = "" )
17
-
18
- require file
19
-
20
- class_name = Kernel.classify( file )
21
- Kernel.const_get( "#{module_prefix}#{class_name}" )
22
-
5
+ # Fetch list of first level classes in module
6
+ #
7
+ # @param mod [Module]
8
+ # @return [Array]
9
+ def self.module_classes( mod )
10
+
11
+ mod.constants.map { |s|
12
+ mod.const_get( s )
13
+ }.select { |c|
14
+ Class === c
15
+ }
16
+
23
17
  end
24
18
 
25
19
  # Obtain symbol of a class
@@ -42,12 +42,11 @@ module Tango
42
42
  def fill( type, resource )
43
43
 
44
44
  raise ArgumentError, "Trying to fill object with unregistered type" unless @resources.keys.include?( type )
45
-
45
+
46
46
  # Append resource to the buffer
47
47
  @resources[type] << resource
48
48
  # Release the buffer if buffer size exceeded
49
- release( type ) if @resources[type].count >= @size
50
-
49
+ release( type ) if @resources[type].count >= @size
51
50
  end
52
51
 
53
52
  # Release all registered buffers
@@ -45,6 +45,7 @@ module Tango
45
45
  # Get resource from cache
46
46
  cached_resource = get( type, resource )
47
47
 
48
+
48
49
  unless cached_resource
49
50
 
50
51
  raise ArgumentError, "No resource callback given" unless block_given?
data/lib/tango/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Tango
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
File without changes
File without changes
@@ -0,0 +1,25 @@
1
+ module Foo
2
+
3
+ class A
4
+ end
5
+
6
+ class B
7
+ end
8
+
9
+ class C
10
+ end
11
+
12
+ module Bar
13
+
14
+ class A
15
+ end
16
+
17
+ class B
18
+ end
19
+
20
+ class C
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -1,4 +1,4 @@
1
- require_relative '../../test_helper'
1
+ require_relative '../../support/helper'
2
2
  require_relative '../../support/lib/simple_handler.rb'
3
3
 
4
4
  require 'tango/etl/dispatcher'
@@ -1,4 +1,4 @@
1
- require_relative '../../test_helper'
1
+ require_relative '../../support/helper'
2
2
 
3
3
  require 'tango/resource/buffer'
4
4
 
@@ -1,5 +1,5 @@
1
- require_relative '../../test_helper'
2
- require_relative '../../support/lib/simple_buffer.rb'
1
+ require_relative '../../support/helper'
2
+ require_relative '../../support/lib/simple_buffer'
3
3
 
4
4
  require 'tango/resource/cache'
5
5
 
@@ -1,4 +1,4 @@
1
- require_relative '../test_helper.rb'
1
+ require_relative '../support/helper'
2
2
 
3
3
  require 'active_record'
4
4
  require 'activerecord-nulldb-adapter'
@@ -1,4 +1,4 @@
1
- require_relative '../test_helper.rb'
1
+ require_relative '../support/helper'
2
2
 
3
3
  require 'tango/database_locker'
4
4
 
@@ -1,4 +1,4 @@
1
- require_relative '../test_helper'
1
+ require_relative '../support/helper'
2
2
 
3
3
  require 'tango/kernel'
4
4
  require 'tango/abstract_model.rb'
@@ -8,21 +8,17 @@ class TestKernel < Test::Unit::TestCase
8
8
  context "a kernel" do
9
9
 
10
10
  setup do
11
+
11
12
  @lib_path = File.join( File.expand_path( '../../', __FILE__ ), 'support', 'lib' )
13
+
14
+ Dir.glob( File.join( @lib_path, '*.rb' ) ) do |f|
15
+ require f
16
+ end
17
+
12
18
  end
13
19
 
14
- should "transform file path to name of a class" do
15
- assert_equal "FooBar", Tango::Kernel.classify( File.join( @lib_path, 'foo_bar.rb' ) )
16
- end
17
-
18
- should "load a class from a file" do
19
- klass = Tango::Kernel.load( File.join( @lib_path, 'simple_buffer.rb' ) )
20
- assert_equal SimpleBuffer, klass
21
- end
22
-
23
- should "load a class in a module from a file" do
24
- klass = Tango::Kernel.load( File.join( @lib_path, 'user.rb' ), 'Model::' )
25
- assert_equal Model::User, klass
20
+ should "load classes in module" do
21
+ assert_equal [Foo::A, Foo::B, Foo::C], Tango::Kernel.module_classes( Foo )
26
22
  end
27
23
 
28
24
  should "obtain symbol from a class" do
@@ -1,5 +1,4 @@
1
- require_relative '../test_helper.rb'
2
-
1
+ require_relative '../support/helper'
3
2
  require 'tango/link_stack'
4
3
 
5
4
  class TestLinkStack < Test::Unit::TestCase
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tango-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Komorowski
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-30 00:00:00.000000000 Z
11
+ date: 2014-07-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -164,12 +164,13 @@ files:
164
164
  - lib/tango/resource/buffer.rb
165
165
  - lib/tango/resource/cache.rb
166
166
  - lib/tango/version.rb
167
- - tango.gemspec
167
+ - tango-etl.gemspec
168
168
  - test/support/db/schema.rb
169
+ - test/support/helper.rb
170
+ - test/support/lib/classes.rb
169
171
  - test/support/lib/simple_buffer.rb
170
172
  - test/support/lib/simple_handler.rb
171
173
  - test/support/lib/user.rb
172
- - test/test_helper.rb
173
174
  - test/unit/etl/test_dispatcher.rb
174
175
  - test/unit/resource/test_buffer.rb
175
176
  - test/unit/resource/test_cache.rb
@@ -201,6 +202,5 @@ rubygems_version: 2.2.2
201
202
  signing_key:
202
203
  specification_version: 4
203
204
  summary: ETL framework
204
- test_files:
205
- - test/test_helper.rb
205
+ test_files: []
206
206
  has_rdoc: