tango-etl 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 35dc1dc259634c71c5549cd9f44c03632bd446d6
4
- data.tar.gz: 4fb85e9a4017c02480b6194686a16ee6040791f8
3
+ metadata.gz: 4612c4976d0f3ebed10de7d9a4fdbf8178ab8072
4
+ data.tar.gz: 47afa4fffe497491fbcecd49ee8b001b88a4f9b6
5
5
  SHA512:
6
- metadata.gz: c945d4126a292c411ddc7a741363737181a1722849c2b43a50d0526773fe7fd989abc56075174f9b63df2452cb5da0268e808274ff39f597cc6608e4e866410b
7
- data.tar.gz: a4ce34f890c40498e894ed8dcf1429daae1597e4bfd05671c9a12182dc589d1d24a1574b5c05b5b02aac029361b99044dc540a53381ae7543db191887414f0df
6
+ metadata.gz: b487d74cd0dc5e319ef758d52e969727828f240496c345b60731d78663e015c2f5891f2bc405d9da2a9670b0bd0a56688c413c4f183a8c639ad656dd319edcd1
7
+ data.tar.gz: e9edf5573f63b9723d3b89e91f99390fd38d865f92dddeebc66e496812a3a009515e0a3ef3cd29d0e1213d55ed9f50540540132a36ad3b2f7f67dadd0b27e3b3
data/CHANGELOG.md CHANGED
@@ -8,4 +8,7 @@
8
8
  - Added full API documentation
9
9
 
10
10
  ###0.1.2 - 30/06/2014
11
- - Update of gemfile
11
+ - Update of gemfile
12
+
13
+ ###0.1.3 - 29/07/2014
14
+ - Change way of registering tango application dependencies
data/Gemfile CHANGED
@@ -11,6 +11,7 @@ group :development do
11
11
  end
12
12
 
13
13
  group :test do
14
+ gem 'minitest', '~> 5.3.5'
14
15
  gem 'shoulda-context', '~> 1.2.1'
15
16
  gem "mocha", "~> 1.1.0"
16
17
  gem 'activerecord-nulldb-adapter', '~> 0.3.1'
data/lib/tango/app.rb CHANGED
@@ -34,6 +34,9 @@ module Tango
34
34
  @parser = parser || Nokogiri::HTML
35
35
  @db_locker = db_locker || DatabaseLocker.new( Multidb.databases )
36
36
  @logger = logger || Logger.new( STDOUT )
37
+
38
+ @models = []
39
+ @operators = []
37
40
 
38
41
  end
39
42
 
@@ -44,46 +47,40 @@ module Tango
44
47
  # Filter run after Tango execution
45
48
  def after
46
49
  end
47
-
48
- # Register new resource model
50
+
51
+ # Register a new model
49
52
  #
50
- # @param symbol [Symbol]
51
- # @param model [Class]
52
- def register_model( symbol, model )
53
-
54
- @models[symbol] = model
55
-
56
- # Truncate table of non persistent model
57
- unless model.persistent?
58
- ActiveRecord::Base.connection.execute( "TRUNCATE #{model.table_name}" )
59
- end
60
-
53
+ # @param model [Symbol]
54
+ # @return [Array]
55
+ def register_model( model )
56
+ @models << model
61
57
  end
62
58
 
63
- # Register new resource operator
59
+ # Register a new resource operator
64
60
  #
65
- # @param symbol [Symbol]
66
- # @param operator [Class]
67
- def register_operator( symbol, operator )
68
-
69
- @operators[symbol] = operator
70
-
71
- # Register operator with resource cache system
72
- @cache.register( symbol ) do |resource|
73
- operator.load( resource )
74
- end
75
-
61
+ # @param operator [Tango::ETL::OperatorInterface]
62
+ # @return [Array]
63
+ def register_operator( operator )
64
+ @operators << operator
65
+ end
66
+
67
+ # Register a new handler with the dispatcher
68
+ #
69
+ # @param handler [Tango::ETL::HandlerInterface]
70
+ # @return [Array]
71
+ def register_handler( handler )
72
+ @dispatcher.register( handler )
76
73
  end
77
74
 
78
75
  # Run ETL process
79
76
  #
80
- # @param link_stack [Tango::LinkStack]
81
- # @param dispatcher [Tango::Etl::Dispatcher]
82
- # @param cache [Tango::Resources::Cache]
83
- # @param http_client [Object] Must implement get method
84
- # @param parser [Object] Must implement parse method
85
- # @param logger [Logger]
86
- # @return [Integer]
77
+ # @param link_stack [Tango::LinkStack]
78
+ # @param dispatcher [Tango::Etl::Dispatcher]
79
+ # @param cache [Tango::Resources::Cache]
80
+ # @param http_client [Object] Must implement get method
81
+ # @param parser [Object] Must implement parse method
82
+ # @param logger [Logger]
83
+ # @return [Nil]
87
84
  def run
88
85
 
89
86
  # Save beginning time
@@ -93,13 +90,17 @@ module Tango
93
90
  @logger.info "Target: #{@link_stack.host}."
94
91
 
95
92
  # Use next unlocked database
96
- Multidb.use( @db_locker.unlocked )
93
+ pick_database( @db_locker.unlocked )
97
94
  @logger.info "Using database '#{@db_locker.unlocked}'."
95
+
96
+ @logger.info "Truncating non persistent models ..."
97
+ truncate_tables( non_persistent_models )
98
98
 
99
- # Run before filter
99
+ # Load cache for persistent models
100
100
  @logger.info "Loading cache ..."
101
- load_cache
102
-
101
+ setup_cache( @operators )
102
+ load_cache( persistent_models )
103
+
103
104
  # Run before filter
104
105
  @logger.info "Running before callback ..."
105
106
  before
@@ -124,13 +125,14 @@ module Tango
124
125
  begin
125
126
  response = @http_client.get( @link_stack.host + link )
126
127
  rescue StandardError => e
127
- @logger.error "Could not download contents of #{@link_stack.host + link} link."; @logger.error e.message
128
+ @logger.error "Could not download contents of #{@link_stack.host + link} link."
129
+ @logger.error e.message
128
130
  next
129
131
  end
130
132
 
131
133
  # Continue only when response has code 200 or 201
132
134
  if ! [ 200, 201 ].include?( response.code )
133
- @logger.error "Response code for link #{link} is #{response.code}. Only code 200 is accepted."
135
+ @logger.error "Response code for link #{link} is #{response.code}. Only codes 200 and 201 are accepted."
134
136
  next
135
137
  end
136
138
 
@@ -162,14 +164,14 @@ module Tango
162
164
 
163
165
  # Release buffers
164
166
  @logger.info "Releasing buffers ..."
165
- @cache.buffer.release_all()
167
+ release_buffer( @cache.buffer )
166
168
 
167
169
  # Run after filter
168
170
  @logger.info "Running after callback ..."
169
171
  after
170
172
 
171
173
  # Lock database used in this Tango iteration
172
- @db_locker.lock( @db_locker.unlocked )
174
+ lock_database( @db_locker.unlocked )
173
175
 
174
176
  # Get time of script execution ending
175
177
  end_time = Time.now
@@ -183,15 +185,76 @@ module Tango
183
185
  end
184
186
 
185
187
  private
188
+
189
+ # Pick database ( e.g. to be used in this Tango run )
190
+ #
191
+ # @param database [String|Symbol]
192
+ # @return [String|Symbol]
193
+ def pick_database( database )
194
+ Multidb.use( database )
195
+ end
196
+
197
+ # Lock database ( e.g. used in this Tango run )
198
+ #
199
+ # @param database [String|Symbol]
200
+ # @return [String|Symbol]
201
+ def lock_database( database )
202
+ @db_locker.lock( database )
203
+ end
204
+
205
+ # Fetch list of persistent model registered with application
206
+ #
207
+ # @return [Array]
208
+ def persistent_models
209
+ @models.select { |m| m.persistent? }
210
+ end
211
+
212
+ # Fetch list of non persistent model registered with application
213
+ #
214
+ # @return [Array]
215
+ def non_persistent_models
216
+ @models.reject { |m| m.persistent? }
217
+ end
218
+
219
+ # Truncate table of given models
220
+ #
221
+ # @return [Nil]
222
+ def truncate_tables( models )
223
+ models.each do |model|
224
+ ActiveRecord::Base.connection.execute( "TRUNCATE #{model.table_name}" )
225
+ end
226
+ end
186
227
 
187
- def load_cache
188
-
189
- @models.each do |symbol, model|
190
- model.all.each do |m|
191
- @cache.set( symbol, m )
192
- end if model.persistent?
228
+ # Register cache with resource operators
229
+ #
230
+ # @param operators [Array]
231
+ # @return [Array]
232
+ def setup_cache( operators )
233
+ operators.each do |operator|
234
+ @cache.register( Tango::Kernel.symbolize( operator ) ) do |resource|
235
+ operator.load( resource )
236
+ end
193
237
  end
194
-
238
+ end
239
+
240
+ # Load cache for given models
241
+ #
242
+ # @return [Nil]
243
+ def load_cache( models )
244
+ models.each do |model|
245
+ symbol = Tango::Kernel.symbolize( model )
246
+ model.all.each do |record|
247
+ @cache.set( symbol, record )
248
+ end
249
+ end
250
+ end
251
+
252
+ # Release given buffer
253
+ #
254
+ # @param buffer [Tango::Resource::Buffer]
255
+ # @return [Nil]
256
+ def release_buffer( buffer )
257
+ buffer.release_all
195
258
  end
196
259
 
197
260
  end
@@ -11,7 +11,10 @@ module Tango
11
11
 
12
12
  # Constructor of Tango's handler
13
13
  #
14
- # @param url [String]
14
+ # @param [String]
15
+ # @param [Nokogiri::XML]
16
+ # @param [Tango::Resource::Cache]
17
+ # @return [Tango::ETL::HandlerInterface]
15
18
  def initialize( url, document, cache = nil )
16
19
  @url = url
17
20
  @document = document
data/lib/tango/kernel.rb CHANGED
@@ -1,25 +1,19 @@
1
1
  module Tango
2
2
  module Kernel
3
3
 
4
- # Convert file path to class name
5
- # @param file_path [String]
6
- # @return [String]
7
- def self.classify( file_path )
8
- File.basename( file_path, ".*" ).split( "_" ).map { |w| w.capitalize }.join
9
- end
10
4
 
11
- # Load class from a file
12
- #
13
- # @param file [String]
14
- # @param module_prefix [String]
15
- # @return [Class]
16
- def self.load( file, module_prefix = "" )
17
-
18
- require file
19
-
20
- class_name = Kernel.classify( file )
21
- Kernel.const_get( "#{module_prefix}#{class_name}" )
22
-
5
+ # Fetch list of first level classes in module
6
+ #
7
+ # @param mod [Module]
8
+ # @return [Array]
9
+ def self.module_classes( mod )
10
+
11
+ mod.constants.map { |s|
12
+ mod.const_get( s )
13
+ }.select { |c|
14
+ Class === c
15
+ }
16
+
23
17
  end
24
18
 
25
19
  # Obtain symbol of a class
@@ -42,12 +42,11 @@ module Tango
42
42
  def fill( type, resource )
43
43
 
44
44
  raise ArgumentError, "Trying to fill object with unregistered type" unless @resources.keys.include?( type )
45
-
45
+
46
46
  # Append resource to the buffer
47
47
  @resources[type] << resource
48
48
  # Release the buffer if buffer size exceeded
49
- release( type ) if @resources[type].count >= @size
50
-
49
+ release( type ) if @resources[type].count >= @size
51
50
  end
52
51
 
53
52
  # Release all registered buffers
@@ -45,6 +45,7 @@ module Tango
45
45
  # Get resource from cache
46
46
  cached_resource = get( type, resource )
47
47
 
48
+
48
49
  unless cached_resource
49
50
 
50
51
  raise ArgumentError, "No resource callback given" unless block_given?
data/lib/tango/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Tango
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
File without changes
File without changes
@@ -0,0 +1,25 @@
1
+ module Foo
2
+
3
+ class A
4
+ end
5
+
6
+ class B
7
+ end
8
+
9
+ class C
10
+ end
11
+
12
+ module Bar
13
+
14
+ class A
15
+ end
16
+
17
+ class B
18
+ end
19
+
20
+ class C
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -1,4 +1,4 @@
1
- require_relative '../../test_helper'
1
+ require_relative '../../support/helper'
2
2
  require_relative '../../support/lib/simple_handler.rb'
3
3
 
4
4
  require 'tango/etl/dispatcher'
@@ -1,4 +1,4 @@
1
- require_relative '../../test_helper'
1
+ require_relative '../../support/helper'
2
2
 
3
3
  require 'tango/resource/buffer'
4
4
 
@@ -1,5 +1,5 @@
1
- require_relative '../../test_helper'
2
- require_relative '../../support/lib/simple_buffer.rb'
1
+ require_relative '../../support/helper'
2
+ require_relative '../../support/lib/simple_buffer'
3
3
 
4
4
  require 'tango/resource/cache'
5
5
 
@@ -1,4 +1,4 @@
1
- require_relative '../test_helper.rb'
1
+ require_relative '../support/helper'
2
2
 
3
3
  require 'active_record'
4
4
  require 'activerecord-nulldb-adapter'
@@ -1,4 +1,4 @@
1
- require_relative '../test_helper.rb'
1
+ require_relative '../support/helper'
2
2
 
3
3
  require 'tango/database_locker'
4
4
 
@@ -1,4 +1,4 @@
1
- require_relative '../test_helper'
1
+ require_relative '../support/helper'
2
2
 
3
3
  require 'tango/kernel'
4
4
  require 'tango/abstract_model.rb'
@@ -8,21 +8,17 @@ class TestKernel < Test::Unit::TestCase
8
8
  context "a kernel" do
9
9
 
10
10
  setup do
11
+
11
12
  @lib_path = File.join( File.expand_path( '../../', __FILE__ ), 'support', 'lib' )
13
+
14
+ Dir.glob( File.join( @lib_path, '*.rb' ) ) do |f|
15
+ require f
16
+ end
17
+
12
18
  end
13
19
 
14
- should "transform file path to name of a class" do
15
- assert_equal "FooBar", Tango::Kernel.classify( File.join( @lib_path, 'foo_bar.rb' ) )
16
- end
17
-
18
- should "load a class from a file" do
19
- klass = Tango::Kernel.load( File.join( @lib_path, 'simple_buffer.rb' ) )
20
- assert_equal SimpleBuffer, klass
21
- end
22
-
23
- should "load a class in a module from a file" do
24
- klass = Tango::Kernel.load( File.join( @lib_path, 'user.rb' ), 'Model::' )
25
- assert_equal Model::User, klass
20
+ should "load classes in module" do
21
+ assert_equal [Foo::A, Foo::B, Foo::C], Tango::Kernel.module_classes( Foo )
26
22
  end
27
23
 
28
24
  should "obtain symbol from a class" do
@@ -1,5 +1,4 @@
1
- require_relative '../test_helper.rb'
2
-
1
+ require_relative '../support/helper'
3
2
  require 'tango/link_stack'
4
3
 
5
4
  class TestLinkStack < Test::Unit::TestCase
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tango-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Komorowski
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-30 00:00:00.000000000 Z
11
+ date: 2014-07-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -164,12 +164,13 @@ files:
164
164
  - lib/tango/resource/buffer.rb
165
165
  - lib/tango/resource/cache.rb
166
166
  - lib/tango/version.rb
167
- - tango.gemspec
167
+ - tango-etl.gemspec
168
168
  - test/support/db/schema.rb
169
+ - test/support/helper.rb
170
+ - test/support/lib/classes.rb
169
171
  - test/support/lib/simple_buffer.rb
170
172
  - test/support/lib/simple_handler.rb
171
173
  - test/support/lib/user.rb
172
- - test/test_helper.rb
173
174
  - test/unit/etl/test_dispatcher.rb
174
175
  - test/unit/resource/test_buffer.rb
175
176
  - test/unit/resource/test_cache.rb
@@ -201,6 +202,5 @@ rubygems_version: 2.2.2
201
202
  signing_key:
202
203
  specification_version: 4
203
204
  summary: ETL framework
204
- test_files:
205
- - test/test_helper.rb
205
+ test_files: []
206
206
  has_rdoc: