tango-etl 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -1
- data/Gemfile +1 -0
- data/lib/tango/app.rb +110 -47
- data/lib/tango/etl/handler_interface.rb +4 -1
- data/lib/tango/kernel.rb +12 -18
- data/lib/tango/resource/buffer.rb +2 -3
- data/lib/tango/resource/cache.rb +1 -0
- data/lib/tango/version.rb +1 -1
- data/{tango.gemspec → tango-etl.gemspec} +0 -0
- data/test/{test_helper.rb → support/helper.rb} +0 -0
- data/test/support/lib/classes.rb +25 -0
- data/test/unit/etl/test_dispatcher.rb +1 -1
- data/test/unit/resource/test_buffer.rb +1 -1
- data/test/unit/resource/test_cache.rb +2 -2
- data/test/unit/test_abstract_model.rb +1 -1
- data/test/unit/test_database_locker.rb +1 -1
- data/test/unit/test_kernel.rb +9 -13
- data/test/unit/test_link_stack.rb +1 -2
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4612c4976d0f3ebed10de7d9a4fdbf8178ab8072
|
4
|
+
data.tar.gz: 47afa4fffe497491fbcecd49ee8b001b88a4f9b6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b487d74cd0dc5e319ef758d52e969727828f240496c345b60731d78663e015c2f5891f2bc405d9da2a9670b0bd0a56688c413c4f183a8c639ad656dd319edcd1
|
7
|
+
data.tar.gz: e9edf5573f63b9723d3b89e91f99390fd38d865f92dddeebc66e496812a3a009515e0a3ef3cd29d0e1213d55ed9f50540540132a36ad3b2f7f67dadd0b27e3b3
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/lib/tango/app.rb
CHANGED
@@ -34,6 +34,9 @@ module Tango
|
|
34
34
|
@parser = parser || Nokogiri::HTML
|
35
35
|
@db_locker = db_locker || DatabaseLocker.new( Multidb.databases )
|
36
36
|
@logger = logger || Logger.new( STDOUT )
|
37
|
+
|
38
|
+
@models = []
|
39
|
+
@operators = []
|
37
40
|
|
38
41
|
end
|
39
42
|
|
@@ -44,46 +47,40 @@ module Tango
|
|
44
47
|
# Filter run after Tango execution
|
45
48
|
def after
|
46
49
|
end
|
47
|
-
|
48
|
-
# Register new
|
50
|
+
|
51
|
+
# Register a new model
|
49
52
|
#
|
50
|
-
# @param
|
51
|
-
# @
|
52
|
-
def register_model(
|
53
|
-
|
54
|
-
@models[symbol] = model
|
55
|
-
|
56
|
-
# Truncate table of non persistent model
|
57
|
-
unless model.persistent?
|
58
|
-
ActiveRecord::Base.connection.execute( "TRUNCATE #{model.table_name}" )
|
59
|
-
end
|
60
|
-
|
53
|
+
# @param model [Symbol]
|
54
|
+
# @return [Array]
|
55
|
+
def register_model( model )
|
56
|
+
@models << model
|
61
57
|
end
|
62
58
|
|
63
|
-
# Register new resource operator
|
59
|
+
# Register a new resource operator
|
64
60
|
#
|
65
|
-
# @param
|
66
|
-
# @
|
67
|
-
def register_operator(
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
61
|
+
# @param operator [Tango::ETL::OperatorInterface]
|
62
|
+
# @return [Array]
|
63
|
+
def register_operator( operator )
|
64
|
+
@operators << operator
|
65
|
+
end
|
66
|
+
|
67
|
+
# Register a new handler with the dispatcher
|
68
|
+
#
|
69
|
+
# @param handler [Tango::ETL::HandlerInterface]
|
70
|
+
# @return [Array]
|
71
|
+
def register_handler( handler )
|
72
|
+
@dispatcher.register( handler )
|
76
73
|
end
|
77
74
|
|
78
75
|
# Run ETL process
|
79
76
|
#
|
80
|
-
# @param
|
81
|
-
# @param
|
82
|
-
# @param
|
83
|
-
# @param
|
84
|
-
# @param
|
85
|
-
# @param
|
86
|
-
# @return
|
77
|
+
# @param link_stack [Tango::LinkStack]
|
78
|
+
# @param dispatcher [Tango::Etl::Dispatcher]
|
79
|
+
# @param cache [Tango::Resources::Cache]
|
80
|
+
# @param http_client [Object] Must implement get method
|
81
|
+
# @param parser [Object] Must implement parse method
|
82
|
+
# @param logger [Logger]
|
83
|
+
# @return [Nil]
|
87
84
|
def run
|
88
85
|
|
89
86
|
# Save beginning time
|
@@ -93,13 +90,17 @@ module Tango
|
|
93
90
|
@logger.info "Target: #{@link_stack.host}."
|
94
91
|
|
95
92
|
# Use next unlocked database
|
96
|
-
|
93
|
+
pick_database( @db_locker.unlocked )
|
97
94
|
@logger.info "Using database '#{@db_locker.unlocked}'."
|
95
|
+
|
96
|
+
@logger.info "Truncating non persistent models ..."
|
97
|
+
truncate_tables( non_persistent_models )
|
98
98
|
|
99
|
-
#
|
99
|
+
# Load cache for persistent models
|
100
100
|
@logger.info "Loading cache ..."
|
101
|
-
|
102
|
-
|
101
|
+
setup_cache( @operators )
|
102
|
+
load_cache( persistent_models )
|
103
|
+
|
103
104
|
# Run before filter
|
104
105
|
@logger.info "Running before callback ..."
|
105
106
|
before
|
@@ -124,13 +125,14 @@ module Tango
|
|
124
125
|
begin
|
125
126
|
response = @http_client.get( @link_stack.host + link )
|
126
127
|
rescue StandardError => e
|
127
|
-
@logger.error "Could not download contents of #{@link_stack.host + link} link."
|
128
|
+
@logger.error "Could not download contents of #{@link_stack.host + link} link."
|
129
|
+
@logger.error e.message
|
128
130
|
next
|
129
131
|
end
|
130
132
|
|
131
133
|
# Continue only when response has code 200 or 201
|
132
134
|
if ! [ 200, 201 ].include?( response.code )
|
133
|
-
@logger.error "Response code for link #{link} is #{response.code}. Only
|
135
|
+
@logger.error "Response code for link #{link} is #{response.code}. Only codes 200 and 201 are accepted."
|
134
136
|
next
|
135
137
|
end
|
136
138
|
|
@@ -162,14 +164,14 @@ module Tango
|
|
162
164
|
|
163
165
|
# Release buffers
|
164
166
|
@logger.info "Releasing buffers ..."
|
165
|
-
@cache.buffer
|
167
|
+
release_buffer( @cache.buffer )
|
166
168
|
|
167
169
|
# Run after filter
|
168
170
|
@logger.info "Running after callback ..."
|
169
171
|
after
|
170
172
|
|
171
173
|
# Lock database used in this Tango iteration
|
172
|
-
|
174
|
+
lock_database( @db_locker.unlocked )
|
173
175
|
|
174
176
|
# Get time of script execution ending
|
175
177
|
end_time = Time.now
|
@@ -183,15 +185,76 @@ module Tango
|
|
183
185
|
end
|
184
186
|
|
185
187
|
private
|
188
|
+
|
189
|
+
# Pick database ( e.g. to be used in this Tango run )
|
190
|
+
#
|
191
|
+
# @param database [String|Symbol]
|
192
|
+
# @return [String|Symbol]
|
193
|
+
def pick_database( database )
|
194
|
+
Multidb.use( database )
|
195
|
+
end
|
196
|
+
|
197
|
+
# Lock database ( e.g. used in this Tango run )
|
198
|
+
#
|
199
|
+
# @param database [String|Symbol]
|
200
|
+
# @return [String|Symbol]
|
201
|
+
def lock_database( database )
|
202
|
+
@db_locker.lock( database )
|
203
|
+
end
|
204
|
+
|
205
|
+
# Fetch list of persistent model registered with application
|
206
|
+
#
|
207
|
+
# @return [Array]
|
208
|
+
def persistent_models
|
209
|
+
@models.select { |m| m.persistent? }
|
210
|
+
end
|
211
|
+
|
212
|
+
# Fetch list of non persistent model registered with application
|
213
|
+
#
|
214
|
+
# @return [Array]
|
215
|
+
def non_persistent_models
|
216
|
+
@models.reject { |m| m.persistent? }
|
217
|
+
end
|
218
|
+
|
219
|
+
# Truncate table of given models
|
220
|
+
#
|
221
|
+
# @return [Nil]
|
222
|
+
def truncate_tables( models )
|
223
|
+
models.each do |model|
|
224
|
+
ActiveRecord::Base.connection.execute( "TRUNCATE #{model.table_name}" )
|
225
|
+
end
|
226
|
+
end
|
186
227
|
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
228
|
+
# Register cache with resource operators
|
229
|
+
#
|
230
|
+
# @param operators [Array]
|
231
|
+
# @return [Array]
|
232
|
+
def setup_cache( operators )
|
233
|
+
operators.each do |operator|
|
234
|
+
@cache.register( Tango::Kernel.symbolize( operator ) ) do |resource|
|
235
|
+
operator.load( resource )
|
236
|
+
end
|
193
237
|
end
|
194
|
-
|
238
|
+
end
|
239
|
+
|
240
|
+
# Load cache for given models
|
241
|
+
#
|
242
|
+
# @return [Nil]
|
243
|
+
def load_cache( models )
|
244
|
+
models.each do |model|
|
245
|
+
symbol = Tango::Kernel.symbolize( model )
|
246
|
+
model.all.each do |record|
|
247
|
+
@cache.set( symbol, record )
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
# Release given buffer
|
253
|
+
#
|
254
|
+
# @param buffer [Tango::Resource::Buffer]
|
255
|
+
# @return [Nil]
|
256
|
+
def release_buffer( buffer )
|
257
|
+
buffer.release_all
|
195
258
|
end
|
196
259
|
|
197
260
|
end
|
@@ -11,7 +11,10 @@ module Tango
|
|
11
11
|
|
12
12
|
# Constructor of Tango's handler
|
13
13
|
#
|
14
|
-
# @param
|
14
|
+
# @param [String]
|
15
|
+
# @param [Nokogiri::XML]
|
16
|
+
# @param [Tango::Resource::Cache]
|
17
|
+
# @return [Tango::ETL::HandlerInterface]
|
15
18
|
def initialize( url, document, cache = nil )
|
16
19
|
@url = url
|
17
20
|
@document = document
|
data/lib/tango/kernel.rb
CHANGED
@@ -1,25 +1,19 @@
|
|
1
1
|
module Tango
|
2
2
|
module Kernel
|
3
3
|
|
4
|
-
# Convert file path to class name
|
5
|
-
# @param file_path [String]
|
6
|
-
# @return [String]
|
7
|
-
def self.classify( file_path )
|
8
|
-
File.basename( file_path, ".*" ).split( "_" ).map { |w| w.capitalize }.join
|
9
|
-
end
|
10
4
|
|
11
|
-
#
|
12
|
-
#
|
13
|
-
# @param
|
14
|
-
# @
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
5
|
+
# Fetch list of first level classes in module
|
6
|
+
#
|
7
|
+
# @param mod [Module]
|
8
|
+
# @return [Array]
|
9
|
+
def self.module_classes( mod )
|
10
|
+
|
11
|
+
mod.constants.map { |s|
|
12
|
+
mod.const_get( s )
|
13
|
+
}.select { |c|
|
14
|
+
Class === c
|
15
|
+
}
|
16
|
+
|
23
17
|
end
|
24
18
|
|
25
19
|
# Obtain symbol of a class
|
@@ -42,12 +42,11 @@ module Tango
|
|
42
42
|
def fill( type, resource )
|
43
43
|
|
44
44
|
raise ArgumentError, "Trying to fill object with unregistered type" unless @resources.keys.include?( type )
|
45
|
-
|
45
|
+
|
46
46
|
# Append resource to the buffer
|
47
47
|
@resources[type] << resource
|
48
48
|
# Release the buffer if buffer size exceeded
|
49
|
-
release( type ) if @resources[type].count >= @size
|
50
|
-
|
49
|
+
release( type ) if @resources[type].count >= @size
|
51
50
|
end
|
52
51
|
|
53
52
|
# Release all registered buffers
|
data/lib/tango/resource/cache.rb
CHANGED
data/lib/tango/version.rb
CHANGED
File without changes
|
File without changes
|
data/test/unit/test_kernel.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require_relative '../
|
1
|
+
require_relative '../support/helper'
|
2
2
|
|
3
3
|
require 'tango/kernel'
|
4
4
|
require 'tango/abstract_model.rb'
|
@@ -8,21 +8,17 @@ class TestKernel < Test::Unit::TestCase
|
|
8
8
|
context "a kernel" do
|
9
9
|
|
10
10
|
setup do
|
11
|
+
|
11
12
|
@lib_path = File.join( File.expand_path( '../../', __FILE__ ), 'support', 'lib' )
|
13
|
+
|
14
|
+
Dir.glob( File.join( @lib_path, '*.rb' ) ) do |f|
|
15
|
+
require f
|
16
|
+
end
|
17
|
+
|
12
18
|
end
|
13
19
|
|
14
|
-
should "
|
15
|
-
assert_equal
|
16
|
-
end
|
17
|
-
|
18
|
-
should "load a class from a file" do
|
19
|
-
klass = Tango::Kernel.load( File.join( @lib_path, 'simple_buffer.rb' ) )
|
20
|
-
assert_equal SimpleBuffer, klass
|
21
|
-
end
|
22
|
-
|
23
|
-
should "load a class in a module from a file" do
|
24
|
-
klass = Tango::Kernel.load( File.join( @lib_path, 'user.rb' ), 'Model::' )
|
25
|
-
assert_equal Model::User, klass
|
20
|
+
should "load classes in module" do
|
21
|
+
assert_equal [Foo::A, Foo::B, Foo::C], Tango::Kernel.module_classes( Foo )
|
26
22
|
end
|
27
23
|
|
28
24
|
should "obtain symbol from a class" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tango-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Komorowski
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -164,12 +164,13 @@ files:
|
|
164
164
|
- lib/tango/resource/buffer.rb
|
165
165
|
- lib/tango/resource/cache.rb
|
166
166
|
- lib/tango/version.rb
|
167
|
-
- tango.gemspec
|
167
|
+
- tango-etl.gemspec
|
168
168
|
- test/support/db/schema.rb
|
169
|
+
- test/support/helper.rb
|
170
|
+
- test/support/lib/classes.rb
|
169
171
|
- test/support/lib/simple_buffer.rb
|
170
172
|
- test/support/lib/simple_handler.rb
|
171
173
|
- test/support/lib/user.rb
|
172
|
-
- test/test_helper.rb
|
173
174
|
- test/unit/etl/test_dispatcher.rb
|
174
175
|
- test/unit/resource/test_buffer.rb
|
175
176
|
- test/unit/resource/test_cache.rb
|
@@ -201,6 +202,5 @@ rubygems_version: 2.2.2
|
|
201
202
|
signing_key:
|
202
203
|
specification_version: 4
|
203
204
|
summary: ETL framework
|
204
|
-
test_files:
|
205
|
-
- test/test_helper.rb
|
205
|
+
test_files: []
|
206
206
|
has_rdoc:
|