tango-etl 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -1
- data/Gemfile +1 -0
- data/lib/tango/app.rb +110 -47
- data/lib/tango/etl/handler_interface.rb +4 -1
- data/lib/tango/kernel.rb +12 -18
- data/lib/tango/resource/buffer.rb +2 -3
- data/lib/tango/resource/cache.rb +1 -0
- data/lib/tango/version.rb +1 -1
- data/{tango.gemspec → tango-etl.gemspec} +0 -0
- data/test/{test_helper.rb → support/helper.rb} +0 -0
- data/test/support/lib/classes.rb +25 -0
- data/test/unit/etl/test_dispatcher.rb +1 -1
- data/test/unit/resource/test_buffer.rb +1 -1
- data/test/unit/resource/test_cache.rb +2 -2
- data/test/unit/test_abstract_model.rb +1 -1
- data/test/unit/test_database_locker.rb +1 -1
- data/test/unit/test_kernel.rb +9 -13
- data/test/unit/test_link_stack.rb +1 -2
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4612c4976d0f3ebed10de7d9a4fdbf8178ab8072
|
4
|
+
data.tar.gz: 47afa4fffe497491fbcecd49ee8b001b88a4f9b6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b487d74cd0dc5e319ef758d52e969727828f240496c345b60731d78663e015c2f5891f2bc405d9da2a9670b0bd0a56688c413c4f183a8c639ad656dd319edcd1
|
7
|
+
data.tar.gz: e9edf5573f63b9723d3b89e91f99390fd38d865f92dddeebc66e496812a3a009515e0a3ef3cd29d0e1213d55ed9f50540540132a36ad3b2f7f67dadd0b27e3b3
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/lib/tango/app.rb
CHANGED
@@ -34,6 +34,9 @@ module Tango
|
|
34
34
|
@parser = parser || Nokogiri::HTML
|
35
35
|
@db_locker = db_locker || DatabaseLocker.new( Multidb.databases )
|
36
36
|
@logger = logger || Logger.new( STDOUT )
|
37
|
+
|
38
|
+
@models = []
|
39
|
+
@operators = []
|
37
40
|
|
38
41
|
end
|
39
42
|
|
@@ -44,46 +47,40 @@ module Tango
|
|
44
47
|
# Filter run after Tango execution
|
45
48
|
def after
|
46
49
|
end
|
47
|
-
|
48
|
-
# Register new
|
50
|
+
|
51
|
+
# Register a new model
|
49
52
|
#
|
50
|
-
# @param
|
51
|
-
# @
|
52
|
-
def register_model(
|
53
|
-
|
54
|
-
@models[symbol] = model
|
55
|
-
|
56
|
-
# Truncate table of non persistent model
|
57
|
-
unless model.persistent?
|
58
|
-
ActiveRecord::Base.connection.execute( "TRUNCATE #{model.table_name}" )
|
59
|
-
end
|
60
|
-
|
53
|
+
# @param model [Symbol]
|
54
|
+
# @return [Array]
|
55
|
+
def register_model( model )
|
56
|
+
@models << model
|
61
57
|
end
|
62
58
|
|
63
|
-
# Register new resource operator
|
59
|
+
# Register a new resource operator
|
64
60
|
#
|
65
|
-
# @param
|
66
|
-
# @
|
67
|
-
def register_operator(
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
61
|
+
# @param operator [Tango::ETL::OperatorInterface]
|
62
|
+
# @return [Array]
|
63
|
+
def register_operator( operator )
|
64
|
+
@operators << operator
|
65
|
+
end
|
66
|
+
|
67
|
+
# Register a new handler with the dispatcher
|
68
|
+
#
|
69
|
+
# @param handler [Tango::ETL::HandlerInterface]
|
70
|
+
# @return [Array]
|
71
|
+
def register_handler( handler )
|
72
|
+
@dispatcher.register( handler )
|
76
73
|
end
|
77
74
|
|
78
75
|
# Run ETL process
|
79
76
|
#
|
80
|
-
# @param
|
81
|
-
# @param
|
82
|
-
# @param
|
83
|
-
# @param
|
84
|
-
# @param
|
85
|
-
# @param
|
86
|
-
# @return
|
77
|
+
# @param link_stack [Tango::LinkStack]
|
78
|
+
# @param dispatcher [Tango::Etl::Dispatcher]
|
79
|
+
# @param cache [Tango::Resources::Cache]
|
80
|
+
# @param http_client [Object] Must implement get method
|
81
|
+
# @param parser [Object] Must implement parse method
|
82
|
+
# @param logger [Logger]
|
83
|
+
# @return [Nil]
|
87
84
|
def run
|
88
85
|
|
89
86
|
# Save beginning time
|
@@ -93,13 +90,17 @@ module Tango
|
|
93
90
|
@logger.info "Target: #{@link_stack.host}."
|
94
91
|
|
95
92
|
# Use next unlocked database
|
96
|
-
|
93
|
+
pick_database( @db_locker.unlocked )
|
97
94
|
@logger.info "Using database '#{@db_locker.unlocked}'."
|
95
|
+
|
96
|
+
@logger.info "Truncating non persistent models ..."
|
97
|
+
truncate_tables( non_persistent_models )
|
98
98
|
|
99
|
-
#
|
99
|
+
# Load cache for persistent models
|
100
100
|
@logger.info "Loading cache ..."
|
101
|
-
|
102
|
-
|
101
|
+
setup_cache( @operators )
|
102
|
+
load_cache( persistent_models )
|
103
|
+
|
103
104
|
# Run before filter
|
104
105
|
@logger.info "Running before callback ..."
|
105
106
|
before
|
@@ -124,13 +125,14 @@ module Tango
|
|
124
125
|
begin
|
125
126
|
response = @http_client.get( @link_stack.host + link )
|
126
127
|
rescue StandardError => e
|
127
|
-
@logger.error "Could not download contents of #{@link_stack.host + link} link."
|
128
|
+
@logger.error "Could not download contents of #{@link_stack.host + link} link."
|
129
|
+
@logger.error e.message
|
128
130
|
next
|
129
131
|
end
|
130
132
|
|
131
133
|
# Continue only when response has code 200 or 201
|
132
134
|
if ! [ 200, 201 ].include?( response.code )
|
133
|
-
@logger.error "Response code for link #{link} is #{response.code}. Only
|
135
|
+
@logger.error "Response code for link #{link} is #{response.code}. Only codes 200 and 201 are accepted."
|
134
136
|
next
|
135
137
|
end
|
136
138
|
|
@@ -162,14 +164,14 @@ module Tango
|
|
162
164
|
|
163
165
|
# Release buffers
|
164
166
|
@logger.info "Releasing buffers ..."
|
165
|
-
@cache.buffer
|
167
|
+
release_buffer( @cache.buffer )
|
166
168
|
|
167
169
|
# Run after filter
|
168
170
|
@logger.info "Running after callback ..."
|
169
171
|
after
|
170
172
|
|
171
173
|
# Lock database used in this Tango iteration
|
172
|
-
|
174
|
+
lock_database( @db_locker.unlocked )
|
173
175
|
|
174
176
|
# Get time of script execution ending
|
175
177
|
end_time = Time.now
|
@@ -183,15 +185,76 @@ module Tango
|
|
183
185
|
end
|
184
186
|
|
185
187
|
private
|
188
|
+
|
189
|
+
# Pick database ( e.g. to be used in this Tango run )
|
190
|
+
#
|
191
|
+
# @param database [String|Symbol]
|
192
|
+
# @return [String|Symbol]
|
193
|
+
def pick_database( database )
|
194
|
+
Multidb.use( database )
|
195
|
+
end
|
196
|
+
|
197
|
+
# Lock database ( e.g. used in this Tango run )
|
198
|
+
#
|
199
|
+
# @param database [String|Symbol]
|
200
|
+
# @return [String|Symbol]
|
201
|
+
def lock_database( database )
|
202
|
+
@db_locker.lock( database )
|
203
|
+
end
|
204
|
+
|
205
|
+
# Fetch list of persistent model registered with application
|
206
|
+
#
|
207
|
+
# @return [Array]
|
208
|
+
def persistent_models
|
209
|
+
@models.select { |m| m.persistent? }
|
210
|
+
end
|
211
|
+
|
212
|
+
# Fetch list of non persistent model registered with application
|
213
|
+
#
|
214
|
+
# @return [Array]
|
215
|
+
def non_persistent_models
|
216
|
+
@models.reject { |m| m.persistent? }
|
217
|
+
end
|
218
|
+
|
219
|
+
# Truncate table of given models
|
220
|
+
#
|
221
|
+
# @return [Nil]
|
222
|
+
def truncate_tables( models )
|
223
|
+
models.each do |model|
|
224
|
+
ActiveRecord::Base.connection.execute( "TRUNCATE #{model.table_name}" )
|
225
|
+
end
|
226
|
+
end
|
186
227
|
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
228
|
+
# Register cache with resource operators
|
229
|
+
#
|
230
|
+
# @param operators [Array]
|
231
|
+
# @return [Array]
|
232
|
+
def setup_cache( operators )
|
233
|
+
operators.each do |operator|
|
234
|
+
@cache.register( Tango::Kernel.symbolize( operator ) ) do |resource|
|
235
|
+
operator.load( resource )
|
236
|
+
end
|
193
237
|
end
|
194
|
-
|
238
|
+
end
|
239
|
+
|
240
|
+
# Load cache for given models
|
241
|
+
#
|
242
|
+
# @return [Nil]
|
243
|
+
def load_cache( models )
|
244
|
+
models.each do |model|
|
245
|
+
symbol = Tango::Kernel.symbolize( model )
|
246
|
+
model.all.each do |record|
|
247
|
+
@cache.set( symbol, record )
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
# Release given buffer
|
253
|
+
#
|
254
|
+
# @param buffer [Tango::Resource::Buffer]
|
255
|
+
# @return [Nil]
|
256
|
+
def release_buffer( buffer )
|
257
|
+
buffer.release_all
|
195
258
|
end
|
196
259
|
|
197
260
|
end
|
@@ -11,7 +11,10 @@ module Tango
|
|
11
11
|
|
12
12
|
# Constructor of Tango's handler
|
13
13
|
#
|
14
|
-
# @param
|
14
|
+
# @param [String]
|
15
|
+
# @param [Nokogiri::XML]
|
16
|
+
# @param [Tango::Resource::Cache]
|
17
|
+
# @return [Tango::ETL::HandlerInterface]
|
15
18
|
def initialize( url, document, cache = nil )
|
16
19
|
@url = url
|
17
20
|
@document = document
|
data/lib/tango/kernel.rb
CHANGED
@@ -1,25 +1,19 @@
|
|
1
1
|
module Tango
|
2
2
|
module Kernel
|
3
3
|
|
4
|
-
# Convert file path to class name
|
5
|
-
# @param file_path [String]
|
6
|
-
# @return [String]
|
7
|
-
def self.classify( file_path )
|
8
|
-
File.basename( file_path, ".*" ).split( "_" ).map { |w| w.capitalize }.join
|
9
|
-
end
|
10
4
|
|
11
|
-
#
|
12
|
-
#
|
13
|
-
# @param
|
14
|
-
# @
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
5
|
+
# Fetch list of first level classes in module
|
6
|
+
#
|
7
|
+
# @param mod [Module]
|
8
|
+
# @return [Array]
|
9
|
+
def self.module_classes( mod )
|
10
|
+
|
11
|
+
mod.constants.map { |s|
|
12
|
+
mod.const_get( s )
|
13
|
+
}.select { |c|
|
14
|
+
Class === c
|
15
|
+
}
|
16
|
+
|
23
17
|
end
|
24
18
|
|
25
19
|
# Obtain symbol of a class
|
@@ -42,12 +42,11 @@ module Tango
|
|
42
42
|
def fill( type, resource )
|
43
43
|
|
44
44
|
raise ArgumentError, "Trying to fill object with unregistered type" unless @resources.keys.include?( type )
|
45
|
-
|
45
|
+
|
46
46
|
# Append resource to the buffer
|
47
47
|
@resources[type] << resource
|
48
48
|
# Release the buffer if buffer size exceeded
|
49
|
-
release( type ) if @resources[type].count >= @size
|
50
|
-
|
49
|
+
release( type ) if @resources[type].count >= @size
|
51
50
|
end
|
52
51
|
|
53
52
|
# Release all registered buffers
|
data/lib/tango/resource/cache.rb
CHANGED
data/lib/tango/version.rb
CHANGED
File without changes
|
File without changes
|
data/test/unit/test_kernel.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require_relative '../
|
1
|
+
require_relative '../support/helper'
|
2
2
|
|
3
3
|
require 'tango/kernel'
|
4
4
|
require 'tango/abstract_model.rb'
|
@@ -8,21 +8,17 @@ class TestKernel < Test::Unit::TestCase
|
|
8
8
|
context "a kernel" do
|
9
9
|
|
10
10
|
setup do
|
11
|
+
|
11
12
|
@lib_path = File.join( File.expand_path( '../../', __FILE__ ), 'support', 'lib' )
|
13
|
+
|
14
|
+
Dir.glob( File.join( @lib_path, '*.rb' ) ) do |f|
|
15
|
+
require f
|
16
|
+
end
|
17
|
+
|
12
18
|
end
|
13
19
|
|
14
|
-
should "
|
15
|
-
assert_equal
|
16
|
-
end
|
17
|
-
|
18
|
-
should "load a class from a file" do
|
19
|
-
klass = Tango::Kernel.load( File.join( @lib_path, 'simple_buffer.rb' ) )
|
20
|
-
assert_equal SimpleBuffer, klass
|
21
|
-
end
|
22
|
-
|
23
|
-
should "load a class in a module from a file" do
|
24
|
-
klass = Tango::Kernel.load( File.join( @lib_path, 'user.rb' ), 'Model::' )
|
25
|
-
assert_equal Model::User, klass
|
20
|
+
should "load classes in module" do
|
21
|
+
assert_equal [Foo::A, Foo::B, Foo::C], Tango::Kernel.module_classes( Foo )
|
26
22
|
end
|
27
23
|
|
28
24
|
should "obtain symbol from a class" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tango-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Komorowski
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -164,12 +164,13 @@ files:
|
|
164
164
|
- lib/tango/resource/buffer.rb
|
165
165
|
- lib/tango/resource/cache.rb
|
166
166
|
- lib/tango/version.rb
|
167
|
-
- tango.gemspec
|
167
|
+
- tango-etl.gemspec
|
168
168
|
- test/support/db/schema.rb
|
169
|
+
- test/support/helper.rb
|
170
|
+
- test/support/lib/classes.rb
|
169
171
|
- test/support/lib/simple_buffer.rb
|
170
172
|
- test/support/lib/simple_handler.rb
|
171
173
|
- test/support/lib/user.rb
|
172
|
-
- test/test_helper.rb
|
173
174
|
- test/unit/etl/test_dispatcher.rb
|
174
175
|
- test/unit/resource/test_buffer.rb
|
175
176
|
- test/unit/resource/test_cache.rb
|
@@ -201,6 +202,5 @@ rubygems_version: 2.2.2
|
|
201
202
|
signing_key:
|
202
203
|
specification_version: 4
|
203
204
|
summary: ETL framework
|
204
|
-
test_files:
|
205
|
-
- test/test_helper.rb
|
205
|
+
test_files: []
|
206
206
|
has_rdoc:
|