tango-etl 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +19 -0
- data/Rakefile +12 -0
- data/changelog.md +4 -0
- data/config/app.yml.sample +6 -0
- data/config/database.yml.sample +27 -0
- data/lib/tango/abstract_model.rb +53 -0
- data/lib/tango/app.rb +202 -0
- data/lib/tango/database_locker.rb +49 -0
- data/lib/tango/etl/dispatcher.rb +49 -0
- data/lib/tango/etl/handler_interface.rb +40 -0
- data/lib/tango/etl/operator_interface.rb +36 -0
- data/lib/tango/etl.rb +3 -0
- data/lib/tango/kernel.rb +36 -0
- data/lib/tango/link_stack.rb +61 -0
- data/lib/tango/multidb.rb +11 -0
- data/lib/tango/resource/buffer.rb +74 -0
- data/lib/tango/resource/cache.rb +81 -0
- data/lib/tango/resource.rb +2 -0
- data/lib/tango/version.rb +3 -0
- data/lib/tango.rb +18 -0
- data/readme.md +3 -0
- data/tango.gemspec +20 -0
- data/test/support/db/schema.rb +6 -0
- data/test/support/lib/model/user.rb +11 -0
- data/test/support/lib/simple_buffer.rb +18 -0
- data/test/support/lib/simple_handler.rb +18 -0
- data/test/unit/etl/test_dispatcher.rb +22 -0
- data/test/unit/resource/test_buffer.rb +51 -0
- data/test/unit/resource/test_cache.rb +120 -0
- data/test/unit/test_abstract_model.rb +43 -0
- data/test/unit/test_database_locker.rb +32 -0
- data/test/unit/test_kernel.rb +35 -0
- data/test/unit/test_link_stack.rb +49 -0
- metadata +177 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
module Tango
|
2
|
+
module Resource
|
3
|
+
|
4
|
+
# Key - value caching system for resources
|
5
|
+
#
|
6
|
+
# @author Mckomo
|
7
|
+
class Cache
|
8
|
+
|
9
|
+
attr_reader :buffer
|
10
|
+
|
11
|
+
# Constructor of the cache
|
12
|
+
#
|
13
|
+
# @param buffer [Tango::Resources::Buffer]
|
14
|
+
# @return [Tango::Resources::Cache]
|
15
|
+
def initialize( buffer = nil )
|
16
|
+
# Set dependencies
|
17
|
+
@buffer = buffer || Buffer.new
|
18
|
+
# Container for resources cache
|
19
|
+
@storage = {}
|
20
|
+
end
|
21
|
+
|
22
|
+
# Register new type of resource to be cached
|
23
|
+
#
|
24
|
+
# @param type [Symbol]
|
25
|
+
# @param release_callback [Proc]
|
26
|
+
def register( type, &release_callback )
|
27
|
+
# Create container for cache of new resource
|
28
|
+
@storage[type] = {}
|
29
|
+
# Also register new type with buffer
|
30
|
+
@buffer.register( type, &release_callback )
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get a resource or use given block to cache and return it's id
|
34
|
+
#
|
35
|
+
# @param type [Symbol]
|
36
|
+
# @param resource [Object]
|
37
|
+
# @return [Integer]
|
38
|
+
def load( type, resource )
|
39
|
+
|
40
|
+
# Get resource from cache
|
41
|
+
cached_resource = get( type, resource )
|
42
|
+
|
43
|
+
unless cached_resource
|
44
|
+
|
45
|
+
raise ArgumentError, "No resource callback given" unless block_given?
|
46
|
+
# If not found, execute yield to receive transformed resource
|
47
|
+
cached_resource = yield( resource )
|
48
|
+
# Cache new resource
|
49
|
+
set( type, cached_resource )
|
50
|
+
# Fill buffer with newly cached resource
|
51
|
+
@buffer.fill( type, cached_resource )
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
cached_resource
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
# Setter for the cache storage
|
60
|
+
#
|
61
|
+
# @param type [Symbol]
|
62
|
+
# @param resource [Object]
|
63
|
+
# @return [Object]
|
64
|
+
def set( type, resource )
|
65
|
+
raise ArgumentError, "Trying to set resource with unregistered type" unless @storage.keys.include?( type )
|
66
|
+
@storage[type][resource.cache_key] = resource
|
67
|
+
end
|
68
|
+
|
69
|
+
# Getter for the cache storage
|
70
|
+
#
|
71
|
+
# @param type [Symbol]
|
72
|
+
# @param key [String]
|
73
|
+
# @return [Object]
|
74
|
+
def get( type, resource )
|
75
|
+
raise ArgumentError, "Trying to get resource with unregistered type" unless @storage.keys.include?( type )
|
76
|
+
@storage[type][resource.cache_key]
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/tango.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# Tango == ETl => true
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'httparty'
|
4
|
+
require 'active_record'
|
5
|
+
require 'activerecord-import'
|
6
|
+
require 'ar-multidb'
|
7
|
+
|
8
|
+
require 'yaml'
|
9
|
+
require 'logger'
|
10
|
+
|
11
|
+
require 'tango/app'
|
12
|
+
require 'tango/kernel'
|
13
|
+
require 'tango/multidb'
|
14
|
+
require 'tango/abstract_model'
|
15
|
+
require 'tango/link_stack'
|
16
|
+
require 'tango/database_locker'
|
17
|
+
require 'tango/etl'
|
18
|
+
require 'tango/resource'
|
data/readme.md
ADDED
data/tango.gemspec
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require './lib/tango/version'
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = 'tango-etl'
|
5
|
+
s.version = Tango::VERSION
|
6
|
+
s.license = 'MIT'
|
7
|
+
s.summary = 'ETL framework'
|
8
|
+
s.description = 'Scrapping web content made even easier.'
|
9
|
+
s.authors = ['Maciej Komorowski']
|
10
|
+
s.email = 'mckomo@gmail.com'
|
11
|
+
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
12
|
+
s.test_files = s.files.select { |p| p =~ /^test\/*test_.*.rb/ }
|
13
|
+
s.homepage = 'https://github.com/mckomo/Tango'
|
14
|
+
s.add_dependency 'nokogiri', '~> 1.6', '>= 1.6.1'
|
15
|
+
s.add_dependency 'httparty', '~> 0.13', '>= 0.13.1'
|
16
|
+
s.add_dependency 'activerecord', '~> 4.1', '>= 4.1.0'
|
17
|
+
s.add_dependency 'activerecord-import', '~> 0.5', '>= 0.5.0'
|
18
|
+
s.add_dependency 'ar-multidb', '~> 0.1', '>= 0.1.12'
|
19
|
+
end
|
20
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class SimpleBuffer
|
2
|
+
|
3
|
+
attr_reader :register_counter, :fill_couter
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@register_counter = 0
|
7
|
+
@fill_couter = 0
|
8
|
+
end
|
9
|
+
|
10
|
+
def register( type )
|
11
|
+
@register_counter += 1
|
12
|
+
end
|
13
|
+
|
14
|
+
def fill( type, resource )
|
15
|
+
@fill_couter += 1
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class SimpleBuffer
|
2
|
+
|
3
|
+
attr_reader :register_counter, :fill_couter
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@register_counter = 0
|
7
|
+
@fill_couter = 0
|
8
|
+
end
|
9
|
+
|
10
|
+
def register( type )
|
11
|
+
@register_counter += 1
|
12
|
+
end
|
13
|
+
|
14
|
+
def fill( type, resource )
|
15
|
+
@fill_couter += 1
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
|
5
|
+
require 'tango/resource/cache'
|
6
|
+
require_relative '../../support/lib/simple_handler.rb'
|
7
|
+
|
8
|
+
class TestDispatcher < Test::Unit::TestCase
|
9
|
+
|
10
|
+
context "a handler dispatcher" do
|
11
|
+
|
12
|
+
setup do
|
13
|
+
@dispatcher
|
14
|
+
end
|
15
|
+
|
16
|
+
should "throw exception when trying to load unregistered type" do
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
|
5
|
+
require './lib/tango/resource/buffer.rb'
|
6
|
+
|
7
|
+
class TestBuffer < Test::Unit::TestCase
|
8
|
+
|
9
|
+
context "a buffer" do
|
10
|
+
|
11
|
+
setup do
|
12
|
+
@buffer_size = 10
|
13
|
+
@buffer = Tango::Resource::Buffer.new( @buffer_size )
|
14
|
+
end
|
15
|
+
|
16
|
+
should "throw exception when try register type without callback object with unregistered type" do
|
17
|
+
assert_raise ArgumentError do
|
18
|
+
@buffer.register( :foo )
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
should "throw exception when try to fill object with unregistered type" do
|
23
|
+
assert_raise ArgumentError do
|
24
|
+
@buffer.fill( :foo, Object.new )
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
should "give ability to register new type of resource" do
|
29
|
+
@buffer.register( :foo ) do; nil; end
|
30
|
+
@buffer.fill( :foo, Object.new )
|
31
|
+
end
|
32
|
+
|
33
|
+
should "trigger release callback when buffer size exceeded" do
|
34
|
+
|
35
|
+
counter = 0
|
36
|
+
|
37
|
+
@buffer.register( :foo ) do
|
38
|
+
counter += 1
|
39
|
+
end
|
40
|
+
|
41
|
+
( @buffer_size * 5 ).times do
|
42
|
+
@buffer.fill( :foo, Object.new )
|
43
|
+
end # Full buffer 5 times
|
44
|
+
|
45
|
+
assert_equal 5, counter
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
|
5
|
+
require 'tango/resource/cache'
|
6
|
+
require_relative '../../support/lib/simple_buffer.rb'
|
7
|
+
|
8
|
+
class TestCache < Test::Unit::TestCase
|
9
|
+
|
10
|
+
context "a cache" do
|
11
|
+
|
12
|
+
setup do
|
13
|
+
@buffer = SimpleBuffer.new
|
14
|
+
@cache = Tango::Resource::Cache.new( @buffer )
|
15
|
+
@foo_resource = stub( cache_key: "foo" )
|
16
|
+
end
|
17
|
+
|
18
|
+
should "throw exception when trying to load unregistered type" do
|
19
|
+
assert_raise ArgumentError do
|
20
|
+
@cache.load( :foo, "bar" )
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
should "throw exception when trying to load yet not cached resource without callback" do
|
25
|
+
|
26
|
+
@cache.register( :foo ) do
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
|
30
|
+
assert_raise ArgumentError do
|
31
|
+
@cache.load( :foo, @foo_resource )
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
should "be able to store recourse" do
|
37
|
+
|
38
|
+
@cache.register( :foo ) do
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
@cache.set( :foo, @foo_resource )
|
43
|
+
assert_equal @foo_resource, @cache.get( :foo, @foo_resource )
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
should "be able to load already stored recourse" do
|
48
|
+
|
49
|
+
@cache.register( :foo ) do
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
|
53
|
+
@cache.set( :foo, @foo_resource )
|
54
|
+
assert_equal @foo_resource, @cache.load( :foo, @foo_resource )
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
should "load yet not cached value using callback" do
|
59
|
+
|
60
|
+
@cache.register( :foo ) do
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
|
64
|
+
loaded_resource = @cache.load( :foo, @foo_resource ) do
|
65
|
+
@foo_resource
|
66
|
+
end
|
67
|
+
|
68
|
+
assert_equal @foo_resource, loaded_resource
|
69
|
+
assert_equal @foo_resource, @cache.load( :foo, @foo_resource )
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
should "also register new types with buffer" do
|
74
|
+
|
75
|
+
10.times do |i|
|
76
|
+
@cache.register( i ) do
|
77
|
+
nil
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
assert_equal 10, @buffer.register_counter
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
should "fill buffer with loaded resources" do
|
86
|
+
|
87
|
+
@cache.register( :foo ) do
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
|
91
|
+
2.times do |i|
|
92
|
+
resouce = stub( cache_key: i )
|
93
|
+
loaded_resource = @cache.load( :foo, resouce ) do
|
94
|
+
resouce
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
assert_equal 2, @buffer.fill_couter
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
should "fill buffer only once with same resource" do
|
103
|
+
|
104
|
+
@cache.register( :foo ) do
|
105
|
+
nil
|
106
|
+
end
|
107
|
+
|
108
|
+
2.times do |i|
|
109
|
+
loaded_resource = @cache.load( :foo, @foo_resource ) do
|
110
|
+
@foo_resource
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
assert_equal 1, @buffer.fill_couter
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
require 'active_record'
|
5
|
+
require 'activerecord-nulldb-adapter'
|
6
|
+
|
7
|
+
require 'tango/abstract_model'
|
8
|
+
require_relative '../support/lib/model/user.rb'
|
9
|
+
|
10
|
+
class TestAbstractModel < Test::Unit::TestCase
|
11
|
+
|
12
|
+
context "instance of a model that extends AbstractModel" do
|
13
|
+
|
14
|
+
setup do
|
15
|
+
|
16
|
+
ActiveRecord::Base.establish_connection :adapter => :nulldb,
|
17
|
+
:schema => Dir.pwd + '/test/support/db/schema.rb'
|
18
|
+
@model = Model::User.new
|
19
|
+
@model.name = "Maciej"
|
20
|
+
@model.age = 22
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
should "have cache key" do
|
25
|
+
assert_equal ["Maciej", 22], @model.cache_key
|
26
|
+
end
|
27
|
+
|
28
|
+
should "have array list with its properties values" do
|
29
|
+
assert_equal [nil, "Maciej", 22], @model.values
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
context "model class that extends AbstractModel" do
|
35
|
+
|
36
|
+
should "know what is the next available id" do
|
37
|
+
assert_equal 1, Model::User.next_id
|
38
|
+
assert_equal 2, Model::User.next_id
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
|
5
|
+
require 'tango/database_locker'
|
6
|
+
|
7
|
+
class TestDatabaseLocker < Test::Unit::TestCase
|
8
|
+
|
9
|
+
context "a database locker" do
|
10
|
+
|
11
|
+
setup do
|
12
|
+
@lock_path = './tmp/database_test.lock'
|
13
|
+
@locker = Tango::DatabaseLocker.new( ["master", "slave"], @lock_path )
|
14
|
+
end
|
15
|
+
|
16
|
+
teardown do
|
17
|
+
File.delete( @lock_path )
|
18
|
+
end
|
19
|
+
|
20
|
+
should "create lock file" do
|
21
|
+
@locker.lock( "master" )
|
22
|
+
assert File.exists?( @lock_path )
|
23
|
+
assert_equal "master", IO.read( @lock_path )
|
24
|
+
end
|
25
|
+
|
26
|
+
should "find unlocked database" do
|
27
|
+
@locker.lock( "slave" )
|
28
|
+
assert_equal "master", @locker.unlocked
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
|
5
|
+
require 'tango/kernel'
|
6
|
+
|
7
|
+
class TestKernel < Test::Unit::TestCase
|
8
|
+
|
9
|
+
context "a kernel" do
|
10
|
+
|
11
|
+
should "transform file path to name of a class" do
|
12
|
+
assert_equal "FooBar", Tango::Kernel.classify( "./../lib/foo_bar.rb" )
|
13
|
+
end
|
14
|
+
|
15
|
+
should "load a class from a file" do
|
16
|
+
klass = Tango::Kernel.load( Dir.pwd + '/test/support/lib/simple_buffer.rb' )
|
17
|
+
assert_equal SimpleBuffer, klass
|
18
|
+
end
|
19
|
+
|
20
|
+
should "load a class in a module from a file" do
|
21
|
+
klass = Tango::Kernel.load( Dir.pwd + '/test/support/lib/model/user.rb', 'Model::' )
|
22
|
+
assert_equal Model::User, klass
|
23
|
+
end
|
24
|
+
|
25
|
+
should "obtain symbol from a class" do
|
26
|
+
assert_equal :simple_buffer, Tango::Kernel.symbolize( SimpleBuffer )
|
27
|
+
end
|
28
|
+
|
29
|
+
should "obtain symbol from a class in a module" do
|
30
|
+
assert_equal :user, Tango::Kernel.symbolize( Model::User )
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
|
5
|
+
require 'tango/link_stack'
|
6
|
+
|
7
|
+
class TestLinkStack < Test::Unit::TestCase
|
8
|
+
|
9
|
+
context "a link stack" do
|
10
|
+
|
11
|
+
setup do
|
12
|
+
@stack = Tango::LinkStack.new( 'http://example.com/data?xml' )
|
13
|
+
end
|
14
|
+
|
15
|
+
should "raise error when initialized with incorrect URL" do
|
16
|
+
assert_raise ArgumentError do
|
17
|
+
Tango::LinkStack.new( 'ImNotA/Link' )
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
should "know host of base link" do
|
22
|
+
assert_equal 'http://example.com:80', @stack.host
|
23
|
+
end
|
24
|
+
|
25
|
+
should "contain initial link path" do
|
26
|
+
assert @stack.has_links?
|
27
|
+
assert_equal ['/data?xml'], @stack.links
|
28
|
+
assert_equal '/data?xml', @stack.shift
|
29
|
+
assert ! @stack.has_links?
|
30
|
+
end
|
31
|
+
|
32
|
+
should "store appended links" do
|
33
|
+
|
34
|
+
@stack.shift # shift initial path
|
35
|
+
|
36
|
+
@stack.append '/data/bids'
|
37
|
+
@stack.append [ '/data/bids/1', '/data/bids/2' ]
|
38
|
+
assert_equal 3, @stack.links.count
|
39
|
+
|
40
|
+
assert_equal '/data/bids', @stack.shift
|
41
|
+
assert_equal '/data/bids/1', @stack.shift
|
42
|
+
assert_equal '/data/bids/2', @stack.shift
|
43
|
+
|
44
|
+
assert ! @stack.has_links?
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|