tango-etl 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +19 -0
- data/Rakefile +12 -0
- data/changelog.md +4 -0
- data/config/app.yml.sample +6 -0
- data/config/database.yml.sample +27 -0
- data/lib/tango/abstract_model.rb +53 -0
- data/lib/tango/app.rb +202 -0
- data/lib/tango/database_locker.rb +49 -0
- data/lib/tango/etl/dispatcher.rb +49 -0
- data/lib/tango/etl/handler_interface.rb +40 -0
- data/lib/tango/etl/operator_interface.rb +36 -0
- data/lib/tango/etl.rb +3 -0
- data/lib/tango/kernel.rb +36 -0
- data/lib/tango/link_stack.rb +61 -0
- data/lib/tango/multidb.rb +11 -0
- data/lib/tango/resource/buffer.rb +74 -0
- data/lib/tango/resource/cache.rb +81 -0
- data/lib/tango/resource.rb +2 -0
- data/lib/tango/version.rb +3 -0
- data/lib/tango.rb +18 -0
- data/readme.md +3 -0
- data/tango.gemspec +20 -0
- data/test/support/db/schema.rb +6 -0
- data/test/support/lib/model/user.rb +11 -0
- data/test/support/lib/simple_buffer.rb +18 -0
- data/test/support/lib/simple_handler.rb +18 -0
- data/test/unit/etl/test_dispatcher.rb +22 -0
- data/test/unit/resource/test_buffer.rb +51 -0
- data/test/unit/resource/test_cache.rb +120 -0
- data/test/unit/test_abstract_model.rb +43 -0
- data/test/unit/test_database_locker.rb +32 -0
- data/test/unit/test_kernel.rb +35 -0
- data/test/unit/test_link_stack.rb +49 -0
- metadata +177 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
module Tango
|
2
|
+
module Resource
|
3
|
+
|
4
|
+
# Key - value caching system for resources
|
5
|
+
#
|
6
|
+
# @author Mckomo
|
7
|
+
class Cache
|
8
|
+
|
9
|
+
attr_reader :buffer
|
10
|
+
|
11
|
+
# Constructor of the cache
|
12
|
+
#
|
13
|
+
# @param buffer [Tango::Resources::Buffer]
|
14
|
+
# @return [Tango::Resources::Cache]
|
15
|
+
def initialize( buffer = nil )
|
16
|
+
# Set dependencies
|
17
|
+
@buffer = buffer || Buffer.new
|
18
|
+
# Container for resources cache
|
19
|
+
@storage = {}
|
20
|
+
end
|
21
|
+
|
22
|
+
# Register new type of resource to be cached
|
23
|
+
#
|
24
|
+
# @param type [Symbol]
|
25
|
+
# @param release_callback [Proc]
|
26
|
+
def register( type, &release_callback )
|
27
|
+
# Create container for cache of new resource
|
28
|
+
@storage[type] = {}
|
29
|
+
# Also register new type with buffer
|
30
|
+
@buffer.register( type, &release_callback )
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get a resource or use given block to cache and return it's id
|
34
|
+
#
|
35
|
+
# @param type [Symbol]
|
36
|
+
# @param resource [Object]
|
37
|
+
# @return [Integer]
|
38
|
+
def load( type, resource )
|
39
|
+
|
40
|
+
# Get resource from cache
|
41
|
+
cached_resource = get( type, resource )
|
42
|
+
|
43
|
+
unless cached_resource
|
44
|
+
|
45
|
+
raise ArgumentError, "No resource callback given" unless block_given?
|
46
|
+
# If not found, execute yield to receive transformed resource
|
47
|
+
cached_resource = yield( resource )
|
48
|
+
# Cache new resource
|
49
|
+
set( type, cached_resource )
|
50
|
+
# Fill buffer with newly cached resource
|
51
|
+
@buffer.fill( type, cached_resource )
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
cached_resource
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
# Setter for the cache storage
|
60
|
+
#
|
61
|
+
# @param type [Symbol]
|
62
|
+
# @param resource [Object]
|
63
|
+
# @return [Object]
|
64
|
+
def set( type, resource )
|
65
|
+
raise ArgumentError, "Trying to set resource with unregistered type" unless @storage.keys.include?( type )
|
66
|
+
@storage[type][resource.cache_key] = resource
|
67
|
+
end
|
68
|
+
|
69
|
+
# Getter for the cache storage
|
70
|
+
#
|
71
|
+
# @param type [Symbol]
|
72
|
+
# @param key [String]
|
73
|
+
# @return [Object]
|
74
|
+
def get( type, resource )
|
75
|
+
raise ArgumentError, "Trying to get resource with unregistered type" unless @storage.keys.include?( type )
|
76
|
+
@storage[type][resource.cache_key]
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/tango.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# Tango == ETl => true
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'httparty'
|
4
|
+
require 'active_record'
|
5
|
+
require 'activerecord-import'
|
6
|
+
require 'ar-multidb'
|
7
|
+
|
8
|
+
require 'yaml'
|
9
|
+
require 'logger'
|
10
|
+
|
11
|
+
require 'tango/app'
|
12
|
+
require 'tango/kernel'
|
13
|
+
require 'tango/multidb'
|
14
|
+
require 'tango/abstract_model'
|
15
|
+
require 'tango/link_stack'
|
16
|
+
require 'tango/database_locker'
|
17
|
+
require 'tango/etl'
|
18
|
+
require 'tango/resource'
|
data/readme.md
ADDED
data/tango.gemspec
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require './lib/tango/version'
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = 'tango-etl'
|
5
|
+
s.version = Tango::VERSION
|
6
|
+
s.license = 'MIT'
|
7
|
+
s.summary = 'ETL framework'
|
8
|
+
s.description = 'Scrapping web content made even easier.'
|
9
|
+
s.authors = ['Maciej Komorowski']
|
10
|
+
s.email = 'mckomo@gmail.com'
|
11
|
+
s.files = `git ls-files`.split("\n") - %w[.gitignore]
|
12
|
+
s.test_files = s.files.select { |p| p =~ /^test\/*test_.*.rb/ }
|
13
|
+
s.homepage = 'https://github.com/mckomo/Tango'
|
14
|
+
s.add_dependency 'nokogiri', '~> 1.6', '>= 1.6.1'
|
15
|
+
s.add_dependency 'httparty', '~> 0.13', '>= 0.13.1'
|
16
|
+
s.add_dependency 'activerecord', '~> 4.1', '>= 4.1.0'
|
17
|
+
s.add_dependency 'activerecord-import', '~> 0.5', '>= 0.5.0'
|
18
|
+
s.add_dependency 'ar-multidb', '~> 0.1', '>= 0.1.12'
|
19
|
+
end
|
20
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class SimpleBuffer
|
2
|
+
|
3
|
+
attr_reader :register_counter, :fill_couter
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@register_counter = 0
|
7
|
+
@fill_couter = 0
|
8
|
+
end
|
9
|
+
|
10
|
+
def register( type )
|
11
|
+
@register_counter += 1
|
12
|
+
end
|
13
|
+
|
14
|
+
def fill( type, resource )
|
15
|
+
@fill_couter += 1
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class SimpleBuffer
|
2
|
+
|
3
|
+
attr_reader :register_counter, :fill_couter
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@register_counter = 0
|
7
|
+
@fill_couter = 0
|
8
|
+
end
|
9
|
+
|
10
|
+
def register( type )
|
11
|
+
@register_counter += 1
|
12
|
+
end
|
13
|
+
|
14
|
+
def fill( type, resource )
|
15
|
+
@fill_couter += 1
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
|
5
|
+
require 'tango/resource/cache'
|
6
|
+
require_relative '../../support/lib/simple_handler.rb'
|
7
|
+
|
8
|
+
class TestDispatcher < Test::Unit::TestCase
|
9
|
+
|
10
|
+
context "a handler dispatcher" do
|
11
|
+
|
12
|
+
setup do
|
13
|
+
@dispatcher
|
14
|
+
end
|
15
|
+
|
16
|
+
should "throw exception when trying to load unregistered type" do
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
|
5
|
+
require './lib/tango/resource/buffer.rb'
|
6
|
+
|
7
|
+
class TestBuffer < Test::Unit::TestCase
|
8
|
+
|
9
|
+
context "a buffer" do
|
10
|
+
|
11
|
+
setup do
|
12
|
+
@buffer_size = 10
|
13
|
+
@buffer = Tango::Resource::Buffer.new( @buffer_size )
|
14
|
+
end
|
15
|
+
|
16
|
+
should "throw exception when try register type without callback object with unregistered type" do
|
17
|
+
assert_raise ArgumentError do
|
18
|
+
@buffer.register( :foo )
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
should "throw exception when try to fill object with unregistered type" do
|
23
|
+
assert_raise ArgumentError do
|
24
|
+
@buffer.fill( :foo, Object.new )
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
should "give ability to register new type of resource" do
|
29
|
+
@buffer.register( :foo ) do; nil; end
|
30
|
+
@buffer.fill( :foo, Object.new )
|
31
|
+
end
|
32
|
+
|
33
|
+
should "trigger release callback when buffer size exceeded" do
|
34
|
+
|
35
|
+
counter = 0
|
36
|
+
|
37
|
+
@buffer.register( :foo ) do
|
38
|
+
counter += 1
|
39
|
+
end
|
40
|
+
|
41
|
+
( @buffer_size * 5 ).times do
|
42
|
+
@buffer.fill( :foo, Object.new )
|
43
|
+
end # Full buffer 5 times
|
44
|
+
|
45
|
+
assert_equal 5, counter
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
|
5
|
+
require 'tango/resource/cache'
|
6
|
+
require_relative '../../support/lib/simple_buffer.rb'
|
7
|
+
|
8
|
+
class TestCache < Test::Unit::TestCase
|
9
|
+
|
10
|
+
context "a cache" do
|
11
|
+
|
12
|
+
setup do
|
13
|
+
@buffer = SimpleBuffer.new
|
14
|
+
@cache = Tango::Resource::Cache.new( @buffer )
|
15
|
+
@foo_resource = stub( cache_key: "foo" )
|
16
|
+
end
|
17
|
+
|
18
|
+
should "throw exception when trying to load unregistered type" do
|
19
|
+
assert_raise ArgumentError do
|
20
|
+
@cache.load( :foo, "bar" )
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
should "throw exception when trying to load yet not cached resource without callback" do
|
25
|
+
|
26
|
+
@cache.register( :foo ) do
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
|
30
|
+
assert_raise ArgumentError do
|
31
|
+
@cache.load( :foo, @foo_resource )
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
should "be able to store recourse" do
|
37
|
+
|
38
|
+
@cache.register( :foo ) do
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
@cache.set( :foo, @foo_resource )
|
43
|
+
assert_equal @foo_resource, @cache.get( :foo, @foo_resource )
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
should "be able to load already stored recourse" do
|
48
|
+
|
49
|
+
@cache.register( :foo ) do
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
|
53
|
+
@cache.set( :foo, @foo_resource )
|
54
|
+
assert_equal @foo_resource, @cache.load( :foo, @foo_resource )
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
should "load yet not cached value using callback" do
|
59
|
+
|
60
|
+
@cache.register( :foo ) do
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
|
64
|
+
loaded_resource = @cache.load( :foo, @foo_resource ) do
|
65
|
+
@foo_resource
|
66
|
+
end
|
67
|
+
|
68
|
+
assert_equal @foo_resource, loaded_resource
|
69
|
+
assert_equal @foo_resource, @cache.load( :foo, @foo_resource )
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
should "also register new types with buffer" do
|
74
|
+
|
75
|
+
10.times do |i|
|
76
|
+
@cache.register( i ) do
|
77
|
+
nil
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
assert_equal 10, @buffer.register_counter
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
should "fill buffer with loaded resources" do
|
86
|
+
|
87
|
+
@cache.register( :foo ) do
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
|
91
|
+
2.times do |i|
|
92
|
+
resouce = stub( cache_key: i )
|
93
|
+
loaded_resource = @cache.load( :foo, resouce ) do
|
94
|
+
resouce
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
assert_equal 2, @buffer.fill_couter
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
should "fill buffer only once with same resource" do
|
103
|
+
|
104
|
+
@cache.register( :foo ) do
|
105
|
+
nil
|
106
|
+
end
|
107
|
+
|
108
|
+
2.times do |i|
|
109
|
+
loaded_resource = @cache.load( :foo, @foo_resource ) do
|
110
|
+
@foo_resource
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
assert_equal 1, @buffer.fill_couter
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
require 'active_record'
|
5
|
+
require 'activerecord-nulldb-adapter'
|
6
|
+
|
7
|
+
require 'tango/abstract_model'
|
8
|
+
require_relative '../support/lib/model/user.rb'
|
9
|
+
|
10
|
+
class TestAbstractModel < Test::Unit::TestCase
|
11
|
+
|
12
|
+
context "instance of a model that extends AbstractModel" do
|
13
|
+
|
14
|
+
setup do
|
15
|
+
|
16
|
+
ActiveRecord::Base.establish_connection :adapter => :nulldb,
|
17
|
+
:schema => Dir.pwd + '/test/support/db/schema.rb'
|
18
|
+
@model = Model::User.new
|
19
|
+
@model.name = "Maciej"
|
20
|
+
@model.age = 22
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
should "have cache key" do
|
25
|
+
assert_equal ["Maciej", 22], @model.cache_key
|
26
|
+
end
|
27
|
+
|
28
|
+
should "have array list with its properties values" do
|
29
|
+
assert_equal [nil, "Maciej", 22], @model.values
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
context "model class that extends AbstractModel" do
|
35
|
+
|
36
|
+
should "know what is the next available id" do
|
37
|
+
assert_equal 1, Model::User.next_id
|
38
|
+
assert_equal 2, Model::User.next_id
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
|
5
|
+
require 'tango/database_locker'
|
6
|
+
|
7
|
+
class TestDatabaseLocker < Test::Unit::TestCase
|
8
|
+
|
9
|
+
context "a database locker" do
|
10
|
+
|
11
|
+
setup do
|
12
|
+
@lock_path = './tmp/database_test.lock'
|
13
|
+
@locker = Tango::DatabaseLocker.new( ["master", "slave"], @lock_path )
|
14
|
+
end
|
15
|
+
|
16
|
+
teardown do
|
17
|
+
File.delete( @lock_path )
|
18
|
+
end
|
19
|
+
|
20
|
+
should "create lock file" do
|
21
|
+
@locker.lock( "master" )
|
22
|
+
assert File.exists?( @lock_path )
|
23
|
+
assert_equal "master", IO.read( @lock_path )
|
24
|
+
end
|
25
|
+
|
26
|
+
should "find unlocked database" do
|
27
|
+
@locker.lock( "slave" )
|
28
|
+
assert_equal "master", @locker.unlocked
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
|
5
|
+
require 'tango/kernel'
|
6
|
+
|
7
|
+
class TestKernel < Test::Unit::TestCase
|
8
|
+
|
9
|
+
context "a kernel" do
|
10
|
+
|
11
|
+
should "transform file path to name of a class" do
|
12
|
+
assert_equal "FooBar", Tango::Kernel.classify( "./../lib/foo_bar.rb" )
|
13
|
+
end
|
14
|
+
|
15
|
+
should "load a class from a file" do
|
16
|
+
klass = Tango::Kernel.load( Dir.pwd + '/test/support/lib/simple_buffer.rb' )
|
17
|
+
assert_equal SimpleBuffer, klass
|
18
|
+
end
|
19
|
+
|
20
|
+
should "load a class in a module from a file" do
|
21
|
+
klass = Tango::Kernel.load( Dir.pwd + '/test/support/lib/model/user.rb', 'Model::' )
|
22
|
+
assert_equal Model::User, klass
|
23
|
+
end
|
24
|
+
|
25
|
+
should "obtain symbol from a class" do
|
26
|
+
assert_equal :simple_buffer, Tango::Kernel.symbolize( SimpleBuffer )
|
27
|
+
end
|
28
|
+
|
29
|
+
should "obtain symbol from a class in a module" do
|
30
|
+
assert_equal :user, Tango::Kernel.symbolize( Model::User )
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'shoulda'
|
3
|
+
require 'mocha/setup'
|
4
|
+
|
5
|
+
require 'tango/link_stack'
|
6
|
+
|
7
|
+
class TestLinkStack < Test::Unit::TestCase
|
8
|
+
|
9
|
+
context "a link stack" do
|
10
|
+
|
11
|
+
setup do
|
12
|
+
@stack = Tango::LinkStack.new( 'http://example.com/data?xml' )
|
13
|
+
end
|
14
|
+
|
15
|
+
should "raise error when initialized with incorrect URL" do
|
16
|
+
assert_raise ArgumentError do
|
17
|
+
Tango::LinkStack.new( 'ImNotA/Link' )
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
should "know host of base link" do
|
22
|
+
assert_equal 'http://example.com:80', @stack.host
|
23
|
+
end
|
24
|
+
|
25
|
+
should "contain initial link path" do
|
26
|
+
assert @stack.has_links?
|
27
|
+
assert_equal ['/data?xml'], @stack.links
|
28
|
+
assert_equal '/data?xml', @stack.shift
|
29
|
+
assert ! @stack.has_links?
|
30
|
+
end
|
31
|
+
|
32
|
+
should "store appended links" do
|
33
|
+
|
34
|
+
@stack.shift # shift initial path
|
35
|
+
|
36
|
+
@stack.append '/data/bids'
|
37
|
+
@stack.append [ '/data/bids/1', '/data/bids/2' ]
|
38
|
+
assert_equal 3, @stack.links.count
|
39
|
+
|
40
|
+
assert_equal '/data/bids', @stack.shift
|
41
|
+
assert_equal '/data/bids/1', @stack.shift
|
42
|
+
assert_equal '/data/bids/2', @stack.shift
|
43
|
+
|
44
|
+
assert ! @stack.has_links?
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|