extraloop-redis-storage 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/README.rdoc +1 -3
- data/bin/extraloop +3 -16
- data/lib/extraloop/redis-storage/model.rb +5 -2
- data/lib/extraloop/redis-storage/record.rb +7 -4
- data/lib/extraloop/redis-storage/remote_store/fusion_tables.rb +11 -13
- data/lib/extraloop/redis-storage/remote_store.rb +31 -8
- data/lib/extraloop/redis-storage/scraping_session.rb +5 -4
- data/lib/extraloop/redis-storage.rb +16 -2
- data/lib/extraloop/scraper_base.rb +2 -3
- data/lib/extraloop/support.rb +37 -0
- data/spec/dataset_factory_spec.rb +2 -4
- data/spec/fusion_tables_spec.rb +67 -0
- data/spec/model_spec.rb +21 -0
- data/spec/record_spec.rb +21 -3
- data/spec/remote_store_spec.rb +47 -0
- data/spec/scraper_base_spec.rb +3 -5
- data/spec/scraping_session_spec.rb +4 -4
- data/spec/spec_helper.rb +12 -0
- metadata +57 -19
data/History.txt
CHANGED
data/README.rdoc
CHANGED
@@ -76,6 +76,4 @@ Similarly, stored datasets can be uploaded to a remote datastore:
|
|
76
76
|
|
77
77
|
extraloop datastore push 51..48 fusion_tables -c google_username:password
|
78
78
|
|
79
|
-
While Google's Fusion Tables is currently the only one implemented, support for other remote datastores (e.g.
|
80
|
-
[couchDB](http://couchdb.apache.org/), , [cartoDB](http://cartodb.com) ), and [CKAN Webstore](http://wiki.ckan.org/Webstore) will be added soon.
|
81
|
-
|
79
|
+
While Google's Fusion Tables is currently the only one implemented, support for pushing dataset to other remote datastores (e.g. {couchDB}[http://couchdb.apache.org/], {cartoDB}[http://cartodb.com], and {CKAN Webstore}[http://wiki.ckan.org/Webstore]) will be added soon.
|
data/bin/extraloop
CHANGED
@@ -10,15 +10,6 @@ class DataStoreCommand < Thor
|
|
10
10
|
|
11
11
|
ExtraLoop::Storage::autoload_models
|
12
12
|
|
13
|
-
class << self
|
14
|
-
def parse_config
|
15
|
-
config_file = File.join(Etc.getpwuid.dir, '.extraloop.yml')
|
16
|
-
File.exist?(config_file) && YAML::load_file(config_file) or {}
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
@@config = parse_config
|
21
|
-
|
22
13
|
@@sessions = ExtraLoop::Storage::ScrapingSession.all
|
23
14
|
@@redis = Ohm.redis
|
24
15
|
|
@@ -63,18 +54,14 @@ class DataStoreCommand < Thor
|
|
63
54
|
|
64
55
|
desc "push [sessions] [remote_store]", "Uploads one or several datasets to a remote data store"
|
65
56
|
method_option :schema, :type => 'hash', :aliases => "-s"
|
66
|
-
method_option :credentials, :type => '
|
57
|
+
method_option :credentials, :type => 'hash', :aliases => "-a"
|
67
58
|
|
68
59
|
def push(sessions, store_type=:fusion_tables)
|
60
|
+
schema = {:schema => options.fetch('schema', {})}
|
69
61
|
|
70
62
|
filter(sessions).each do |session|
|
71
63
|
store_type = store_type.to_sym
|
72
|
-
|
73
|
-
credentials = options.fetch('credentials', @@config[:datastore] && @@config[:datastore][:credentials] && @@config[:datastore][store_type]).split(':')
|
74
|
-
rescue NoMethodError
|
75
|
-
abort "Cannot find credentials for remote datastore.\nPlease specify them using the --credential switch (e.g. 'andrea:mypassword')"
|
76
|
-
end
|
77
|
-
datastore = ExtraLoop::Storage::RemoteStore::get_transport(store_type, credentials)
|
64
|
+
datastore = ExtraLoop::Storage::RemoteStore::get_transport(store_type, options[:credentials], schema)
|
78
65
|
datastore.push session
|
79
66
|
end
|
80
67
|
end
|
@@ -2,8 +2,11 @@
|
|
2
2
|
# are associated to a Scraping session object.
|
3
3
|
#
|
4
4
|
class ExtraLoop::Storage::Model < Ohm::Model
|
5
|
-
|
6
|
-
|
5
|
+
|
6
|
+
def self.[](id)
|
7
|
+
raise ArgumentError.new "model Id should be capitalized" unless id.to_s[0] =~ /[A-Z]/
|
8
|
+
super(id) || create(:id => id)
|
9
|
+
end
|
7
10
|
|
8
11
|
def to_hash
|
9
12
|
super.merge(attributes.reduce({}) { |memo, attribute|
|
@@ -1,13 +1,15 @@
|
|
1
1
|
class ExtraLoop::Storage::Record < Ohm::Model
|
2
|
+
include Ohm::Callbacks
|
2
3
|
include Ohm::Boundaries
|
4
|
+
include Ohm::Typecast
|
3
5
|
include Ohm::Timestamping
|
4
6
|
|
5
7
|
reference :session, ExtraLoop::Storage::ScrapingSession
|
6
|
-
attribute :extracted_at
|
8
|
+
attribute :extracted_at, Time
|
7
9
|
index :session_id
|
8
10
|
|
9
11
|
def initialize attrs={}
|
10
|
-
self.class.send
|
12
|
+
self.class.send(:_inherit!)
|
11
13
|
super attrs
|
12
14
|
end
|
13
15
|
|
@@ -38,8 +40,9 @@ class ExtraLoop::Storage::Record < Ohm::Model
|
|
38
40
|
klass = self
|
39
41
|
|
40
42
|
while klass != ExtraLoop::Storage::Record
|
41
|
-
attributes.
|
42
|
-
|
43
|
+
%w[attributes indices counters].each do |method|
|
44
|
+
send(method).concat(klass.superclass.send(method)).uniq!
|
45
|
+
end
|
43
46
|
klass = klass.superclass
|
44
47
|
end
|
45
48
|
end
|
@@ -1,13 +1,10 @@
|
|
1
|
-
class ExtraLoop::Storage::FusionTables
|
2
|
-
@@connection = nil
|
3
1
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
@api = connect
|
8
|
-
end
|
2
|
+
|
3
|
+
class ExtraLoop::Storage::FusionTables < ExtraLoop::Storage::RemoteStore
|
4
|
+
@@connection = nil
|
9
5
|
|
10
6
|
def push(session)
|
7
|
+
@api = connect!
|
11
8
|
dataset = session.to_hash
|
12
9
|
records = dataset[:records]
|
13
10
|
title = dataset[:title].gsub(/\sDataset/,'')
|
@@ -20,22 +17,23 @@ class ExtraLoop::Storage::FusionTables
|
|
20
17
|
private
|
21
18
|
def make_schema(record)
|
22
19
|
defaults = {
|
20
|
+
'id' => 'number',
|
23
21
|
'session_id' => 'number'
|
24
22
|
}
|
25
23
|
|
26
|
-
|
27
|
-
|
24
|
+
option_schema = @options.fetch(:schema, {}).stringify_keys
|
25
|
+
schema = defaults.merge option_schema
|
28
26
|
record.keys.
|
29
|
-
reject { |key| schema.keys.include?(key) }.
|
27
|
+
reject { |key| schema.keys.include?(key.to_s) }.
|
30
28
|
map { |key| {:name => key.to_s, :type => 'string'} }.
|
31
29
|
concat(schema.map { |field, type| {:name => field.to_s, :type => type }})
|
32
30
|
end
|
33
31
|
|
34
|
-
def connect
|
32
|
+
def connect!
|
35
33
|
return @@connection if @@connection
|
36
|
-
|
37
34
|
@@connection = GData::Client::FusionTables.new
|
38
|
-
|
35
|
+
@credentials = @credentials.symbolize_keys
|
36
|
+
@@connection.clientlogin(@credentials[:username], @credentials[:password])
|
39
37
|
@@connection
|
40
38
|
end
|
41
39
|
end
|
@@ -1,13 +1,36 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
class ExtraLoop::Storage::RemoteStore
|
2
|
+
@@config = {}
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
#
|
5
|
+
# Instanciates the relevant transport class for the selected datastore
|
6
|
+
#
|
7
|
+
def self.get_transport(datastore, credentials=nil, options={})
|
8
|
+
classname = datastore.to_s.camel_case
|
9
|
+
ExtraLoop::Storage.const_get(classname).new(credentials, options) if ExtraLoop::Storage.const_defined?(classname)
|
10
|
+
end
|
6
11
|
|
12
|
+
def initialize(credentials, options={})
|
13
|
+
datastore = self.class.to_s.snake_case.split('/').last.to_sym
|
14
|
+
load_config
|
15
|
+
@options = options
|
16
|
+
@credentials = credentials || config_for([:datastore, datastore])
|
17
|
+
raise ExtraLoop::Storage::Exceptions::MissingCredentialsError.new "Missing credentials for '#{datastore}' remote store" unless @credentials
|
18
|
+
@api = nil
|
19
|
+
end
|
7
20
|
|
8
|
-
|
9
|
-
def
|
10
|
-
|
11
|
-
|
21
|
+
protected
|
22
|
+
def config_for(keys, context=@@config)
|
23
|
+
key = keys.shift.to_s
|
24
|
+
value = context.stringify_keys[key]
|
25
|
+
value && value.respond_to?(:fetch) && keys.any? ? config_for(keys, value) : value
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_config
|
29
|
+
config_file = File.join(Etc.getpwuid.dir, '.extraloop.yml')
|
30
|
+
@@config = File.exist?(config_file) && YAML::load_file(config_file) || {}
|
12
31
|
end
|
13
32
|
end
|
33
|
+
|
34
|
+
module ExtraLoop::Storage::Exceptions
|
35
|
+
class MissingCredentialsError < StandardError; end
|
36
|
+
end
|
@@ -6,20 +6,21 @@ class ExtraLoop::Storage::ScrapingSession < Ohm::Model
|
|
6
6
|
|
7
7
|
attribute :title
|
8
8
|
reference :model, ExtraLoop::Storage::Model
|
9
|
+
index :model_id
|
9
10
|
|
10
11
|
def records(params={})
|
11
|
-
klass = if Object.const_defined?(model.
|
12
|
-
Object.const_get(model.
|
12
|
+
klass = if Object.const_defined?(model.id)
|
13
|
+
Object.const_get(model.id)
|
13
14
|
else
|
14
15
|
dynamic_class = Class.new(ExtraLoop::Storage::Record) do
|
15
|
-
# override default to_hash so that it will return the Redis hash
|
16
|
+
# override the default to_hash so that it will return the Redis hash
|
16
17
|
# internally stored by Ohm
|
17
18
|
def to_hash
|
18
19
|
Ohm.redis.hgetall self.key
|
19
20
|
end
|
20
21
|
end
|
21
22
|
|
22
|
-
Object.const_set(model.
|
23
|
+
Object.const_set(model.id, dynamic_class)
|
23
24
|
dynamic_class
|
24
25
|
end
|
25
26
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require "rubygems"
|
2
|
+
require "bundler/setup"
|
2
3
|
require "json"
|
3
4
|
require "yaml"
|
4
5
|
require "rubygems"
|
@@ -6,6 +7,8 @@ require "redis"
|
|
6
7
|
require "ohm"
|
7
8
|
require "ohm/contrib"
|
8
9
|
require "extraloop"
|
10
|
+
require "fileutils"
|
11
|
+
require "pry"
|
9
12
|
|
10
13
|
begin
|
11
14
|
gem "fusion_tables", "~> 0.3.1"
|
@@ -14,14 +17,23 @@ rescue Gem::LoadError
|
|
14
17
|
end
|
15
18
|
|
16
19
|
|
20
|
+
begin
|
21
|
+
gem "cartodb-rb-client"
|
22
|
+
require "cartodb-rb-client"
|
23
|
+
rescue Gem::LoadError
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
|
17
28
|
base_path = File.realpath(File.dirname(__FILE__))
|
18
29
|
$: << "#{base_path}"
|
19
30
|
|
20
31
|
require "scraper_base"
|
32
|
+
require "support"
|
21
33
|
|
22
34
|
module ExtraLoop
|
23
35
|
module Storage
|
24
|
-
VERSION ||= "0.0.
|
36
|
+
VERSION ||= "0.0.8"
|
25
37
|
|
26
38
|
def self.connect(*args)
|
27
39
|
Ohm.connect(*args)
|
@@ -29,7 +41,7 @@ module ExtraLoop
|
|
29
41
|
|
30
42
|
# Tries to automatically locate the models directory and load all ruby files within in
|
31
43
|
def self.autoload_models(dirname='models')
|
32
|
-
|
44
|
+
# Dir["**/**#{dirname}/*.rb"].each { |path| require "./#{path}" }
|
33
45
|
end
|
34
46
|
end
|
35
47
|
end
|
@@ -44,4 +56,6 @@ ExtraLoop::Storage.autoload :ScrapingSession, "#{base_path}/scraping_session.rb"
|
|
44
56
|
ExtraLoop::Storage.autoload :Model, "#{base_path}/model.rb"
|
45
57
|
ExtraLoop::Storage.autoload :DatasetFactory, "#{base_path}/dataset_factory.rb"
|
46
58
|
ExtraLoop::Storage.autoload :RemoteStore, "#{base_path}/remote_store.rb"
|
59
|
+
ExtraLoop::Storage.autoload :FusionTables, "#{base_path}/remote_store/fusion_tables.rb"
|
60
|
+
ExtraLoop::Storage.autoload :Cartodb, "#{base_path}/remote_store/cartodb.rb"
|
47
61
|
|
@@ -7,6 +7,7 @@ class ExtraLoop::ScraperBase
|
|
7
7
|
title ||= collection_name
|
8
8
|
|
9
9
|
@model = model_klass = model.respond_to?(:new) && model || ExtraLoop::Storage::DatasetFactory.new(model.to_sym, @extractor_args.map(&:first)).get_class
|
10
|
+
log_session! title
|
10
11
|
|
11
12
|
log_session! title
|
12
13
|
|
@@ -21,9 +22,7 @@ class ExtraLoop::ScraperBase
|
|
21
22
|
def log_session!(title="")
|
22
23
|
if !@session
|
23
24
|
ns = ExtraLoop::Storage
|
24
|
-
|
25
|
-
model = results.any? && results.first || ns::Model.create(:name => @model)
|
26
|
-
@session = ns::ScrapingSession.create :title => title, :model => model
|
25
|
+
@session = ns::ScrapingSession.create :title => title, :model => ExtraLoop::Storage::Model[@model.to_s]
|
27
26
|
end
|
28
27
|
end
|
29
28
|
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class Hash
|
2
|
+
def stringify_keys
|
3
|
+
_reduce_keys &:to_s
|
4
|
+
end
|
5
|
+
|
6
|
+
def symbolize_keys
|
7
|
+
_reduce_keys &:to_sym
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
def _reduce_keys(&block)
|
12
|
+
self.reduce({}){|memo,(k,v)| memo[yield(k)] = v; memo}
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class Object
|
17
|
+
def try(method, *args)
|
18
|
+
send method, *args if respond_to? method
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class String
|
23
|
+
def capitalize_first
|
24
|
+
self[0].upcase + self[1, size]
|
25
|
+
end
|
26
|
+
def camel_case
|
27
|
+
self.gsub(/^.|_./) { |chars| chars.split("").last.upcase }
|
28
|
+
end
|
29
|
+
|
30
|
+
def snake_case
|
31
|
+
self.gsub(/::/, '/').
|
32
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
33
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
34
|
+
tr("-", "_").
|
35
|
+
downcase
|
36
|
+
end
|
37
|
+
end
|
@@ -1,8 +1,6 @@
|
|
1
|
-
|
1
|
+
require "spec_helper"
|
2
2
|
|
3
3
|
describe ExtraLoop::Storage::DatasetFactory do
|
4
|
-
Ohm.connect :url => "redis://127.0.0.1:6379/7"
|
5
|
-
|
6
4
|
describe "#get_class" do
|
7
5
|
context "with invalid input" do
|
8
6
|
before do
|
@@ -25,7 +23,7 @@ describe ExtraLoop::Storage::DatasetFactory do
|
|
25
23
|
context "with valid input" do
|
26
24
|
before do
|
27
25
|
@factory = ExtraLoop::Storage::DatasetFactory.new(:blurb, [:a, :b, :c])
|
28
|
-
@session = ExtraLoop::Storage::ScrapingSession.create
|
26
|
+
@session = ExtraLoop::Storage::ScrapingSession.create :model => ExtraLoop::Storage::Model[:Blurb]
|
29
27
|
end
|
30
28
|
|
31
29
|
subject { @factory.get_class.new :session => @session }
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'gdata'
|
3
|
+
|
4
|
+
class MyRecord < ExtraLoop::Storage::Record
|
5
|
+
attribute :index
|
6
|
+
end
|
7
|
+
|
8
|
+
describe ExtraLoop::Storage::FusionTables do
|
9
|
+
|
10
|
+
describe "#push" do
|
11
|
+
|
12
|
+
before do
|
13
|
+
@dataset = ExtraLoop::Storage::ScrapingSession.create :title => 'test dataset', :model => ExtraLoop::Storage::Model[:MyRecord]
|
14
|
+
|
15
|
+
5.times do |n|
|
16
|
+
MyRecord.create :index => n, :session => @dataset
|
17
|
+
end
|
18
|
+
|
19
|
+
table=Object.new
|
20
|
+
mock(table).insert(@dataset.to_hash[:records])
|
21
|
+
|
22
|
+
any_instance_of(GData::Client::FusionTables) do |ft|
|
23
|
+
mock(ft).clientlogin(is_a(String), is_a(String)).times(any_times)
|
24
|
+
stub(ft).create_table { |title, fields|
|
25
|
+
@title = title
|
26
|
+
@fields = fields
|
27
|
+
|
28
|
+
table
|
29
|
+
}
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
context "without a schema definition" do
|
34
|
+
|
35
|
+
before do
|
36
|
+
@fusion_table = ExtraLoop::Storage::FusionTables.new :username => 'username', 'password' => "password"
|
37
|
+
@fusion_table.push(@dataset)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should name the table with a string starting with 'Dataset'" do
|
41
|
+
@title.should match /^Dataset/
|
42
|
+
end
|
43
|
+
|
44
|
+
subject { @fields }
|
45
|
+
|
46
|
+
it { should include :name => 'session_id', :type => 'number' }
|
47
|
+
it { should_not include :name => 'session_id', :type => 'string' }
|
48
|
+
it { should include :name => 'index', :type => 'string' }
|
49
|
+
it { should include :name => 'id', :type => 'number' }
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
context "with a schema definition" do
|
54
|
+
|
55
|
+
before do
|
56
|
+
@fusion_table = ExtraLoop::Storage::FusionTables.new(["username","password"], {:schema => {:session_id => 'string', :index => 'number'}})
|
57
|
+
@fusion_table.push(@dataset)
|
58
|
+
end
|
59
|
+
|
60
|
+
subject { @fields }
|
61
|
+
|
62
|
+
it { should_not include :name => 'session_id', :type => 'number' }
|
63
|
+
it { should include :name => 'session_id', :type => 'string' }
|
64
|
+
it { should include :name => 'index', :type => 'number' }
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/spec/model_spec.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe ExtraLoop::Storage::Model do
|
4
|
+
describe "::[]()" do
|
5
|
+
before do
|
6
|
+
@model = ExtraLoop::Storage::Model[:My_model]
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should create a record if a model with id 'my_model' does not exist" do
|
10
|
+
@model.should eql(ExtraLoop::Storage::Model[:My_model])
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should throw an argument error if the model id is not capitalized" do
|
14
|
+
lambda { ExtraLoop::Storage::Model[:my_model] }.should raise_error(ArgumentError)
|
15
|
+
end
|
16
|
+
|
17
|
+
after do
|
18
|
+
Ohm.flush
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/spec/record_spec.rb
CHANGED
@@ -1,15 +1,16 @@
|
|
1
|
-
|
1
|
+
require "spec_helper"
|
2
2
|
|
3
3
|
class MyRecord < ExtraLoop::Storage::Record
|
4
4
|
attribute :foo
|
5
5
|
attribute :bar
|
6
|
+
counter :n
|
7
|
+
index :bar
|
6
8
|
end
|
7
9
|
|
8
|
-
|
9
10
|
describe ExtraLoop::Storage::Record do
|
10
11
|
|
11
12
|
before do
|
12
|
-
@session = ExtraLoop::Storage::ScrapingSession.create
|
13
|
+
@session = ExtraLoop::Storage::ScrapingSession.create :model => ExtraLoop::Storage::Model[:MyRecord]
|
13
14
|
end
|
14
15
|
|
15
16
|
context "record subclasses" do
|
@@ -21,6 +22,11 @@ describe ExtraLoop::Storage::Record do
|
|
21
22
|
it { subject.extracted_at.should be_a_kind_of(Time) }
|
22
23
|
it { subject.session.should eql(@session) }
|
23
24
|
|
25
|
+
it "should correctly increment counters" do
|
26
|
+
3.times { subject.incr :n }
|
27
|
+
subject.n.should eql(3)
|
28
|
+
end
|
29
|
+
|
24
30
|
context "without a session attribute" do
|
25
31
|
subject { MyRecord.new }
|
26
32
|
it { subject.valid?.should_not be_true }
|
@@ -33,6 +39,18 @@ describe ExtraLoop::Storage::Record do
|
|
33
39
|
it { subject.session.should eql(@session) }
|
34
40
|
end
|
35
41
|
|
42
|
+
describe "#find" do
|
43
|
+
before do
|
44
|
+
MyRecord.create(:foo => 'foo', :bar => 'bar', :session => @session)
|
45
|
+
MyRecord.create(:foo => 'foo', :bar => 'bar', :session => @session)
|
46
|
+
@target = MyRecord.create(:foo => 'foo', :bar => 'foo', :session => @session)
|
47
|
+
MyRecord.create(:foo => 'foo', :bar => 'bar', :session => @session)
|
48
|
+
end
|
49
|
+
|
50
|
+
subject { MyRecord.find(:bar => 'foo').first }
|
51
|
+
it { should eql(@target) }
|
52
|
+
end
|
53
|
+
|
36
54
|
describe "Record::last" do
|
37
55
|
before do
|
38
56
|
2.times { MyRecord.create(:session => @session) }
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
|
4
|
+
describe ExtraLoop::Storage::RemoteStore do
|
5
|
+
|
6
|
+
describe "#get_transport" do
|
7
|
+
context "with credentials" do
|
8
|
+
subject { ExtraLoop::Storage::RemoteStore::get_transport(:fusion_tables, ['username', 'password'] ) }
|
9
|
+
|
10
|
+
it { should be_a_kind_of ExtraLoop::Storage::FusionTables }
|
11
|
+
end
|
12
|
+
|
13
|
+
context "without credentials or config file" do
|
14
|
+
it "should raise a MissingCredentials error" do
|
15
|
+
expect { ExtraLoop::Storage::RemoteStore::get_transport(:fusion_tables, nil, {:schema => {:session_id => 'string', :index => 'number'}}) }.to raise_exception(ExtraLoop::Storage::Exceptions::MissingCredentialsError)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
context "with credentials in config file" do
|
20
|
+
before do
|
21
|
+
config={
|
22
|
+
:datastore => {
|
23
|
+
:fusion_tables => {
|
24
|
+
:username => 'test_user',
|
25
|
+
:password => 'password'
|
26
|
+
}
|
27
|
+
}
|
28
|
+
}
|
29
|
+
|
30
|
+
config_file = File.join(Etc.getpwuid.dir, '.extraloop.yml')
|
31
|
+
FileUtils.mv(config_file, config_file + ".diabled") if File.exist? config_file
|
32
|
+
File.open(config_file, 'w') { |f| f.write config.to_yaml }
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should not raise a MissingCredentials error" do
|
36
|
+
expect { ExtraLoop::Storage::get_transport(:fusion_tables, nil, {:schema => {:session_id => 'string', :index => 'number'}}) }.to_not raise_exception(ExtraLoop::Storage::Exceptions::MissingCredentialsError)
|
37
|
+
end
|
38
|
+
|
39
|
+
after do
|
40
|
+
config_file = File.join(Etc.getpwuid.dir, '.extraloop.yml')
|
41
|
+
FileUtils.rm config_file
|
42
|
+
FileUtils.mv(config_file + ".disabled", config_file) if File.exist? config_file + ".disabled"
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/spec/scraper_base_spec.rb
CHANGED
@@ -1,9 +1,7 @@
|
|
1
|
-
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
3
|
|
4
4
|
describe ExtraLoop::ScraperBase do
|
5
|
-
Ohm.connect :url => "redis://127.0.0.1:6379/7"
|
6
|
-
|
7
5
|
before(:each) do
|
8
6
|
@records = records = (1..10).to_a.map { |n| OpenStruct.new :foo => "foo#{n}" }
|
9
7
|
@scraper = ExtraLoop::ScraperBase.new("http://someurl.net").
|
@@ -91,8 +89,8 @@ describe ExtraLoop::ScraperBase do
|
|
91
89
|
end
|
92
90
|
|
93
91
|
it "should persist 10 records" do
|
94
|
-
|
95
|
-
|
92
|
+
@scraper.session.records.should have(10).records
|
93
|
+
@scraper.session.records.map(&:id).reject(&:nil?).should_not be_empty
|
96
94
|
end
|
97
95
|
end
|
98
96
|
end
|
@@ -1,13 +1,13 @@
|
|
1
1
|
$VERBOSE=nil
|
2
|
-
|
2
|
+
require "spec_helper"
|
3
3
|
|
4
4
|
describe ExtraLoop::Storage::ScrapingSession do
|
5
|
-
Ohm.connect :url => "redis://127.0.0.1:6379/7"
|
6
5
|
|
7
6
|
describe "#records" do
|
8
7
|
before(:each) do
|
9
8
|
my_collection = ExtraLoop::Storage::DatasetFactory.new(:MyCollection).get_class
|
10
|
-
@
|
9
|
+
@model = ExtraLoop::Storage::Model[:MyCollection]
|
10
|
+
@session = ExtraLoop::Storage::ScrapingSession.create :model => @model
|
11
11
|
5.times do
|
12
12
|
item = my_collection.create(:session => @session)
|
13
13
|
end
|
@@ -15,7 +15,7 @@ describe ExtraLoop::Storage::ScrapingSession do
|
|
15
15
|
|
16
16
|
context "dataset class exists" do
|
17
17
|
context "passing a constant" do
|
18
|
-
subject { @session.records
|
18
|
+
subject { @session.records }
|
19
19
|
it { should have(5).items }
|
20
20
|
it { subject.all? { |record| record.valid? }.should be_true }
|
21
21
|
end
|
data/spec/spec_helper.rb
ADDED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: extraloop-redis-storage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03
|
12
|
+
date: 2012-04-03 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: extraloop
|
16
|
-
requirement: &
|
16
|
+
requirement: &17153680 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.0.3
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *17153680
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: ohm
|
27
|
-
requirement: &
|
27
|
+
requirement: &17152940 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.1.3
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *17152940
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ohm-contrib
|
38
|
-
requirement: &
|
38
|
+
requirement: &17151620 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 0.1.2
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *17151620
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: thor
|
49
|
-
requirement: &
|
49
|
+
requirement: &17151040 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - =
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 0.14.6
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *17151040
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rake
|
60
|
-
requirement: &
|
60
|
+
requirement: &17148720 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *17148720
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rspec
|
71
|
-
requirement: &
|
71
|
+
requirement: &17147980 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,21 @@ dependencies:
|
|
76
76
|
version: 2.7.0
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *17147980
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: guard-rspec
|
82
|
+
requirement: &17147320 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ~>
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: 0.7.0
|
88
|
+
type: :development
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: *17147320
|
80
91
|
- !ruby/object:Gem::Dependency
|
81
92
|
name: rr
|
82
|
-
requirement: &
|
93
|
+
requirement: &17146760 !ruby/object:Gem::Requirement
|
83
94
|
none: false
|
84
95
|
requirements:
|
85
96
|
- - ~>
|
@@ -87,10 +98,10 @@ dependencies:
|
|
87
98
|
version: 1.0.4
|
88
99
|
type: :development
|
89
100
|
prerelease: false
|
90
|
-
version_requirements: *
|
101
|
+
version_requirements: *17146760
|
91
102
|
- !ruby/object:Gem::Dependency
|
92
103
|
name: pry
|
93
|
-
requirement: &
|
104
|
+
requirement: &17146020 !ruby/object:Gem::Requirement
|
94
105
|
none: false
|
95
106
|
requirements:
|
96
107
|
- - ~>
|
@@ -98,7 +109,29 @@ dependencies:
|
|
98
109
|
version: 0.9.7.4
|
99
110
|
type: :development
|
100
111
|
prerelease: false
|
101
|
-
version_requirements: *
|
112
|
+
version_requirements: *17146020
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: fusion_tables
|
115
|
+
requirement: &17145200 !ruby/object:Gem::Requirement
|
116
|
+
none: false
|
117
|
+
requirements:
|
118
|
+
- - ~>
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: 0.3.1
|
121
|
+
type: :development
|
122
|
+
prerelease: false
|
123
|
+
version_requirements: *17145200
|
124
|
+
- !ruby/object:Gem::Dependency
|
125
|
+
name: geocoder
|
126
|
+
requirement: &17144660 !ruby/object:Gem::Requirement
|
127
|
+
none: false
|
128
|
+
requirements:
|
129
|
+
- - ~>
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 1.1.1
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: *17144660
|
102
135
|
description: Redis-based Persistence layer for the ExtraLoop data extraction toolkit.
|
103
136
|
Includes a convinent command line tool allowing to list, filter, delete, and export
|
104
137
|
harvested datasets
|
@@ -122,10 +155,15 @@ files:
|
|
122
155
|
- lib/extraloop/redis-storage/remote_store/fusion_tables.rb
|
123
156
|
- lib/extraloop/redis-storage/scraping_session.rb
|
124
157
|
- lib/extraloop/scraper_base.rb
|
158
|
+
- lib/extraloop/support.rb
|
125
159
|
- spec/dataset_factory_spec.rb
|
160
|
+
- spec/fusion_tables_spec.rb
|
161
|
+
- spec/model_spec.rb
|
126
162
|
- spec/record_spec.rb
|
163
|
+
- spec/remote_store_spec.rb
|
127
164
|
- spec/scraper_base_spec.rb
|
128
165
|
- spec/scraping_session_spec.rb
|
166
|
+
- spec/spec_helper.rb
|
129
167
|
- bin/extraloop
|
130
168
|
homepage: http://github.com/afiore/extraloop-redis-storage
|
131
169
|
licenses: []
|
@@ -142,7 +180,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
142
180
|
version: '0'
|
143
181
|
segments:
|
144
182
|
- 0
|
145
|
-
hash:
|
183
|
+
hash: 3286007203333335840
|
146
184
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
147
185
|
none: false
|
148
186
|
requirements:
|