extraloop-redis-storage 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/README.rdoc +1 -3
- data/bin/extraloop +3 -16
- data/lib/extraloop/redis-storage/model.rb +5 -2
- data/lib/extraloop/redis-storage/record.rb +7 -4
- data/lib/extraloop/redis-storage/remote_store/fusion_tables.rb +11 -13
- data/lib/extraloop/redis-storage/remote_store.rb +31 -8
- data/lib/extraloop/redis-storage/scraping_session.rb +5 -4
- data/lib/extraloop/redis-storage.rb +16 -2
- data/lib/extraloop/scraper_base.rb +2 -3
- data/lib/extraloop/support.rb +37 -0
- data/spec/dataset_factory_spec.rb +2 -4
- data/spec/fusion_tables_spec.rb +67 -0
- data/spec/model_spec.rb +21 -0
- data/spec/record_spec.rb +21 -3
- data/spec/remote_store_spec.rb +47 -0
- data/spec/scraper_base_spec.rb +3 -5
- data/spec/scraping_session_spec.rb +4 -4
- data/spec/spec_helper.rb +12 -0
- metadata +57 -19
data/History.txt
CHANGED
data/README.rdoc
CHANGED
@@ -76,6 +76,4 @@ Similarly, stored datasets can be uploaded to a remote datastore:
|
|
76
76
|
|
77
77
|
extraloop datastore push 51..48 fusion_tables -c google_username:password
|
78
78
|
|
79
|
-
While Google's Fusion Tables is currently the only one implemented, support for other remote datastores (e.g.
|
80
|
-
[couchDB](http://couchdb.apache.org/), , [cartoDB](http://cartodb.com) ), and [CKAN Webstore](http://wiki.ckan.org/Webstore) will be added soon.
|
81
|
-
|
79
|
+
While Google's Fusion Tables is currently the only one implemented, support for pushing dataset to other remote datastores (e.g. {couchDB}[http://couchdb.apache.org/], {cartoDB}[http://cartodb.com], and {CKAN Webstore}[http://wiki.ckan.org/Webstore]) will be added soon.
|
data/bin/extraloop
CHANGED
@@ -10,15 +10,6 @@ class DataStoreCommand < Thor
|
|
10
10
|
|
11
11
|
ExtraLoop::Storage::autoload_models
|
12
12
|
|
13
|
-
class << self
|
14
|
-
def parse_config
|
15
|
-
config_file = File.join(Etc.getpwuid.dir, '.extraloop.yml')
|
16
|
-
File.exist?(config_file) && YAML::load_file(config_file) or {}
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
@@config = parse_config
|
21
|
-
|
22
13
|
@@sessions = ExtraLoop::Storage::ScrapingSession.all
|
23
14
|
@@redis = Ohm.redis
|
24
15
|
|
@@ -63,18 +54,14 @@ class DataStoreCommand < Thor
|
|
63
54
|
|
64
55
|
desc "push [sessions] [remote_store]", "Uploads one or several datasets to a remote data store"
|
65
56
|
method_option :schema, :type => 'hash', :aliases => "-s"
|
66
|
-
method_option :credentials, :type => '
|
57
|
+
method_option :credentials, :type => 'hash', :aliases => "-a"
|
67
58
|
|
68
59
|
def push(sessions, store_type=:fusion_tables)
|
60
|
+
schema = {:schema => options.fetch('schema', {})}
|
69
61
|
|
70
62
|
filter(sessions).each do |session|
|
71
63
|
store_type = store_type.to_sym
|
72
|
-
|
73
|
-
credentials = options.fetch('credentials', @@config[:datastore] && @@config[:datastore][:credentials] && @@config[:datastore][store_type]).split(':')
|
74
|
-
rescue NoMethodError
|
75
|
-
abort "Cannot find credentials for remote datastore.\nPlease specify them using the --credential switch (e.g. 'andrea:mypassword')"
|
76
|
-
end
|
77
|
-
datastore = ExtraLoop::Storage::RemoteStore::get_transport(store_type, credentials)
|
64
|
+
datastore = ExtraLoop::Storage::RemoteStore::get_transport(store_type, options[:credentials], schema)
|
78
65
|
datastore.push session
|
79
66
|
end
|
80
67
|
end
|
@@ -2,8 +2,11 @@
|
|
2
2
|
# are associated to a Scraping session object.
|
3
3
|
#
|
4
4
|
class ExtraLoop::Storage::Model < Ohm::Model
|
5
|
-
|
6
|
-
|
5
|
+
|
6
|
+
def self.[](id)
|
7
|
+
raise ArgumentError.new "model Id should be capitalized" unless id.to_s[0] =~ /[A-Z]/
|
8
|
+
super(id) || create(:id => id)
|
9
|
+
end
|
7
10
|
|
8
11
|
def to_hash
|
9
12
|
super.merge(attributes.reduce({}) { |memo, attribute|
|
@@ -1,13 +1,15 @@
|
|
1
1
|
class ExtraLoop::Storage::Record < Ohm::Model
|
2
|
+
include Ohm::Callbacks
|
2
3
|
include Ohm::Boundaries
|
4
|
+
include Ohm::Typecast
|
3
5
|
include Ohm::Timestamping
|
4
6
|
|
5
7
|
reference :session, ExtraLoop::Storage::ScrapingSession
|
6
|
-
attribute :extracted_at
|
8
|
+
attribute :extracted_at, Time
|
7
9
|
index :session_id
|
8
10
|
|
9
11
|
def initialize attrs={}
|
10
|
-
self.class.send
|
12
|
+
self.class.send(:_inherit!)
|
11
13
|
super attrs
|
12
14
|
end
|
13
15
|
|
@@ -38,8 +40,9 @@ class ExtraLoop::Storage::Record < Ohm::Model
|
|
38
40
|
klass = self
|
39
41
|
|
40
42
|
while klass != ExtraLoop::Storage::Record
|
41
|
-
attributes.
|
42
|
-
|
43
|
+
%w[attributes indices counters].each do |method|
|
44
|
+
send(method).concat(klass.superclass.send(method)).uniq!
|
45
|
+
end
|
43
46
|
klass = klass.superclass
|
44
47
|
end
|
45
48
|
end
|
@@ -1,13 +1,10 @@
|
|
1
|
-
class ExtraLoop::Storage::FusionTables
|
2
|
-
@@connection = nil
|
3
1
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
@api = connect
|
8
|
-
end
|
2
|
+
|
3
|
+
class ExtraLoop::Storage::FusionTables < ExtraLoop::Storage::RemoteStore
|
4
|
+
@@connection = nil
|
9
5
|
|
10
6
|
def push(session)
|
7
|
+
@api = connect!
|
11
8
|
dataset = session.to_hash
|
12
9
|
records = dataset[:records]
|
13
10
|
title = dataset[:title].gsub(/\sDataset/,'')
|
@@ -20,22 +17,23 @@ class ExtraLoop::Storage::FusionTables
|
|
20
17
|
private
|
21
18
|
def make_schema(record)
|
22
19
|
defaults = {
|
20
|
+
'id' => 'number',
|
23
21
|
'session_id' => 'number'
|
24
22
|
}
|
25
23
|
|
26
|
-
|
27
|
-
|
24
|
+
option_schema = @options.fetch(:schema, {}).stringify_keys
|
25
|
+
schema = defaults.merge option_schema
|
28
26
|
record.keys.
|
29
|
-
reject { |key| schema.keys.include?(key) }.
|
27
|
+
reject { |key| schema.keys.include?(key.to_s) }.
|
30
28
|
map { |key| {:name => key.to_s, :type => 'string'} }.
|
31
29
|
concat(schema.map { |field, type| {:name => field.to_s, :type => type }})
|
32
30
|
end
|
33
31
|
|
34
|
-
def connect
|
32
|
+
def connect!
|
35
33
|
return @@connection if @@connection
|
36
|
-
|
37
34
|
@@connection = GData::Client::FusionTables.new
|
38
|
-
|
35
|
+
@credentials = @credentials.symbolize_keys
|
36
|
+
@@connection.clientlogin(@credentials[:username], @credentials[:password])
|
39
37
|
@@connection
|
40
38
|
end
|
41
39
|
end
|
@@ -1,13 +1,36 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
class ExtraLoop::Storage::RemoteStore
|
2
|
+
@@config = {}
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
#
|
5
|
+
# Instanciates the relevant transport class for the selected datastore
|
6
|
+
#
|
7
|
+
def self.get_transport(datastore, credentials=nil, options={})
|
8
|
+
classname = datastore.to_s.camel_case
|
9
|
+
ExtraLoop::Storage.const_get(classname).new(credentials, options) if ExtraLoop::Storage.const_defined?(classname)
|
10
|
+
end
|
6
11
|
|
12
|
+
def initialize(credentials, options={})
|
13
|
+
datastore = self.class.to_s.snake_case.split('/').last.to_sym
|
14
|
+
load_config
|
15
|
+
@options = options
|
16
|
+
@credentials = credentials || config_for([:datastore, datastore])
|
17
|
+
raise ExtraLoop::Storage::Exceptions::MissingCredentialsError.new "Missing credentials for '#{datastore}' remote store" unless @credentials
|
18
|
+
@api = nil
|
19
|
+
end
|
7
20
|
|
8
|
-
|
9
|
-
def
|
10
|
-
|
11
|
-
|
21
|
+
protected
|
22
|
+
def config_for(keys, context=@@config)
|
23
|
+
key = keys.shift.to_s
|
24
|
+
value = context.stringify_keys[key]
|
25
|
+
value && value.respond_to?(:fetch) && keys.any? ? config_for(keys, value) : value
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_config
|
29
|
+
config_file = File.join(Etc.getpwuid.dir, '.extraloop.yml')
|
30
|
+
@@config = File.exist?(config_file) && YAML::load_file(config_file) || {}
|
12
31
|
end
|
13
32
|
end
|
33
|
+
|
34
|
+
module ExtraLoop::Storage::Exceptions
|
35
|
+
class MissingCredentialsError < StandardError; end
|
36
|
+
end
|
@@ -6,20 +6,21 @@ class ExtraLoop::Storage::ScrapingSession < Ohm::Model
|
|
6
6
|
|
7
7
|
attribute :title
|
8
8
|
reference :model, ExtraLoop::Storage::Model
|
9
|
+
index :model_id
|
9
10
|
|
10
11
|
def records(params={})
|
11
|
-
klass = if Object.const_defined?(model.
|
12
|
-
Object.const_get(model.
|
12
|
+
klass = if Object.const_defined?(model.id)
|
13
|
+
Object.const_get(model.id)
|
13
14
|
else
|
14
15
|
dynamic_class = Class.new(ExtraLoop::Storage::Record) do
|
15
|
-
# override default to_hash so that it will return the Redis hash
|
16
|
+
# override the default to_hash so that it will return the Redis hash
|
16
17
|
# internally stored by Ohm
|
17
18
|
def to_hash
|
18
19
|
Ohm.redis.hgetall self.key
|
19
20
|
end
|
20
21
|
end
|
21
22
|
|
22
|
-
Object.const_set(model.
|
23
|
+
Object.const_set(model.id, dynamic_class)
|
23
24
|
dynamic_class
|
24
25
|
end
|
25
26
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require "rubygems"
|
2
|
+
require "bundler/setup"
|
2
3
|
require "json"
|
3
4
|
require "yaml"
|
4
5
|
require "rubygems"
|
@@ -6,6 +7,8 @@ require "redis"
|
|
6
7
|
require "ohm"
|
7
8
|
require "ohm/contrib"
|
8
9
|
require "extraloop"
|
10
|
+
require "fileutils"
|
11
|
+
require "pry"
|
9
12
|
|
10
13
|
begin
|
11
14
|
gem "fusion_tables", "~> 0.3.1"
|
@@ -14,14 +17,23 @@ rescue Gem::LoadError
|
|
14
17
|
end
|
15
18
|
|
16
19
|
|
20
|
+
begin
|
21
|
+
gem "cartodb-rb-client"
|
22
|
+
require "cartodb-rb-client"
|
23
|
+
rescue Gem::LoadError
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
|
17
28
|
base_path = File.realpath(File.dirname(__FILE__))
|
18
29
|
$: << "#{base_path}"
|
19
30
|
|
20
31
|
require "scraper_base"
|
32
|
+
require "support"
|
21
33
|
|
22
34
|
module ExtraLoop
|
23
35
|
module Storage
|
24
|
-
VERSION ||= "0.0.
|
36
|
+
VERSION ||= "0.0.8"
|
25
37
|
|
26
38
|
def self.connect(*args)
|
27
39
|
Ohm.connect(*args)
|
@@ -29,7 +41,7 @@ module ExtraLoop
|
|
29
41
|
|
30
42
|
# Tries to automatically locate the models directory and load all ruby files within in
|
31
43
|
def self.autoload_models(dirname='models')
|
32
|
-
|
44
|
+
# Dir["**/**#{dirname}/*.rb"].each { |path| require "./#{path}" }
|
33
45
|
end
|
34
46
|
end
|
35
47
|
end
|
@@ -44,4 +56,6 @@ ExtraLoop::Storage.autoload :ScrapingSession, "#{base_path}/scraping_session.rb"
|
|
44
56
|
ExtraLoop::Storage.autoload :Model, "#{base_path}/model.rb"
|
45
57
|
ExtraLoop::Storage.autoload :DatasetFactory, "#{base_path}/dataset_factory.rb"
|
46
58
|
ExtraLoop::Storage.autoload :RemoteStore, "#{base_path}/remote_store.rb"
|
59
|
+
ExtraLoop::Storage.autoload :FusionTables, "#{base_path}/remote_store/fusion_tables.rb"
|
60
|
+
ExtraLoop::Storage.autoload :Cartodb, "#{base_path}/remote_store/cartodb.rb"
|
47
61
|
|
@@ -7,6 +7,7 @@ class ExtraLoop::ScraperBase
|
|
7
7
|
title ||= collection_name
|
8
8
|
|
9
9
|
@model = model_klass = model.respond_to?(:new) && model || ExtraLoop::Storage::DatasetFactory.new(model.to_sym, @extractor_args.map(&:first)).get_class
|
10
|
+
log_session! title
|
10
11
|
|
11
12
|
log_session! title
|
12
13
|
|
@@ -21,9 +22,7 @@ class ExtraLoop::ScraperBase
|
|
21
22
|
def log_session!(title="")
|
22
23
|
if !@session
|
23
24
|
ns = ExtraLoop::Storage
|
24
|
-
|
25
|
-
model = results.any? && results.first || ns::Model.create(:name => @model)
|
26
|
-
@session = ns::ScrapingSession.create :title => title, :model => model
|
25
|
+
@session = ns::ScrapingSession.create :title => title, :model => ExtraLoop::Storage::Model[@model.to_s]
|
27
26
|
end
|
28
27
|
end
|
29
28
|
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class Hash
|
2
|
+
def stringify_keys
|
3
|
+
_reduce_keys &:to_s
|
4
|
+
end
|
5
|
+
|
6
|
+
def symbolize_keys
|
7
|
+
_reduce_keys &:to_sym
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
def _reduce_keys(&block)
|
12
|
+
self.reduce({}){|memo,(k,v)| memo[yield(k)] = v; memo}
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class Object
|
17
|
+
def try(method, *args)
|
18
|
+
send method, *args if respond_to? method
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class String
|
23
|
+
def capitalize_first
|
24
|
+
self[0].upcase + self[1, size]
|
25
|
+
end
|
26
|
+
def camel_case
|
27
|
+
self.gsub(/^.|_./) { |chars| chars.split("").last.upcase }
|
28
|
+
end
|
29
|
+
|
30
|
+
def snake_case
|
31
|
+
self.gsub(/::/, '/').
|
32
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
33
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
34
|
+
tr("-", "_").
|
35
|
+
downcase
|
36
|
+
end
|
37
|
+
end
|
@@ -1,8 +1,6 @@
|
|
1
|
-
|
1
|
+
require "spec_helper"
|
2
2
|
|
3
3
|
describe ExtraLoop::Storage::DatasetFactory do
|
4
|
-
Ohm.connect :url => "redis://127.0.0.1:6379/7"
|
5
|
-
|
6
4
|
describe "#get_class" do
|
7
5
|
context "with invalid input" do
|
8
6
|
before do
|
@@ -25,7 +23,7 @@ describe ExtraLoop::Storage::DatasetFactory do
|
|
25
23
|
context "with valid input" do
|
26
24
|
before do
|
27
25
|
@factory = ExtraLoop::Storage::DatasetFactory.new(:blurb, [:a, :b, :c])
|
28
|
-
@session = ExtraLoop::Storage::ScrapingSession.create
|
26
|
+
@session = ExtraLoop::Storage::ScrapingSession.create :model => ExtraLoop::Storage::Model[:Blurb]
|
29
27
|
end
|
30
28
|
|
31
29
|
subject { @factory.get_class.new :session => @session }
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'gdata'
|
3
|
+
|
4
|
+
class MyRecord < ExtraLoop::Storage::Record
|
5
|
+
attribute :index
|
6
|
+
end
|
7
|
+
|
8
|
+
describe ExtraLoop::Storage::FusionTables do
|
9
|
+
|
10
|
+
describe "#push" do
|
11
|
+
|
12
|
+
before do
|
13
|
+
@dataset = ExtraLoop::Storage::ScrapingSession.create :title => 'test dataset', :model => ExtraLoop::Storage::Model[:MyRecord]
|
14
|
+
|
15
|
+
5.times do |n|
|
16
|
+
MyRecord.create :index => n, :session => @dataset
|
17
|
+
end
|
18
|
+
|
19
|
+
table=Object.new
|
20
|
+
mock(table).insert(@dataset.to_hash[:records])
|
21
|
+
|
22
|
+
any_instance_of(GData::Client::FusionTables) do |ft|
|
23
|
+
mock(ft).clientlogin(is_a(String), is_a(String)).times(any_times)
|
24
|
+
stub(ft).create_table { |title, fields|
|
25
|
+
@title = title
|
26
|
+
@fields = fields
|
27
|
+
|
28
|
+
table
|
29
|
+
}
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
context "without a schema definition" do
|
34
|
+
|
35
|
+
before do
|
36
|
+
@fusion_table = ExtraLoop::Storage::FusionTables.new :username => 'username', 'password' => "password"
|
37
|
+
@fusion_table.push(@dataset)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should name the table with a string starting with 'Dataset'" do
|
41
|
+
@title.should match /^Dataset/
|
42
|
+
end
|
43
|
+
|
44
|
+
subject { @fields }
|
45
|
+
|
46
|
+
it { should include :name => 'session_id', :type => 'number' }
|
47
|
+
it { should_not include :name => 'session_id', :type => 'string' }
|
48
|
+
it { should include :name => 'index', :type => 'string' }
|
49
|
+
it { should include :name => 'id', :type => 'number' }
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
context "with a schema definition" do
|
54
|
+
|
55
|
+
before do
|
56
|
+
@fusion_table = ExtraLoop::Storage::FusionTables.new(["username","password"], {:schema => {:session_id => 'string', :index => 'number'}})
|
57
|
+
@fusion_table.push(@dataset)
|
58
|
+
end
|
59
|
+
|
60
|
+
subject { @fields }
|
61
|
+
|
62
|
+
it { should_not include :name => 'session_id', :type => 'number' }
|
63
|
+
it { should include :name => 'session_id', :type => 'string' }
|
64
|
+
it { should include :name => 'index', :type => 'number' }
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/spec/model_spec.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe ExtraLoop::Storage::Model do
|
4
|
+
describe "::[]()" do
|
5
|
+
before do
|
6
|
+
@model = ExtraLoop::Storage::Model[:My_model]
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should create a record if a model with id 'my_model' does not exist" do
|
10
|
+
@model.should eql(ExtraLoop::Storage::Model[:My_model])
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should throw an argument error if the model id is not capitalized" do
|
14
|
+
lambda { ExtraLoop::Storage::Model[:my_model] }.should raise_error(ArgumentError)
|
15
|
+
end
|
16
|
+
|
17
|
+
after do
|
18
|
+
Ohm.flush
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/spec/record_spec.rb
CHANGED
@@ -1,15 +1,16 @@
|
|
1
|
-
|
1
|
+
require "spec_helper"
|
2
2
|
|
3
3
|
class MyRecord < ExtraLoop::Storage::Record
|
4
4
|
attribute :foo
|
5
5
|
attribute :bar
|
6
|
+
counter :n
|
7
|
+
index :bar
|
6
8
|
end
|
7
9
|
|
8
|
-
|
9
10
|
describe ExtraLoop::Storage::Record do
|
10
11
|
|
11
12
|
before do
|
12
|
-
@session = ExtraLoop::Storage::ScrapingSession.create
|
13
|
+
@session = ExtraLoop::Storage::ScrapingSession.create :model => ExtraLoop::Storage::Model[:MyRecord]
|
13
14
|
end
|
14
15
|
|
15
16
|
context "record subclasses" do
|
@@ -21,6 +22,11 @@ describe ExtraLoop::Storage::Record do
|
|
21
22
|
it { subject.extracted_at.should be_a_kind_of(Time) }
|
22
23
|
it { subject.session.should eql(@session) }
|
23
24
|
|
25
|
+
it "should correctly increment counters" do
|
26
|
+
3.times { subject.incr :n }
|
27
|
+
subject.n.should eql(3)
|
28
|
+
end
|
29
|
+
|
24
30
|
context "without a session attribute" do
|
25
31
|
subject { MyRecord.new }
|
26
32
|
it { subject.valid?.should_not be_true }
|
@@ -33,6 +39,18 @@ describe ExtraLoop::Storage::Record do
|
|
33
39
|
it { subject.session.should eql(@session) }
|
34
40
|
end
|
35
41
|
|
42
|
+
describe "#find" do
|
43
|
+
before do
|
44
|
+
MyRecord.create(:foo => 'foo', :bar => 'bar', :session => @session)
|
45
|
+
MyRecord.create(:foo => 'foo', :bar => 'bar', :session => @session)
|
46
|
+
@target = MyRecord.create(:foo => 'foo', :bar => 'foo', :session => @session)
|
47
|
+
MyRecord.create(:foo => 'foo', :bar => 'bar', :session => @session)
|
48
|
+
end
|
49
|
+
|
50
|
+
subject { MyRecord.find(:bar => 'foo').first }
|
51
|
+
it { should eql(@target) }
|
52
|
+
end
|
53
|
+
|
36
54
|
describe "Record::last" do
|
37
55
|
before do
|
38
56
|
2.times { MyRecord.create(:session => @session) }
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
|
4
|
+
describe ExtraLoop::Storage::RemoteStore do
|
5
|
+
|
6
|
+
describe "#get_transport" do
|
7
|
+
context "with credentials" do
|
8
|
+
subject { ExtraLoop::Storage::RemoteStore::get_transport(:fusion_tables, ['username', 'password'] ) }
|
9
|
+
|
10
|
+
it { should be_a_kind_of ExtraLoop::Storage::FusionTables }
|
11
|
+
end
|
12
|
+
|
13
|
+
context "without credentials or config file" do
|
14
|
+
it "should raise a MissingCredentials error" do
|
15
|
+
expect { ExtraLoop::Storage::RemoteStore::get_transport(:fusion_tables, nil, {:schema => {:session_id => 'string', :index => 'number'}}) }.to raise_exception(ExtraLoop::Storage::Exceptions::MissingCredentialsError)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
context "with credentials in config file" do
|
20
|
+
before do
|
21
|
+
config={
|
22
|
+
:datastore => {
|
23
|
+
:fusion_tables => {
|
24
|
+
:username => 'test_user',
|
25
|
+
:password => 'password'
|
26
|
+
}
|
27
|
+
}
|
28
|
+
}
|
29
|
+
|
30
|
+
config_file = File.join(Etc.getpwuid.dir, '.extraloop.yml')
|
31
|
+
FileUtils.mv(config_file, config_file + ".diabled") if File.exist? config_file
|
32
|
+
File.open(config_file, 'w') { |f| f.write config.to_yaml }
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should not raise a MissingCredentials error" do
|
36
|
+
expect { ExtraLoop::Storage::get_transport(:fusion_tables, nil, {:schema => {:session_id => 'string', :index => 'number'}}) }.to_not raise_exception(ExtraLoop::Storage::Exceptions::MissingCredentialsError)
|
37
|
+
end
|
38
|
+
|
39
|
+
after do
|
40
|
+
config_file = File.join(Etc.getpwuid.dir, '.extraloop.yml')
|
41
|
+
FileUtils.rm config_file
|
42
|
+
FileUtils.mv(config_file + ".disabled", config_file) if File.exist? config_file + ".disabled"
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/spec/scraper_base_spec.rb
CHANGED
@@ -1,9 +1,7 @@
|
|
1
|
-
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
3
|
|
4
4
|
describe ExtraLoop::ScraperBase do
|
5
|
-
Ohm.connect :url => "redis://127.0.0.1:6379/7"
|
6
|
-
|
7
5
|
before(:each) do
|
8
6
|
@records = records = (1..10).to_a.map { |n| OpenStruct.new :foo => "foo#{n}" }
|
9
7
|
@scraper = ExtraLoop::ScraperBase.new("http://someurl.net").
|
@@ -91,8 +89,8 @@ describe ExtraLoop::ScraperBase do
|
|
91
89
|
end
|
92
90
|
|
93
91
|
it "should persist 10 records" do
|
94
|
-
|
95
|
-
|
92
|
+
@scraper.session.records.should have(10).records
|
93
|
+
@scraper.session.records.map(&:id).reject(&:nil?).should_not be_empty
|
96
94
|
end
|
97
95
|
end
|
98
96
|
end
|
@@ -1,13 +1,13 @@
|
|
1
1
|
$VERBOSE=nil
|
2
|
-
|
2
|
+
require "spec_helper"
|
3
3
|
|
4
4
|
describe ExtraLoop::Storage::ScrapingSession do
|
5
|
-
Ohm.connect :url => "redis://127.0.0.1:6379/7"
|
6
5
|
|
7
6
|
describe "#records" do
|
8
7
|
before(:each) do
|
9
8
|
my_collection = ExtraLoop::Storage::DatasetFactory.new(:MyCollection).get_class
|
10
|
-
@
|
9
|
+
@model = ExtraLoop::Storage::Model[:MyCollection]
|
10
|
+
@session = ExtraLoop::Storage::ScrapingSession.create :model => @model
|
11
11
|
5.times do
|
12
12
|
item = my_collection.create(:session => @session)
|
13
13
|
end
|
@@ -15,7 +15,7 @@ describe ExtraLoop::Storage::ScrapingSession do
|
|
15
15
|
|
16
16
|
context "dataset class exists" do
|
17
17
|
context "passing a constant" do
|
18
|
-
subject { @session.records
|
18
|
+
subject { @session.records }
|
19
19
|
it { should have(5).items }
|
20
20
|
it { subject.all? { |record| record.valid? }.should be_true }
|
21
21
|
end
|
data/spec/spec_helper.rb
ADDED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: extraloop-redis-storage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03
|
12
|
+
date: 2012-04-03 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: extraloop
|
16
|
-
requirement: &
|
16
|
+
requirement: &17153680 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.0.3
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *17153680
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: ohm
|
27
|
-
requirement: &
|
27
|
+
requirement: &17152940 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.1.3
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *17152940
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ohm-contrib
|
38
|
-
requirement: &
|
38
|
+
requirement: &17151620 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 0.1.2
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *17151620
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: thor
|
49
|
-
requirement: &
|
49
|
+
requirement: &17151040 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - =
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 0.14.6
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *17151040
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rake
|
60
|
-
requirement: &
|
60
|
+
requirement: &17148720 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *17148720
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rspec
|
71
|
-
requirement: &
|
71
|
+
requirement: &17147980 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,21 @@ dependencies:
|
|
76
76
|
version: 2.7.0
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *17147980
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: guard-rspec
|
82
|
+
requirement: &17147320 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ~>
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: 0.7.0
|
88
|
+
type: :development
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: *17147320
|
80
91
|
- !ruby/object:Gem::Dependency
|
81
92
|
name: rr
|
82
|
-
requirement: &
|
93
|
+
requirement: &17146760 !ruby/object:Gem::Requirement
|
83
94
|
none: false
|
84
95
|
requirements:
|
85
96
|
- - ~>
|
@@ -87,10 +98,10 @@ dependencies:
|
|
87
98
|
version: 1.0.4
|
88
99
|
type: :development
|
89
100
|
prerelease: false
|
90
|
-
version_requirements: *
|
101
|
+
version_requirements: *17146760
|
91
102
|
- !ruby/object:Gem::Dependency
|
92
103
|
name: pry
|
93
|
-
requirement: &
|
104
|
+
requirement: &17146020 !ruby/object:Gem::Requirement
|
94
105
|
none: false
|
95
106
|
requirements:
|
96
107
|
- - ~>
|
@@ -98,7 +109,29 @@ dependencies:
|
|
98
109
|
version: 0.9.7.4
|
99
110
|
type: :development
|
100
111
|
prerelease: false
|
101
|
-
version_requirements: *
|
112
|
+
version_requirements: *17146020
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: fusion_tables
|
115
|
+
requirement: &17145200 !ruby/object:Gem::Requirement
|
116
|
+
none: false
|
117
|
+
requirements:
|
118
|
+
- - ~>
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: 0.3.1
|
121
|
+
type: :development
|
122
|
+
prerelease: false
|
123
|
+
version_requirements: *17145200
|
124
|
+
- !ruby/object:Gem::Dependency
|
125
|
+
name: geocoder
|
126
|
+
requirement: &17144660 !ruby/object:Gem::Requirement
|
127
|
+
none: false
|
128
|
+
requirements:
|
129
|
+
- - ~>
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 1.1.1
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: *17144660
|
102
135
|
description: Redis-based Persistence layer for the ExtraLoop data extraction toolkit.
|
103
136
|
Includes a convinent command line tool allowing to list, filter, delete, and export
|
104
137
|
harvested datasets
|
@@ -122,10 +155,15 @@ files:
|
|
122
155
|
- lib/extraloop/redis-storage/remote_store/fusion_tables.rb
|
123
156
|
- lib/extraloop/redis-storage/scraping_session.rb
|
124
157
|
- lib/extraloop/scraper_base.rb
|
158
|
+
- lib/extraloop/support.rb
|
125
159
|
- spec/dataset_factory_spec.rb
|
160
|
+
- spec/fusion_tables_spec.rb
|
161
|
+
- spec/model_spec.rb
|
126
162
|
- spec/record_spec.rb
|
163
|
+
- spec/remote_store_spec.rb
|
127
164
|
- spec/scraper_base_spec.rb
|
128
165
|
- spec/scraping_session_spec.rb
|
166
|
+
- spec/spec_helper.rb
|
129
167
|
- bin/extraloop
|
130
168
|
homepage: http://github.com/afiore/extraloop-redis-storage
|
131
169
|
licenses: []
|
@@ -142,7 +180,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
142
180
|
version: '0'
|
143
181
|
segments:
|
144
182
|
- 0
|
145
|
-
hash:
|
183
|
+
hash: 3286007203333335840
|
146
184
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
147
185
|
none: false
|
148
186
|
requirements:
|