storexplore 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/.travis.yml +8 -0
- data/README.md +129 -1
- data/Rakefile +6 -0
- data/lib/storexplore.rb +2 -2
- data/lib/storexplore/api.rb +14 -4
- data/lib/storexplore/digger.rb +4 -4
- data/lib/storexplore/dsl.rb +61 -0
- data/lib/storexplore/testing/dummy_store_api.rb +2 -2
- data/lib/storexplore/version.rb +2 -2
- data/lib/storexplore/walker_page.rb +1 -14
- data/spec/lib/storexplore/api_spec.rb +14 -7
- data/spec/lib/storexplore/dsl_spec.rb +215 -0
- data/spec/lib/storexplore/testing/dummy_store_api_spec.rb +38 -16
- data/spec/lib/storexplore/walker_page_spec.rb +1 -2
- data/spec/spec_helper.rb +11 -1
- data/storexplore.gemspec +1 -0
- metadata +20 -9
- metadata.gz.sig +0 -0
- data/lib/storexplore/api_builder.rb +0 -68
- data/spec/lib/storexplore/api_builder_spec.rb +0 -99
- data/spec/lib/storexplore/digger_spec.rb +0 -53
- data/spec/lib/storexplore/walker_spec.rb +0 -97
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7da700982a0fbe6f80b7aa37ff7e4f231f43935
|
4
|
+
data.tar.gz: 5fdb4f7684e506964f3fff42bdde441f70e71e92
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 97e8fe6604856ffe5c4baa1287fe40ce5cfe40c4a950af74496bf848b3e42210973b218a117fc9a011d17814f82ca499e141fe810035e78a1d76a6bc45c58170
|
7
|
+
data.tar.gz: c5645631ef4ec8569277f15c2d7b9354f26a134833ee79e9cefd52ac0b3fea37919fabe8d51eab67be94479dd18c8cf2400aa8ca362d6aca846c6c1e41971c79
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data.tar.gz.sig
CHANGED
Binary file
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -4,6 +4,9 @@ A declarative scrapping DSL that lets one define directory like apis to an onlin
|
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
7
|
+
In order to be able to enumerate all items of a store in constant memory,
|
8
|
+
Storexplore requires Matz Ruby 2.0 for its lazy enumerators.
|
9
|
+
|
7
10
|
Add this line to your application's Gemfile:
|
8
11
|
|
9
12
|
gem 'storexplore'
|
@@ -18,7 +21,132 @@ Or install it yourself as:
|
|
18
21
|
|
19
22
|
## Usage
|
20
23
|
|
21
|
-
|
24
|
+
The library builds hierarchical APIs on online stores. Stores are typicaly
|
25
|
+
organized in the following way :
|
26
|
+
|
27
|
+
Store > Categories > ... > Sub Categories > Items
|
28
|
+
|
29
|
+
The store is like a root category. Any category, at any depth level can have
|
30
|
+
both children categories and items. Items cannot have children of any kind.
|
31
|
+
Both categories and items can have attributes.
|
32
|
+
|
33
|
+
All searching of children and attributes is done through mechanize/nokogiri
|
34
|
+
selectors (css or xpath).
|
35
|
+
|
36
|
+
Here is a sample store api declaration :
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
Storexplore::define_api 'dummy-store.com' do
|
40
|
+
|
41
|
+
categories 'a.category' do
|
42
|
+
attributes do
|
43
|
+
{ :name => page.get_one("h1").content }
|
44
|
+
end
|
45
|
+
|
46
|
+
categories 'a.category' do
|
47
|
+
attributes do
|
48
|
+
{ :name => page.get_one("h1").content }
|
49
|
+
end
|
50
|
+
|
51
|
+
items 'a.item' do
|
52
|
+
attributes do
|
53
|
+
{
|
54
|
+
:name => page.get_one('h1').content,
|
55
|
+
:brand => page.get_one('#brand').content,
|
56
|
+
:price => page.get_one('#price').content.to_f,
|
57
|
+
:image => page.get_one('#image').content,
|
58
|
+
:remote_id => page.get_one('#remote_id').content
|
59
|
+
}
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
```
|
66
|
+
|
67
|
+
This build a hierarchical API on the 'dummy-store.com' online store. This
|
68
|
+
registers a new api definition that will be used to browse any store which
|
69
|
+
uri contains 'dummy-store.com'.
|
70
|
+
|
71
|
+
Now here is how this API can be accessed to pretty print all its content:
|
72
|
+
|
73
|
+
```ruby
|
74
|
+
Api.browse('http://www.dummy-store.com').categories.each do |category|
|
75
|
+
|
76
|
+
puts "category: #{category.title}"
|
77
|
+
puts "attributes: #{category.attributes}"
|
78
|
+
|
79
|
+
category.categories.each do |sub_category|
|
80
|
+
|
81
|
+
puts " category: #{sub_category.title}"
|
82
|
+
puts " attributes: #{sub_category.attributes}"
|
83
|
+
|
84
|
+
sub_category.items.each do |item|
|
85
|
+
|
86
|
+
puts " item: #{item.title}"
|
87
|
+
puts " attributes: #{item.attributes}"
|
88
|
+
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
```
|
93
|
+
|
94
|
+
### Testing
|
95
|
+
|
96
|
+
Storexplore ships with some dummy store generation utilities. Dummy stores can
|
97
|
+
be generated to the file system using the Storexplore::Testing::DummyStore and
|
98
|
+
Storexplore::Testing::DummyStoreGenerator classes. This is particularly useful
|
99
|
+
while testing.
|
100
|
+
|
101
|
+
To use it, add the following, to your spec_helper.rb for example :
|
102
|
+
|
103
|
+
```ruby
|
104
|
+
require 'storexplore/testing'
|
105
|
+
|
106
|
+
Storexplore::Testing.config do |config|
|
107
|
+
config.dummy_store_generation_dir= File.join(Rails.root, '../tmp')
|
108
|
+
end
|
109
|
+
```
|
110
|
+
|
111
|
+
It is then possible to generate a store with the following :
|
112
|
+
|
113
|
+
```ruby
|
114
|
+
DummyStore.wipe_out_store(store_name)
|
115
|
+
@store_generator = DummyStore.open(store_name)
|
116
|
+
@store_generator.generate(3).categories.and(3).categories.and(item_count).items
|
117
|
+
```
|
118
|
+
|
119
|
+
It is also possibe to add elements with explicit values :
|
120
|
+
|
121
|
+
```ruby
|
122
|
+
@store_generator.
|
123
|
+
category(cat_name = "extra long category name").
|
124
|
+
category(sub_cat_name = "extra long sub category name").
|
125
|
+
item(item_name = "super extra long item name").generate().
|
126
|
+
attributes(price: 12.3)
|
127
|
+
```
|
128
|
+
|
129
|
+
Storexplore provides an api definition for dummy stores in
|
130
|
+
'storexplore/testing/dummy_store_api'. It can be required independently if
|
131
|
+
needed.
|
132
|
+
|
133
|
+
### RSpec shared examples
|
134
|
+
|
135
|
+
Storexplore also ships with an rspec shared examples macro. It can be used for
|
136
|
+
any custom store API definition.
|
137
|
+
|
138
|
+
```ruby
|
139
|
+
require 'storexplore/testing'
|
140
|
+
|
141
|
+
describe "MyStoreApi" do
|
142
|
+
include Storexplore::Testing::ApiSpecMacros
|
143
|
+
|
144
|
+
it_should_behave_like_any_store_items_api
|
145
|
+
|
146
|
+
...
|
147
|
+
|
148
|
+
end
|
149
|
+
```
|
22
150
|
|
23
151
|
## Contributing
|
24
152
|
|
data/Rakefile
CHANGED
data/lib/storexplore.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# storexplore.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -22,9 +22,9 @@
|
|
22
22
|
require "storexplore/version"
|
23
23
|
require "storexplore/array_utils"
|
24
24
|
require "storexplore/api"
|
25
|
-
require "storexplore/api_builder"
|
26
25
|
require "storexplore/browsing_error"
|
27
26
|
require "storexplore/digger"
|
27
|
+
require "storexplore/dsl"
|
28
28
|
require "storexplore/hash_utils"
|
29
29
|
require "storexplore/null_digger"
|
30
30
|
require "storexplore/uri_utils"
|
data/lib/storexplore/api.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# api.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -24,12 +24,18 @@ module Storexplore
|
|
24
24
|
# Objects able to walk a store and discover available items
|
25
25
|
class Api
|
26
26
|
|
27
|
+
def self.define(name, &block)
|
28
|
+
builder = Dsl.walker_builder(&block)
|
29
|
+
|
30
|
+
register_builder(name, builder)
|
31
|
+
end
|
32
|
+
|
27
33
|
def self.browse(store_url)
|
28
|
-
builder(store_url).
|
34
|
+
builder(store_url).new_walker(WalkerPage.open(store_url))
|
29
35
|
end
|
30
36
|
|
31
|
-
def self.
|
32
|
-
builders
|
37
|
+
def self.undef(name)
|
38
|
+
builders.delete(name)
|
33
39
|
end
|
34
40
|
|
35
41
|
# Uri of the main page of the store
|
@@ -46,6 +52,10 @@ module Storexplore
|
|
46
52
|
|
47
53
|
private
|
48
54
|
|
55
|
+
def self.register_builder(name, builder)
|
56
|
+
builders[name] = builder
|
57
|
+
end
|
58
|
+
|
49
59
|
def self.builder(store_url)
|
50
60
|
builders.each do |name, builder|
|
51
61
|
if store_url.include?(name)
|
data/lib/storexplore/digger.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# digger.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -21,14 +21,14 @@
|
|
21
21
|
|
22
22
|
module Storexplore
|
23
23
|
class Digger
|
24
|
-
def initialize(selector,
|
24
|
+
def initialize(selector, sub_walker_builder)
|
25
25
|
@selector = selector
|
26
|
-
@
|
26
|
+
@sub_walker_builder = sub_walker_builder
|
27
27
|
end
|
28
28
|
|
29
29
|
def sub_walkers(page, father)
|
30
30
|
page.search_links(@selector).each_with_index.to_a.lazy.map do |link, i|
|
31
|
-
@
|
31
|
+
@sub_walker_builder.new_walker(link, father, i)
|
32
32
|
end
|
33
33
|
end
|
34
34
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# dsl.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
module Storexplore
|
23
|
+
|
24
|
+
class Dsl
|
25
|
+
|
26
|
+
def self.walker_builder(&block)
|
27
|
+
new.tap do |dsl|
|
28
|
+
dsl.instance_eval(&block)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize()
|
33
|
+
@scrap_attributes_block = lambda do |_| {} end
|
34
|
+
@categories_digger = NullDigger.new
|
35
|
+
@items_digger = NullDigger.new
|
36
|
+
end
|
37
|
+
|
38
|
+
def attributes(&block)
|
39
|
+
@scrap_attributes_block = block
|
40
|
+
end
|
41
|
+
|
42
|
+
def categories(selector, &block)
|
43
|
+
@categories_digger = Digger.new(selector, Dsl.walker_builder(&block))
|
44
|
+
end
|
45
|
+
|
46
|
+
def items(selector, &block)
|
47
|
+
@items_digger = Digger.new(selector, Dsl.walker_builder(&block))
|
48
|
+
end
|
49
|
+
|
50
|
+
def new_walker(page_getter, father = nil, index = nil)
|
51
|
+
Walker.new(page_getter).tap do |walker|
|
52
|
+
walker.categories_digger = @categories_digger
|
53
|
+
walker.items_digger = @items_digger
|
54
|
+
walker.scrap_attributes_block = @scrap_attributes_block
|
55
|
+
walker.father = father
|
56
|
+
walker.index = index
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# dummy_store_api.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -24,7 +24,7 @@ require_relative 'dummy_store_constants'
|
|
24
24
|
module Storexplore
|
25
25
|
module Testing
|
26
26
|
|
27
|
-
Storexplore::
|
27
|
+
Storexplore::Api.define DummyStoreConstants::NAME do
|
28
28
|
|
29
29
|
categories 'a.category' do
|
30
30
|
attributes do
|
data/lib/storexplore/version.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# version.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -20,5 +20,5 @@
|
|
20
20
|
# MA 02110-1301 USA
|
21
21
|
|
22
22
|
module Storexplore
|
23
|
-
VERSION = "0.0
|
23
|
+
VERSION = "0.1.0"
|
24
24
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# walker_page.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -21,19 +21,6 @@
|
|
21
21
|
|
22
22
|
require 'mechanize'
|
23
23
|
|
24
|
-
# monkey patch to avoid a regex uri encoding error when importing
|
25
|
-
# incompatible encoding regexp match (ASCII-8BIT regexp with UTF-8 string) (Encoding::CompatibilityError)
|
26
|
-
# /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:353:in `gsub'
|
27
|
-
# /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:353:in `_escape'
|
28
|
-
# /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:363:in `escape'
|
29
|
-
# from uri method
|
30
|
-
require "webrick/httputils"
|
31
|
-
module WEBrick::HTTPUtils
|
32
|
-
def self.escape(s)
|
33
|
-
URI.escape(s)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
24
|
module Storexplore
|
38
25
|
|
39
26
|
class WalkerPage
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# api_spec.rb
|
4
4
|
#
|
5
|
-
# Copyright (C) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (C) 2010, 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -25,20 +25,27 @@ module Storexplore
|
|
25
25
|
|
26
26
|
describe Api do
|
27
27
|
|
28
|
-
before :
|
29
|
-
Api.
|
30
|
-
|
31
|
-
|
32
|
-
|
28
|
+
before :all do
|
29
|
+
Storexplore::Api.define 'cats' do
|
30
|
+
attributes do
|
31
|
+
{animal: :cats}
|
32
|
+
end
|
33
|
+
end
|
33
34
|
end
|
34
35
|
|
35
36
|
it "select the good store items api builder to browse a store" do
|
36
|
-
expect(Api.browse(
|
37
|
+
expect(Api.browse("http://www.cats.net").attributes[:animal]).to eq(:cats)
|
37
38
|
end
|
38
39
|
|
39
40
|
it "fails when it does not know how to browse a store" do
|
40
41
|
expect(lambda { Api.browse("http://unknown.store.com") }).to raise_error(NotImplementedError)
|
41
42
|
end
|
42
43
|
|
44
|
+
it "allows to unregister an installed api (mostly for testing)" do
|
45
|
+
Api.undef 'cats'
|
46
|
+
|
47
|
+
expect(lambda { Api.browse("http://www.cats.com") }).to raise_error(NotImplementedError)
|
48
|
+
end
|
49
|
+
|
43
50
|
end
|
44
51
|
end
|
@@ -0,0 +1,215 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# dsl_spec.rb
|
4
|
+
#
|
5
|
+
# Copyright (C) 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require "spec_helper"
|
23
|
+
|
24
|
+
module Storexplore
|
25
|
+
|
26
|
+
describe Dsl do
|
27
|
+
|
28
|
+
def browse
|
29
|
+
@walker = Storexplore::Api.browse("http://www.cats-surplus.com")
|
30
|
+
end
|
31
|
+
|
32
|
+
after :each do
|
33
|
+
Storexplore::Api.undef 'cats'
|
34
|
+
end
|
35
|
+
|
36
|
+
context 'a simple store' do
|
37
|
+
before :each do
|
38
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com", content_type: 'text/html', body: <<-eos)
|
39
|
+
<html>
|
40
|
+
<body>
|
41
|
+
<a href="category1.html" class="category">Cats with fur</a>
|
42
|
+
<a href="category2.html" class="category">Naked cats</a>
|
43
|
+
<a href="category3.html" class="category">Cats with feathers</a>
|
44
|
+
|
45
|
+
<a href="item1.html" class="item">The first thing we sell</a>
|
46
|
+
<a href="item2.html" class="item">The second thing we sell</a>
|
47
|
+
<a href="legal.html" class="legal">How we sell it</a>
|
48
|
+
</body>
|
49
|
+
</html>
|
50
|
+
eos
|
51
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/category1.html", content_type: 'text/html', body: <<-eos)
|
52
|
+
<html>
|
53
|
+
<body>
|
54
|
+
<a href="category4.html" class="sub-category">Cats with red fur</a>
|
55
|
+
<a href="category5.html" class="sub-category">Cats with green fur</a>
|
56
|
+
|
57
|
+
<a href="item3.html" class="item">The first thing we sell</a>
|
58
|
+
<a href="item4.html" class="item">The second thing we sell</a>
|
59
|
+
<a href="item5.html" class="item">The second thing we sell</a>
|
60
|
+
</body>
|
61
|
+
</html>
|
62
|
+
eos
|
63
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/category2.html", content_type: 'text/html', body: "")
|
64
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/category3.html", content_type: 'text/html', body: "")
|
65
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/category4.html", content_type: 'text/html', body: "")
|
66
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/category5.html", content_type: 'text/html', body: "")
|
67
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/item1.html", content_type: 'text/html', body: "")
|
68
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/item2.html", content_type: 'text/html', body: "")
|
69
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/item3.html", content_type: 'text/html', body: "")
|
70
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/item4.html", content_type: 'text/html', body: "")
|
71
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/item5.html", content_type: 'text/html', body: "")
|
72
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/legal.html", content_type: 'text/html', body: "")
|
73
|
+
|
74
|
+
Storexplore::Api.define 'cats' do
|
75
|
+
items 'a.item' do
|
76
|
+
attributes do
|
77
|
+
{ page: page,
|
78
|
+
this_is_the: :item}
|
79
|
+
end
|
80
|
+
end
|
81
|
+
categories 'a.category' do
|
82
|
+
categories 'a.sub-category' do
|
83
|
+
end
|
84
|
+
items 'a.item' do
|
85
|
+
attributes do
|
86
|
+
raise WalkerPageError.new("Dummy error message")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
attributes do
|
90
|
+
{ page: page,
|
91
|
+
this_is_the: :category}
|
92
|
+
end
|
93
|
+
end
|
94
|
+
attributes do
|
95
|
+
{ page: page,
|
96
|
+
this_is_the: :root}
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
browse
|
101
|
+
end
|
102
|
+
|
103
|
+
it "root walker has the uri of the home page" do
|
104
|
+
expect(@walker.uri).to eq URI("http://www.cats-surplus.com")
|
105
|
+
end
|
106
|
+
|
107
|
+
it "sub walkers have the uri of their page" do
|
108
|
+
expect(@walker.items.first.uri).to eq URI("http://www.cats-surplus.com/item1.html")
|
109
|
+
end
|
110
|
+
|
111
|
+
it "root walker title is the store uri" do
|
112
|
+
expect(@walker.title).to eq "http://www.cats-surplus.com"
|
113
|
+
end
|
114
|
+
|
115
|
+
it "sub walkers title is the text of its origin link" do
|
116
|
+
expect(@walker.items.first.title).to eq "The first thing we sell"
|
117
|
+
end
|
118
|
+
|
119
|
+
it "uses a selector to spot sub items" do
|
120
|
+
expect(@walker.items).to have_exactly(2).items
|
121
|
+
end
|
122
|
+
it "uses a selector to spot sub categories" do
|
123
|
+
expect(@walker.categories).to have_exactly(3).categories
|
124
|
+
end
|
125
|
+
it "root walker can have attributes" do
|
126
|
+
expect(@walker.attributes[:this_is_the]).to eq :root
|
127
|
+
expect(@walker.attributes[:page]).to be_instance_of(WalkerPage)
|
128
|
+
end
|
129
|
+
it "categories can have sub categories" do
|
130
|
+
expect(@walker.categories.first.categories).to have_exactly(2).categories
|
131
|
+
end
|
132
|
+
it "categories can have sub items" do
|
133
|
+
expect(@walker.categories.first.items).to have_exactly(3).items
|
134
|
+
end
|
135
|
+
it "categories can have attributes" do
|
136
|
+
expect(@walker.categories.first.attributes[:this_is_the]).to eq :category
|
137
|
+
expect(@walker.categories.first.attributes[:page]).to be_instance_of(WalkerPage)
|
138
|
+
end
|
139
|
+
it "items can have attributes" do
|
140
|
+
expect(@walker.items.first.attributes[:this_is_the]).to eq :item
|
141
|
+
expect(@walker.items.first.attributes[:page]).to be_instance_of(WalkerPage)
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
context "when troubleshooting" do
|
146
|
+
|
147
|
+
before :each do
|
148
|
+
@sub_walker = @walker.categories.first.items.drop(1).first
|
149
|
+
end
|
150
|
+
|
151
|
+
it "walkers have an index" do
|
152
|
+
expect(@sub_walker.index).to eq 1
|
153
|
+
end
|
154
|
+
|
155
|
+
it "has a meaningfull string representation" do
|
156
|
+
expect(@sub_walker.to_s).to include(Walker.to_s)
|
157
|
+
expect(@sub_walker.to_s).to include("##{@sub_walker.index}")
|
158
|
+
expect(@sub_walker.to_s).to include("@#{@sub_walker.uri}")
|
159
|
+
end
|
160
|
+
|
161
|
+
it "has a full genealogy" do
|
162
|
+
genealogy = @sub_walker.genealogy.split("\n")
|
163
|
+
|
164
|
+
expect(genealogy).to eq [@walker.to_s, @walker.categories.first.to_s, @sub_walker.to_s]
|
165
|
+
end
|
166
|
+
|
167
|
+
it "wraps parsing errors with debug errors" do
|
168
|
+
expect(lambda { @sub_walker.attributes }).to raise_error(BrowsingError, "Dummy error message\n#{@sub_walker.genealogy}")
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
context 'a redirected home page' do
|
175
|
+
before :each do
|
176
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com", status: [301, "Moved Permanently"], location: "http://www.cats-surplus.com/index.html")
|
177
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/index.html", content_type: 'text/html', body: "")
|
178
|
+
|
179
|
+
Storexplore::Api.define 'cats' do
|
180
|
+
end
|
181
|
+
|
182
|
+
browse
|
183
|
+
end
|
184
|
+
|
185
|
+
it "root walker has the uri of finaly page" do
|
186
|
+
expect(@walker.uri).to eq URI("http://www.cats-surplus.com/index.html")
|
187
|
+
end
|
188
|
+
|
189
|
+
it "root walker title is the store uri" do
|
190
|
+
expect(@walker.title).to eq "http://www.cats-surplus.com"
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
context 'an empty store' do
|
195
|
+
before :each do
|
196
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com", content_type: 'text/html', body: "")
|
197
|
+
|
198
|
+
Storexplore::Api.define 'cats' do
|
199
|
+
end
|
200
|
+
|
201
|
+
browse
|
202
|
+
end
|
203
|
+
|
204
|
+
it "has no items" do
|
205
|
+
expect(@walker.items).to be_empty
|
206
|
+
end
|
207
|
+
it "has no sub categories" do
|
208
|
+
expect(@walker.categories).to be_empty
|
209
|
+
end
|
210
|
+
it "has no sub attributes" do
|
211
|
+
expect(@walker.attributes).to be_empty
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# dummy_store_api_spec.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -59,19 +59,28 @@ module Storexplore
|
|
59
59
|
end
|
60
60
|
|
61
61
|
it "should use constant memory" do
|
62
|
-
|
62
|
+
FEW = 10
|
63
|
+
MANY = 100
|
64
|
+
RUNS = 2
|
63
65
|
|
64
|
-
|
65
|
-
|
66
|
+
many_inputs_memory = memory_usage_for_items(MANY, RUNS)
|
67
|
+
few_inputs_memory = memory_usage_for_items(FEW, RUNS)
|
66
68
|
|
67
|
-
|
69
|
+
slope = (many_inputs_memory - few_inputs_memory) / (MANY - FEW)
|
70
|
+
|
71
|
+
zero_inputs_memory = few_inputs_memory - FEW * slope
|
72
|
+
|
73
|
+
expect(slope).to be_within(zero_inputs_memory * 0.05).of(0.0)
|
68
74
|
end
|
69
75
|
|
70
|
-
def memory_usage_for_items(item_count)
|
76
|
+
def memory_usage_for_items(item_count, runs)
|
71
77
|
generate_store(store_name = "www.spec-perf-store.com", item_count)
|
72
|
-
|
73
|
-
|
78
|
+
data = runs.times.map do
|
79
|
+
memory_peak_of do
|
80
|
+
walk_store(store_name)
|
81
|
+
end
|
74
82
|
end
|
83
|
+
mean(data)
|
75
84
|
end
|
76
85
|
|
77
86
|
def memory_peak_of
|
@@ -95,25 +104,38 @@ module Storexplore
|
|
95
104
|
end
|
96
105
|
|
97
106
|
def current_living_objects
|
98
|
-
GC.start
|
99
107
|
object_counts = ObjectSpace.count_objects
|
100
108
|
object_counts[:TOTAL] - object_counts[:FREE]
|
101
109
|
end
|
102
110
|
|
103
111
|
def walk_store(store_name)
|
104
112
|
new_store(store_name).categories.each do |category|
|
105
|
-
|
106
|
-
|
113
|
+
register(category)
|
114
|
+
|
107
115
|
category.categories.each do |sub_category|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
@attributes = item.attributes
|
116
|
+
register(sub_category)
|
117
|
+
|
118
|
+
sub_category.items.each do |item|
|
119
|
+
register(item)
|
113
120
|
end
|
114
121
|
end
|
115
122
|
end
|
116
123
|
end
|
124
|
+
|
125
|
+
def register(store_node)
|
126
|
+
@title = store_node.title
|
127
|
+
@attributes = store_node.attributes
|
128
|
+
|
129
|
+
# No GC is explicitly done, because:
|
130
|
+
# - large inputs forces it anyway
|
131
|
+
# - it greatly slows tests
|
132
|
+
# - GCing should not change the complexity of the system
|
133
|
+
# GC.start
|
134
|
+
end
|
135
|
+
|
136
|
+
def mean(data)
|
137
|
+
data.reduce(:+)/data.size
|
138
|
+
end
|
117
139
|
end
|
118
140
|
|
119
141
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# walker_page_spec.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -23,7 +23,6 @@ require 'spec_helper'
|
|
23
23
|
|
24
24
|
module Storexplore
|
25
25
|
|
26
|
-
# @integration
|
27
26
|
describe WalkerPage, slow: true do
|
28
27
|
|
29
28
|
before :each do
|
data/spec/spec_helper.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# spec_helper.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -19,10 +19,20 @@
|
|
19
19
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
20
|
# MA 02110-1301 USA
|
21
21
|
|
22
|
+
require 'fakeweb'
|
22
23
|
require 'spec_combos'
|
23
24
|
require 'storexplore'
|
24
25
|
require 'storexplore/testing'
|
25
26
|
|
27
|
+
# Dummy store generation
|
26
28
|
Storexplore::Testing.config do |config|
|
27
29
|
config.dummy_store_generation_dir= File.join(File.dirname(__FILE__), '../tmp')
|
28
30
|
end
|
31
|
+
|
32
|
+
# Clean up fakeweb registry after every test
|
33
|
+
FakeWeb.allow_net_connect = false
|
34
|
+
RSpec.configure do |config|
|
35
|
+
config.after(:each) do
|
36
|
+
FakeWeb.clean_registry
|
37
|
+
end
|
38
|
+
end
|
data/storexplore.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: storexplore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Philou
|
@@ -31,7 +31,7 @@ cert_chain:
|
|
31
31
|
yLcl1cmm5ALtJ/+Bkkmp0i4amXeTDMvq9r8PBsVsQwxYOYJBP+Umxz3PX6HjFHrQ
|
32
32
|
XdkXx3oZ
|
33
33
|
-----END CERTIFICATE-----
|
34
|
-
date:
|
34
|
+
date: 2014-01-14 00:00:00.000000000 Z
|
35
35
|
dependencies:
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: mechanize
|
@@ -103,6 +103,20 @@ dependencies:
|
|
103
103
|
- - ~>
|
104
104
|
- !ruby/object:Gem::Version
|
105
105
|
version: '0.2'
|
106
|
+
- !ruby/object:Gem::Dependency
|
107
|
+
name: fakeweb
|
108
|
+
requirement: !ruby/object:Gem::Requirement
|
109
|
+
requirements:
|
110
|
+
- - ~>
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: '1.3'
|
113
|
+
type: :development
|
114
|
+
prerelease: false
|
115
|
+
version_requirements: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - ~>
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '1.3'
|
106
120
|
description: A declarative scrapping DSL that lets one define directory like apis
|
107
121
|
to an online store
|
108
122
|
email:
|
@@ -113,6 +127,7 @@ extra_rdoc_files: []
|
|
113
127
|
files:
|
114
128
|
- .gitignore
|
115
129
|
- .rspec
|
130
|
+
- .travis.yml
|
116
131
|
- Gemfile
|
117
132
|
- Guardfile
|
118
133
|
- LICENSE
|
@@ -120,10 +135,10 @@ files:
|
|
120
135
|
- Rakefile
|
121
136
|
- lib/storexplore.rb
|
122
137
|
- lib/storexplore/api.rb
|
123
|
-
- lib/storexplore/api_builder.rb
|
124
138
|
- lib/storexplore/array_utils.rb
|
125
139
|
- lib/storexplore/browsing_error.rb
|
126
140
|
- lib/storexplore/digger.rb
|
141
|
+
- lib/storexplore/dsl.rb
|
127
142
|
- lib/storexplore/hash_utils.rb
|
128
143
|
- lib/storexplore/null_digger.rb
|
129
144
|
- lib/storexplore/testing.rb
|
@@ -141,14 +156,12 @@ files:
|
|
141
156
|
- lib/storexplore/walker.rb
|
142
157
|
- lib/storexplore/walker_page.rb
|
143
158
|
- lib/storexplore/walker_page_error.rb
|
144
|
-
- spec/lib/storexplore/api_builder_spec.rb
|
145
159
|
- spec/lib/storexplore/api_spec.rb
|
146
|
-
- spec/lib/storexplore/
|
160
|
+
- spec/lib/storexplore/dsl_spec.rb
|
147
161
|
- spec/lib/storexplore/store_walker_page_spec_fixture.html
|
148
162
|
- spec/lib/storexplore/testing/dummy_store_api_spec.rb
|
149
163
|
- spec/lib/storexplore/uri_utils_spec.rb
|
150
164
|
- spec/lib/storexplore/walker_page_spec.rb
|
151
|
-
- spec/lib/storexplore/walker_spec.rb
|
152
165
|
- spec/spec_helper.rb
|
153
166
|
- storexplore.gemspec
|
154
167
|
homepage: https://github.com/philou/storexplore
|
@@ -176,12 +189,10 @@ signing_key:
|
|
176
189
|
specification_version: 4
|
177
190
|
summary: Online store scraping library
|
178
191
|
test_files:
|
179
|
-
- spec/lib/storexplore/api_builder_spec.rb
|
180
192
|
- spec/lib/storexplore/api_spec.rb
|
181
|
-
- spec/lib/storexplore/
|
193
|
+
- spec/lib/storexplore/dsl_spec.rb
|
182
194
|
- spec/lib/storexplore/store_walker_page_spec_fixture.html
|
183
195
|
- spec/lib/storexplore/testing/dummy_store_api_spec.rb
|
184
196
|
- spec/lib/storexplore/uri_utils_spec.rb
|
185
197
|
- spec/lib/storexplore/walker_page_spec.rb
|
186
|
-
- spec/lib/storexplore/walker_spec.rb
|
187
198
|
- spec/spec_helper.rb
|
metadata.gz.sig
CHANGED
Binary file
|
@@ -1,68 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
#
|
3
|
-
# api_builder.rb
|
4
|
-
#
|
5
|
-
# Copyright (c) 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
-
#
|
7
|
-
# This library is free software; you can redistribute it and/or
|
8
|
-
# modify it under the terms of the GNU Lesser General Public
|
9
|
-
# License as published by the Free Software Foundation; either
|
10
|
-
# version 3.0 of the License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This library is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
-
# Lesser General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Lesser General Public
|
18
|
-
# License along with this library; if not, write to the Free Software
|
19
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
-
# MA 02110-1301 USA
|
21
|
-
|
22
|
-
module Storexplore
|
23
|
-
|
24
|
-
class ApiBuilder
|
25
|
-
|
26
|
-
def self.define(api_class, digger_class, &block)
|
27
|
-
new(api_class, digger_class).tap do |result|
|
28
|
-
result.instance_eval(&block)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def initialize(api_class, digger_class)
|
33
|
-
@api_class = api_class
|
34
|
-
@digger_class = digger_class
|
35
|
-
@scrap_attributes_block = lambda do {} end
|
36
|
-
@categories_digger = NullDigger.new
|
37
|
-
@items_digger = NullDigger.new
|
38
|
-
end
|
39
|
-
|
40
|
-
def attributes(&block)
|
41
|
-
@scrap_attributes_block = block
|
42
|
-
end
|
43
|
-
|
44
|
-
def categories(selector, &block)
|
45
|
-
@categories_digger = @digger_class.new(selector, ApiBuilder.define(@api_class, @digger_class, &block))
|
46
|
-
end
|
47
|
-
|
48
|
-
def items(selector, &block)
|
49
|
-
@items_digger = @digger_class.new(selector, ApiBuilder.define(@api_class, @digger_class, &block))
|
50
|
-
end
|
51
|
-
|
52
|
-
def new(page_getter, father = nil, index = nil)
|
53
|
-
@api_class.new(page_getter).tap do |result|
|
54
|
-
result.categories_digger = @categories_digger
|
55
|
-
result.items_digger = @items_digger
|
56
|
-
result.scrap_attributes_block = @scrap_attributes_block
|
57
|
-
result.father = father
|
58
|
-
result.index = index
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
def self.define_api(name, &block)
|
64
|
-
builder = ApiBuilder.define(Walker, Digger, &block)
|
65
|
-
|
66
|
-
Api.register_builder(name, builder)
|
67
|
-
end
|
68
|
-
end
|
@@ -1,99 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
#
|
3
|
-
# api_builder_spec.rb
|
4
|
-
#
|
5
|
-
# Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
-
#
|
7
|
-
# This library is free software; you can redistribute it and/or
|
8
|
-
# modify it under the terms of the GNU Lesser General Public
|
9
|
-
# License as published by the Free Software Foundation; either
|
10
|
-
# version 3.0 of the License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This library is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
-
# Lesser General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Lesser General Public
|
18
|
-
# License along with this library; if not, write to the Free Software
|
19
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
-
# MA 02110-1301 USA
|
21
|
-
|
22
|
-
require "spec_helper"
|
23
|
-
|
24
|
-
module Storexplore
|
25
|
-
|
26
|
-
describe ApiBuilder do
|
27
|
-
|
28
|
-
before :each do
|
29
|
-
@url = "http://www.mega-store.com"
|
30
|
-
@api = double("Store api").as_null_object
|
31
|
-
@api_class = double("Store api class")
|
32
|
-
@api_class.stub(:new).with(@url).and_return(@api)
|
33
|
-
|
34
|
-
@selector = "a.child"
|
35
|
-
@digger = double("Digger")
|
36
|
-
@digger_class = double("Digger class")
|
37
|
-
end
|
38
|
-
|
39
|
-
context "using define method" do
|
40
|
-
it "creates new store api" do
|
41
|
-
@builder = ApiBuilder.define(@api_class, Digger) { }
|
42
|
-
|
43
|
-
expect(@builder.new(@url)).to eq @api
|
44
|
-
end
|
45
|
-
|
46
|
-
it "initializes nested definition through its block" do
|
47
|
-
ApiBuilder.stub(:new).and_return(builder = double(ApiBuilder))
|
48
|
-
|
49
|
-
expect(builder).to receive(:complex_builder_initialization)
|
50
|
-
|
51
|
-
ApiBuilder.define(@api_class, Digger) do
|
52
|
-
complex_builder_initialization
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
context "when nesting definitions" do
|
58
|
-
|
59
|
-
before :each do
|
60
|
-
@builder = ApiBuilder.new(@api_class, @digger_class)
|
61
|
-
end
|
62
|
-
|
63
|
-
after :each do
|
64
|
-
@builder.new(@url)
|
65
|
-
end
|
66
|
-
|
67
|
-
[:categories, :items].each do |sub_definition|
|
68
|
-
|
69
|
-
before :each do
|
70
|
-
ApiBuilder.stub(:new).and_return(@sub_builder = double(ApiBuilder))
|
71
|
-
@digger_class.stub(:new).with(@selector, @sub_builder).and_return(@digger)
|
72
|
-
end
|
73
|
-
|
74
|
-
it "tells the store api how to find sub #{sub_definition}" do
|
75
|
-
expect(@api).to receive("#{sub_definition}_digger=").with(@digger)
|
76
|
-
|
77
|
-
@builder.send(sub_definition, @selector) do end
|
78
|
-
end
|
79
|
-
|
80
|
-
it "initialises the sub #{sub_definition} builder" do
|
81
|
-
expect(@sub_builder).to receive(:sub_builder_initialization)
|
82
|
-
|
83
|
-
@builder.send(sub_definition, @selector) do
|
84
|
-
sub_builder_initialization
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
it "tells the store api how to parse attributes" do
|
90
|
-
scrap_attributes_block = Proc.new { |page| @scrap_attributes_block_is_unique = true }
|
91
|
-
|
92
|
-
expect(@api).to receive(:scrap_attributes_block=).with(scrap_attributes_block)
|
93
|
-
|
94
|
-
@builder.attributes(&scrap_attributes_block)
|
95
|
-
end
|
96
|
-
|
97
|
-
end
|
98
|
-
end
|
99
|
-
end
|
@@ -1,53 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
#
|
3
|
-
# digger_spec.rb
|
4
|
-
#
|
5
|
-
# Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
-
#
|
7
|
-
# This library is free software; you can redistribute it and/or
|
8
|
-
# modify it under the terms of the GNU Lesser General Public
|
9
|
-
# License as published by the Free Software Foundation; either
|
10
|
-
# version 3.0 of the License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This library is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
-
# Lesser General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Lesser General Public
|
18
|
-
# License along with this library; if not, write to the Free Software
|
19
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
-
# MA 02110-1301 USA
|
21
|
-
|
22
|
-
require "spec_helper"
|
23
|
-
|
24
|
-
module Storexplore
|
25
|
-
|
26
|
-
describe Digger do
|
27
|
-
|
28
|
-
before :each do
|
29
|
-
@digger = Digger.new(@selector = "a.items", @factory = double("Sub walker factory"))
|
30
|
-
@page = double(WalkerPage)
|
31
|
-
@page.stub(:search_links).with(@selector).and_return(@links = [double("Link"),double("Link")])
|
32
|
-
end
|
33
|
-
|
34
|
-
it "creates sub walkers for each link it finds" do
|
35
|
-
@links.each do |link|
|
36
|
-
expect(@factory).to receive(:new).with(link, anything, anything)
|
37
|
-
end
|
38
|
-
|
39
|
-
@digger.sub_walkers(@page, nil).to_a
|
40
|
-
end
|
41
|
-
|
42
|
-
it "for debugging purpose, provides father walker and link index to sub walkers" do
|
43
|
-
father = double("Father walker")
|
44
|
-
|
45
|
-
@links.each_with_index do |link, index|
|
46
|
-
expect(@factory).to receive(:new).with(link, father, index)
|
47
|
-
end
|
48
|
-
|
49
|
-
@digger.sub_walkers(@page, father).to_a
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
end
|
@@ -1,97 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
#
|
3
|
-
# walker_spec.rb
|
4
|
-
#
|
5
|
-
# Copyright (C) 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
-
#
|
7
|
-
# This library is free software; you can redistribute it and/or
|
8
|
-
# modify it under the terms of the GNU Lesser General Public
|
9
|
-
# License as published by the Free Software Foundation; either
|
10
|
-
# version 3.0 of the License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This library is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
-
# Lesser General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Lesser General Public
|
18
|
-
# License along with this library; if not, write to the Free Software
|
19
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
-
# MA 02110-1301 USA
|
21
|
-
|
22
|
-
require "spec_helper"
|
23
|
-
|
24
|
-
module Storexplore
|
25
|
-
|
26
|
-
describe Walker do
|
27
|
-
|
28
|
-
before :each do
|
29
|
-
@page = double("Page", :uri => "http://www.maxi-discount.com")
|
30
|
-
@page_getter = double("Getter", :get => @page, :text => "Conserves")
|
31
|
-
@walker = Walker.new(@page_getter)
|
32
|
-
|
33
|
-
@sub_walkers = [double("Sub walker")]
|
34
|
-
@digger = double(Digger)
|
35
|
-
@digger.stub(:sub_walkers).with(@page, @walker).and_return(@sub_walkers)
|
36
|
-
end
|
37
|
-
|
38
|
-
it "has the uri of its page" do
|
39
|
-
expect(@walker.uri).to eq @page.uri
|
40
|
-
end
|
41
|
-
|
42
|
-
it "it uses the text of its origin (ex: link) as title" do
|
43
|
-
expect(@walker.title).to eq @page_getter.text
|
44
|
-
end
|
45
|
-
|
46
|
-
context "by default" do
|
47
|
-
it "has no items" do
|
48
|
-
expect(@walker.items).to be_empty
|
49
|
-
end
|
50
|
-
it "has no sub categories" do
|
51
|
-
expect(@walker.categories).to be_empty
|
52
|
-
end
|
53
|
-
it "has no attributes" do
|
54
|
-
expect(@walker.attributes).to be_empty
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
it "uses its items digger to collect its items" do
|
59
|
-
@walker.items_digger = @digger
|
60
|
-
|
61
|
-
expect(@walker.items).to eq @sub_walkers
|
62
|
-
end
|
63
|
-
it "uses its categories digger to collect its sub categories" do
|
64
|
-
@walker.categories_digger = @digger
|
65
|
-
|
66
|
-
expect(@walker.categories).to eq @sub_walkers
|
67
|
-
end
|
68
|
-
it "uses its scrap attributes block to collect its attributes" do
|
69
|
-
attributes = { :name => "Candy" }
|
70
|
-
@walker.scrap_attributes_block = lambda { |page| attributes }
|
71
|
-
|
72
|
-
expect(@walker.attributes).to eq attributes
|
73
|
-
end
|
74
|
-
|
75
|
-
context "when troubleshooting" do
|
76
|
-
|
77
|
-
it "has a meaningfull string representation" do
|
78
|
-
walker = Walker.new(@page_getter)
|
79
|
-
walker.index= 23
|
80
|
-
expect(walker.to_s).to include(Walker.to_s)
|
81
|
-
expect(walker.to_s).to include("##{walker.index}")
|
82
|
-
expect(walker.to_s).to include("@#{walker.uri}")
|
83
|
-
end
|
84
|
-
it "has a full genealogy" do
|
85
|
-
link = double("Link")
|
86
|
-
link.stub_chain(:get, :uri).and_return(@page.uri + "/viandes")
|
87
|
-
child_walker = Walker.new(link)
|
88
|
-
child_walker.index = 12
|
89
|
-
child_walker.father = @walker
|
90
|
-
|
91
|
-
genealogy = child_walker.genealogy.split("\n")
|
92
|
-
|
93
|
-
expect(genealogy).to eq [@walker.to_s, child_walker.to_s]
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|