storexplore 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/.travis.yml +8 -0
- data/README.md +129 -1
- data/Rakefile +6 -0
- data/lib/storexplore.rb +2 -2
- data/lib/storexplore/api.rb +14 -4
- data/lib/storexplore/digger.rb +4 -4
- data/lib/storexplore/dsl.rb +61 -0
- data/lib/storexplore/testing/dummy_store_api.rb +2 -2
- data/lib/storexplore/version.rb +2 -2
- data/lib/storexplore/walker_page.rb +1 -14
- data/spec/lib/storexplore/api_spec.rb +14 -7
- data/spec/lib/storexplore/dsl_spec.rb +215 -0
- data/spec/lib/storexplore/testing/dummy_store_api_spec.rb +38 -16
- data/spec/lib/storexplore/walker_page_spec.rb +1 -2
- data/spec/spec_helper.rb +11 -1
- data/storexplore.gemspec +1 -0
- metadata +20 -9
- metadata.gz.sig +0 -0
- data/lib/storexplore/api_builder.rb +0 -68
- data/spec/lib/storexplore/api_builder_spec.rb +0 -99
- data/spec/lib/storexplore/digger_spec.rb +0 -53
- data/spec/lib/storexplore/walker_spec.rb +0 -97
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7da700982a0fbe6f80b7aa37ff7e4f231f43935
|
4
|
+
data.tar.gz: 5fdb4f7684e506964f3fff42bdde441f70e71e92
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 97e8fe6604856ffe5c4baa1287fe40ce5cfe40c4a950af74496bf848b3e42210973b218a117fc9a011d17814f82ca499e141fe810035e78a1d76a6bc45c58170
|
7
|
+
data.tar.gz: c5645631ef4ec8569277f15c2d7b9354f26a134833ee79e9cefd52ac0b3fea37919fabe8d51eab67be94479dd18c8cf2400aa8ca362d6aca846c6c1e41971c79
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data.tar.gz.sig
CHANGED
Binary file
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -4,6 +4,9 @@ A declarative scrapping DSL that lets one define directory like apis to an onlin
|
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
7
|
+
In order to be able to enumerate all items of a store in constant memory,
|
8
|
+
Storexplore requires Matz Ruby 2.0 for its lazy enumerators.
|
9
|
+
|
7
10
|
Add this line to your application's Gemfile:
|
8
11
|
|
9
12
|
gem 'storexplore'
|
@@ -18,7 +21,132 @@ Or install it yourself as:
|
|
18
21
|
|
19
22
|
## Usage
|
20
23
|
|
21
|
-
|
24
|
+
The library builds hierarchical APIs on online stores. Stores are typicaly
|
25
|
+
organized in the following way :
|
26
|
+
|
27
|
+
Store > Categories > ... > Sub Categories > Items
|
28
|
+
|
29
|
+
The store is like a root category. Any category, at any depth level can have
|
30
|
+
both children categories and items. Items cannot have children of any kind.
|
31
|
+
Both categories and items can have attributes.
|
32
|
+
|
33
|
+
All searching of children and attributes is done through mechanize/nokogiri
|
34
|
+
selectors (css or xpath).
|
35
|
+
|
36
|
+
Here is a sample store api declaration :
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
Storexplore::define_api 'dummy-store.com' do
|
40
|
+
|
41
|
+
categories 'a.category' do
|
42
|
+
attributes do
|
43
|
+
{ :name => page.get_one("h1").content }
|
44
|
+
end
|
45
|
+
|
46
|
+
categories 'a.category' do
|
47
|
+
attributes do
|
48
|
+
{ :name => page.get_one("h1").content }
|
49
|
+
end
|
50
|
+
|
51
|
+
items 'a.item' do
|
52
|
+
attributes do
|
53
|
+
{
|
54
|
+
:name => page.get_one('h1').content,
|
55
|
+
:brand => page.get_one('#brand').content,
|
56
|
+
:price => page.get_one('#price').content.to_f,
|
57
|
+
:image => page.get_one('#image').content,
|
58
|
+
:remote_id => page.get_one('#remote_id').content
|
59
|
+
}
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
```
|
66
|
+
|
67
|
+
This build a hierarchical API on the 'dummy-store.com' online store. This
|
68
|
+
registers a new api definition that will be used to browse any store which
|
69
|
+
uri contains 'dummy-store.com'.
|
70
|
+
|
71
|
+
Now here is how this API can be accessed to pretty print all its content:
|
72
|
+
|
73
|
+
```ruby
|
74
|
+
Api.browse('http://www.dummy-store.com').categories.each do |category|
|
75
|
+
|
76
|
+
puts "category: #{category.title}"
|
77
|
+
puts "attributes: #{category.attributes}"
|
78
|
+
|
79
|
+
category.categories.each do |sub_category|
|
80
|
+
|
81
|
+
puts " category: #{sub_category.title}"
|
82
|
+
puts " attributes: #{sub_category.attributes}"
|
83
|
+
|
84
|
+
sub_category.items.each do |item|
|
85
|
+
|
86
|
+
puts " item: #{item.title}"
|
87
|
+
puts " attributes: #{item.attributes}"
|
88
|
+
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
```
|
93
|
+
|
94
|
+
### Testing
|
95
|
+
|
96
|
+
Storexplore ships with some dummy store generation utilities. Dummy stores can
|
97
|
+
be generated to the file system using the Storexplore::Testing::DummyStore and
|
98
|
+
Storexplore::Testing::DummyStoreGenerator classes. This is particularly useful
|
99
|
+
while testing.
|
100
|
+
|
101
|
+
To use it, add the following, to your spec_helper.rb for example :
|
102
|
+
|
103
|
+
```ruby
|
104
|
+
require 'storexplore/testing'
|
105
|
+
|
106
|
+
Storexplore::Testing.config do |config|
|
107
|
+
config.dummy_store_generation_dir= File.join(Rails.root, '../tmp')
|
108
|
+
end
|
109
|
+
```
|
110
|
+
|
111
|
+
It is then possible to generate a store with the following :
|
112
|
+
|
113
|
+
```ruby
|
114
|
+
DummyStore.wipe_out_store(store_name)
|
115
|
+
@store_generator = DummyStore.open(store_name)
|
116
|
+
@store_generator.generate(3).categories.and(3).categories.and(item_count).items
|
117
|
+
```
|
118
|
+
|
119
|
+
It is also possibe to add elements with explicit values :
|
120
|
+
|
121
|
+
```ruby
|
122
|
+
@store_generator.
|
123
|
+
category(cat_name = "extra long category name").
|
124
|
+
category(sub_cat_name = "extra long sub category name").
|
125
|
+
item(item_name = "super extra long item name").generate().
|
126
|
+
attributes(price: 12.3)
|
127
|
+
```
|
128
|
+
|
129
|
+
Storexplore provides an api definition for dummy stores in
|
130
|
+
'storexplore/testing/dummy_store_api'. It can be required independently if
|
131
|
+
needed.
|
132
|
+
|
133
|
+
### RSpec shared examples
|
134
|
+
|
135
|
+
Storexplore also ships with an rspec shared examples macro. It can be used for
|
136
|
+
any custom store API definition.
|
137
|
+
|
138
|
+
```ruby
|
139
|
+
require 'storexplore/testing'
|
140
|
+
|
141
|
+
describe "MyStoreApi" do
|
142
|
+
include Storexplore::Testing::ApiSpecMacros
|
143
|
+
|
144
|
+
it_should_behave_like_any_store_items_api
|
145
|
+
|
146
|
+
...
|
147
|
+
|
148
|
+
end
|
149
|
+
```
|
22
150
|
|
23
151
|
## Contributing
|
24
152
|
|
data/Rakefile
CHANGED
data/lib/storexplore.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# storexplore.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -22,9 +22,9 @@
|
|
22
22
|
require "storexplore/version"
|
23
23
|
require "storexplore/array_utils"
|
24
24
|
require "storexplore/api"
|
25
|
-
require "storexplore/api_builder"
|
26
25
|
require "storexplore/browsing_error"
|
27
26
|
require "storexplore/digger"
|
27
|
+
require "storexplore/dsl"
|
28
28
|
require "storexplore/hash_utils"
|
29
29
|
require "storexplore/null_digger"
|
30
30
|
require "storexplore/uri_utils"
|
data/lib/storexplore/api.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# api.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -24,12 +24,18 @@ module Storexplore
|
|
24
24
|
# Objects able to walk a store and discover available items
|
25
25
|
class Api
|
26
26
|
|
27
|
+
def self.define(name, &block)
|
28
|
+
builder = Dsl.walker_builder(&block)
|
29
|
+
|
30
|
+
register_builder(name, builder)
|
31
|
+
end
|
32
|
+
|
27
33
|
def self.browse(store_url)
|
28
|
-
builder(store_url).
|
34
|
+
builder(store_url).new_walker(WalkerPage.open(store_url))
|
29
35
|
end
|
30
36
|
|
31
|
-
def self.
|
32
|
-
builders
|
37
|
+
def self.undef(name)
|
38
|
+
builders.delete(name)
|
33
39
|
end
|
34
40
|
|
35
41
|
# Uri of the main page of the store
|
@@ -46,6 +52,10 @@ module Storexplore
|
|
46
52
|
|
47
53
|
private
|
48
54
|
|
55
|
+
def self.register_builder(name, builder)
|
56
|
+
builders[name] = builder
|
57
|
+
end
|
58
|
+
|
49
59
|
def self.builder(store_url)
|
50
60
|
builders.each do |name, builder|
|
51
61
|
if store_url.include?(name)
|
data/lib/storexplore/digger.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# digger.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -21,14 +21,14 @@
|
|
21
21
|
|
22
22
|
module Storexplore
|
23
23
|
class Digger
|
24
|
-
def initialize(selector,
|
24
|
+
def initialize(selector, sub_walker_builder)
|
25
25
|
@selector = selector
|
26
|
-
@
|
26
|
+
@sub_walker_builder = sub_walker_builder
|
27
27
|
end
|
28
28
|
|
29
29
|
def sub_walkers(page, father)
|
30
30
|
page.search_links(@selector).each_with_index.to_a.lazy.map do |link, i|
|
31
|
-
@
|
31
|
+
@sub_walker_builder.new_walker(link, father, i)
|
32
32
|
end
|
33
33
|
end
|
34
34
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# dsl.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
module Storexplore
|
23
|
+
|
24
|
+
class Dsl
|
25
|
+
|
26
|
+
def self.walker_builder(&block)
|
27
|
+
new.tap do |dsl|
|
28
|
+
dsl.instance_eval(&block)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize()
|
33
|
+
@scrap_attributes_block = lambda do |_| {} end
|
34
|
+
@categories_digger = NullDigger.new
|
35
|
+
@items_digger = NullDigger.new
|
36
|
+
end
|
37
|
+
|
38
|
+
def attributes(&block)
|
39
|
+
@scrap_attributes_block = block
|
40
|
+
end
|
41
|
+
|
42
|
+
def categories(selector, &block)
|
43
|
+
@categories_digger = Digger.new(selector, Dsl.walker_builder(&block))
|
44
|
+
end
|
45
|
+
|
46
|
+
def items(selector, &block)
|
47
|
+
@items_digger = Digger.new(selector, Dsl.walker_builder(&block))
|
48
|
+
end
|
49
|
+
|
50
|
+
def new_walker(page_getter, father = nil, index = nil)
|
51
|
+
Walker.new(page_getter).tap do |walker|
|
52
|
+
walker.categories_digger = @categories_digger
|
53
|
+
walker.items_digger = @items_digger
|
54
|
+
walker.scrap_attributes_block = @scrap_attributes_block
|
55
|
+
walker.father = father
|
56
|
+
walker.index = index
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# dummy_store_api.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -24,7 +24,7 @@ require_relative 'dummy_store_constants'
|
|
24
24
|
module Storexplore
|
25
25
|
module Testing
|
26
26
|
|
27
|
-
Storexplore::
|
27
|
+
Storexplore::Api.define DummyStoreConstants::NAME do
|
28
28
|
|
29
29
|
categories 'a.category' do
|
30
30
|
attributes do
|
data/lib/storexplore/version.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# version.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -20,5 +20,5 @@
|
|
20
20
|
# MA 02110-1301 USA
|
21
21
|
|
22
22
|
module Storexplore
|
23
|
-
VERSION = "0.0
|
23
|
+
VERSION = "0.1.0"
|
24
24
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# walker_page.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -21,19 +21,6 @@
|
|
21
21
|
|
22
22
|
require 'mechanize'
|
23
23
|
|
24
|
-
# monkey patch to avoid a regex uri encoding error when importing
|
25
|
-
# incompatible encoding regexp match (ASCII-8BIT regexp with UTF-8 string) (Encoding::CompatibilityError)
|
26
|
-
# /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:353:in `gsub'
|
27
|
-
# /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:353:in `_escape'
|
28
|
-
# /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:363:in `escape'
|
29
|
-
# from uri method
|
30
|
-
require "webrick/httputils"
|
31
|
-
module WEBrick::HTTPUtils
|
32
|
-
def self.escape(s)
|
33
|
-
URI.escape(s)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
24
|
module Storexplore
|
38
25
|
|
39
26
|
class WalkerPage
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# api_spec.rb
|
4
4
|
#
|
5
|
-
# Copyright (C) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (C) 2010, 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -25,20 +25,27 @@ module Storexplore
|
|
25
25
|
|
26
26
|
describe Api do
|
27
27
|
|
28
|
-
before :
|
29
|
-
Api.
|
30
|
-
|
31
|
-
|
32
|
-
|
28
|
+
before :all do
|
29
|
+
Storexplore::Api.define 'cats' do
|
30
|
+
attributes do
|
31
|
+
{animal: :cats}
|
32
|
+
end
|
33
|
+
end
|
33
34
|
end
|
34
35
|
|
35
36
|
it "select the good store items api builder to browse a store" do
|
36
|
-
expect(Api.browse(
|
37
|
+
expect(Api.browse("http://www.cats.net").attributes[:animal]).to eq(:cats)
|
37
38
|
end
|
38
39
|
|
39
40
|
it "fails when it does not know how to browse a store" do
|
40
41
|
expect(lambda { Api.browse("http://unknown.store.com") }).to raise_error(NotImplementedError)
|
41
42
|
end
|
42
43
|
|
44
|
+
it "allows to unregister an installed api (mostly for testing)" do
|
45
|
+
Api.undef 'cats'
|
46
|
+
|
47
|
+
expect(lambda { Api.browse("http://www.cats.com") }).to raise_error(NotImplementedError)
|
48
|
+
end
|
49
|
+
|
43
50
|
end
|
44
51
|
end
|
@@ -0,0 +1,215 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# dsl_spec.rb
|
4
|
+
#
|
5
|
+
# Copyright (C) 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require "spec_helper"
|
23
|
+
|
24
|
+
module Storexplore
|
25
|
+
|
26
|
+
describe Dsl do
|
27
|
+
|
28
|
+
def browse
|
29
|
+
@walker = Storexplore::Api.browse("http://www.cats-surplus.com")
|
30
|
+
end
|
31
|
+
|
32
|
+
after :each do
|
33
|
+
Storexplore::Api.undef 'cats'
|
34
|
+
end
|
35
|
+
|
36
|
+
context 'a simple store' do
|
37
|
+
before :each do
|
38
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com", content_type: 'text/html', body: <<-eos)
|
39
|
+
<html>
|
40
|
+
<body>
|
41
|
+
<a href="category1.html" class="category">Cats with fur</a>
|
42
|
+
<a href="category2.html" class="category">Naked cats</a>
|
43
|
+
<a href="category3.html" class="category">Cats with feathers</a>
|
44
|
+
|
45
|
+
<a href="item1.html" class="item">The first thing we sell</a>
|
46
|
+
<a href="item2.html" class="item">The second thing we sell</a>
|
47
|
+
<a href="legal.html" class="legal">How we sell it</a>
|
48
|
+
</body>
|
49
|
+
</html>
|
50
|
+
eos
|
51
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/category1.html", content_type: 'text/html', body: <<-eos)
|
52
|
+
<html>
|
53
|
+
<body>
|
54
|
+
<a href="category4.html" class="sub-category">Cats with red fur</a>
|
55
|
+
<a href="category5.html" class="sub-category">Cats with green fur</a>
|
56
|
+
|
57
|
+
<a href="item3.html" class="item">The first thing we sell</a>
|
58
|
+
<a href="item4.html" class="item">The second thing we sell</a>
|
59
|
+
<a href="item5.html" class="item">The second thing we sell</a>
|
60
|
+
</body>
|
61
|
+
</html>
|
62
|
+
eos
|
63
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/category2.html", content_type: 'text/html', body: "")
|
64
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/category3.html", content_type: 'text/html', body: "")
|
65
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/category4.html", content_type: 'text/html', body: "")
|
66
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/category5.html", content_type: 'text/html', body: "")
|
67
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/item1.html", content_type: 'text/html', body: "")
|
68
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/item2.html", content_type: 'text/html', body: "")
|
69
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/item3.html", content_type: 'text/html', body: "")
|
70
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/item4.html", content_type: 'text/html', body: "")
|
71
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/item5.html", content_type: 'text/html', body: "")
|
72
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/legal.html", content_type: 'text/html', body: "")
|
73
|
+
|
74
|
+
Storexplore::Api.define 'cats' do
|
75
|
+
items 'a.item' do
|
76
|
+
attributes do
|
77
|
+
{ page: page,
|
78
|
+
this_is_the: :item}
|
79
|
+
end
|
80
|
+
end
|
81
|
+
categories 'a.category' do
|
82
|
+
categories 'a.sub-category' do
|
83
|
+
end
|
84
|
+
items 'a.item' do
|
85
|
+
attributes do
|
86
|
+
raise WalkerPageError.new("Dummy error message")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
attributes do
|
90
|
+
{ page: page,
|
91
|
+
this_is_the: :category}
|
92
|
+
end
|
93
|
+
end
|
94
|
+
attributes do
|
95
|
+
{ page: page,
|
96
|
+
this_is_the: :root}
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
browse
|
101
|
+
end
|
102
|
+
|
103
|
+
it "root walker has the uri of the home page" do
|
104
|
+
expect(@walker.uri).to eq URI("http://www.cats-surplus.com")
|
105
|
+
end
|
106
|
+
|
107
|
+
it "sub walkers have the uri of their page" do
|
108
|
+
expect(@walker.items.first.uri).to eq URI("http://www.cats-surplus.com/item1.html")
|
109
|
+
end
|
110
|
+
|
111
|
+
it "root walker title is the store uri" do
|
112
|
+
expect(@walker.title).to eq "http://www.cats-surplus.com"
|
113
|
+
end
|
114
|
+
|
115
|
+
it "sub walkers title is the text of its origin link" do
|
116
|
+
expect(@walker.items.first.title).to eq "The first thing we sell"
|
117
|
+
end
|
118
|
+
|
119
|
+
it "uses a selector to spot sub items" do
|
120
|
+
expect(@walker.items).to have_exactly(2).items
|
121
|
+
end
|
122
|
+
it "uses a selector to spot sub categories" do
|
123
|
+
expect(@walker.categories).to have_exactly(3).categories
|
124
|
+
end
|
125
|
+
it "root walker can have attributes" do
|
126
|
+
expect(@walker.attributes[:this_is_the]).to eq :root
|
127
|
+
expect(@walker.attributes[:page]).to be_instance_of(WalkerPage)
|
128
|
+
end
|
129
|
+
it "categories can have sub categories" do
|
130
|
+
expect(@walker.categories.first.categories).to have_exactly(2).categories
|
131
|
+
end
|
132
|
+
it "categories can have sub items" do
|
133
|
+
expect(@walker.categories.first.items).to have_exactly(3).items
|
134
|
+
end
|
135
|
+
it "categories can have attributes" do
|
136
|
+
expect(@walker.categories.first.attributes[:this_is_the]).to eq :category
|
137
|
+
expect(@walker.categories.first.attributes[:page]).to be_instance_of(WalkerPage)
|
138
|
+
end
|
139
|
+
it "items can have attributes" do
|
140
|
+
expect(@walker.items.first.attributes[:this_is_the]).to eq :item
|
141
|
+
expect(@walker.items.first.attributes[:page]).to be_instance_of(WalkerPage)
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
context "when troubleshooting" do
|
146
|
+
|
147
|
+
before :each do
|
148
|
+
@sub_walker = @walker.categories.first.items.drop(1).first
|
149
|
+
end
|
150
|
+
|
151
|
+
it "walkers have an index" do
|
152
|
+
expect(@sub_walker.index).to eq 1
|
153
|
+
end
|
154
|
+
|
155
|
+
it "has a meaningfull string representation" do
|
156
|
+
expect(@sub_walker.to_s).to include(Walker.to_s)
|
157
|
+
expect(@sub_walker.to_s).to include("##{@sub_walker.index}")
|
158
|
+
expect(@sub_walker.to_s).to include("@#{@sub_walker.uri}")
|
159
|
+
end
|
160
|
+
|
161
|
+
it "has a full genealogy" do
|
162
|
+
genealogy = @sub_walker.genealogy.split("\n")
|
163
|
+
|
164
|
+
expect(genealogy).to eq [@walker.to_s, @walker.categories.first.to_s, @sub_walker.to_s]
|
165
|
+
end
|
166
|
+
|
167
|
+
it "wraps parsing errors with debug errors" do
|
168
|
+
expect(lambda { @sub_walker.attributes }).to raise_error(BrowsingError, "Dummy error message\n#{@sub_walker.genealogy}")
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
context 'a redirected home page' do
|
175
|
+
before :each do
|
176
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com", status: [301, "Moved Permanently"], location: "http://www.cats-surplus.com/index.html")
|
177
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com/index.html", content_type: 'text/html', body: "")
|
178
|
+
|
179
|
+
Storexplore::Api.define 'cats' do
|
180
|
+
end
|
181
|
+
|
182
|
+
browse
|
183
|
+
end
|
184
|
+
|
185
|
+
it "root walker has the uri of finaly page" do
|
186
|
+
expect(@walker.uri).to eq URI("http://www.cats-surplus.com/index.html")
|
187
|
+
end
|
188
|
+
|
189
|
+
it "root walker title is the store uri" do
|
190
|
+
expect(@walker.title).to eq "http://www.cats-surplus.com"
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
context 'an empty store' do
|
195
|
+
before :each do
|
196
|
+
FakeWeb.register_uri(:get, "http://www.cats-surplus.com", content_type: 'text/html', body: "")
|
197
|
+
|
198
|
+
Storexplore::Api.define 'cats' do
|
199
|
+
end
|
200
|
+
|
201
|
+
browse
|
202
|
+
end
|
203
|
+
|
204
|
+
it "has no items" do
|
205
|
+
expect(@walker.items).to be_empty
|
206
|
+
end
|
207
|
+
it "has no sub categories" do
|
208
|
+
expect(@walker.categories).to be_empty
|
209
|
+
end
|
210
|
+
it "has no sub attributes" do
|
211
|
+
expect(@walker.attributes).to be_empty
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# dummy_store_api_spec.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -59,19 +59,28 @@ module Storexplore
|
|
59
59
|
end
|
60
60
|
|
61
61
|
it "should use constant memory" do
|
62
|
-
|
62
|
+
FEW = 10
|
63
|
+
MANY = 100
|
64
|
+
RUNS = 2
|
63
65
|
|
64
|
-
|
65
|
-
|
66
|
+
many_inputs_memory = memory_usage_for_items(MANY, RUNS)
|
67
|
+
few_inputs_memory = memory_usage_for_items(FEW, RUNS)
|
66
68
|
|
67
|
-
|
69
|
+
slope = (many_inputs_memory - few_inputs_memory) / (MANY - FEW)
|
70
|
+
|
71
|
+
zero_inputs_memory = few_inputs_memory - FEW * slope
|
72
|
+
|
73
|
+
expect(slope).to be_within(zero_inputs_memory * 0.05).of(0.0)
|
68
74
|
end
|
69
75
|
|
70
|
-
def memory_usage_for_items(item_count)
|
76
|
+
def memory_usage_for_items(item_count, runs)
|
71
77
|
generate_store(store_name = "www.spec-perf-store.com", item_count)
|
72
|
-
|
73
|
-
|
78
|
+
data = runs.times.map do
|
79
|
+
memory_peak_of do
|
80
|
+
walk_store(store_name)
|
81
|
+
end
|
74
82
|
end
|
83
|
+
mean(data)
|
75
84
|
end
|
76
85
|
|
77
86
|
def memory_peak_of
|
@@ -95,25 +104,38 @@ module Storexplore
|
|
95
104
|
end
|
96
105
|
|
97
106
|
def current_living_objects
|
98
|
-
GC.start
|
99
107
|
object_counts = ObjectSpace.count_objects
|
100
108
|
object_counts[:TOTAL] - object_counts[:FREE]
|
101
109
|
end
|
102
110
|
|
103
111
|
def walk_store(store_name)
|
104
112
|
new_store(store_name).categories.each do |category|
|
105
|
-
|
106
|
-
|
113
|
+
register(category)
|
114
|
+
|
107
115
|
category.categories.each do |sub_category|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
@attributes = item.attributes
|
116
|
+
register(sub_category)
|
117
|
+
|
118
|
+
sub_category.items.each do |item|
|
119
|
+
register(item)
|
113
120
|
end
|
114
121
|
end
|
115
122
|
end
|
116
123
|
end
|
124
|
+
|
125
|
+
def register(store_node)
|
126
|
+
@title = store_node.title
|
127
|
+
@attributes = store_node.attributes
|
128
|
+
|
129
|
+
# No GC is explicitly done, because:
|
130
|
+
# - large inputs forces it anyway
|
131
|
+
# - it greatly slows tests
|
132
|
+
# - GCing should not change the complexity of the system
|
133
|
+
# GC.start
|
134
|
+
end
|
135
|
+
|
136
|
+
def mean(data)
|
137
|
+
data.reduce(:+)/data.size
|
138
|
+
end
|
117
139
|
end
|
118
140
|
|
119
141
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# walker_page_spec.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2011, 2012, 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -23,7 +23,6 @@ require 'spec_helper'
|
|
23
23
|
|
24
24
|
module Storexplore
|
25
25
|
|
26
|
-
# @integration
|
27
26
|
describe WalkerPage, slow: true do
|
28
27
|
|
29
28
|
before :each do
|
data/spec/spec_helper.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
# spec_helper.rb
|
4
4
|
#
|
5
|
-
# Copyright (c) 2013 by Philippe Bourgau. All rights reserved.
|
5
|
+
# Copyright (c) 2013, 2014 by Philippe Bourgau. All rights reserved.
|
6
6
|
#
|
7
7
|
# This library is free software; you can redistribute it and/or
|
8
8
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -19,10 +19,20 @@
|
|
19
19
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
20
|
# MA 02110-1301 USA
|
21
21
|
|
22
|
+
require 'fakeweb'
|
22
23
|
require 'spec_combos'
|
23
24
|
require 'storexplore'
|
24
25
|
require 'storexplore/testing'
|
25
26
|
|
27
|
+
# Dummy store generation
|
26
28
|
Storexplore::Testing.config do |config|
|
27
29
|
config.dummy_store_generation_dir= File.join(File.dirname(__FILE__), '../tmp')
|
28
30
|
end
|
31
|
+
|
32
|
+
# Clean up fakeweb registry after every test
|
33
|
+
FakeWeb.allow_net_connect = false
|
34
|
+
RSpec.configure do |config|
|
35
|
+
config.after(:each) do
|
36
|
+
FakeWeb.clean_registry
|
37
|
+
end
|
38
|
+
end
|
data/storexplore.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: storexplore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Philou
|
@@ -31,7 +31,7 @@ cert_chain:
|
|
31
31
|
yLcl1cmm5ALtJ/+Bkkmp0i4amXeTDMvq9r8PBsVsQwxYOYJBP+Umxz3PX6HjFHrQ
|
32
32
|
XdkXx3oZ
|
33
33
|
-----END CERTIFICATE-----
|
34
|
-
date:
|
34
|
+
date: 2014-01-14 00:00:00.000000000 Z
|
35
35
|
dependencies:
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: mechanize
|
@@ -103,6 +103,20 @@ dependencies:
|
|
103
103
|
- - ~>
|
104
104
|
- !ruby/object:Gem::Version
|
105
105
|
version: '0.2'
|
106
|
+
- !ruby/object:Gem::Dependency
|
107
|
+
name: fakeweb
|
108
|
+
requirement: !ruby/object:Gem::Requirement
|
109
|
+
requirements:
|
110
|
+
- - ~>
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: '1.3'
|
113
|
+
type: :development
|
114
|
+
prerelease: false
|
115
|
+
version_requirements: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - ~>
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '1.3'
|
106
120
|
description: A declarative scrapping DSL that lets one define directory like apis
|
107
121
|
to an online store
|
108
122
|
email:
|
@@ -113,6 +127,7 @@ extra_rdoc_files: []
|
|
113
127
|
files:
|
114
128
|
- .gitignore
|
115
129
|
- .rspec
|
130
|
+
- .travis.yml
|
116
131
|
- Gemfile
|
117
132
|
- Guardfile
|
118
133
|
- LICENSE
|
@@ -120,10 +135,10 @@ files:
|
|
120
135
|
- Rakefile
|
121
136
|
- lib/storexplore.rb
|
122
137
|
- lib/storexplore/api.rb
|
123
|
-
- lib/storexplore/api_builder.rb
|
124
138
|
- lib/storexplore/array_utils.rb
|
125
139
|
- lib/storexplore/browsing_error.rb
|
126
140
|
- lib/storexplore/digger.rb
|
141
|
+
- lib/storexplore/dsl.rb
|
127
142
|
- lib/storexplore/hash_utils.rb
|
128
143
|
- lib/storexplore/null_digger.rb
|
129
144
|
- lib/storexplore/testing.rb
|
@@ -141,14 +156,12 @@ files:
|
|
141
156
|
- lib/storexplore/walker.rb
|
142
157
|
- lib/storexplore/walker_page.rb
|
143
158
|
- lib/storexplore/walker_page_error.rb
|
144
|
-
- spec/lib/storexplore/api_builder_spec.rb
|
145
159
|
- spec/lib/storexplore/api_spec.rb
|
146
|
-
- spec/lib/storexplore/
|
160
|
+
- spec/lib/storexplore/dsl_spec.rb
|
147
161
|
- spec/lib/storexplore/store_walker_page_spec_fixture.html
|
148
162
|
- spec/lib/storexplore/testing/dummy_store_api_spec.rb
|
149
163
|
- spec/lib/storexplore/uri_utils_spec.rb
|
150
164
|
- spec/lib/storexplore/walker_page_spec.rb
|
151
|
-
- spec/lib/storexplore/walker_spec.rb
|
152
165
|
- spec/spec_helper.rb
|
153
166
|
- storexplore.gemspec
|
154
167
|
homepage: https://github.com/philou/storexplore
|
@@ -176,12 +189,10 @@ signing_key:
|
|
176
189
|
specification_version: 4
|
177
190
|
summary: Online store scraping library
|
178
191
|
test_files:
|
179
|
-
- spec/lib/storexplore/api_builder_spec.rb
|
180
192
|
- spec/lib/storexplore/api_spec.rb
|
181
|
-
- spec/lib/storexplore/
|
193
|
+
- spec/lib/storexplore/dsl_spec.rb
|
182
194
|
- spec/lib/storexplore/store_walker_page_spec_fixture.html
|
183
195
|
- spec/lib/storexplore/testing/dummy_store_api_spec.rb
|
184
196
|
- spec/lib/storexplore/uri_utils_spec.rb
|
185
197
|
- spec/lib/storexplore/walker_page_spec.rb
|
186
|
-
- spec/lib/storexplore/walker_spec.rb
|
187
198
|
- spec/spec_helper.rb
|
metadata.gz.sig
CHANGED
Binary file
|
@@ -1,68 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
#
|
3
|
-
# api_builder.rb
|
4
|
-
#
|
5
|
-
# Copyright (c) 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
-
#
|
7
|
-
# This library is free software; you can redistribute it and/or
|
8
|
-
# modify it under the terms of the GNU Lesser General Public
|
9
|
-
# License as published by the Free Software Foundation; either
|
10
|
-
# version 3.0 of the License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This library is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
-
# Lesser General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Lesser General Public
|
18
|
-
# License along with this library; if not, write to the Free Software
|
19
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
-
# MA 02110-1301 USA
|
21
|
-
|
22
|
-
module Storexplore
|
23
|
-
|
24
|
-
class ApiBuilder
|
25
|
-
|
26
|
-
def self.define(api_class, digger_class, &block)
|
27
|
-
new(api_class, digger_class).tap do |result|
|
28
|
-
result.instance_eval(&block)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
def initialize(api_class, digger_class)
|
33
|
-
@api_class = api_class
|
34
|
-
@digger_class = digger_class
|
35
|
-
@scrap_attributes_block = lambda do {} end
|
36
|
-
@categories_digger = NullDigger.new
|
37
|
-
@items_digger = NullDigger.new
|
38
|
-
end
|
39
|
-
|
40
|
-
def attributes(&block)
|
41
|
-
@scrap_attributes_block = block
|
42
|
-
end
|
43
|
-
|
44
|
-
def categories(selector, &block)
|
45
|
-
@categories_digger = @digger_class.new(selector, ApiBuilder.define(@api_class, @digger_class, &block))
|
46
|
-
end
|
47
|
-
|
48
|
-
def items(selector, &block)
|
49
|
-
@items_digger = @digger_class.new(selector, ApiBuilder.define(@api_class, @digger_class, &block))
|
50
|
-
end
|
51
|
-
|
52
|
-
def new(page_getter, father = nil, index = nil)
|
53
|
-
@api_class.new(page_getter).tap do |result|
|
54
|
-
result.categories_digger = @categories_digger
|
55
|
-
result.items_digger = @items_digger
|
56
|
-
result.scrap_attributes_block = @scrap_attributes_block
|
57
|
-
result.father = father
|
58
|
-
result.index = index
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
def self.define_api(name, &block)
|
64
|
-
builder = ApiBuilder.define(Walker, Digger, &block)
|
65
|
-
|
66
|
-
Api.register_builder(name, builder)
|
67
|
-
end
|
68
|
-
end
|
@@ -1,99 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
#
|
3
|
-
# api_builder_spec.rb
|
4
|
-
#
|
5
|
-
# Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
-
#
|
7
|
-
# This library is free software; you can redistribute it and/or
|
8
|
-
# modify it under the terms of the GNU Lesser General Public
|
9
|
-
# License as published by the Free Software Foundation; either
|
10
|
-
# version 3.0 of the License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This library is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
-
# Lesser General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Lesser General Public
|
18
|
-
# License along with this library; if not, write to the Free Software
|
19
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
-
# MA 02110-1301 USA
|
21
|
-
|
22
|
-
require "spec_helper"
|
23
|
-
|
24
|
-
module Storexplore
|
25
|
-
|
26
|
-
describe ApiBuilder do
|
27
|
-
|
28
|
-
before :each do
|
29
|
-
@url = "http://www.mega-store.com"
|
30
|
-
@api = double("Store api").as_null_object
|
31
|
-
@api_class = double("Store api class")
|
32
|
-
@api_class.stub(:new).with(@url).and_return(@api)
|
33
|
-
|
34
|
-
@selector = "a.child"
|
35
|
-
@digger = double("Digger")
|
36
|
-
@digger_class = double("Digger class")
|
37
|
-
end
|
38
|
-
|
39
|
-
context "using define method" do
|
40
|
-
it "creates new store api" do
|
41
|
-
@builder = ApiBuilder.define(@api_class, Digger) { }
|
42
|
-
|
43
|
-
expect(@builder.new(@url)).to eq @api
|
44
|
-
end
|
45
|
-
|
46
|
-
it "initializes nested definition through its block" do
|
47
|
-
ApiBuilder.stub(:new).and_return(builder = double(ApiBuilder))
|
48
|
-
|
49
|
-
expect(builder).to receive(:complex_builder_initialization)
|
50
|
-
|
51
|
-
ApiBuilder.define(@api_class, Digger) do
|
52
|
-
complex_builder_initialization
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
context "when nesting definitions" do
|
58
|
-
|
59
|
-
before :each do
|
60
|
-
@builder = ApiBuilder.new(@api_class, @digger_class)
|
61
|
-
end
|
62
|
-
|
63
|
-
after :each do
|
64
|
-
@builder.new(@url)
|
65
|
-
end
|
66
|
-
|
67
|
-
[:categories, :items].each do |sub_definition|
|
68
|
-
|
69
|
-
before :each do
|
70
|
-
ApiBuilder.stub(:new).and_return(@sub_builder = double(ApiBuilder))
|
71
|
-
@digger_class.stub(:new).with(@selector, @sub_builder).and_return(@digger)
|
72
|
-
end
|
73
|
-
|
74
|
-
it "tells the store api how to find sub #{sub_definition}" do
|
75
|
-
expect(@api).to receive("#{sub_definition}_digger=").with(@digger)
|
76
|
-
|
77
|
-
@builder.send(sub_definition, @selector) do end
|
78
|
-
end
|
79
|
-
|
80
|
-
it "initialises the sub #{sub_definition} builder" do
|
81
|
-
expect(@sub_builder).to receive(:sub_builder_initialization)
|
82
|
-
|
83
|
-
@builder.send(sub_definition, @selector) do
|
84
|
-
sub_builder_initialization
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
it "tells the store api how to parse attributes" do
|
90
|
-
scrap_attributes_block = Proc.new { |page| @scrap_attributes_block_is_unique = true }
|
91
|
-
|
92
|
-
expect(@api).to receive(:scrap_attributes_block=).with(scrap_attributes_block)
|
93
|
-
|
94
|
-
@builder.attributes(&scrap_attributes_block)
|
95
|
-
end
|
96
|
-
|
97
|
-
end
|
98
|
-
end
|
99
|
-
end
|
@@ -1,53 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
#
|
3
|
-
# digger_spec.rb
|
4
|
-
#
|
5
|
-
# Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
-
#
|
7
|
-
# This library is free software; you can redistribute it and/or
|
8
|
-
# modify it under the terms of the GNU Lesser General Public
|
9
|
-
# License as published by the Free Software Foundation; either
|
10
|
-
# version 3.0 of the License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This library is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
-
# Lesser General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Lesser General Public
|
18
|
-
# License along with this library; if not, write to the Free Software
|
19
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
-
# MA 02110-1301 USA
|
21
|
-
|
22
|
-
require "spec_helper"
|
23
|
-
|
24
|
-
module Storexplore
|
25
|
-
|
26
|
-
describe Digger do
|
27
|
-
|
28
|
-
before :each do
|
29
|
-
@digger = Digger.new(@selector = "a.items", @factory = double("Sub walker factory"))
|
30
|
-
@page = double(WalkerPage)
|
31
|
-
@page.stub(:search_links).with(@selector).and_return(@links = [double("Link"),double("Link")])
|
32
|
-
end
|
33
|
-
|
34
|
-
it "creates sub walkers for each link it finds" do
|
35
|
-
@links.each do |link|
|
36
|
-
expect(@factory).to receive(:new).with(link, anything, anything)
|
37
|
-
end
|
38
|
-
|
39
|
-
@digger.sub_walkers(@page, nil).to_a
|
40
|
-
end
|
41
|
-
|
42
|
-
it "for debugging purpose, provides father walker and link index to sub walkers" do
|
43
|
-
father = double("Father walker")
|
44
|
-
|
45
|
-
@links.each_with_index do |link, index|
|
46
|
-
expect(@factory).to receive(:new).with(link, father, index)
|
47
|
-
end
|
48
|
-
|
49
|
-
@digger.sub_walkers(@page, father).to_a
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
end
|
@@ -1,97 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
#
|
3
|
-
# walker_spec.rb
|
4
|
-
#
|
5
|
-
# Copyright (C) 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
-
#
|
7
|
-
# This library is free software; you can redistribute it and/or
|
8
|
-
# modify it under the terms of the GNU Lesser General Public
|
9
|
-
# License as published by the Free Software Foundation; either
|
10
|
-
# version 3.0 of the License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This library is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
-
# Lesser General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Lesser General Public
|
18
|
-
# License along with this library; if not, write to the Free Software
|
19
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
-
# MA 02110-1301 USA
|
21
|
-
|
22
|
-
require "spec_helper"
|
23
|
-
|
24
|
-
module Storexplore
|
25
|
-
|
26
|
-
describe Walker do
|
27
|
-
|
28
|
-
before :each do
|
29
|
-
@page = double("Page", :uri => "http://www.maxi-discount.com")
|
30
|
-
@page_getter = double("Getter", :get => @page, :text => "Conserves")
|
31
|
-
@walker = Walker.new(@page_getter)
|
32
|
-
|
33
|
-
@sub_walkers = [double("Sub walker")]
|
34
|
-
@digger = double(Digger)
|
35
|
-
@digger.stub(:sub_walkers).with(@page, @walker).and_return(@sub_walkers)
|
36
|
-
end
|
37
|
-
|
38
|
-
it "has the uri of its page" do
|
39
|
-
expect(@walker.uri).to eq @page.uri
|
40
|
-
end
|
41
|
-
|
42
|
-
it "it uses the text of its origin (ex: link) as title" do
|
43
|
-
expect(@walker.title).to eq @page_getter.text
|
44
|
-
end
|
45
|
-
|
46
|
-
context "by default" do
|
47
|
-
it "has no items" do
|
48
|
-
expect(@walker.items).to be_empty
|
49
|
-
end
|
50
|
-
it "has no sub categories" do
|
51
|
-
expect(@walker.categories).to be_empty
|
52
|
-
end
|
53
|
-
it "has no attributes" do
|
54
|
-
expect(@walker.attributes).to be_empty
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
it "uses its items digger to collect its items" do
|
59
|
-
@walker.items_digger = @digger
|
60
|
-
|
61
|
-
expect(@walker.items).to eq @sub_walkers
|
62
|
-
end
|
63
|
-
it "uses its categories digger to collect its sub categories" do
|
64
|
-
@walker.categories_digger = @digger
|
65
|
-
|
66
|
-
expect(@walker.categories).to eq @sub_walkers
|
67
|
-
end
|
68
|
-
it "uses its scrap attributes block to collect its attributes" do
|
69
|
-
attributes = { :name => "Candy" }
|
70
|
-
@walker.scrap_attributes_block = lambda { |page| attributes }
|
71
|
-
|
72
|
-
expect(@walker.attributes).to eq attributes
|
73
|
-
end
|
74
|
-
|
75
|
-
context "when troubleshooting" do
|
76
|
-
|
77
|
-
it "has a meaningfull string representation" do
|
78
|
-
walker = Walker.new(@page_getter)
|
79
|
-
walker.index= 23
|
80
|
-
expect(walker.to_s).to include(Walker.to_s)
|
81
|
-
expect(walker.to_s).to include("##{walker.index}")
|
82
|
-
expect(walker.to_s).to include("@#{walker.uri}")
|
83
|
-
end
|
84
|
-
it "has a full genealogy" do
|
85
|
-
link = double("Link")
|
86
|
-
link.stub_chain(:get, :uri).and_return(@page.uri + "/viandes")
|
87
|
-
child_walker = Walker.new(link)
|
88
|
-
child_walker.index = 12
|
89
|
-
child_walker.father = @walker
|
90
|
-
|
91
|
-
genealogy = child_walker.genealogy.split("\n")
|
92
|
-
|
93
|
-
expect(genealogy).to eq [@walker.to_s, child_walker.to_s]
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|