storexplore 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/.gitignore +26 -0
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/Guardfile +9 -0
- data/LICENSE +165 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/lib/storexplore/api.rb +63 -0
- data/lib/storexplore/api_builder.rb +68 -0
- data/lib/storexplore/array_utils.rb +36 -0
- data/lib/storexplore/browsing_error.rb +26 -0
- data/lib/storexplore/digger.rb +35 -0
- data/lib/storexplore/hash_utils.rb +56 -0
- data/lib/storexplore/null_digger.rb +30 -0
- data/lib/storexplore/testing/api_shared_examples.rb +140 -0
- data/lib/storexplore/testing/configuration.rb +56 -0
- data/lib/storexplore/testing/dummy_data.rb +67 -0
- data/lib/storexplore/testing/dummy_store.rb +195 -0
- data/lib/storexplore/testing/dummy_store_api.rb +54 -0
- data/lib/storexplore/testing/dummy_store_constants.rb +31 -0
- data/lib/storexplore/testing/dummy_store_generator.rb +65 -0
- data/lib/storexplore/testing/matchers/have_unique_matcher.rb +74 -0
- data/lib/storexplore/testing/matchers/mostly_matcher.rb +45 -0
- data/lib/storexplore/testing.rb +30 -0
- data/lib/storexplore/uri_utils.rb +38 -0
- data/lib/storexplore/version.rb +24 -0
- data/lib/storexplore/walker.rb +84 -0
- data/lib/storexplore/walker_page.rb +142 -0
- data/lib/storexplore/walker_page_error.rb +25 -0
- data/lib/storexplore.rb +34 -0
- data/spec/lib/storexplore/api_builder_spec.rb +99 -0
- data/spec/lib/storexplore/api_spec.rb +44 -0
- data/spec/lib/storexplore/digger_spec.rb +53 -0
- data/spec/lib/storexplore/store_walker_page_spec_fixture.html +21 -0
- data/spec/lib/storexplore/testing/dummy_store_api_spec.rb +120 -0
- data/spec/lib/storexplore/uri_utils_spec.rb +51 -0
- data/spec/lib/storexplore/walker_page_spec.rb +120 -0
- data/spec/lib/storexplore/walker_spec.rb +97 -0
- data/spec/spec_helper.rb +28 -0
- data/storexplore.gemspec +27 -0
- data.tar.gz.sig +0 -0
- metadata +187 -0
- metadata.gz.sig +0 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# mostly_matcher.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
# Matcher to verify that most items match something else
|
23
|
+
RSpec::Matchers.define :mostly do |item_matcher|
|
24
|
+
|
25
|
+
match do |actual_items|
|
26
|
+
expected_uniques_count(actual_items) <= actual_matches(actual_items, item_matcher).length
|
27
|
+
end
|
28
|
+
description do
|
29
|
+
"#{item_matcher.description} to be true for at least #{threshold*100}% of the items"
|
30
|
+
end
|
31
|
+
|
32
|
+
#private
|
33
|
+
|
34
|
+
def expected_uniques_count(actual_items)
|
35
|
+
(actual_items.length * threshold).round
|
36
|
+
end
|
37
|
+
def actual_matches(actual_items, item_matcher)
|
38
|
+
actual_items.find_all {|item| item_matcher.matches?(item)}
|
39
|
+
end
|
40
|
+
|
41
|
+
def threshold
|
42
|
+
0.7
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# storexplore.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require 'storexplore'
|
23
|
+
require 'storexplore/testing/api_shared_examples'
|
24
|
+
require 'storexplore/testing/configuration'
|
25
|
+
require 'storexplore/testing/dummy_data'
|
26
|
+
require 'storexplore/testing/dummy_store'
|
27
|
+
require 'storexplore/testing/dummy_store_api'
|
28
|
+
require 'storexplore/testing/dummy_store_generator'
|
29
|
+
require 'storexplore/testing/matchers/have_unique_matcher'
|
30
|
+
require 'storexplore/testing/matchers/mostly_matcher'
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# uri.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2011, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
|
23
|
+
module Storexplore
|
24
|
+
|
25
|
+
# Extra URI utilities
|
26
|
+
class UriUtils
|
27
|
+
|
28
|
+
# extracts the domain from an uri
|
29
|
+
def self.domain(uri)
|
30
|
+
|
31
|
+
return "localhost" if uri.scheme == "file"
|
32
|
+
return nil if uri.host.nil?
|
33
|
+
return nil if uri.host =~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/
|
34
|
+
|
35
|
+
/([^\.]+\.[^\.]+)$/.match(uri.host)[0]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# version.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
module Storexplore
|
23
|
+
VERSION = "0.0.1"
|
24
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# walker.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
module Storexplore
|
23
|
+
|
24
|
+
class Walker
|
25
|
+
|
26
|
+
attr_accessor :categories_digger, :items_digger, :scrap_attributes_block, :father, :index
|
27
|
+
|
28
|
+
def initialize(getter)
|
29
|
+
self.categories_digger = NullDigger.new
|
30
|
+
self.items_digger = NullDigger.new
|
31
|
+
self.scrap_attributes_block = proc do { } end
|
32
|
+
@getter = getter
|
33
|
+
end
|
34
|
+
|
35
|
+
def title
|
36
|
+
@getter.text
|
37
|
+
end
|
38
|
+
|
39
|
+
def uri
|
40
|
+
page.uri
|
41
|
+
end
|
42
|
+
|
43
|
+
def attributes
|
44
|
+
@attributes ||= scrap_attributes
|
45
|
+
end
|
46
|
+
|
47
|
+
def categories
|
48
|
+
categories_digger.sub_walkers(page, self)
|
49
|
+
end
|
50
|
+
|
51
|
+
def items
|
52
|
+
items_digger.sub_walkers(page, self)
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_s
|
56
|
+
"#{self.class} ##{index} @#{uri}"
|
57
|
+
end
|
58
|
+
|
59
|
+
def genealogy
|
60
|
+
genealogy_prefix + to_s
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
def page
|
65
|
+
@page ||= @getter.get
|
66
|
+
end
|
67
|
+
|
68
|
+
def genealogy_prefix
|
69
|
+
if father.nil?
|
70
|
+
""
|
71
|
+
else
|
72
|
+
father.genealogy + "\n"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def scrap_attributes
|
77
|
+
begin
|
78
|
+
instance_eval(&@scrap_attributes_block)
|
79
|
+
rescue WalkerPageError => e
|
80
|
+
raise BrowsingError.new("#{e.message}\n#{genealogy}")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# walker_page.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require 'mechanize'
|
23
|
+
|
24
|
+
# monkey patch to avoid a regex uri encoding error when importing
|
25
|
+
# incompatible encoding regexp match (ASCII-8BIT regexp with UTF-8 string) (Encoding::CompatibilityError)
|
26
|
+
# /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:353:in `gsub'
|
27
|
+
# /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:353:in `_escape'
|
28
|
+
# /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:363:in `escape'
|
29
|
+
# from uri method
|
30
|
+
require "webrick/httputils"
|
31
|
+
module WEBrick::HTTPUtils
|
32
|
+
def self.escape(s)
|
33
|
+
URI.escape(s)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
module Storexplore
|
38
|
+
|
39
|
+
class WalkerPage
|
40
|
+
extend Forwardable
|
41
|
+
|
42
|
+
def self.open(uri)
|
43
|
+
Getter.new(uri)
|
44
|
+
end
|
45
|
+
|
46
|
+
def_delegator :@mechanize_page, :uri
|
47
|
+
|
48
|
+
def search_links(selector)
|
49
|
+
uri2links = {}
|
50
|
+
search_all_links(selector).each do |link|
|
51
|
+
target_uri = link.uri
|
52
|
+
uri2links[target_uri.to_s] = link if same_domain? uri, target_uri
|
53
|
+
end
|
54
|
+
# enforcing deterministicity for testing and debugging
|
55
|
+
uri2links.values.sort_by {|link| link.uri.to_s }
|
56
|
+
end
|
57
|
+
|
58
|
+
def get_one(selector)
|
59
|
+
first_or_throw(@mechanize_page.search(selector), "elements", selector)
|
60
|
+
end
|
61
|
+
|
62
|
+
def get_all(selector, separator)
|
63
|
+
elements = @mechanize_page.search(selector)
|
64
|
+
throw_if_empty(elements, "elements", selector)
|
65
|
+
|
66
|
+
(elements.map &:text).join(separator)
|
67
|
+
end
|
68
|
+
|
69
|
+
def get_image(selector)
|
70
|
+
first_or_throw(@mechanize_page.images_with(search: selector), "images", selector)
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
def initialize(mechanize_page)
|
76
|
+
@mechanize_page = mechanize_page
|
77
|
+
end
|
78
|
+
|
79
|
+
def same_domain?(source_uri, target_uri)
|
80
|
+
target_uri.relative? || (UriUtils.domain(source_uri) == UriUtils.domain(target_uri))
|
81
|
+
end
|
82
|
+
|
83
|
+
def search_all_links(selector)
|
84
|
+
@mechanize_page.links_with(search: selector).map { |link| Link.new(link) }
|
85
|
+
end
|
86
|
+
|
87
|
+
def first_or_throw(elements, name, selector)
|
88
|
+
throw_if_empty(elements, name, selector)
|
89
|
+
elements.first
|
90
|
+
end
|
91
|
+
|
92
|
+
def throw_if_empty(elements, name, selector)
|
93
|
+
if elements.empty?
|
94
|
+
raise WalkerPageError.new("Page \"#{uri}\" does not contain any #{name} like \"#{selector}\"")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class Getter
|
99
|
+
attr_reader :uri
|
100
|
+
|
101
|
+
def initialize(uri)
|
102
|
+
@uri = uri
|
103
|
+
end
|
104
|
+
def get
|
105
|
+
@page ||= get_page
|
106
|
+
end
|
107
|
+
|
108
|
+
def text
|
109
|
+
@uri.to_s
|
110
|
+
end
|
111
|
+
|
112
|
+
private
|
113
|
+
|
114
|
+
def get_page
|
115
|
+
agent = Mechanize.new do |it|
|
116
|
+
# NOTE: by default Mechanize has infinite history, and causes memory leaks
|
117
|
+
it.history.max_size = 0
|
118
|
+
end
|
119
|
+
|
120
|
+
WalkerPage.new(agent.get(@uri))
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
class Link
|
125
|
+
extend Forwardable
|
126
|
+
|
127
|
+
def initialize(mechanize_link)
|
128
|
+
@mechanize_link = mechanize_link
|
129
|
+
end
|
130
|
+
|
131
|
+
def_delegator :@mechanize_link, :uri
|
132
|
+
|
133
|
+
def get
|
134
|
+
WalkerPage.new(@mechanize_link.click)
|
135
|
+
end
|
136
|
+
|
137
|
+
def text
|
138
|
+
@mechanize_link.text
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# walker_page_error.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
module Storexplore
|
23
|
+
class WalkerPageError < StandardError
|
24
|
+
end
|
25
|
+
end
|
data/lib/storexplore.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# storexplore.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require "storexplore/version"
|
23
|
+
require "storexplore/array_utils"
|
24
|
+
require "storexplore/api"
|
25
|
+
require "storexplore/api_builder"
|
26
|
+
require "storexplore/browsing_error"
|
27
|
+
require "storexplore/digger"
|
28
|
+
require "storexplore/hash_utils"
|
29
|
+
require "storexplore/null_digger"
|
30
|
+
require "storexplore/uri_utils"
|
31
|
+
require "storexplore/walker"
|
32
|
+
require "storexplore/walker_page"
|
33
|
+
require "storexplore/walker_page_error"
|
34
|
+
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# api_builder_spec.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require "spec_helper"
|
23
|
+
|
24
|
+
module Storexplore
|
25
|
+
|
26
|
+
describe ApiBuilder do
|
27
|
+
|
28
|
+
before :each do
|
29
|
+
@url = "http://www.mega-store.com"
|
30
|
+
@api = double("Store api").as_null_object
|
31
|
+
@api_class = double("Store api class")
|
32
|
+
@api_class.stub(:new).with(@url).and_return(@api)
|
33
|
+
|
34
|
+
@selector = "a.child"
|
35
|
+
@digger = double("Digger")
|
36
|
+
@digger_class = double("Digger class")
|
37
|
+
end
|
38
|
+
|
39
|
+
context "using define method" do
|
40
|
+
it "creates new store api" do
|
41
|
+
@builder = ApiBuilder.define(@api_class, Digger) { }
|
42
|
+
|
43
|
+
expect(@builder.new(@url)).to eq @api
|
44
|
+
end
|
45
|
+
|
46
|
+
it "initializes nested definition through its block" do
|
47
|
+
ApiBuilder.stub(:new).and_return(builder = double(ApiBuilder))
|
48
|
+
|
49
|
+
expect(builder).to receive(:complex_builder_initialization)
|
50
|
+
|
51
|
+
ApiBuilder.define(@api_class, Digger) do
|
52
|
+
complex_builder_initialization
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
context "when nesting definitions" do
|
58
|
+
|
59
|
+
before :each do
|
60
|
+
@builder = ApiBuilder.new(@api_class, @digger_class)
|
61
|
+
end
|
62
|
+
|
63
|
+
after :each do
|
64
|
+
@builder.new(@url)
|
65
|
+
end
|
66
|
+
|
67
|
+
[:categories, :items].each do |sub_definition|
|
68
|
+
|
69
|
+
before :each do
|
70
|
+
ApiBuilder.stub(:new).and_return(@sub_builder = double(ApiBuilder))
|
71
|
+
@digger_class.stub(:new).with(@selector, @sub_builder).and_return(@digger)
|
72
|
+
end
|
73
|
+
|
74
|
+
it "tells the store api how to find sub #{sub_definition}" do
|
75
|
+
expect(@api).to receive("#{sub_definition}_digger=").with(@digger)
|
76
|
+
|
77
|
+
@builder.send(sub_definition, @selector) do end
|
78
|
+
end
|
79
|
+
|
80
|
+
it "initialises the sub #{sub_definition} builder" do
|
81
|
+
expect(@sub_builder).to receive(:sub_builder_initialization)
|
82
|
+
|
83
|
+
@builder.send(sub_definition, @selector) do
|
84
|
+
sub_builder_initialization
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
it "tells the store api how to parse attributes" do
|
90
|
+
scrap_attributes_block = Proc.new { |page| @scrap_attributes_block_is_unique = true }
|
91
|
+
|
92
|
+
expect(@api).to receive(:scrap_attributes_block=).with(scrap_attributes_block)
|
93
|
+
|
94
|
+
@builder.attributes(&scrap_attributes_block)
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# api_spec.rb
|
4
|
+
#
|
5
|
+
# Copyright (C) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require 'spec_helper'
|
23
|
+
|
24
|
+
module Storexplore
|
25
|
+
|
26
|
+
describe Api do
|
27
|
+
|
28
|
+
before :each do
|
29
|
+
Api.register_builder(my_store = "www.my-store.com", builder = double(ApiBuilder.class))
|
30
|
+
@url = "http://#{my_store}"
|
31
|
+
WalkerPage.stub(:open).with(@url).and_return(walker = double(WalkerPage))
|
32
|
+
builder.stub(:new).with(walker).and_return(@store_api = double(ApiBuilder))
|
33
|
+
end
|
34
|
+
|
35
|
+
it "select the good store items api builder to browse a store" do
|
36
|
+
expect(Api.browse(@url)).to eq @store_api
|
37
|
+
end
|
38
|
+
|
39
|
+
it "fails when it does not know how to browse a store" do
|
40
|
+
expect(lambda { Api.browse("http://unknown.store.com") }).to raise_error(NotImplementedError)
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# digger_spec.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require "spec_helper"
|
23
|
+
|
24
|
+
module Storexplore
|
25
|
+
|
26
|
+
describe Digger do
|
27
|
+
|
28
|
+
before :each do
|
29
|
+
@digger = Digger.new(@selector = "a.items", @factory = double("Sub walker factory"))
|
30
|
+
@page = double(WalkerPage)
|
31
|
+
@page.stub(:search_links).with(@selector).and_return(@links = [double("Link"),double("Link")])
|
32
|
+
end
|
33
|
+
|
34
|
+
it "creates sub walkers for each link it finds" do
|
35
|
+
@links.each do |link|
|
36
|
+
expect(@factory).to receive(:new).with(link, anything, anything)
|
37
|
+
end
|
38
|
+
|
39
|
+
@digger.sub_walkers(@page, nil).to_a
|
40
|
+
end
|
41
|
+
|
42
|
+
it "for debugging purpose, provides father walker and link index to sub walkers" do
|
43
|
+
father = double("Father walker")
|
44
|
+
|
45
|
+
@links.each_with_index do |link, index|
|
46
|
+
expect(@factory).to receive(:new).with(link, father, index)
|
47
|
+
end
|
48
|
+
|
49
|
+
@digger.sub_walkers(@page, father).to_a
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
</head>
|
4
|
+
<body>
|
5
|
+
<div id="unique"></div>
|
6
|
+
<p class="number">0</p>
|
7
|
+
<p class="number">1</p>
|
8
|
+
|
9
|
+
<a href="b.html" class="letter">b</a>
|
10
|
+
<a href="a.html" class="letter">a</a>
|
11
|
+
|
12
|
+
<a href="http://www.twilight-zone.com" id="outbound">twilight</a>
|
13
|
+
|
14
|
+
<a href="unique.html" class="twin">first twin</a>
|
15
|
+
<a href="unique.html" class="twin">second twin</a>
|
16
|
+
|
17
|
+
<a href="store_walker_page_spec_fixture.html" id="myself">myself</a>
|
18
|
+
|
19
|
+
<img class="image" src="toto.txt"/>
|
20
|
+
</body>
|
21
|
+
</html>
|