storexplore 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/.gitignore +26 -0
- data/.rspec +1 -0
- data/Gemfile +4 -0
- data/Guardfile +9 -0
- data/LICENSE +165 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/lib/storexplore/api.rb +63 -0
- data/lib/storexplore/api_builder.rb +68 -0
- data/lib/storexplore/array_utils.rb +36 -0
- data/lib/storexplore/browsing_error.rb +26 -0
- data/lib/storexplore/digger.rb +35 -0
- data/lib/storexplore/hash_utils.rb +56 -0
- data/lib/storexplore/null_digger.rb +30 -0
- data/lib/storexplore/testing/api_shared_examples.rb +140 -0
- data/lib/storexplore/testing/configuration.rb +56 -0
- data/lib/storexplore/testing/dummy_data.rb +67 -0
- data/lib/storexplore/testing/dummy_store.rb +195 -0
- data/lib/storexplore/testing/dummy_store_api.rb +54 -0
- data/lib/storexplore/testing/dummy_store_constants.rb +31 -0
- data/lib/storexplore/testing/dummy_store_generator.rb +65 -0
- data/lib/storexplore/testing/matchers/have_unique_matcher.rb +74 -0
- data/lib/storexplore/testing/matchers/mostly_matcher.rb +45 -0
- data/lib/storexplore/testing.rb +30 -0
- data/lib/storexplore/uri_utils.rb +38 -0
- data/lib/storexplore/version.rb +24 -0
- data/lib/storexplore/walker.rb +84 -0
- data/lib/storexplore/walker_page.rb +142 -0
- data/lib/storexplore/walker_page_error.rb +25 -0
- data/lib/storexplore.rb +34 -0
- data/spec/lib/storexplore/api_builder_spec.rb +99 -0
- data/spec/lib/storexplore/api_spec.rb +44 -0
- data/spec/lib/storexplore/digger_spec.rb +53 -0
- data/spec/lib/storexplore/store_walker_page_spec_fixture.html +21 -0
- data/spec/lib/storexplore/testing/dummy_store_api_spec.rb +120 -0
- data/spec/lib/storexplore/uri_utils_spec.rb +51 -0
- data/spec/lib/storexplore/walker_page_spec.rb +120 -0
- data/spec/lib/storexplore/walker_spec.rb +97 -0
- data/spec/spec_helper.rb +28 -0
- data/storexplore.gemspec +27 -0
- data.tar.gz.sig +0 -0
- metadata +187 -0
- metadata.gz.sig +0 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# mostly_matcher.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
# Matcher to verify that most items match something else
|
23
|
+
RSpec::Matchers.define :mostly do |item_matcher|
|
24
|
+
|
25
|
+
match do |actual_items|
|
26
|
+
expected_uniques_count(actual_items) <= actual_matches(actual_items, item_matcher).length
|
27
|
+
end
|
28
|
+
description do
|
29
|
+
"#{item_matcher.description} to be true for at least #{threshold*100}% of the items"
|
30
|
+
end
|
31
|
+
|
32
|
+
#private
|
33
|
+
|
34
|
+
def expected_uniques_count(actual_items)
|
35
|
+
(actual_items.length * threshold).round
|
36
|
+
end
|
37
|
+
def actual_matches(actual_items, item_matcher)
|
38
|
+
actual_items.find_all {|item| item_matcher.matches?(item)}
|
39
|
+
end
|
40
|
+
|
41
|
+
def threshold
|
42
|
+
0.7
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# storexplore.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require 'storexplore'
|
23
|
+
require 'storexplore/testing/api_shared_examples'
|
24
|
+
require 'storexplore/testing/configuration'
|
25
|
+
require 'storexplore/testing/dummy_data'
|
26
|
+
require 'storexplore/testing/dummy_store'
|
27
|
+
require 'storexplore/testing/dummy_store_api'
|
28
|
+
require 'storexplore/testing/dummy_store_generator'
|
29
|
+
require 'storexplore/testing/matchers/have_unique_matcher'
|
30
|
+
require 'storexplore/testing/matchers/mostly_matcher'
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# uri.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2011, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
|
23
|
+
module Storexplore
|
24
|
+
|
25
|
+
# Extra URI utilities
|
26
|
+
class UriUtils
|
27
|
+
|
28
|
+
# extracts the domain from an uri
|
29
|
+
def self.domain(uri)
|
30
|
+
|
31
|
+
return "localhost" if uri.scheme == "file"
|
32
|
+
return nil if uri.host.nil?
|
33
|
+
return nil if uri.host =~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/
|
34
|
+
|
35
|
+
/([^\.]+\.[^\.]+)$/.match(uri.host)[0]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# version.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
module Storexplore
|
23
|
+
VERSION = "0.0.1"
|
24
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# walker.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
module Storexplore
|
23
|
+
|
24
|
+
class Walker
|
25
|
+
|
26
|
+
attr_accessor :categories_digger, :items_digger, :scrap_attributes_block, :father, :index
|
27
|
+
|
28
|
+
def initialize(getter)
|
29
|
+
self.categories_digger = NullDigger.new
|
30
|
+
self.items_digger = NullDigger.new
|
31
|
+
self.scrap_attributes_block = proc do { } end
|
32
|
+
@getter = getter
|
33
|
+
end
|
34
|
+
|
35
|
+
def title
|
36
|
+
@getter.text
|
37
|
+
end
|
38
|
+
|
39
|
+
def uri
|
40
|
+
page.uri
|
41
|
+
end
|
42
|
+
|
43
|
+
def attributes
|
44
|
+
@attributes ||= scrap_attributes
|
45
|
+
end
|
46
|
+
|
47
|
+
def categories
|
48
|
+
categories_digger.sub_walkers(page, self)
|
49
|
+
end
|
50
|
+
|
51
|
+
def items
|
52
|
+
items_digger.sub_walkers(page, self)
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_s
|
56
|
+
"#{self.class} ##{index} @#{uri}"
|
57
|
+
end
|
58
|
+
|
59
|
+
def genealogy
|
60
|
+
genealogy_prefix + to_s
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
def page
|
65
|
+
@page ||= @getter.get
|
66
|
+
end
|
67
|
+
|
68
|
+
def genealogy_prefix
|
69
|
+
if father.nil?
|
70
|
+
""
|
71
|
+
else
|
72
|
+
father.genealogy + "\n"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def scrap_attributes
|
77
|
+
begin
|
78
|
+
instance_eval(&@scrap_attributes_block)
|
79
|
+
rescue WalkerPageError => e
|
80
|
+
raise BrowsingError.new("#{e.message}\n#{genealogy}")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# walker_page.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require 'mechanize'
|
23
|
+
|
24
|
+
# monkey patch to avoid a regex uri encoding error when importing
|
25
|
+
# incompatible encoding regexp match (ASCII-8BIT regexp with UTF-8 string) (Encoding::CompatibilityError)
|
26
|
+
# /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:353:in `gsub'
|
27
|
+
# /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:353:in `_escape'
|
28
|
+
# /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:363:in `escape'
|
29
|
+
# from uri method
|
30
|
+
require "webrick/httputils"
|
31
|
+
module WEBrick::HTTPUtils
|
32
|
+
def self.escape(s)
|
33
|
+
URI.escape(s)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
module Storexplore
|
38
|
+
|
39
|
+
class WalkerPage
|
40
|
+
extend Forwardable
|
41
|
+
|
42
|
+
def self.open(uri)
|
43
|
+
Getter.new(uri)
|
44
|
+
end
|
45
|
+
|
46
|
+
def_delegator :@mechanize_page, :uri
|
47
|
+
|
48
|
+
def search_links(selector)
|
49
|
+
uri2links = {}
|
50
|
+
search_all_links(selector).each do |link|
|
51
|
+
target_uri = link.uri
|
52
|
+
uri2links[target_uri.to_s] = link if same_domain? uri, target_uri
|
53
|
+
end
|
54
|
+
# enforcing deterministicity for testing and debugging
|
55
|
+
uri2links.values.sort_by {|link| link.uri.to_s }
|
56
|
+
end
|
57
|
+
|
58
|
+
def get_one(selector)
|
59
|
+
first_or_throw(@mechanize_page.search(selector), "elements", selector)
|
60
|
+
end
|
61
|
+
|
62
|
+
def get_all(selector, separator)
|
63
|
+
elements = @mechanize_page.search(selector)
|
64
|
+
throw_if_empty(elements, "elements", selector)
|
65
|
+
|
66
|
+
(elements.map &:text).join(separator)
|
67
|
+
end
|
68
|
+
|
69
|
+
def get_image(selector)
|
70
|
+
first_or_throw(@mechanize_page.images_with(search: selector), "images", selector)
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
def initialize(mechanize_page)
|
76
|
+
@mechanize_page = mechanize_page
|
77
|
+
end
|
78
|
+
|
79
|
+
def same_domain?(source_uri, target_uri)
|
80
|
+
target_uri.relative? || (UriUtils.domain(source_uri) == UriUtils.domain(target_uri))
|
81
|
+
end
|
82
|
+
|
83
|
+
def search_all_links(selector)
|
84
|
+
@mechanize_page.links_with(search: selector).map { |link| Link.new(link) }
|
85
|
+
end
|
86
|
+
|
87
|
+
def first_or_throw(elements, name, selector)
|
88
|
+
throw_if_empty(elements, name, selector)
|
89
|
+
elements.first
|
90
|
+
end
|
91
|
+
|
92
|
+
def throw_if_empty(elements, name, selector)
|
93
|
+
if elements.empty?
|
94
|
+
raise WalkerPageError.new("Page \"#{uri}\" does not contain any #{name} like \"#{selector}\"")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class Getter
|
99
|
+
attr_reader :uri
|
100
|
+
|
101
|
+
def initialize(uri)
|
102
|
+
@uri = uri
|
103
|
+
end
|
104
|
+
def get
|
105
|
+
@page ||= get_page
|
106
|
+
end
|
107
|
+
|
108
|
+
def text
|
109
|
+
@uri.to_s
|
110
|
+
end
|
111
|
+
|
112
|
+
private
|
113
|
+
|
114
|
+
def get_page
|
115
|
+
agent = Mechanize.new do |it|
|
116
|
+
# NOTE: by default Mechanize has infinite history, and causes memory leaks
|
117
|
+
it.history.max_size = 0
|
118
|
+
end
|
119
|
+
|
120
|
+
WalkerPage.new(agent.get(@uri))
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
class Link
|
125
|
+
extend Forwardable
|
126
|
+
|
127
|
+
def initialize(mechanize_link)
|
128
|
+
@mechanize_link = mechanize_link
|
129
|
+
end
|
130
|
+
|
131
|
+
def_delegator :@mechanize_link, :uri
|
132
|
+
|
133
|
+
def get
|
134
|
+
WalkerPage.new(@mechanize_link.click)
|
135
|
+
end
|
136
|
+
|
137
|
+
def text
|
138
|
+
@mechanize_link.text
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# walker_page_error.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
module Storexplore
|
23
|
+
class WalkerPageError < StandardError
|
24
|
+
end
|
25
|
+
end
|
data/lib/storexplore.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# storexplore.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require "storexplore/version"
|
23
|
+
require "storexplore/array_utils"
|
24
|
+
require "storexplore/api"
|
25
|
+
require "storexplore/api_builder"
|
26
|
+
require "storexplore/browsing_error"
|
27
|
+
require "storexplore/digger"
|
28
|
+
require "storexplore/hash_utils"
|
29
|
+
require "storexplore/null_digger"
|
30
|
+
require "storexplore/uri_utils"
|
31
|
+
require "storexplore/walker"
|
32
|
+
require "storexplore/walker_page"
|
33
|
+
require "storexplore/walker_page_error"
|
34
|
+
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# api_builder_spec.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require "spec_helper"
|
23
|
+
|
24
|
+
module Storexplore
|
25
|
+
|
26
|
+
describe ApiBuilder do
|
27
|
+
|
28
|
+
before :each do
|
29
|
+
@url = "http://www.mega-store.com"
|
30
|
+
@api = double("Store api").as_null_object
|
31
|
+
@api_class = double("Store api class")
|
32
|
+
@api_class.stub(:new).with(@url).and_return(@api)
|
33
|
+
|
34
|
+
@selector = "a.child"
|
35
|
+
@digger = double("Digger")
|
36
|
+
@digger_class = double("Digger class")
|
37
|
+
end
|
38
|
+
|
39
|
+
context "using define method" do
|
40
|
+
it "creates new store api" do
|
41
|
+
@builder = ApiBuilder.define(@api_class, Digger) { }
|
42
|
+
|
43
|
+
expect(@builder.new(@url)).to eq @api
|
44
|
+
end
|
45
|
+
|
46
|
+
it "initializes nested definition through its block" do
|
47
|
+
ApiBuilder.stub(:new).and_return(builder = double(ApiBuilder))
|
48
|
+
|
49
|
+
expect(builder).to receive(:complex_builder_initialization)
|
50
|
+
|
51
|
+
ApiBuilder.define(@api_class, Digger) do
|
52
|
+
complex_builder_initialization
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
context "when nesting definitions" do
|
58
|
+
|
59
|
+
before :each do
|
60
|
+
@builder = ApiBuilder.new(@api_class, @digger_class)
|
61
|
+
end
|
62
|
+
|
63
|
+
after :each do
|
64
|
+
@builder.new(@url)
|
65
|
+
end
|
66
|
+
|
67
|
+
[:categories, :items].each do |sub_definition|
|
68
|
+
|
69
|
+
before :each do
|
70
|
+
ApiBuilder.stub(:new).and_return(@sub_builder = double(ApiBuilder))
|
71
|
+
@digger_class.stub(:new).with(@selector, @sub_builder).and_return(@digger)
|
72
|
+
end
|
73
|
+
|
74
|
+
it "tells the store api how to find sub #{sub_definition}" do
|
75
|
+
expect(@api).to receive("#{sub_definition}_digger=").with(@digger)
|
76
|
+
|
77
|
+
@builder.send(sub_definition, @selector) do end
|
78
|
+
end
|
79
|
+
|
80
|
+
it "initialises the sub #{sub_definition} builder" do
|
81
|
+
expect(@sub_builder).to receive(:sub_builder_initialization)
|
82
|
+
|
83
|
+
@builder.send(sub_definition, @selector) do
|
84
|
+
sub_builder_initialization
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
it "tells the store api how to parse attributes" do
|
90
|
+
scrap_attributes_block = Proc.new { |page| @scrap_attributes_block_is_unique = true }
|
91
|
+
|
92
|
+
expect(@api).to receive(:scrap_attributes_block=).with(scrap_attributes_block)
|
93
|
+
|
94
|
+
@builder.attributes(&scrap_attributes_block)
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# api_spec.rb
|
4
|
+
#
|
5
|
+
# Copyright (C) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require 'spec_helper'
|
23
|
+
|
24
|
+
module Storexplore
|
25
|
+
|
26
|
+
describe Api do
|
27
|
+
|
28
|
+
before :each do
|
29
|
+
Api.register_builder(my_store = "www.my-store.com", builder = double(ApiBuilder.class))
|
30
|
+
@url = "http://#{my_store}"
|
31
|
+
WalkerPage.stub(:open).with(@url).and_return(walker = double(WalkerPage))
|
32
|
+
builder.stub(:new).with(walker).and_return(@store_api = double(ApiBuilder))
|
33
|
+
end
|
34
|
+
|
35
|
+
it "select the good store items api builder to browse a store" do
|
36
|
+
expect(Api.browse(@url)).to eq @store_api
|
37
|
+
end
|
38
|
+
|
39
|
+
it "fails when it does not know how to browse a store" do
|
40
|
+
expect(lambda { Api.browse("http://unknown.store.com") }).to raise_error(NotImplementedError)
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#
|
3
|
+
# digger_spec.rb
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
|
6
|
+
#
|
7
|
+
# This library is free software; you can redistribute it and/or
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
9
|
+
# License as published by the Free Software Foundation; either
|
10
|
+
# version 3.0 of the License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
+
# Lesser General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
18
|
+
# License along with this library; if not, write to the Free Software
|
19
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
20
|
+
# MA 02110-1301 USA
|
21
|
+
|
22
|
+
require "spec_helper"
|
23
|
+
|
24
|
+
module Storexplore
|
25
|
+
|
26
|
+
describe Digger do
|
27
|
+
|
28
|
+
before :each do
|
29
|
+
@digger = Digger.new(@selector = "a.items", @factory = double("Sub walker factory"))
|
30
|
+
@page = double(WalkerPage)
|
31
|
+
@page.stub(:search_links).with(@selector).and_return(@links = [double("Link"),double("Link")])
|
32
|
+
end
|
33
|
+
|
34
|
+
it "creates sub walkers for each link it finds" do
|
35
|
+
@links.each do |link|
|
36
|
+
expect(@factory).to receive(:new).with(link, anything, anything)
|
37
|
+
end
|
38
|
+
|
39
|
+
@digger.sub_walkers(@page, nil).to_a
|
40
|
+
end
|
41
|
+
|
42
|
+
it "for debugging purpose, provides father walker and link index to sub walkers" do
|
43
|
+
father = double("Father walker")
|
44
|
+
|
45
|
+
@links.each_with_index do |link, index|
|
46
|
+
expect(@factory).to receive(:new).with(link, father, index)
|
47
|
+
end
|
48
|
+
|
49
|
+
@digger.sub_walkers(@page, father).to_a
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
</head>
|
4
|
+
<body>
|
5
|
+
<div id="unique"></div>
|
6
|
+
<p class="number">0</p>
|
7
|
+
<p class="number">1</p>
|
8
|
+
|
9
|
+
<a href="b.html" class="letter">b</a>
|
10
|
+
<a href="a.html" class="letter">a</a>
|
11
|
+
|
12
|
+
<a href="http://www.twilight-zone.com" id="outbound">twilight</a>
|
13
|
+
|
14
|
+
<a href="unique.html" class="twin">first twin</a>
|
15
|
+
<a href="unique.html" class="twin">second twin</a>
|
16
|
+
|
17
|
+
<a href="store_walker_page_spec_fixture.html" id="myself">myself</a>
|
18
|
+
|
19
|
+
<img class="image" src="toto.txt"/>
|
20
|
+
</body>
|
21
|
+
</html>
|