storexplore 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data/.gitignore +26 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +4 -0
  6. data/Guardfile +9 -0
  7. data/LICENSE +165 -0
  8. data/README.md +29 -0
  9. data/Rakefile +1 -0
  10. data/lib/storexplore/api.rb +63 -0
  11. data/lib/storexplore/api_builder.rb +68 -0
  12. data/lib/storexplore/array_utils.rb +36 -0
  13. data/lib/storexplore/browsing_error.rb +26 -0
  14. data/lib/storexplore/digger.rb +35 -0
  15. data/lib/storexplore/hash_utils.rb +56 -0
  16. data/lib/storexplore/null_digger.rb +30 -0
  17. data/lib/storexplore/testing/api_shared_examples.rb +140 -0
  18. data/lib/storexplore/testing/configuration.rb +56 -0
  19. data/lib/storexplore/testing/dummy_data.rb +67 -0
  20. data/lib/storexplore/testing/dummy_store.rb +195 -0
  21. data/lib/storexplore/testing/dummy_store_api.rb +54 -0
  22. data/lib/storexplore/testing/dummy_store_constants.rb +31 -0
  23. data/lib/storexplore/testing/dummy_store_generator.rb +65 -0
  24. data/lib/storexplore/testing/matchers/have_unique_matcher.rb +74 -0
  25. data/lib/storexplore/testing/matchers/mostly_matcher.rb +45 -0
  26. data/lib/storexplore/testing.rb +30 -0
  27. data/lib/storexplore/uri_utils.rb +38 -0
  28. data/lib/storexplore/version.rb +24 -0
  29. data/lib/storexplore/walker.rb +84 -0
  30. data/lib/storexplore/walker_page.rb +142 -0
  31. data/lib/storexplore/walker_page_error.rb +25 -0
  32. data/lib/storexplore.rb +34 -0
  33. data/spec/lib/storexplore/api_builder_spec.rb +99 -0
  34. data/spec/lib/storexplore/api_spec.rb +44 -0
  35. data/spec/lib/storexplore/digger_spec.rb +53 -0
  36. data/spec/lib/storexplore/store_walker_page_spec_fixture.html +21 -0
  37. data/spec/lib/storexplore/testing/dummy_store_api_spec.rb +120 -0
  38. data/spec/lib/storexplore/uri_utils_spec.rb +51 -0
  39. data/spec/lib/storexplore/walker_page_spec.rb +120 -0
  40. data/spec/lib/storexplore/walker_spec.rb +97 -0
  41. data/spec/spec_helper.rb +28 -0
  42. data/storexplore.gemspec +27 -0
  43. data.tar.gz.sig +0 -0
  44. metadata +187 -0
  45. metadata.gz.sig +0 -0
@@ -0,0 +1,45 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # mostly_matcher.rb
4
+ #
5
+ # Copyright (c) 2010, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ # Matcher to verify that most items match something else
23
+ RSpec::Matchers.define :mostly do |item_matcher|
24
+
25
+ match do |actual_items|
26
+ expected_uniques_count(actual_items) <= actual_matches(actual_items, item_matcher).length
27
+ end
28
+ description do
29
+ "#{item_matcher.description} to be true for at least #{threshold*100}% of the items"
30
+ end
31
+
32
+ #private
33
+
34
+ def expected_uniques_count(actual_items)
35
+ (actual_items.length * threshold).round
36
+ end
37
+ def actual_matches(actual_items, item_matcher)
38
+ actual_items.find_all {|item| item_matcher.matches?(item)}
39
+ end
40
+
41
+ def threshold
42
+ 0.7
43
+ end
44
+
45
+ end
@@ -0,0 +1,30 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # storexplore.rb
4
+ #
5
+ # Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ require 'storexplore'
23
+ require 'storexplore/testing/api_shared_examples'
24
+ require 'storexplore/testing/configuration'
25
+ require 'storexplore/testing/dummy_data'
26
+ require 'storexplore/testing/dummy_store'
27
+ require 'storexplore/testing/dummy_store_api'
28
+ require 'storexplore/testing/dummy_store_generator'
29
+ require 'storexplore/testing/matchers/have_unique_matcher'
30
+ require 'storexplore/testing/matchers/mostly_matcher'
@@ -0,0 +1,38 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # uri.rb
4
+ #
5
+ # Copyright (c) 2011, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+
23
+ module Storexplore
24
+
25
+ # Extra URI utilities
26
+ class UriUtils
27
+
28
+ # extracts the domain from an uri
29
+ def self.domain(uri)
30
+
31
+ return "localhost" if uri.scheme == "file"
32
+ return nil if uri.host.nil?
33
+ return nil if uri.host =~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/
34
+
35
+ /([^\.]+\.[^\.]+)$/.match(uri.host)[0]
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # version.rb
4
+ #
5
+ # Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ module Storexplore
23
+ VERSION = "0.0.1"
24
+ end
@@ -0,0 +1,84 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # walker.rb
4
+ #
5
+ # Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ module Storexplore
23
+
24
+ class Walker
25
+
26
+ attr_accessor :categories_digger, :items_digger, :scrap_attributes_block, :father, :index
27
+
28
+ def initialize(getter)
29
+ self.categories_digger = NullDigger.new
30
+ self.items_digger = NullDigger.new
31
+ self.scrap_attributes_block = proc do { } end
32
+ @getter = getter
33
+ end
34
+
35
+ def title
36
+ @getter.text
37
+ end
38
+
39
+ def uri
40
+ page.uri
41
+ end
42
+
43
+ def attributes
44
+ @attributes ||= scrap_attributes
45
+ end
46
+
47
+ def categories
48
+ categories_digger.sub_walkers(page, self)
49
+ end
50
+
51
+ def items
52
+ items_digger.sub_walkers(page, self)
53
+ end
54
+
55
+ def to_s
56
+ "#{self.class} ##{index} @#{uri}"
57
+ end
58
+
59
+ def genealogy
60
+ genealogy_prefix + to_s
61
+ end
62
+
63
+ private
64
+ def page
65
+ @page ||= @getter.get
66
+ end
67
+
68
+ def genealogy_prefix
69
+ if father.nil?
70
+ ""
71
+ else
72
+ father.genealogy + "\n"
73
+ end
74
+ end
75
+
76
+ def scrap_attributes
77
+ begin
78
+ instance_eval(&@scrap_attributes_block)
79
+ rescue WalkerPageError => e
80
+ raise BrowsingError.new("#{e.message}\n#{genealogy}")
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,142 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # walker_page.rb
4
+ #
5
+ # Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ require 'mechanize'
23
+
24
+ # monkey patch to avoid a regex uri encoding error when importing
25
+ # incompatible encoding regexp match (ASCII-8BIT regexp with UTF-8 string) (Encoding::CompatibilityError)
26
+ # /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:353:in `gsub'
27
+ # /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:353:in `_escape'
28
+ # /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:363:in `escape'
29
+ # from uri method
30
+ require "webrick/httputils"
31
+ module WEBrick::HTTPUtils
32
+ def self.escape(s)
33
+ URI.escape(s)
34
+ end
35
+ end
36
+
37
+ module Storexplore
38
+
39
+ class WalkerPage
40
+ extend Forwardable
41
+
42
+ def self.open(uri)
43
+ Getter.new(uri)
44
+ end
45
+
46
+ def_delegator :@mechanize_page, :uri
47
+
48
+ def search_links(selector)
49
+ uri2links = {}
50
+ search_all_links(selector).each do |link|
51
+ target_uri = link.uri
52
+ uri2links[target_uri.to_s] = link if same_domain? uri, target_uri
53
+ end
54
+ # enforcing deterministicity for testing and debugging
55
+ uri2links.values.sort_by {|link| link.uri.to_s }
56
+ end
57
+
58
+ def get_one(selector)
59
+ first_or_throw(@mechanize_page.search(selector), "elements", selector)
60
+ end
61
+
62
+ def get_all(selector, separator)
63
+ elements = @mechanize_page.search(selector)
64
+ throw_if_empty(elements, "elements", selector)
65
+
66
+ (elements.map &:text).join(separator)
67
+ end
68
+
69
+ def get_image(selector)
70
+ first_or_throw(@mechanize_page.images_with(search: selector), "images", selector)
71
+ end
72
+
73
+ private
74
+
75
+ def initialize(mechanize_page)
76
+ @mechanize_page = mechanize_page
77
+ end
78
+
79
+ def same_domain?(source_uri, target_uri)
80
+ target_uri.relative? || (UriUtils.domain(source_uri) == UriUtils.domain(target_uri))
81
+ end
82
+
83
+ def search_all_links(selector)
84
+ @mechanize_page.links_with(search: selector).map { |link| Link.new(link) }
85
+ end
86
+
87
+ def first_or_throw(elements, name, selector)
88
+ throw_if_empty(elements, name, selector)
89
+ elements.first
90
+ end
91
+
92
+ def throw_if_empty(elements, name, selector)
93
+ if elements.empty?
94
+ raise WalkerPageError.new("Page \"#{uri}\" does not contain any #{name} like \"#{selector}\"")
95
+ end
96
+ end
97
+
98
+ class Getter
99
+ attr_reader :uri
100
+
101
+ def initialize(uri)
102
+ @uri = uri
103
+ end
104
+ def get
105
+ @page ||= get_page
106
+ end
107
+
108
+ def text
109
+ @uri.to_s
110
+ end
111
+
112
+ private
113
+
114
+ def get_page
115
+ agent = Mechanize.new do |it|
116
+ # NOTE: by default Mechanize has infinite history, and causes memory leaks
117
+ it.history.max_size = 0
118
+ end
119
+
120
+ WalkerPage.new(agent.get(@uri))
121
+ end
122
+ end
123
+
124
+ class Link
125
+ extend Forwardable
126
+
127
+ def initialize(mechanize_link)
128
+ @mechanize_link = mechanize_link
129
+ end
130
+
131
+ def_delegator :@mechanize_link, :uri
132
+
133
+ def get
134
+ WalkerPage.new(@mechanize_link.click)
135
+ end
136
+
137
+ def text
138
+ @mechanize_link.text
139
+ end
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # walker_page_error.rb
4
+ #
5
+ # Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ module Storexplore
23
+ class WalkerPageError < StandardError
24
+ end
25
+ end
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # storexplore.rb
4
+ #
5
+ # Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ require "storexplore/version"
23
+ require "storexplore/array_utils"
24
+ require "storexplore/api"
25
+ require "storexplore/api_builder"
26
+ require "storexplore/browsing_error"
27
+ require "storexplore/digger"
28
+ require "storexplore/hash_utils"
29
+ require "storexplore/null_digger"
30
+ require "storexplore/uri_utils"
31
+ require "storexplore/walker"
32
+ require "storexplore/walker_page"
33
+ require "storexplore/walker_page_error"
34
+
@@ -0,0 +1,99 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # api_builder_spec.rb
4
+ #
5
+ # Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ require "spec_helper"
23
+
24
+ module Storexplore
25
+
26
+ describe ApiBuilder do
27
+
28
+ before :each do
29
+ @url = "http://www.mega-store.com"
30
+ @api = double("Store api").as_null_object
31
+ @api_class = double("Store api class")
32
+ @api_class.stub(:new).with(@url).and_return(@api)
33
+
34
+ @selector = "a.child"
35
+ @digger = double("Digger")
36
+ @digger_class = double("Digger class")
37
+ end
38
+
39
+ context "using define method" do
40
+ it "creates new store api" do
41
+ @builder = ApiBuilder.define(@api_class, Digger) { }
42
+
43
+ expect(@builder.new(@url)).to eq @api
44
+ end
45
+
46
+ it "initializes nested definition through its block" do
47
+ ApiBuilder.stub(:new).and_return(builder = double(ApiBuilder))
48
+
49
+ expect(builder).to receive(:complex_builder_initialization)
50
+
51
+ ApiBuilder.define(@api_class, Digger) do
52
+ complex_builder_initialization
53
+ end
54
+ end
55
+ end
56
+
57
+ context "when nesting definitions" do
58
+
59
+ before :each do
60
+ @builder = ApiBuilder.new(@api_class, @digger_class)
61
+ end
62
+
63
+ after :each do
64
+ @builder.new(@url)
65
+ end
66
+
67
+ [:categories, :items].each do |sub_definition|
68
+
69
+ before :each do
70
+ ApiBuilder.stub(:new).and_return(@sub_builder = double(ApiBuilder))
71
+ @digger_class.stub(:new).with(@selector, @sub_builder).and_return(@digger)
72
+ end
73
+
74
+ it "tells the store api how to find sub #{sub_definition}" do
75
+ expect(@api).to receive("#{sub_definition}_digger=").with(@digger)
76
+
77
+ @builder.send(sub_definition, @selector) do end
78
+ end
79
+
80
+ it "initialises the sub #{sub_definition} builder" do
81
+ expect(@sub_builder).to receive(:sub_builder_initialization)
82
+
83
+ @builder.send(sub_definition, @selector) do
84
+ sub_builder_initialization
85
+ end
86
+ end
87
+ end
88
+
89
+ it "tells the store api how to parse attributes" do
90
+ scrap_attributes_block = Proc.new { |page| @scrap_attributes_block_is_unique = true }
91
+
92
+ expect(@api).to receive(:scrap_attributes_block=).with(scrap_attributes_block)
93
+
94
+ @builder.attributes(&scrap_attributes_block)
95
+ end
96
+
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,44 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # api_spec.rb
4
+ #
5
+ # Copyright (C) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ require 'spec_helper'
23
+
24
+ module Storexplore
25
+
26
+ describe Api do
27
+
28
+ before :each do
29
+ Api.register_builder(my_store = "www.my-store.com", builder = double(ApiBuilder.class))
30
+ @url = "http://#{my_store}"
31
+ WalkerPage.stub(:open).with(@url).and_return(walker = double(WalkerPage))
32
+ builder.stub(:new).with(walker).and_return(@store_api = double(ApiBuilder))
33
+ end
34
+
35
+ it "select the good store items api builder to browse a store" do
36
+ expect(Api.browse(@url)).to eq @store_api
37
+ end
38
+
39
+ it "fails when it does not know how to browse a store" do
40
+ expect(lambda { Api.browse("http://unknown.store.com") }).to raise_error(NotImplementedError)
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,53 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # digger_spec.rb
4
+ #
5
+ # Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ require "spec_helper"
23
+
24
+ module Storexplore
25
+
26
+ describe Digger do
27
+
28
+ before :each do
29
+ @digger = Digger.new(@selector = "a.items", @factory = double("Sub walker factory"))
30
+ @page = double(WalkerPage)
31
+ @page.stub(:search_links).with(@selector).and_return(@links = [double("Link"),double("Link")])
32
+ end
33
+
34
+ it "creates sub walkers for each link it finds" do
35
+ @links.each do |link|
36
+ expect(@factory).to receive(:new).with(link, anything, anything)
37
+ end
38
+
39
+ @digger.sub_walkers(@page, nil).to_a
40
+ end
41
+
42
+ it "for debugging purpose, provides father walker and link index to sub walkers" do
43
+ father = double("Father walker")
44
+
45
+ @links.each_with_index do |link, index|
46
+ expect(@factory).to receive(:new).with(link, father, index)
47
+ end
48
+
49
+ @digger.sub_walkers(@page, father).to_a
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1,21 @@
1
+ <html>
2
+ <head>
3
+ </head>
4
+ <body>
5
+ <div id="unique"></div>
6
+ <p class="number">0</p>
7
+ <p class="number">1</p>
8
+
9
+ <a href="b.html" class="letter">b</a>
10
+ <a href="a.html" class="letter">a</a>
11
+
12
+ <a href="http://www.twilight-zone.com" id="outbound">twilight</a>
13
+
14
+ <a href="unique.html" class="twin">first twin</a>
15
+ <a href="unique.html" class="twin">second twin</a>
16
+
17
+ <a href="store_walker_page_spec_fixture.html" id="myself">myself</a>
18
+
19
+ <img class="image" src="toto.txt"/>
20
+ </body>
21
+ </html>