storexplore 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data/.gitignore +26 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +4 -0
  6. data/Guardfile +9 -0
  7. data/LICENSE +165 -0
  8. data/README.md +29 -0
  9. data/Rakefile +1 -0
  10. data/lib/storexplore/api.rb +63 -0
  11. data/lib/storexplore/api_builder.rb +68 -0
  12. data/lib/storexplore/array_utils.rb +36 -0
  13. data/lib/storexplore/browsing_error.rb +26 -0
  14. data/lib/storexplore/digger.rb +35 -0
  15. data/lib/storexplore/hash_utils.rb +56 -0
  16. data/lib/storexplore/null_digger.rb +30 -0
  17. data/lib/storexplore/testing/api_shared_examples.rb +140 -0
  18. data/lib/storexplore/testing/configuration.rb +56 -0
  19. data/lib/storexplore/testing/dummy_data.rb +67 -0
  20. data/lib/storexplore/testing/dummy_store.rb +195 -0
  21. data/lib/storexplore/testing/dummy_store_api.rb +54 -0
  22. data/lib/storexplore/testing/dummy_store_constants.rb +31 -0
  23. data/lib/storexplore/testing/dummy_store_generator.rb +65 -0
  24. data/lib/storexplore/testing/matchers/have_unique_matcher.rb +74 -0
  25. data/lib/storexplore/testing/matchers/mostly_matcher.rb +45 -0
  26. data/lib/storexplore/testing.rb +30 -0
  27. data/lib/storexplore/uri_utils.rb +38 -0
  28. data/lib/storexplore/version.rb +24 -0
  29. data/lib/storexplore/walker.rb +84 -0
  30. data/lib/storexplore/walker_page.rb +142 -0
  31. data/lib/storexplore/walker_page_error.rb +25 -0
  32. data/lib/storexplore.rb +34 -0
  33. data/spec/lib/storexplore/api_builder_spec.rb +99 -0
  34. data/spec/lib/storexplore/api_spec.rb +44 -0
  35. data/spec/lib/storexplore/digger_spec.rb +53 -0
  36. data/spec/lib/storexplore/store_walker_page_spec_fixture.html +21 -0
  37. data/spec/lib/storexplore/testing/dummy_store_api_spec.rb +120 -0
  38. data/spec/lib/storexplore/uri_utils_spec.rb +51 -0
  39. data/spec/lib/storexplore/walker_page_spec.rb +120 -0
  40. data/spec/lib/storexplore/walker_spec.rb +97 -0
  41. data/spec/spec_helper.rb +28 -0
  42. data/storexplore.gemspec +27 -0
  43. data.tar.gz.sig +0 -0
  44. metadata +187 -0
  45. metadata.gz.sig +0 -0
@@ -0,0 +1,45 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # mostly_matcher.rb
4
+ #
5
+ # Copyright (c) 2010, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ # Matcher to verify that most items match something else
23
+ RSpec::Matchers.define :mostly do |item_matcher|
24
+
25
+ match do |actual_items|
26
+ expected_uniques_count(actual_items) <= actual_matches(actual_items, item_matcher).length
27
+ end
28
+ description do
29
+ "#{item_matcher.description} to be true for at least #{threshold*100}% of the items"
30
+ end
31
+
32
+ #private
33
+
34
+ def expected_uniques_count(actual_items)
35
+ (actual_items.length * threshold).round
36
+ end
37
+ def actual_matches(actual_items, item_matcher)
38
+ actual_items.find_all {|item| item_matcher.matches?(item)}
39
+ end
40
+
41
+ def threshold
42
+ 0.7
43
+ end
44
+
45
+ end
@@ -0,0 +1,30 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # storexplore.rb
4
+ #
5
+ # Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ require 'storexplore'
23
+ require 'storexplore/testing/api_shared_examples'
24
+ require 'storexplore/testing/configuration'
25
+ require 'storexplore/testing/dummy_data'
26
+ require 'storexplore/testing/dummy_store'
27
+ require 'storexplore/testing/dummy_store_api'
28
+ require 'storexplore/testing/dummy_store_generator'
29
+ require 'storexplore/testing/matchers/have_unique_matcher'
30
+ require 'storexplore/testing/matchers/mostly_matcher'
@@ -0,0 +1,38 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # uri.rb
4
+ #
5
+ # Copyright (c) 2011, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+
23
+ module Storexplore
24
+
25
+ # Extra URI utilities
26
+ class UriUtils
27
+
28
+ # extracts the domain from an uri
29
+ def self.domain(uri)
30
+
31
+ return "localhost" if uri.scheme == "file"
32
+ return nil if uri.host.nil?
33
+ return nil if uri.host =~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/
34
+
35
+ /([^\.]+\.[^\.]+)$/.match(uri.host)[0]
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # version.rb
4
+ #
5
+ # Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ module Storexplore
23
+ VERSION = "0.0.1"
24
+ end
@@ -0,0 +1,84 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # walker.rb
4
+ #
5
+ # Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ module Storexplore
23
+
24
+ class Walker
25
+
26
+ attr_accessor :categories_digger, :items_digger, :scrap_attributes_block, :father, :index
27
+
28
+ def initialize(getter)
29
+ self.categories_digger = NullDigger.new
30
+ self.items_digger = NullDigger.new
31
+ self.scrap_attributes_block = proc do { } end
32
+ @getter = getter
33
+ end
34
+
35
+ def title
36
+ @getter.text
37
+ end
38
+
39
+ def uri
40
+ page.uri
41
+ end
42
+
43
+ def attributes
44
+ @attributes ||= scrap_attributes
45
+ end
46
+
47
+ def categories
48
+ categories_digger.sub_walkers(page, self)
49
+ end
50
+
51
+ def items
52
+ items_digger.sub_walkers(page, self)
53
+ end
54
+
55
+ def to_s
56
+ "#{self.class} ##{index} @#{uri}"
57
+ end
58
+
59
+ def genealogy
60
+ genealogy_prefix + to_s
61
+ end
62
+
63
+ private
64
+ def page
65
+ @page ||= @getter.get
66
+ end
67
+
68
+ def genealogy_prefix
69
+ if father.nil?
70
+ ""
71
+ else
72
+ father.genealogy + "\n"
73
+ end
74
+ end
75
+
76
+ def scrap_attributes
77
+ begin
78
+ instance_eval(&@scrap_attributes_block)
79
+ rescue WalkerPageError => e
80
+ raise BrowsingError.new("#{e.message}\n#{genealogy}")
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,142 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # walker_page.rb
4
+ #
5
+ # Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ require 'mechanize'
23
+
24
+ # monkey patch to avoid a regex uri encoding error when importing
25
+ # incompatible encoding regexp match (ASCII-8BIT regexp with UTF-8 string) (Encoding::CompatibilityError)
26
+ # /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:353:in `gsub'
27
+ # /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:353:in `_escape'
28
+ # /home/philou/.rbenv/versions/1.9.3-p194/lib/ruby/1.9.1/webrick/httputils.rb:363:in `escape'
29
+ # from uri method
30
+ require "webrick/httputils"
31
+ module WEBrick::HTTPUtils
32
+ def self.escape(s)
33
+ URI.escape(s)
34
+ end
35
+ end
36
+
37
+ module Storexplore
38
+
39
+ class WalkerPage
40
+ extend Forwardable
41
+
42
+ def self.open(uri)
43
+ Getter.new(uri)
44
+ end
45
+
46
+ def_delegator :@mechanize_page, :uri
47
+
48
+ def search_links(selector)
49
+ uri2links = {}
50
+ search_all_links(selector).each do |link|
51
+ target_uri = link.uri
52
+ uri2links[target_uri.to_s] = link if same_domain? uri, target_uri
53
+ end
54
+ # enforcing deterministicity for testing and debugging
55
+ uri2links.values.sort_by {|link| link.uri.to_s }
56
+ end
57
+
58
+ def get_one(selector)
59
+ first_or_throw(@mechanize_page.search(selector), "elements", selector)
60
+ end
61
+
62
+ def get_all(selector, separator)
63
+ elements = @mechanize_page.search(selector)
64
+ throw_if_empty(elements, "elements", selector)
65
+
66
+ (elements.map &:text).join(separator)
67
+ end
68
+
69
+ def get_image(selector)
70
+ first_or_throw(@mechanize_page.images_with(search: selector), "images", selector)
71
+ end
72
+
73
+ private
74
+
75
+ def initialize(mechanize_page)
76
+ @mechanize_page = mechanize_page
77
+ end
78
+
79
+ def same_domain?(source_uri, target_uri)
80
+ target_uri.relative? || (UriUtils.domain(source_uri) == UriUtils.domain(target_uri))
81
+ end
82
+
83
+ def search_all_links(selector)
84
+ @mechanize_page.links_with(search: selector).map { |link| Link.new(link) }
85
+ end
86
+
87
+ def first_or_throw(elements, name, selector)
88
+ throw_if_empty(elements, name, selector)
89
+ elements.first
90
+ end
91
+
92
+ def throw_if_empty(elements, name, selector)
93
+ if elements.empty?
94
+ raise WalkerPageError.new("Page \"#{uri}\" does not contain any #{name} like \"#{selector}\"")
95
+ end
96
+ end
97
+
98
+ class Getter
99
+ attr_reader :uri
100
+
101
+ def initialize(uri)
102
+ @uri = uri
103
+ end
104
+ def get
105
+ @page ||= get_page
106
+ end
107
+
108
+ def text
109
+ @uri.to_s
110
+ end
111
+
112
+ private
113
+
114
+ def get_page
115
+ agent = Mechanize.new do |it|
116
+ # NOTE: by default Mechanize has infinite history, and causes memory leaks
117
+ it.history.max_size = 0
118
+ end
119
+
120
+ WalkerPage.new(agent.get(@uri))
121
+ end
122
+ end
123
+
124
+ class Link
125
+ extend Forwardable
126
+
127
+ def initialize(mechanize_link)
128
+ @mechanize_link = mechanize_link
129
+ end
130
+
131
+ def_delegator :@mechanize_link, :uri
132
+
133
+ def get
134
+ WalkerPage.new(@mechanize_link.click)
135
+ end
136
+
137
+ def text
138
+ @mechanize_link.text
139
+ end
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # walker_page_error.rb
4
+ #
5
+ # Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ module Storexplore
23
+ class WalkerPageError < StandardError
24
+ end
25
+ end
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # storexplore.rb
4
+ #
5
+ # Copyright (c) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ require "storexplore/version"
23
+ require "storexplore/array_utils"
24
+ require "storexplore/api"
25
+ require "storexplore/api_builder"
26
+ require "storexplore/browsing_error"
27
+ require "storexplore/digger"
28
+ require "storexplore/hash_utils"
29
+ require "storexplore/null_digger"
30
+ require "storexplore/uri_utils"
31
+ require "storexplore/walker"
32
+ require "storexplore/walker_page"
33
+ require "storexplore/walker_page_error"
34
+
@@ -0,0 +1,99 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # api_builder_spec.rb
4
+ #
5
+ # Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ require "spec_helper"
23
+
24
+ module Storexplore
25
+
26
+ describe ApiBuilder do
27
+
28
+ before :each do
29
+ @url = "http://www.mega-store.com"
30
+ @api = double("Store api").as_null_object
31
+ @api_class = double("Store api class")
32
+ @api_class.stub(:new).with(@url).and_return(@api)
33
+
34
+ @selector = "a.child"
35
+ @digger = double("Digger")
36
+ @digger_class = double("Digger class")
37
+ end
38
+
39
+ context "using define method" do
40
+ it "creates new store api" do
41
+ @builder = ApiBuilder.define(@api_class, Digger) { }
42
+
43
+ expect(@builder.new(@url)).to eq @api
44
+ end
45
+
46
+ it "initializes nested definition through its block" do
47
+ ApiBuilder.stub(:new).and_return(builder = double(ApiBuilder))
48
+
49
+ expect(builder).to receive(:complex_builder_initialization)
50
+
51
+ ApiBuilder.define(@api_class, Digger) do
52
+ complex_builder_initialization
53
+ end
54
+ end
55
+ end
56
+
57
+ context "when nesting definitions" do
58
+
59
+ before :each do
60
+ @builder = ApiBuilder.new(@api_class, @digger_class)
61
+ end
62
+
63
+ after :each do
64
+ @builder.new(@url)
65
+ end
66
+
67
+ [:categories, :items].each do |sub_definition|
68
+
69
+ before :each do
70
+ ApiBuilder.stub(:new).and_return(@sub_builder = double(ApiBuilder))
71
+ @digger_class.stub(:new).with(@selector, @sub_builder).and_return(@digger)
72
+ end
73
+
74
+ it "tells the store api how to find sub #{sub_definition}" do
75
+ expect(@api).to receive("#{sub_definition}_digger=").with(@digger)
76
+
77
+ @builder.send(sub_definition, @selector) do end
78
+ end
79
+
80
+ it "initialises the sub #{sub_definition} builder" do
81
+ expect(@sub_builder).to receive(:sub_builder_initialization)
82
+
83
+ @builder.send(sub_definition, @selector) do
84
+ sub_builder_initialization
85
+ end
86
+ end
87
+ end
88
+
89
+ it "tells the store api how to parse attributes" do
90
+ scrap_attributes_block = Proc.new { |page| @scrap_attributes_block_is_unique = true }
91
+
92
+ expect(@api).to receive(:scrap_attributes_block=).with(scrap_attributes_block)
93
+
94
+ @builder.attributes(&scrap_attributes_block)
95
+ end
96
+
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,44 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # api_spec.rb
4
+ #
5
+ # Copyright (C) 2010, 2011, 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ require 'spec_helper'
23
+
24
+ module Storexplore
25
+
26
+ describe Api do
27
+
28
+ before :each do
29
+ Api.register_builder(my_store = "www.my-store.com", builder = double(ApiBuilder.class))
30
+ @url = "http://#{my_store}"
31
+ WalkerPage.stub(:open).with(@url).and_return(walker = double(WalkerPage))
32
+ builder.stub(:new).with(walker).and_return(@store_api = double(ApiBuilder))
33
+ end
34
+
35
+ it "select the good store items api builder to browse a store" do
36
+ expect(Api.browse(@url)).to eq @store_api
37
+ end
38
+
39
+ it "fails when it does not know how to browse a store" do
40
+ expect(lambda { Api.browse("http://unknown.store.com") }).to raise_error(NotImplementedError)
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,53 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ # digger_spec.rb
4
+ #
5
+ # Copyright (c) 2012, 2013 by Philippe Bourgau. All rights reserved.
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 3.0 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20
+ # MA 02110-1301 USA
21
+
22
+ require "spec_helper"
23
+
24
+ module Storexplore
25
+
26
+ describe Digger do
27
+
28
+ before :each do
29
+ @digger = Digger.new(@selector = "a.items", @factory = double("Sub walker factory"))
30
+ @page = double(WalkerPage)
31
+ @page.stub(:search_links).with(@selector).and_return(@links = [double("Link"),double("Link")])
32
+ end
33
+
34
+ it "creates sub walkers for each link it finds" do
35
+ @links.each do |link|
36
+ expect(@factory).to receive(:new).with(link, anything, anything)
37
+ end
38
+
39
+ @digger.sub_walkers(@page, nil).to_a
40
+ end
41
+
42
+ it "for debugging purpose, provides father walker and link index to sub walkers" do
43
+ father = double("Father walker")
44
+
45
+ @links.each_with_index do |link, index|
46
+ expect(@factory).to receive(:new).with(link, father, index)
47
+ end
48
+
49
+ @digger.sub_walkers(@page, father).to_a
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1,21 @@
1
+ <html>
2
+ <head>
3
+ </head>
4
+ <body>
5
+ <div id="unique"></div>
6
+ <p class="number">0</p>
7
+ <p class="number">1</p>
8
+
9
+ <a href="b.html" class="letter">b</a>
10
+ <a href="a.html" class="letter">a</a>
11
+
12
+ <a href="http://www.twilight-zone.com" id="outbound">twilight</a>
13
+
14
+ <a href="unique.html" class="twin">first twin</a>
15
+ <a href="unique.html" class="twin">second twin</a>
16
+
17
+ <a href="store_walker_page_spec_fixture.html" id="myself">myself</a>
18
+
19
+ <img class="image" src="toto.txt"/>
20
+ </body>
21
+ </html>