image_scraper 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ script: bundle exec rake
2
+ language: ruby
3
+ rvm: 1.9.3
4
+
data/Gemfile CHANGED
@@ -11,7 +11,7 @@ gem "rails"
11
11
  # Include everything needed to run rake, tests, features, etc.
12
12
  group :development do
13
13
  gem "shoulda", ">= 0"
14
- gem "bundler", "~> 1.0.0"
15
- gem "jeweler", "~> 1.5.2"
16
- gem "rcov", ">= 0"
14
+ gem "bundler", "~> 1.2"
15
+ gem "jeweler", "~> 1.5"
16
+ #gem "rcov", ">= 0"
17
17
  end
@@ -0,0 +1,114 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ actionmailer (3.2.10)
5
+ actionpack (= 3.2.10)
6
+ mail (~> 2.4.4)
7
+ actionpack (3.2.10)
8
+ activemodel (= 3.2.10)
9
+ activesupport (= 3.2.10)
10
+ builder (~> 3.0.0)
11
+ erubis (~> 2.7.0)
12
+ journey (~> 1.0.4)
13
+ rack (~> 1.4.0)
14
+ rack-cache (~> 1.2)
15
+ rack-test (~> 0.6.1)
16
+ sprockets (~> 2.2.1)
17
+ activemodel (3.2.10)
18
+ activesupport (= 3.2.10)
19
+ builder (~> 3.0.0)
20
+ activerecord (3.2.10)
21
+ activemodel (= 3.2.10)
22
+ activesupport (= 3.2.10)
23
+ arel (~> 3.0.2)
24
+ tzinfo (~> 0.3.29)
25
+ activeresource (3.2.10)
26
+ activemodel (= 3.2.10)
27
+ activesupport (= 3.2.10)
28
+ activesupport (3.2.10)
29
+ i18n (~> 0.6)
30
+ multi_json (~> 1.0)
31
+ addressable (2.3.2)
32
+ arel (3.0.2)
33
+ bourne (1.1.2)
34
+ mocha (= 0.10.5)
35
+ builder (3.0.4)
36
+ css_parser (1.2.6)
37
+ addressable
38
+ rdoc
39
+ erubis (2.7.0)
40
+ git (1.2.5)
41
+ hike (1.2.1)
42
+ i18n (0.6.1)
43
+ jeweler (1.8.4)
44
+ bundler (~> 1.0)
45
+ git (>= 1.2.5)
46
+ rake
47
+ rdoc
48
+ journey (1.0.4)
49
+ json (1.7.6)
50
+ mail (2.4.4)
51
+ i18n (>= 0.4.0)
52
+ mime-types (~> 1.16)
53
+ treetop (~> 1.4.8)
54
+ metaclass (0.0.1)
55
+ mime-types (1.19)
56
+ mocha (0.10.5)
57
+ metaclass (~> 0.0.1)
58
+ multi_json (1.5.0)
59
+ nokogiri (1.5.6)
60
+ polyglot (0.3.3)
61
+ rack (1.4.3)
62
+ rack-cache (1.2)
63
+ rack (>= 0.4)
64
+ rack-ssl (1.3.2)
65
+ rack
66
+ rack-test (0.6.2)
67
+ rack (>= 1.0)
68
+ rails (3.2.10)
69
+ actionmailer (= 3.2.10)
70
+ actionpack (= 3.2.10)
71
+ activerecord (= 3.2.10)
72
+ activeresource (= 3.2.10)
73
+ activesupport (= 3.2.10)
74
+ bundler (~> 1.0)
75
+ railties (= 3.2.10)
76
+ railties (3.2.10)
77
+ actionpack (= 3.2.10)
78
+ activesupport (= 3.2.10)
79
+ rack-ssl (~> 1.3.2)
80
+ rake (>= 0.8.7)
81
+ rdoc (~> 3.4)
82
+ thor (>= 0.14.6, < 2.0)
83
+ rake (10.0.3)
84
+ rdoc (3.12)
85
+ json (~> 1.4)
86
+ shoulda (3.3.2)
87
+ shoulda-context (~> 1.0.1)
88
+ shoulda-matchers (~> 1.4.1)
89
+ shoulda-context (1.0.2)
90
+ shoulda-matchers (1.4.2)
91
+ activesupport (>= 3.0.0)
92
+ bourne (~> 1.1.2)
93
+ sprockets (2.2.2)
94
+ hike (~> 1.2)
95
+ multi_json (~> 1.0)
96
+ rack (~> 1.0)
97
+ tilt (~> 1.1, != 1.3.0)
98
+ thor (0.16.0)
99
+ tilt (1.3.3)
100
+ treetop (1.4.12)
101
+ polyglot
102
+ polyglot (>= 0.3.1)
103
+ tzinfo (0.3.35)
104
+
105
+ PLATFORMS
106
+ ruby
107
+
108
+ DEPENDENCIES
109
+ bundler (~> 1.2)
110
+ css_parser
111
+ jeweler (~> 1.5)
112
+ nokogiri
113
+ rails
114
+ shoulda
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # image_scraper
2
2
 
3
+ [![Build Status](https://travis-ci.org/charlotte-ruby/image_scraper.png?branch=master)](http://travis-ci.org/charlotte-ruby/image_scraper)
4
+
3
5
  Simple utility that pulls image URLS from web page
4
6
 
5
7
  ## INSTALL
data/Rakefile CHANGED
@@ -32,16 +32,16 @@ Rake::TestTask.new(:test) do |test|
32
32
  test.verbose = true
33
33
  end
34
34
 
35
- require 'rcov/rcovtask'
36
- Rcov::RcovTask.new do |test|
37
- test.libs << 'test'
38
- test.pattern = 'test/**/test_*.rb'
39
- test.verbose = true
40
- end
35
+ #require 'rcov/rcovtask'
36
+ #Rcov::RcovTask.new do |test|
37
+ # test.libs << 'test'
38
+ # test.pattern = 'test/**/test_*.rb'
39
+ # test.verbose = true
40
+ #end
41
41
 
42
42
  task :default => :test
43
43
 
44
- require 'rake/rdoctask'
44
+ require 'rdoc/task'
45
45
  Rake::RDocTask.new do |rdoc|
46
46
  version = File.exist?('VERSION') ? File.read('VERSION') : ""
47
47
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.7
1
+ 0.1.8
@@ -4,21 +4,23 @@
4
4
  # -*- encoding: utf-8 -*-
5
5
 
6
6
  Gem::Specification.new do |s|
7
- s.name = %q{image_scraper}
8
- s.version = "0.1.7"
7
+ s.name = "image_scraper"
8
+ s.version = "0.1.8"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["John McAliley"]
12
- s.date = %q{2012-01-02}
13
- s.description = %q{Simple utility to pull image urls from web page}
14
- s.email = %q{john.mcaliley@gmail.com}
12
+ s.date = "2013-01-17"
13
+ s.description = "Simple utility to pull image urls from web page"
14
+ s.email = "john.mcaliley@gmail.com"
15
15
  s.extra_rdoc_files = [
16
16
  "LICENSE.txt",
17
17
  "README.md"
18
18
  ]
19
19
  s.files = [
20
20
  ".document",
21
+ ".travis.yml",
21
22
  "Gemfile",
23
+ "Gemfile.lock",
22
24
  "LICENSE.txt",
23
25
  "README.md",
24
26
  "Rakefile",
@@ -29,18 +31,13 @@ Gem::Specification.new do |s|
29
31
  "lib/image_scraper/railtie.rb",
30
32
  "lib/image_scraper/util.rb"
31
33
  ]
32
- s.homepage = %q{http://github.com/charlotte-ruby/image_scraper}
34
+ s.homepage = "http://github.com/charlotte-ruby/image_scraper"
33
35
  s.licenses = ["MIT"]
34
36
  s.require_paths = ["lib"]
35
- s.rubygems_version = %q{1.3.7}
36
- s.summary = %q{Simple utility to pull image urls from web page}
37
- s.test_files = [
38
- "test/helper.rb",
39
- "test/test_image_scraper.rb"
40
- ]
37
+ s.rubygems_version = "1.8.24"
38
+ s.summary = "Simple utility to pull image urls from web page"
41
39
 
42
40
  if s.respond_to? :specification_version then
43
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
41
  s.specification_version = 3
45
42
 
46
43
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
@@ -48,9 +45,8 @@ Gem::Specification.new do |s|
48
45
  s.add_runtime_dependency(%q<css_parser>, [">= 0"])
49
46
  s.add_runtime_dependency(%q<rails>, [">= 0"])
50
47
  s.add_development_dependency(%q<shoulda>, [">= 0"])
51
- s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
52
- s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
53
- s.add_development_dependency(%q<rcov>, [">= 0"])
48
+ s.add_development_dependency(%q<bundler>, ["~> 1.2"])
49
+ s.add_development_dependency(%q<jeweler>, ["~> 1.5"])
54
50
  s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
55
51
  s.add_runtime_dependency(%q<css_parser>, [">= 0"])
56
52
  else
@@ -58,9 +54,8 @@ Gem::Specification.new do |s|
58
54
  s.add_dependency(%q<css_parser>, [">= 0"])
59
55
  s.add_dependency(%q<rails>, [">= 0"])
60
56
  s.add_dependency(%q<shoulda>, [">= 0"])
61
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
62
- s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
63
- s.add_dependency(%q<rcov>, [">= 0"])
57
+ s.add_dependency(%q<bundler>, ["~> 1.2"])
58
+ s.add_dependency(%q<jeweler>, ["~> 1.5"])
64
59
  s.add_dependency(%q<nokogiri>, [">= 0"])
65
60
  s.add_dependency(%q<css_parser>, [">= 0"])
66
61
  end
@@ -69,9 +64,8 @@ Gem::Specification.new do |s|
69
64
  s.add_dependency(%q<css_parser>, [">= 0"])
70
65
  s.add_dependency(%q<rails>, [">= 0"])
71
66
  s.add_dependency(%q<shoulda>, [">= 0"])
72
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
73
- s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
74
- s.add_dependency(%q<rcov>, [">= 0"])
67
+ s.add_dependency(%q<bundler>, ["~> 1.2"])
68
+ s.add_dependency(%q<jeweler>, ["~> 1.5"])
75
69
  s.add_dependency(%q<nokogiri>, [">= 0"])
76
70
  s.add_dependency(%q<css_parser>, [">= 0"])
77
71
  end
@@ -33,8 +33,8 @@ module ImageScraper
33
33
  def stylesheet_images
34
34
  images = []
35
35
  stylesheets.each do |stylesheet|
36
- file = open(stylesheet)
37
- css = file.string rescue IO.read(file)
36
+ file = open(stylesheet) rescue next
37
+ css = file.string rescue IO.read(file) rescue next
38
38
 
39
39
  images += css.scan(/url\((.*?)\)/).collect do |image_url|
40
40
  image_url = URI.escape image_url[0]
metadata CHANGED
@@ -1,165 +1,156 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: image_scraper
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 1
8
- - 7
9
- segments_generated: true
10
- version: 0.1.7
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.8
5
+ prerelease:
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - John McAliley
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-01-02 00:00:00 -05:00
19
- default_executable:
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
12
+ date: 2013-01-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
22
15
  name: nokogiri
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
- segments_generated: true
31
- version: "0"
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
32
22
  type: :runtime
33
23
  prerelease: false
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
36
31
  name: css_parser
37
- requirement: &id002 !ruby/object:Gem::Requirement
38
- none: false
39
- requirements:
40
- - - ">="
41
- - !ruby/object:Gem::Version
42
- segments:
43
- - 0
44
- segments_generated: true
45
- version: "0"
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
46
38
  type: :runtime
47
39
  prerelease: false
48
- version_requirements: *id002
49
- - !ruby/object:Gem::Dependency
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
50
47
  name: rails
51
- requirement: &id003 !ruby/object:Gem::Requirement
52
- none: false
53
- requirements:
54
- - - ">="
55
- - !ruby/object:Gem::Version
56
- segments:
57
- - 0
58
- segments_generated: true
59
- version: "0"
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
60
54
  type: :runtime
61
55
  prerelease: false
62
- version_requirements: *id003
63
- - !ruby/object:Gem::Dependency
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
64
63
  name: shoulda
65
- requirement: &id004 !ruby/object:Gem::Requirement
66
- none: false
67
- requirements:
68
- - - ">="
69
- - !ruby/object:Gem::Version
70
- segments:
71
- - 0
72
- segments_generated: true
73
- version: "0"
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
74
70
  type: :development
75
71
  prerelease: false
76
- version_requirements: *id004
77
- - !ruby/object:Gem::Dependency
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
78
79
  name: bundler
79
- requirement: &id005 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
80
81
  none: false
81
- requirements:
82
+ requirements:
82
83
  - - ~>
83
- - !ruby/object:Gem::Version
84
- segments:
85
- - 1
86
- - 0
87
- - 0
88
- segments_generated: true
89
- version: 1.0.0
84
+ - !ruby/object:Gem::Version
85
+ version: '1.2'
90
86
  type: :development
91
87
  prerelease: false
92
- version_requirements: *id005
93
- - !ruby/object:Gem::Dependency
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: '1.2'
94
+ - !ruby/object:Gem::Dependency
94
95
  name: jeweler
95
- requirement: &id006 !ruby/object:Gem::Requirement
96
+ requirement: !ruby/object:Gem::Requirement
96
97
  none: false
97
- requirements:
98
+ requirements:
98
99
  - - ~>
99
- - !ruby/object:Gem::Version
100
- segments:
101
- - 1
102
- - 5
103
- - 2
104
- segments_generated: true
105
- version: 1.5.2
106
- type: :development
107
- prerelease: false
108
- version_requirements: *id006
109
- - !ruby/object:Gem::Dependency
110
- name: rcov
111
- requirement: &id007 !ruby/object:Gem::Requirement
112
- none: false
113
- requirements:
114
- - - ">="
115
- - !ruby/object:Gem::Version
116
- segments:
117
- - 0
118
- segments_generated: true
119
- version: "0"
100
+ - !ruby/object:Gem::Version
101
+ version: '1.5'
120
102
  type: :development
121
103
  prerelease: false
122
- version_requirements: *id007
123
- - !ruby/object:Gem::Dependency
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: '1.5'
110
+ - !ruby/object:Gem::Dependency
124
111
  name: nokogiri
125
- requirement: &id008 !ruby/object:Gem::Requirement
126
- none: false
127
- requirements:
128
- - - ">="
129
- - !ruby/object:Gem::Version
130
- segments:
131
- - 0
132
- segments_generated: true
133
- version: "0"
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
134
118
  type: :runtime
135
119
  prerelease: false
136
- version_requirements: *id008
137
- - !ruby/object:Gem::Dependency
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
138
127
  name: css_parser
139
- requirement: &id009 !ruby/object:Gem::Requirement
140
- none: false
141
- requirements:
142
- - - ">="
143
- - !ruby/object:Gem::Version
144
- segments:
145
- - 0
146
- segments_generated: true
147
- version: "0"
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
148
134
  type: :runtime
149
135
  prerelease: false
150
- version_requirements: *id009
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
151
142
  description: Simple utility to pull image urls from web page
152
143
  email: john.mcaliley@gmail.com
153
144
  executables: []
154
-
155
145
  extensions: []
156
-
157
- extra_rdoc_files:
146
+ extra_rdoc_files:
158
147
  - LICENSE.txt
159
148
  - README.md
160
- files:
149
+ files:
161
150
  - .document
151
+ - .travis.yml
162
152
  - Gemfile
153
+ - Gemfile.lock
163
154
  - LICENSE.txt
164
155
  - README.md
165
156
  - Rakefile
@@ -169,43 +160,32 @@ files:
169
160
  - lib/image_scraper/client.rb
170
161
  - lib/image_scraper/railtie.rb
171
162
  - lib/image_scraper/util.rb
172
- - test/helper.rb
173
- - test/test_image_scraper.rb
174
- has_rdoc: true
175
163
  homepage: http://github.com/charlotte-ruby/image_scraper
176
- licenses:
164
+ licenses:
177
165
  - MIT
178
166
  post_install_message:
179
167
  rdoc_options: []
180
-
181
- require_paths:
168
+ require_paths:
182
169
  - lib
183
- required_ruby_version: !ruby/object:Gem::Requirement
170
+ required_ruby_version: !ruby/object:Gem::Requirement
184
171
  none: false
185
- requirements:
186
- - - ">="
187
- - !ruby/object:Gem::Version
188
- hash: -168406416917257246
189
- segments:
172
+ requirements:
173
+ - - ! '>='
174
+ - !ruby/object:Gem::Version
175
+ version: '0'
176
+ segments:
190
177
  - 0
191
- segments_generated: true
192
- version: "0"
193
- required_rubygems_version: !ruby/object:Gem::Requirement
178
+ hash: 3624342284109897327
179
+ required_rubygems_version: !ruby/object:Gem::Requirement
194
180
  none: false
195
- requirements:
196
- - - ">="
197
- - !ruby/object:Gem::Version
198
- segments:
199
- - 0
200
- segments_generated: true
201
- version: "0"
181
+ requirements:
182
+ - - ! '>='
183
+ - !ruby/object:Gem::Version
184
+ version: '0'
202
185
  requirements: []
203
-
204
186
  rubyforge_project:
205
- rubygems_version: 1.3.7
187
+ rubygems_version: 1.8.24
206
188
  signing_key:
207
189
  specification_version: 3
208
190
  summary: Simple utility to pull image urls from web page
209
- test_files:
210
- - test/helper.rb
211
- - test/test_image_scraper.rb
191
+ test_files: []
@@ -1,18 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- begin
4
- Bundler.setup(:default, :development)
5
- rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts "Run `bundle install` to install missing gems"
8
- exit e.status_code
9
- end
10
- require 'test/unit'
11
- require 'shoulda'
12
-
13
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
- $LOAD_PATH.unshift(File.dirname(__FILE__))
15
- require 'image_scraper'
16
-
17
- class Test::Unit::TestCase
18
- end
@@ -1,112 +0,0 @@
1
- require 'pp'
2
- require 'helper'
3
-
4
-
5
- #TODO: these tests will not work forever. Try to test against a static web page instead of external URLs
6
- # Consider using https://raw.github.com/charlotte-ruby/image_scraper urls
7
-
8
- class TestImageScraper < Test::Unit::TestCase
9
- should "return list of all image urls on a web page with absolute paths" do
10
- images = ["http://bits.wikimedia.org/skins-1.18/vector/images/search-ltr.png?303-4",
11
- "http://bits.wikimedia.org/images/wikimedia-button.png",
12
- "http://bits.wikimedia.org/skins-1.18/common/images/poweredby_mediawiki_88x31.png"]
13
- scraper = ImageScraper::Client.new("http://en.wikipedia.org/wiki/Standard_test_image",:include_css_images=>false)
14
- assert_equal images, scraper.image_urls
15
- end
16
-
17
- should "return a list of images with whitespace stripped from the src" do
18
- client = ImageScraper::Client.new("http://www.google.com")
19
- html = IO.read(File.dirname(__FILE__)+"/resources/extra_whitespace.html")
20
- client.doc = Nokogiri::HTML(html)
21
- images = ["http://g-ecx.images-amazon.com/images/G/01/SIMON/IsaacsonWalter._V164348457_.jpg","http://g-ecx.images-amazon.com/images/G/01/SIMON/IsaacsonWalter.jpg"]
22
- assert_equal images, client.image_urls
23
- end
24
-
25
- should "return list of all image urls on a web page with relative paths" do
26
- images = ["//bits.wikimedia.org/skins-1.18/vector/images/search-ltr.png?303-4",
27
- "//bits.wikimedia.org/images/wikimedia-button.png",
28
- "//bits.wikimedia.org/skins-1.18/common/images/poweredby_mediawiki_88x31.png"]
29
- scraper = ImageScraper::Client.new("http://en.wikipedia.org/wiki/Standard_test_image",:convert_to_absolute_url=>false,:include_css_images=>false)
30
- assert_equal images, scraper.image_urls
31
- end
32
-
33
- should "return list of stylesheets contained in html page (relative path)" do
34
- doc = Nokogiri::HTML(IO.read(File.dirname(__FILE__)+"/resources/stylesheet_test.html"))
35
- domain = "http://test.com"
36
- assert_equal ["http://test.com/phoenix/testcentral.css","http://test.com/engine1/style.css"], ImageScraper::Client.new("http://test.com").stylesheets
37
- end
38
-
39
- should "return proper absolute url for a page and asset" do
40
- assert_equal "http://www.test.com/image.gif", ImageScraper::Util.absolute_url("http://www.test.com","image.gif")
41
- assert_equal "http://www.test.com/images/image.gif",ImageScraper::Util.absolute_url("http://www.test.com","images/image.gif")
42
- assert_equal "http://www.test.com/images/image.gif",ImageScraper::Util.absolute_url("http://www.test.com","/images/image.gif")
43
- assert_equal "http://www.test.com/image.gif", ImageScraper::Util.absolute_url("http://www.test.com/","image.gif")
44
- assert_equal "http://www.test.com/images/image.gif", ImageScraper::Util.absolute_url("http://www.test.com/","/images/image.gif")
45
- assert_equal "http://www.test.com/images/image.gif", ImageScraper::Util.absolute_url("http://www.test.com/","images/image.gif")
46
- assert_equal "http://www.test.com/images/image.gif", ImageScraper::Util.absolute_url("http://www.test.com/","/images/image.gif")
47
- assert_equal "http://www.test.com/", ImageScraper::Util.absolute_url("http://www.test.com/")
48
- assert_equal "http://www.test.com/123/test.html", ImageScraper::Util.absolute_url("http://www.test.com/123/test.html")
49
- end
50
-
51
- should "return images from a stylesheet" do
52
- scraper = ImageScraper::Client.new("http://couponshack.com")
53
- assert scraper.stylesheet_images.include? ("http://couponshack.com/images/bg.jpg")
54
- end
55
-
56
- should "strip quotes from a url" do
57
- assert_equal "/images/test.png", ImageScraper::Util.strip_quotes("'/images/test.png'")
58
- assert_equal "http://www.somsite.com/images/test.png", ImageScraper::Util.strip_quotes("'http://www.somsite.com/images/test.png'")
59
- assert_equal "/images/test.png", ImageScraper::Util.strip_quotes('"/images/test.png"')
60
- end
61
-
62
- should "return domain section from a url" do
63
- assert_equal "http://ug.ly", ImageScraper::Util.domain("http://ug.ly/what/is/this.html")
64
- assert_equal "http://ug.ly", ImageScraper::Util.domain("http://ug.ly/what/is/this/")
65
- assert_equal "http://ug.ly", ImageScraper::Util.domain("http://ug.ly/what")
66
- assert_equal "http://www.ug.ly", ImageScraper::Util.domain("http://www.ug.ly/what/is/this/")
67
- end
68
-
69
- should "return nil for doc if URL is invalid" do
70
- scraper = ImageScraper::Client.new("couponshack.com")
71
- assert scraper.doc.nil?
72
- end
73
-
74
- should "return empty arrays if URL is invalid" do
75
- scraper = ImageScraper::Client.new("couponshack.com")
76
- assert_equal [], scraper.image_urls
77
- assert_equal [], scraper.stylesheets
78
- assert_equal [], scraper.stylesheet_images
79
- assert_equal [], scraper.page_images
80
- end
81
-
82
- should "Handle a URL with unescaped spaces" do
83
- images = ["http://bits.wikimedia.org/skins-1.18/vector/images/search-ltr.png?303-4",
84
- "http://bits.wikimedia.org/images/wikimedia-button.png",
85
- "http://bits.wikimedia.org/skins-1.18/common/images/poweredby_mediawiki_88x31.png"]
86
- scraper = ImageScraper::Client.new("http://en.wikipedia.org/wiki/Standard test image",:include_css_images=>false)
87
- assert_equal images, scraper.image_urls
88
- end
89
-
90
- should "Handle a page image with an unescaped url" do
91
- scraper = ImageScraper::Client.new ''
92
- scraper.doc = Nokogiri::HTML("<img src='http://test.com/unescaped path'>")
93
- assert_equal ['http://test.com/unescaped%20path'], scraper.page_images
94
- end
95
-
96
- should "Handle a stylesheet with an unescaped url" do
97
- scraper = ImageScraper::Client.new ''
98
- scraper.url = 'http://test.com'
99
- scraper.doc = Nokogiri::HTML("<link rel='stylesheet' href='http://test.com/unescaped path.css'>")
100
- assert_equal ['http://test.com/unescaped%20path.css'], scraper.stylesheets
101
- end
102
-
103
- should "Handle a stylesheet image with an unescaped url" do
104
- scraper = ImageScraper::Client.new 'https://raw.github.com/charlotte-ruby/image_scraper/master/test/resources/stylesheet_unescaped_image.html', :include_css_images => true
105
- assert_equal ['https://raw.github.com/charlotte-ruby/image_scraper/master/some%20image.png'], scraper.stylesheet_images
106
- end
107
-
108
- should "Handle a stylesheet image with a relative url" do
109
- scraper = ImageScraper::Client.new 'https://raw.github.com/charlotte-ruby/image_scraper/master/test/resources/relative_image_url.html', :include_css_images => true
110
- assert_equal ['https://raw.github.com/charlotte-ruby/image_scraper/master/test/images/some_image.png'], scraper.stylesheet_images
111
- end
112
- end