image_scraper 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ script: bundle exec rake
2
+ language: ruby
3
+ rvm: 1.9.3
4
+
data/Gemfile CHANGED
@@ -11,7 +11,7 @@ gem "rails"
11
11
  # Include everything needed to run rake, tests, features, etc.
12
12
  group :development do
13
13
  gem "shoulda", ">= 0"
14
- gem "bundler", "~> 1.0.0"
15
- gem "jeweler", "~> 1.5.2"
16
- gem "rcov", ">= 0"
14
+ gem "bundler", "~> 1.2"
15
+ gem "jeweler", "~> 1.5"
16
+ #gem "rcov", ">= 0"
17
17
  end
@@ -0,0 +1,114 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ actionmailer (3.2.10)
5
+ actionpack (= 3.2.10)
6
+ mail (~> 2.4.4)
7
+ actionpack (3.2.10)
8
+ activemodel (= 3.2.10)
9
+ activesupport (= 3.2.10)
10
+ builder (~> 3.0.0)
11
+ erubis (~> 2.7.0)
12
+ journey (~> 1.0.4)
13
+ rack (~> 1.4.0)
14
+ rack-cache (~> 1.2)
15
+ rack-test (~> 0.6.1)
16
+ sprockets (~> 2.2.1)
17
+ activemodel (3.2.10)
18
+ activesupport (= 3.2.10)
19
+ builder (~> 3.0.0)
20
+ activerecord (3.2.10)
21
+ activemodel (= 3.2.10)
22
+ activesupport (= 3.2.10)
23
+ arel (~> 3.0.2)
24
+ tzinfo (~> 0.3.29)
25
+ activeresource (3.2.10)
26
+ activemodel (= 3.2.10)
27
+ activesupport (= 3.2.10)
28
+ activesupport (3.2.10)
29
+ i18n (~> 0.6)
30
+ multi_json (~> 1.0)
31
+ addressable (2.3.2)
32
+ arel (3.0.2)
33
+ bourne (1.1.2)
34
+ mocha (= 0.10.5)
35
+ builder (3.0.4)
36
+ css_parser (1.2.6)
37
+ addressable
38
+ rdoc
39
+ erubis (2.7.0)
40
+ git (1.2.5)
41
+ hike (1.2.1)
42
+ i18n (0.6.1)
43
+ jeweler (1.8.4)
44
+ bundler (~> 1.0)
45
+ git (>= 1.2.5)
46
+ rake
47
+ rdoc
48
+ journey (1.0.4)
49
+ json (1.7.6)
50
+ mail (2.4.4)
51
+ i18n (>= 0.4.0)
52
+ mime-types (~> 1.16)
53
+ treetop (~> 1.4.8)
54
+ metaclass (0.0.1)
55
+ mime-types (1.19)
56
+ mocha (0.10.5)
57
+ metaclass (~> 0.0.1)
58
+ multi_json (1.5.0)
59
+ nokogiri (1.5.6)
60
+ polyglot (0.3.3)
61
+ rack (1.4.3)
62
+ rack-cache (1.2)
63
+ rack (>= 0.4)
64
+ rack-ssl (1.3.2)
65
+ rack
66
+ rack-test (0.6.2)
67
+ rack (>= 1.0)
68
+ rails (3.2.10)
69
+ actionmailer (= 3.2.10)
70
+ actionpack (= 3.2.10)
71
+ activerecord (= 3.2.10)
72
+ activeresource (= 3.2.10)
73
+ activesupport (= 3.2.10)
74
+ bundler (~> 1.0)
75
+ railties (= 3.2.10)
76
+ railties (3.2.10)
77
+ actionpack (= 3.2.10)
78
+ activesupport (= 3.2.10)
79
+ rack-ssl (~> 1.3.2)
80
+ rake (>= 0.8.7)
81
+ rdoc (~> 3.4)
82
+ thor (>= 0.14.6, < 2.0)
83
+ rake (10.0.3)
84
+ rdoc (3.12)
85
+ json (~> 1.4)
86
+ shoulda (3.3.2)
87
+ shoulda-context (~> 1.0.1)
88
+ shoulda-matchers (~> 1.4.1)
89
+ shoulda-context (1.0.2)
90
+ shoulda-matchers (1.4.2)
91
+ activesupport (>= 3.0.0)
92
+ bourne (~> 1.1.2)
93
+ sprockets (2.2.2)
94
+ hike (~> 1.2)
95
+ multi_json (~> 1.0)
96
+ rack (~> 1.0)
97
+ tilt (~> 1.1, != 1.3.0)
98
+ thor (0.16.0)
99
+ tilt (1.3.3)
100
+ treetop (1.4.12)
101
+ polyglot
102
+ polyglot (>= 0.3.1)
103
+ tzinfo (0.3.35)
104
+
105
+ PLATFORMS
106
+ ruby
107
+
108
+ DEPENDENCIES
109
+ bundler (~> 1.2)
110
+ css_parser
111
+ jeweler (~> 1.5)
112
+ nokogiri
113
+ rails
114
+ shoulda
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # image_scraper
2
2
 
3
+ [![Build Status](https://travis-ci.org/charlotte-ruby/image_scraper.png?branch=master)](http://travis-ci.org/charlotte-ruby/image_scraper)
4
+
3
5
  Simple utility that pulls image URLS from web page
4
6
 
5
7
  ## INSTALL
data/Rakefile CHANGED
@@ -32,16 +32,16 @@ Rake::TestTask.new(:test) do |test|
32
32
  test.verbose = true
33
33
  end
34
34
 
35
- require 'rcov/rcovtask'
36
- Rcov::RcovTask.new do |test|
37
- test.libs << 'test'
38
- test.pattern = 'test/**/test_*.rb'
39
- test.verbose = true
40
- end
35
+ #require 'rcov/rcovtask'
36
+ #Rcov::RcovTask.new do |test|
37
+ # test.libs << 'test'
38
+ # test.pattern = 'test/**/test_*.rb'
39
+ # test.verbose = true
40
+ #end
41
41
 
42
42
  task :default => :test
43
43
 
44
- require 'rake/rdoctask'
44
+ require 'rdoc/task'
45
45
  Rake::RDocTask.new do |rdoc|
46
46
  version = File.exist?('VERSION') ? File.read('VERSION') : ""
47
47
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.7
1
+ 0.1.8
@@ -4,21 +4,23 @@
4
4
  # -*- encoding: utf-8 -*-
5
5
 
6
6
  Gem::Specification.new do |s|
7
- s.name = %q{image_scraper}
8
- s.version = "0.1.7"
7
+ s.name = "image_scraper"
8
+ s.version = "0.1.8"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["John McAliley"]
12
- s.date = %q{2012-01-02}
13
- s.description = %q{Simple utility to pull image urls from web page}
14
- s.email = %q{john.mcaliley@gmail.com}
12
+ s.date = "2013-01-17"
13
+ s.description = "Simple utility to pull image urls from web page"
14
+ s.email = "john.mcaliley@gmail.com"
15
15
  s.extra_rdoc_files = [
16
16
  "LICENSE.txt",
17
17
  "README.md"
18
18
  ]
19
19
  s.files = [
20
20
  ".document",
21
+ ".travis.yml",
21
22
  "Gemfile",
23
+ "Gemfile.lock",
22
24
  "LICENSE.txt",
23
25
  "README.md",
24
26
  "Rakefile",
@@ -29,18 +31,13 @@ Gem::Specification.new do |s|
29
31
  "lib/image_scraper/railtie.rb",
30
32
  "lib/image_scraper/util.rb"
31
33
  ]
32
- s.homepage = %q{http://github.com/charlotte-ruby/image_scraper}
34
+ s.homepage = "http://github.com/charlotte-ruby/image_scraper"
33
35
  s.licenses = ["MIT"]
34
36
  s.require_paths = ["lib"]
35
- s.rubygems_version = %q{1.3.7}
36
- s.summary = %q{Simple utility to pull image urls from web page}
37
- s.test_files = [
38
- "test/helper.rb",
39
- "test/test_image_scraper.rb"
40
- ]
37
+ s.rubygems_version = "1.8.24"
38
+ s.summary = "Simple utility to pull image urls from web page"
41
39
 
42
40
  if s.respond_to? :specification_version then
43
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
41
  s.specification_version = 3
45
42
 
46
43
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
@@ -48,9 +45,8 @@ Gem::Specification.new do |s|
48
45
  s.add_runtime_dependency(%q<css_parser>, [">= 0"])
49
46
  s.add_runtime_dependency(%q<rails>, [">= 0"])
50
47
  s.add_development_dependency(%q<shoulda>, [">= 0"])
51
- s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
52
- s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
53
- s.add_development_dependency(%q<rcov>, [">= 0"])
48
+ s.add_development_dependency(%q<bundler>, ["~> 1.2"])
49
+ s.add_development_dependency(%q<jeweler>, ["~> 1.5"])
54
50
  s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
55
51
  s.add_runtime_dependency(%q<css_parser>, [">= 0"])
56
52
  else
@@ -58,9 +54,8 @@ Gem::Specification.new do |s|
58
54
  s.add_dependency(%q<css_parser>, [">= 0"])
59
55
  s.add_dependency(%q<rails>, [">= 0"])
60
56
  s.add_dependency(%q<shoulda>, [">= 0"])
61
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
62
- s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
63
- s.add_dependency(%q<rcov>, [">= 0"])
57
+ s.add_dependency(%q<bundler>, ["~> 1.2"])
58
+ s.add_dependency(%q<jeweler>, ["~> 1.5"])
64
59
  s.add_dependency(%q<nokogiri>, [">= 0"])
65
60
  s.add_dependency(%q<css_parser>, [">= 0"])
66
61
  end
@@ -69,9 +64,8 @@ Gem::Specification.new do |s|
69
64
  s.add_dependency(%q<css_parser>, [">= 0"])
70
65
  s.add_dependency(%q<rails>, [">= 0"])
71
66
  s.add_dependency(%q<shoulda>, [">= 0"])
72
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
73
- s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
74
- s.add_dependency(%q<rcov>, [">= 0"])
67
+ s.add_dependency(%q<bundler>, ["~> 1.2"])
68
+ s.add_dependency(%q<jeweler>, ["~> 1.5"])
75
69
  s.add_dependency(%q<nokogiri>, [">= 0"])
76
70
  s.add_dependency(%q<css_parser>, [">= 0"])
77
71
  end
@@ -33,8 +33,8 @@ module ImageScraper
33
33
  def stylesheet_images
34
34
  images = []
35
35
  stylesheets.each do |stylesheet|
36
- file = open(stylesheet)
37
- css = file.string rescue IO.read(file)
36
+ file = open(stylesheet) rescue next
37
+ css = file.string rescue IO.read(file) rescue next
38
38
 
39
39
  images += css.scan(/url\((.*?)\)/).collect do |image_url|
40
40
  image_url = URI.escape image_url[0]
metadata CHANGED
@@ -1,165 +1,156 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: image_scraper
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 1
8
- - 7
9
- segments_generated: true
10
- version: 0.1.7
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.8
5
+ prerelease:
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - John McAliley
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-01-02 00:00:00 -05:00
19
- default_executable:
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
12
+ date: 2013-01-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
22
15
  name: nokogiri
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
- segments_generated: true
31
- version: "0"
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
32
22
  type: :runtime
33
23
  prerelease: false
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
36
31
  name: css_parser
37
- requirement: &id002 !ruby/object:Gem::Requirement
38
- none: false
39
- requirements:
40
- - - ">="
41
- - !ruby/object:Gem::Version
42
- segments:
43
- - 0
44
- segments_generated: true
45
- version: "0"
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
46
38
  type: :runtime
47
39
  prerelease: false
48
- version_requirements: *id002
49
- - !ruby/object:Gem::Dependency
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
50
47
  name: rails
51
- requirement: &id003 !ruby/object:Gem::Requirement
52
- none: false
53
- requirements:
54
- - - ">="
55
- - !ruby/object:Gem::Version
56
- segments:
57
- - 0
58
- segments_generated: true
59
- version: "0"
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
60
54
  type: :runtime
61
55
  prerelease: false
62
- version_requirements: *id003
63
- - !ruby/object:Gem::Dependency
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
64
63
  name: shoulda
65
- requirement: &id004 !ruby/object:Gem::Requirement
66
- none: false
67
- requirements:
68
- - - ">="
69
- - !ruby/object:Gem::Version
70
- segments:
71
- - 0
72
- segments_generated: true
73
- version: "0"
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
74
70
  type: :development
75
71
  prerelease: false
76
- version_requirements: *id004
77
- - !ruby/object:Gem::Dependency
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
78
79
  name: bundler
79
- requirement: &id005 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
80
81
  none: false
81
- requirements:
82
+ requirements:
82
83
  - - ~>
83
- - !ruby/object:Gem::Version
84
- segments:
85
- - 1
86
- - 0
87
- - 0
88
- segments_generated: true
89
- version: 1.0.0
84
+ - !ruby/object:Gem::Version
85
+ version: '1.2'
90
86
  type: :development
91
87
  prerelease: false
92
- version_requirements: *id005
93
- - !ruby/object:Gem::Dependency
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: '1.2'
94
+ - !ruby/object:Gem::Dependency
94
95
  name: jeweler
95
- requirement: &id006 !ruby/object:Gem::Requirement
96
+ requirement: !ruby/object:Gem::Requirement
96
97
  none: false
97
- requirements:
98
+ requirements:
98
99
  - - ~>
99
- - !ruby/object:Gem::Version
100
- segments:
101
- - 1
102
- - 5
103
- - 2
104
- segments_generated: true
105
- version: 1.5.2
106
- type: :development
107
- prerelease: false
108
- version_requirements: *id006
109
- - !ruby/object:Gem::Dependency
110
- name: rcov
111
- requirement: &id007 !ruby/object:Gem::Requirement
112
- none: false
113
- requirements:
114
- - - ">="
115
- - !ruby/object:Gem::Version
116
- segments:
117
- - 0
118
- segments_generated: true
119
- version: "0"
100
+ - !ruby/object:Gem::Version
101
+ version: '1.5'
120
102
  type: :development
121
103
  prerelease: false
122
- version_requirements: *id007
123
- - !ruby/object:Gem::Dependency
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: '1.5'
110
+ - !ruby/object:Gem::Dependency
124
111
  name: nokogiri
125
- requirement: &id008 !ruby/object:Gem::Requirement
126
- none: false
127
- requirements:
128
- - - ">="
129
- - !ruby/object:Gem::Version
130
- segments:
131
- - 0
132
- segments_generated: true
133
- version: "0"
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
134
118
  type: :runtime
135
119
  prerelease: false
136
- version_requirements: *id008
137
- - !ruby/object:Gem::Dependency
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
138
127
  name: css_parser
139
- requirement: &id009 !ruby/object:Gem::Requirement
140
- none: false
141
- requirements:
142
- - - ">="
143
- - !ruby/object:Gem::Version
144
- segments:
145
- - 0
146
- segments_generated: true
147
- version: "0"
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
148
134
  type: :runtime
149
135
  prerelease: false
150
- version_requirements: *id009
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
151
142
  description: Simple utility to pull image urls from web page
152
143
  email: john.mcaliley@gmail.com
153
144
  executables: []
154
-
155
145
  extensions: []
156
-
157
- extra_rdoc_files:
146
+ extra_rdoc_files:
158
147
  - LICENSE.txt
159
148
  - README.md
160
- files:
149
+ files:
161
150
  - .document
151
+ - .travis.yml
162
152
  - Gemfile
153
+ - Gemfile.lock
163
154
  - LICENSE.txt
164
155
  - README.md
165
156
  - Rakefile
@@ -169,43 +160,32 @@ files:
169
160
  - lib/image_scraper/client.rb
170
161
  - lib/image_scraper/railtie.rb
171
162
  - lib/image_scraper/util.rb
172
- - test/helper.rb
173
- - test/test_image_scraper.rb
174
- has_rdoc: true
175
163
  homepage: http://github.com/charlotte-ruby/image_scraper
176
- licenses:
164
+ licenses:
177
165
  - MIT
178
166
  post_install_message:
179
167
  rdoc_options: []
180
-
181
- require_paths:
168
+ require_paths:
182
169
  - lib
183
- required_ruby_version: !ruby/object:Gem::Requirement
170
+ required_ruby_version: !ruby/object:Gem::Requirement
184
171
  none: false
185
- requirements:
186
- - - ">="
187
- - !ruby/object:Gem::Version
188
- hash: -168406416917257246
189
- segments:
172
+ requirements:
173
+ - - ! '>='
174
+ - !ruby/object:Gem::Version
175
+ version: '0'
176
+ segments:
190
177
  - 0
191
- segments_generated: true
192
- version: "0"
193
- required_rubygems_version: !ruby/object:Gem::Requirement
178
+ hash: 3624342284109897327
179
+ required_rubygems_version: !ruby/object:Gem::Requirement
194
180
  none: false
195
- requirements:
196
- - - ">="
197
- - !ruby/object:Gem::Version
198
- segments:
199
- - 0
200
- segments_generated: true
201
- version: "0"
181
+ requirements:
182
+ - - ! '>='
183
+ - !ruby/object:Gem::Version
184
+ version: '0'
202
185
  requirements: []
203
-
204
186
  rubyforge_project:
205
- rubygems_version: 1.3.7
187
+ rubygems_version: 1.8.24
206
188
  signing_key:
207
189
  specification_version: 3
208
190
  summary: Simple utility to pull image urls from web page
209
- test_files:
210
- - test/helper.rb
211
- - test/test_image_scraper.rb
191
+ test_files: []
@@ -1,18 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- begin
4
- Bundler.setup(:default, :development)
5
- rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts "Run `bundle install` to install missing gems"
8
- exit e.status_code
9
- end
10
- require 'test/unit'
11
- require 'shoulda'
12
-
13
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
- $LOAD_PATH.unshift(File.dirname(__FILE__))
15
- require 'image_scraper'
16
-
17
- class Test::Unit::TestCase
18
- end
@@ -1,112 +0,0 @@
1
- require 'pp'
2
- require 'helper'
3
-
4
-
5
- #TODO: these tests will not work forever. Try to test against a static web page instead of external URLs
6
- # Consider using https://raw.github.com/charlotte-ruby/image_scraper urls
7
-
8
- class TestImageScraper < Test::Unit::TestCase
9
- should "return list of all image urls on a web page with absolute paths" do
10
- images = ["http://bits.wikimedia.org/skins-1.18/vector/images/search-ltr.png?303-4",
11
- "http://bits.wikimedia.org/images/wikimedia-button.png",
12
- "http://bits.wikimedia.org/skins-1.18/common/images/poweredby_mediawiki_88x31.png"]
13
- scraper = ImageScraper::Client.new("http://en.wikipedia.org/wiki/Standard_test_image",:include_css_images=>false)
14
- assert_equal images, scraper.image_urls
15
- end
16
-
17
- should "return a list of images with whitespace stripped from the src" do
18
- client = ImageScraper::Client.new("http://www.google.com")
19
- html = IO.read(File.dirname(__FILE__)+"/resources/extra_whitespace.html")
20
- client.doc = Nokogiri::HTML(html)
21
- images = ["http://g-ecx.images-amazon.com/images/G/01/SIMON/IsaacsonWalter._V164348457_.jpg","http://g-ecx.images-amazon.com/images/G/01/SIMON/IsaacsonWalter.jpg"]
22
- assert_equal images, client.image_urls
23
- end
24
-
25
- should "return list of all image urls on a web page with relative paths" do
26
- images = ["//bits.wikimedia.org/skins-1.18/vector/images/search-ltr.png?303-4",
27
- "//bits.wikimedia.org/images/wikimedia-button.png",
28
- "//bits.wikimedia.org/skins-1.18/common/images/poweredby_mediawiki_88x31.png"]
29
- scraper = ImageScraper::Client.new("http://en.wikipedia.org/wiki/Standard_test_image",:convert_to_absolute_url=>false,:include_css_images=>false)
30
- assert_equal images, scraper.image_urls
31
- end
32
-
33
- should "return list of stylesheets contained in html page (relative path)" do
34
- doc = Nokogiri::HTML(IO.read(File.dirname(__FILE__)+"/resources/stylesheet_test.html"))
35
- domain = "http://test.com"
36
- assert_equal ["http://test.com/phoenix/testcentral.css","http://test.com/engine1/style.css"], ImageScraper::Client.new("http://test.com").stylesheets
37
- end
38
-
39
- should "return proper absolute url for a page and asset" do
40
- assert_equal "http://www.test.com/image.gif", ImageScraper::Util.absolute_url("http://www.test.com","image.gif")
41
- assert_equal "http://www.test.com/images/image.gif",ImageScraper::Util.absolute_url("http://www.test.com","images/image.gif")
42
- assert_equal "http://www.test.com/images/image.gif",ImageScraper::Util.absolute_url("http://www.test.com","/images/image.gif")
43
- assert_equal "http://www.test.com/image.gif", ImageScraper::Util.absolute_url("http://www.test.com/","image.gif")
44
- assert_equal "http://www.test.com/images/image.gif", ImageScraper::Util.absolute_url("http://www.test.com/","/images/image.gif")
45
- assert_equal "http://www.test.com/images/image.gif", ImageScraper::Util.absolute_url("http://www.test.com/","images/image.gif")
46
- assert_equal "http://www.test.com/images/image.gif", ImageScraper::Util.absolute_url("http://www.test.com/","/images/image.gif")
47
- assert_equal "http://www.test.com/", ImageScraper::Util.absolute_url("http://www.test.com/")
48
- assert_equal "http://www.test.com/123/test.html", ImageScraper::Util.absolute_url("http://www.test.com/123/test.html")
49
- end
50
-
51
- should "return images from a stylesheet" do
52
- scraper = ImageScraper::Client.new("http://couponshack.com")
53
- assert scraper.stylesheet_images.include? ("http://couponshack.com/images/bg.jpg")
54
- end
55
-
56
- should "strip quotes from a url" do
57
- assert_equal "/images/test.png", ImageScraper::Util.strip_quotes("'/images/test.png'")
58
- assert_equal "http://www.somsite.com/images/test.png", ImageScraper::Util.strip_quotes("'http://www.somsite.com/images/test.png'")
59
- assert_equal "/images/test.png", ImageScraper::Util.strip_quotes('"/images/test.png"')
60
- end
61
-
62
- should "return domain section from a url" do
63
- assert_equal "http://ug.ly", ImageScraper::Util.domain("http://ug.ly/what/is/this.html")
64
- assert_equal "http://ug.ly", ImageScraper::Util.domain("http://ug.ly/what/is/this/")
65
- assert_equal "http://ug.ly", ImageScraper::Util.domain("http://ug.ly/what")
66
- assert_equal "http://www.ug.ly", ImageScraper::Util.domain("http://www.ug.ly/what/is/this/")
67
- end
68
-
69
- should "return nil for doc if URL is invalid" do
70
- scraper = ImageScraper::Client.new("couponshack.com")
71
- assert scraper.doc.nil?
72
- end
73
-
74
- should "return empty arrays if URL is invalid" do
75
- scraper = ImageScraper::Client.new("couponshack.com")
76
- assert_equal [], scraper.image_urls
77
- assert_equal [], scraper.stylesheets
78
- assert_equal [], scraper.stylesheet_images
79
- assert_equal [], scraper.page_images
80
- end
81
-
82
- should "Handle a URL with unescaped spaces" do
83
- images = ["http://bits.wikimedia.org/skins-1.18/vector/images/search-ltr.png?303-4",
84
- "http://bits.wikimedia.org/images/wikimedia-button.png",
85
- "http://bits.wikimedia.org/skins-1.18/common/images/poweredby_mediawiki_88x31.png"]
86
- scraper = ImageScraper::Client.new("http://en.wikipedia.org/wiki/Standard test image",:include_css_images=>false)
87
- assert_equal images, scraper.image_urls
88
- end
89
-
90
- should "Handle a page image with an unescaped url" do
91
- scraper = ImageScraper::Client.new ''
92
- scraper.doc = Nokogiri::HTML("<img src='http://test.com/unescaped path'>")
93
- assert_equal ['http://test.com/unescaped%20path'], scraper.page_images
94
- end
95
-
96
- should "Handle a stylesheet with an unescaped url" do
97
- scraper = ImageScraper::Client.new ''
98
- scraper.url = 'http://test.com'
99
- scraper.doc = Nokogiri::HTML("<link rel='stylesheet' href='http://test.com/unescaped path.css'>")
100
- assert_equal ['http://test.com/unescaped%20path.css'], scraper.stylesheets
101
- end
102
-
103
- should "Handle a stylesheet image with an unescaped url" do
104
- scraper = ImageScraper::Client.new 'https://raw.github.com/charlotte-ruby/image_scraper/master/test/resources/stylesheet_unescaped_image.html', :include_css_images => true
105
- assert_equal ['https://raw.github.com/charlotte-ruby/image_scraper/master/some%20image.png'], scraper.stylesheet_images
106
- end
107
-
108
- should "Handle a stylesheet image with a relative url" do
109
- scraper = ImageScraper::Client.new 'https://raw.github.com/charlotte-ruby/image_scraper/master/test/resources/relative_image_url.html', :include_css_images => true
110
- assert_equal ['https://raw.github.com/charlotte-ruby/image_scraper/master/test/images/some_image.png'], scraper.stylesheet_images
111
- end
112
- end