image_scraper 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +4 -0
- data/Gemfile +3 -3
- data/Gemfile.lock +114 -0
- data/README.md +2 -0
- data/Rakefile +7 -7
- data/VERSION +1 -1
- data/image_scraper.gemspec +16 -22
- data/lib/image_scraper/client.rb +2 -2
- metadata +127 -147
- data/test/helper.rb +0 -18
- data/test/test_image_scraper.rb +0 -112
data/.travis.yml
ADDED
data/Gemfile
CHANGED
@@ -11,7 +11,7 @@ gem "rails"
|
|
11
11
|
# Include everything needed to run rake, tests, features, etc.
|
12
12
|
group :development do
|
13
13
|
gem "shoulda", ">= 0"
|
14
|
-
gem "bundler", "~> 1.
|
15
|
-
gem "jeweler", "~> 1.5
|
16
|
-
gem "rcov", ">= 0"
|
14
|
+
gem "bundler", "~> 1.2"
|
15
|
+
gem "jeweler", "~> 1.5"
|
16
|
+
#gem "rcov", ">= 0"
|
17
17
|
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
actionmailer (3.2.10)
|
5
|
+
actionpack (= 3.2.10)
|
6
|
+
mail (~> 2.4.4)
|
7
|
+
actionpack (3.2.10)
|
8
|
+
activemodel (= 3.2.10)
|
9
|
+
activesupport (= 3.2.10)
|
10
|
+
builder (~> 3.0.0)
|
11
|
+
erubis (~> 2.7.0)
|
12
|
+
journey (~> 1.0.4)
|
13
|
+
rack (~> 1.4.0)
|
14
|
+
rack-cache (~> 1.2)
|
15
|
+
rack-test (~> 0.6.1)
|
16
|
+
sprockets (~> 2.2.1)
|
17
|
+
activemodel (3.2.10)
|
18
|
+
activesupport (= 3.2.10)
|
19
|
+
builder (~> 3.0.0)
|
20
|
+
activerecord (3.2.10)
|
21
|
+
activemodel (= 3.2.10)
|
22
|
+
activesupport (= 3.2.10)
|
23
|
+
arel (~> 3.0.2)
|
24
|
+
tzinfo (~> 0.3.29)
|
25
|
+
activeresource (3.2.10)
|
26
|
+
activemodel (= 3.2.10)
|
27
|
+
activesupport (= 3.2.10)
|
28
|
+
activesupport (3.2.10)
|
29
|
+
i18n (~> 0.6)
|
30
|
+
multi_json (~> 1.0)
|
31
|
+
addressable (2.3.2)
|
32
|
+
arel (3.0.2)
|
33
|
+
bourne (1.1.2)
|
34
|
+
mocha (= 0.10.5)
|
35
|
+
builder (3.0.4)
|
36
|
+
css_parser (1.2.6)
|
37
|
+
addressable
|
38
|
+
rdoc
|
39
|
+
erubis (2.7.0)
|
40
|
+
git (1.2.5)
|
41
|
+
hike (1.2.1)
|
42
|
+
i18n (0.6.1)
|
43
|
+
jeweler (1.8.4)
|
44
|
+
bundler (~> 1.0)
|
45
|
+
git (>= 1.2.5)
|
46
|
+
rake
|
47
|
+
rdoc
|
48
|
+
journey (1.0.4)
|
49
|
+
json (1.7.6)
|
50
|
+
mail (2.4.4)
|
51
|
+
i18n (>= 0.4.0)
|
52
|
+
mime-types (~> 1.16)
|
53
|
+
treetop (~> 1.4.8)
|
54
|
+
metaclass (0.0.1)
|
55
|
+
mime-types (1.19)
|
56
|
+
mocha (0.10.5)
|
57
|
+
metaclass (~> 0.0.1)
|
58
|
+
multi_json (1.5.0)
|
59
|
+
nokogiri (1.5.6)
|
60
|
+
polyglot (0.3.3)
|
61
|
+
rack (1.4.3)
|
62
|
+
rack-cache (1.2)
|
63
|
+
rack (>= 0.4)
|
64
|
+
rack-ssl (1.3.2)
|
65
|
+
rack
|
66
|
+
rack-test (0.6.2)
|
67
|
+
rack (>= 1.0)
|
68
|
+
rails (3.2.10)
|
69
|
+
actionmailer (= 3.2.10)
|
70
|
+
actionpack (= 3.2.10)
|
71
|
+
activerecord (= 3.2.10)
|
72
|
+
activeresource (= 3.2.10)
|
73
|
+
activesupport (= 3.2.10)
|
74
|
+
bundler (~> 1.0)
|
75
|
+
railties (= 3.2.10)
|
76
|
+
railties (3.2.10)
|
77
|
+
actionpack (= 3.2.10)
|
78
|
+
activesupport (= 3.2.10)
|
79
|
+
rack-ssl (~> 1.3.2)
|
80
|
+
rake (>= 0.8.7)
|
81
|
+
rdoc (~> 3.4)
|
82
|
+
thor (>= 0.14.6, < 2.0)
|
83
|
+
rake (10.0.3)
|
84
|
+
rdoc (3.12)
|
85
|
+
json (~> 1.4)
|
86
|
+
shoulda (3.3.2)
|
87
|
+
shoulda-context (~> 1.0.1)
|
88
|
+
shoulda-matchers (~> 1.4.1)
|
89
|
+
shoulda-context (1.0.2)
|
90
|
+
shoulda-matchers (1.4.2)
|
91
|
+
activesupport (>= 3.0.0)
|
92
|
+
bourne (~> 1.1.2)
|
93
|
+
sprockets (2.2.2)
|
94
|
+
hike (~> 1.2)
|
95
|
+
multi_json (~> 1.0)
|
96
|
+
rack (~> 1.0)
|
97
|
+
tilt (~> 1.1, != 1.3.0)
|
98
|
+
thor (0.16.0)
|
99
|
+
tilt (1.3.3)
|
100
|
+
treetop (1.4.12)
|
101
|
+
polyglot
|
102
|
+
polyglot (>= 0.3.1)
|
103
|
+
tzinfo (0.3.35)
|
104
|
+
|
105
|
+
PLATFORMS
|
106
|
+
ruby
|
107
|
+
|
108
|
+
DEPENDENCIES
|
109
|
+
bundler (~> 1.2)
|
110
|
+
css_parser
|
111
|
+
jeweler (~> 1.5)
|
112
|
+
nokogiri
|
113
|
+
rails
|
114
|
+
shoulda
|
data/README.md
CHANGED
data/Rakefile
CHANGED
@@ -32,16 +32,16 @@ Rake::TestTask.new(:test) do |test|
|
|
32
32
|
test.verbose = true
|
33
33
|
end
|
34
34
|
|
35
|
-
require 'rcov/rcovtask'
|
36
|
-
Rcov::RcovTask.new do |test|
|
37
|
-
test.libs << 'test'
|
38
|
-
test.pattern = 'test/**/test_*.rb'
|
39
|
-
test.verbose = true
|
40
|
-
end
|
35
|
+
#require 'rcov/rcovtask'
|
36
|
+
#Rcov::RcovTask.new do |test|
|
37
|
+
# test.libs << 'test'
|
38
|
+
# test.pattern = 'test/**/test_*.rb'
|
39
|
+
# test.verbose = true
|
40
|
+
#end
|
41
41
|
|
42
42
|
task :default => :test
|
43
43
|
|
44
|
-
require '
|
44
|
+
require 'rdoc/task'
|
45
45
|
Rake::RDocTask.new do |rdoc|
|
46
46
|
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
47
47
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.8
|
data/image_scraper.gemspec
CHANGED
@@ -4,21 +4,23 @@
|
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
|
-
s.name =
|
8
|
-
s.version = "0.1.
|
7
|
+
s.name = "image_scraper"
|
8
|
+
s.version = "0.1.8"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["John McAliley"]
|
12
|
-
s.date =
|
13
|
-
s.description =
|
14
|
-
s.email =
|
12
|
+
s.date = "2013-01-17"
|
13
|
+
s.description = "Simple utility to pull image urls from web page"
|
14
|
+
s.email = "john.mcaliley@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENSE.txt",
|
17
17
|
"README.md"
|
18
18
|
]
|
19
19
|
s.files = [
|
20
20
|
".document",
|
21
|
+
".travis.yml",
|
21
22
|
"Gemfile",
|
23
|
+
"Gemfile.lock",
|
22
24
|
"LICENSE.txt",
|
23
25
|
"README.md",
|
24
26
|
"Rakefile",
|
@@ -29,18 +31,13 @@ Gem::Specification.new do |s|
|
|
29
31
|
"lib/image_scraper/railtie.rb",
|
30
32
|
"lib/image_scraper/util.rb"
|
31
33
|
]
|
32
|
-
s.homepage =
|
34
|
+
s.homepage = "http://github.com/charlotte-ruby/image_scraper"
|
33
35
|
s.licenses = ["MIT"]
|
34
36
|
s.require_paths = ["lib"]
|
35
|
-
s.rubygems_version =
|
36
|
-
s.summary =
|
37
|
-
s.test_files = [
|
38
|
-
"test/helper.rb",
|
39
|
-
"test/test_image_scraper.rb"
|
40
|
-
]
|
37
|
+
s.rubygems_version = "1.8.24"
|
38
|
+
s.summary = "Simple utility to pull image urls from web page"
|
41
39
|
|
42
40
|
if s.respond_to? :specification_version then
|
43
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
44
41
|
s.specification_version = 3
|
45
42
|
|
46
43
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
@@ -48,9 +45,8 @@ Gem::Specification.new do |s|
|
|
48
45
|
s.add_runtime_dependency(%q<css_parser>, [">= 0"])
|
49
46
|
s.add_runtime_dependency(%q<rails>, [">= 0"])
|
50
47
|
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
51
|
-
s.add_development_dependency(%q<bundler>, ["~> 1.
|
52
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.5
|
53
|
-
s.add_development_dependency(%q<rcov>, [">= 0"])
|
48
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.2"])
|
49
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.5"])
|
54
50
|
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
55
51
|
s.add_runtime_dependency(%q<css_parser>, [">= 0"])
|
56
52
|
else
|
@@ -58,9 +54,8 @@ Gem::Specification.new do |s|
|
|
58
54
|
s.add_dependency(%q<css_parser>, [">= 0"])
|
59
55
|
s.add_dependency(%q<rails>, [">= 0"])
|
60
56
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
61
|
-
s.add_dependency(%q<bundler>, ["~> 1.
|
62
|
-
s.add_dependency(%q<jeweler>, ["~> 1.5
|
63
|
-
s.add_dependency(%q<rcov>, [">= 0"])
|
57
|
+
s.add_dependency(%q<bundler>, ["~> 1.2"])
|
58
|
+
s.add_dependency(%q<jeweler>, ["~> 1.5"])
|
64
59
|
s.add_dependency(%q<nokogiri>, [">= 0"])
|
65
60
|
s.add_dependency(%q<css_parser>, [">= 0"])
|
66
61
|
end
|
@@ -69,9 +64,8 @@ Gem::Specification.new do |s|
|
|
69
64
|
s.add_dependency(%q<css_parser>, [">= 0"])
|
70
65
|
s.add_dependency(%q<rails>, [">= 0"])
|
71
66
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
72
|
-
s.add_dependency(%q<bundler>, ["~> 1.
|
73
|
-
s.add_dependency(%q<jeweler>, ["~> 1.5
|
74
|
-
s.add_dependency(%q<rcov>, [">= 0"])
|
67
|
+
s.add_dependency(%q<bundler>, ["~> 1.2"])
|
68
|
+
s.add_dependency(%q<jeweler>, ["~> 1.5"])
|
75
69
|
s.add_dependency(%q<nokogiri>, [">= 0"])
|
76
70
|
s.add_dependency(%q<css_parser>, [">= 0"])
|
77
71
|
end
|
data/lib/image_scraper/client.rb
CHANGED
@@ -33,8 +33,8 @@ module ImageScraper
|
|
33
33
|
def stylesheet_images
|
34
34
|
images = []
|
35
35
|
stylesheets.each do |stylesheet|
|
36
|
-
file = open(stylesheet)
|
37
|
-
css = file.string rescue IO.read(file)
|
36
|
+
file = open(stylesheet) rescue next
|
37
|
+
css = file.string rescue IO.read(file) rescue next
|
38
38
|
|
39
39
|
images += css.scan(/url\((.*?)\)/).collect do |image_url|
|
40
40
|
image_url = URI.escape image_url[0]
|
metadata
CHANGED
@@ -1,165 +1,156 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: image_scraper
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 1
|
8
|
-
- 7
|
9
|
-
segments_generated: true
|
10
|
-
version: 0.1.7
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.8
|
5
|
+
prerelease:
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- John McAliley
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
dependencies:
|
21
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2013-01-17 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
22
15
|
name: nokogiri
|
23
|
-
requirement:
|
24
|
-
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
|
29
|
-
- 0
|
30
|
-
segments_generated: true
|
31
|
-
version: "0"
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
32
22
|
type: :runtime
|
33
23
|
prerelease: false
|
34
|
-
version_requirements:
|
35
|
-
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
36
31
|
name: css_parser
|
37
|
-
requirement:
|
38
|
-
none: false
|
39
|
-
requirements:
|
40
|
-
- -
|
41
|
-
- !ruby/object:Gem::Version
|
42
|
-
|
43
|
-
- 0
|
44
|
-
segments_generated: true
|
45
|
-
version: "0"
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
46
38
|
type: :runtime
|
47
39
|
prerelease: false
|
48
|
-
version_requirements:
|
49
|
-
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
50
47
|
name: rails
|
51
|
-
requirement:
|
52
|
-
none: false
|
53
|
-
requirements:
|
54
|
-
- -
|
55
|
-
- !ruby/object:Gem::Version
|
56
|
-
|
57
|
-
- 0
|
58
|
-
segments_generated: true
|
59
|
-
version: "0"
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
60
54
|
type: :runtime
|
61
55
|
prerelease: false
|
62
|
-
version_requirements:
|
63
|
-
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
64
63
|
name: shoulda
|
65
|
-
requirement:
|
66
|
-
none: false
|
67
|
-
requirements:
|
68
|
-
- -
|
69
|
-
- !ruby/object:Gem::Version
|
70
|
-
|
71
|
-
- 0
|
72
|
-
segments_generated: true
|
73
|
-
version: "0"
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
74
70
|
type: :development
|
75
71
|
prerelease: false
|
76
|
-
version_requirements:
|
77
|
-
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
78
79
|
name: bundler
|
79
|
-
requirement:
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
80
81
|
none: false
|
81
|
-
requirements:
|
82
|
+
requirements:
|
82
83
|
- - ~>
|
83
|
-
- !ruby/object:Gem::Version
|
84
|
-
|
85
|
-
- 1
|
86
|
-
- 0
|
87
|
-
- 0
|
88
|
-
segments_generated: true
|
89
|
-
version: 1.0.0
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '1.2'
|
90
86
|
type: :development
|
91
87
|
prerelease: false
|
92
|
-
version_requirements:
|
93
|
-
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '1.2'
|
94
|
+
- !ruby/object:Gem::Dependency
|
94
95
|
name: jeweler
|
95
|
-
requirement:
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
96
97
|
none: false
|
97
|
-
requirements:
|
98
|
+
requirements:
|
98
99
|
- - ~>
|
99
|
-
- !ruby/object:Gem::Version
|
100
|
-
|
101
|
-
- 1
|
102
|
-
- 5
|
103
|
-
- 2
|
104
|
-
segments_generated: true
|
105
|
-
version: 1.5.2
|
106
|
-
type: :development
|
107
|
-
prerelease: false
|
108
|
-
version_requirements: *id006
|
109
|
-
- !ruby/object:Gem::Dependency
|
110
|
-
name: rcov
|
111
|
-
requirement: &id007 !ruby/object:Gem::Requirement
|
112
|
-
none: false
|
113
|
-
requirements:
|
114
|
-
- - ">="
|
115
|
-
- !ruby/object:Gem::Version
|
116
|
-
segments:
|
117
|
-
- 0
|
118
|
-
segments_generated: true
|
119
|
-
version: "0"
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '1.5'
|
120
102
|
type: :development
|
121
103
|
prerelease: false
|
122
|
-
version_requirements:
|
123
|
-
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '1.5'
|
110
|
+
- !ruby/object:Gem::Dependency
|
124
111
|
name: nokogiri
|
125
|
-
requirement:
|
126
|
-
none: false
|
127
|
-
requirements:
|
128
|
-
- -
|
129
|
-
- !ruby/object:Gem::Version
|
130
|
-
|
131
|
-
- 0
|
132
|
-
segments_generated: true
|
133
|
-
version: "0"
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
134
118
|
type: :runtime
|
135
119
|
prerelease: false
|
136
|
-
version_requirements:
|
137
|
-
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
- !ruby/object:Gem::Dependency
|
138
127
|
name: css_parser
|
139
|
-
requirement:
|
140
|
-
none: false
|
141
|
-
requirements:
|
142
|
-
- -
|
143
|
-
- !ruby/object:Gem::Version
|
144
|
-
|
145
|
-
- 0
|
146
|
-
segments_generated: true
|
147
|
-
version: "0"
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ! '>='
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
148
134
|
type: :runtime
|
149
135
|
prerelease: false
|
150
|
-
version_requirements:
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
151
142
|
description: Simple utility to pull image urls from web page
|
152
143
|
email: john.mcaliley@gmail.com
|
153
144
|
executables: []
|
154
|
-
|
155
145
|
extensions: []
|
156
|
-
|
157
|
-
extra_rdoc_files:
|
146
|
+
extra_rdoc_files:
|
158
147
|
- LICENSE.txt
|
159
148
|
- README.md
|
160
|
-
files:
|
149
|
+
files:
|
161
150
|
- .document
|
151
|
+
- .travis.yml
|
162
152
|
- Gemfile
|
153
|
+
- Gemfile.lock
|
163
154
|
- LICENSE.txt
|
164
155
|
- README.md
|
165
156
|
- Rakefile
|
@@ -169,43 +160,32 @@ files:
|
|
169
160
|
- lib/image_scraper/client.rb
|
170
161
|
- lib/image_scraper/railtie.rb
|
171
162
|
- lib/image_scraper/util.rb
|
172
|
-
- test/helper.rb
|
173
|
-
- test/test_image_scraper.rb
|
174
|
-
has_rdoc: true
|
175
163
|
homepage: http://github.com/charlotte-ruby/image_scraper
|
176
|
-
licenses:
|
164
|
+
licenses:
|
177
165
|
- MIT
|
178
166
|
post_install_message:
|
179
167
|
rdoc_options: []
|
180
|
-
|
181
|
-
require_paths:
|
168
|
+
require_paths:
|
182
169
|
- lib
|
183
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
170
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
184
171
|
none: false
|
185
|
-
requirements:
|
186
|
-
- -
|
187
|
-
- !ruby/object:Gem::Version
|
188
|
-
|
189
|
-
segments:
|
172
|
+
requirements:
|
173
|
+
- - ! '>='
|
174
|
+
- !ruby/object:Gem::Version
|
175
|
+
version: '0'
|
176
|
+
segments:
|
190
177
|
- 0
|
191
|
-
|
192
|
-
|
193
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
178
|
+
hash: 3624342284109897327
|
179
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
194
180
|
none: false
|
195
|
-
requirements:
|
196
|
-
- -
|
197
|
-
- !ruby/object:Gem::Version
|
198
|
-
|
199
|
-
- 0
|
200
|
-
segments_generated: true
|
201
|
-
version: "0"
|
181
|
+
requirements:
|
182
|
+
- - ! '>='
|
183
|
+
- !ruby/object:Gem::Version
|
184
|
+
version: '0'
|
202
185
|
requirements: []
|
203
|
-
|
204
186
|
rubyforge_project:
|
205
|
-
rubygems_version: 1.
|
187
|
+
rubygems_version: 1.8.24
|
206
188
|
signing_key:
|
207
189
|
specification_version: 3
|
208
190
|
summary: Simple utility to pull image urls from web page
|
209
|
-
test_files:
|
210
|
-
- test/helper.rb
|
211
|
-
- test/test_image_scraper.rb
|
191
|
+
test_files: []
|
data/test/helper.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'bundler'
|
3
|
-
begin
|
4
|
-
Bundler.setup(:default, :development)
|
5
|
-
rescue Bundler::BundlerError => e
|
6
|
-
$stderr.puts e.message
|
7
|
-
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
-
exit e.status_code
|
9
|
-
end
|
10
|
-
require 'test/unit'
|
11
|
-
require 'shoulda'
|
12
|
-
|
13
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
-
require 'image_scraper'
|
16
|
-
|
17
|
-
class Test::Unit::TestCase
|
18
|
-
end
|
data/test/test_image_scraper.rb
DELETED
@@ -1,112 +0,0 @@
|
|
1
|
-
require 'pp'
|
2
|
-
require 'helper'
|
3
|
-
|
4
|
-
|
5
|
-
#TODO: these tests will not work forever. Try to test against a static web page instead of external URLs
|
6
|
-
# Consider using https://raw.github.com/charlotte-ruby/image_scraper urls
|
7
|
-
|
8
|
-
class TestImageScraper < Test::Unit::TestCase
|
9
|
-
should "return list of all image urls on a web page with absolute paths" do
|
10
|
-
images = ["http://bits.wikimedia.org/skins-1.18/vector/images/search-ltr.png?303-4",
|
11
|
-
"http://bits.wikimedia.org/images/wikimedia-button.png",
|
12
|
-
"http://bits.wikimedia.org/skins-1.18/common/images/poweredby_mediawiki_88x31.png"]
|
13
|
-
scraper = ImageScraper::Client.new("http://en.wikipedia.org/wiki/Standard_test_image",:include_css_images=>false)
|
14
|
-
assert_equal images, scraper.image_urls
|
15
|
-
end
|
16
|
-
|
17
|
-
should "return a list of images with whitespace stripped from the src" do
|
18
|
-
client = ImageScraper::Client.new("http://www.google.com")
|
19
|
-
html = IO.read(File.dirname(__FILE__)+"/resources/extra_whitespace.html")
|
20
|
-
client.doc = Nokogiri::HTML(html)
|
21
|
-
images = ["http://g-ecx.images-amazon.com/images/G/01/SIMON/IsaacsonWalter._V164348457_.jpg","http://g-ecx.images-amazon.com/images/G/01/SIMON/IsaacsonWalter.jpg"]
|
22
|
-
assert_equal images, client.image_urls
|
23
|
-
end
|
24
|
-
|
25
|
-
should "return list of all image urls on a web page with relative paths" do
|
26
|
-
images = ["//bits.wikimedia.org/skins-1.18/vector/images/search-ltr.png?303-4",
|
27
|
-
"//bits.wikimedia.org/images/wikimedia-button.png",
|
28
|
-
"//bits.wikimedia.org/skins-1.18/common/images/poweredby_mediawiki_88x31.png"]
|
29
|
-
scraper = ImageScraper::Client.new("http://en.wikipedia.org/wiki/Standard_test_image",:convert_to_absolute_url=>false,:include_css_images=>false)
|
30
|
-
assert_equal images, scraper.image_urls
|
31
|
-
end
|
32
|
-
|
33
|
-
should "return list of stylesheets contained in html page (relative path)" do
|
34
|
-
doc = Nokogiri::HTML(IO.read(File.dirname(__FILE__)+"/resources/stylesheet_test.html"))
|
35
|
-
domain = "http://test.com"
|
36
|
-
assert_equal ["http://test.com/phoenix/testcentral.css","http://test.com/engine1/style.css"], ImageScraper::Client.new("http://test.com").stylesheets
|
37
|
-
end
|
38
|
-
|
39
|
-
should "return proper absolute url for a page and asset" do
|
40
|
-
assert_equal "http://www.test.com/image.gif", ImageScraper::Util.absolute_url("http://www.test.com","image.gif")
|
41
|
-
assert_equal "http://www.test.com/images/image.gif",ImageScraper::Util.absolute_url("http://www.test.com","images/image.gif")
|
42
|
-
assert_equal "http://www.test.com/images/image.gif",ImageScraper::Util.absolute_url("http://www.test.com","/images/image.gif")
|
43
|
-
assert_equal "http://www.test.com/image.gif", ImageScraper::Util.absolute_url("http://www.test.com/","image.gif")
|
44
|
-
assert_equal "http://www.test.com/images/image.gif", ImageScraper::Util.absolute_url("http://www.test.com/","/images/image.gif")
|
45
|
-
assert_equal "http://www.test.com/images/image.gif", ImageScraper::Util.absolute_url("http://www.test.com/","images/image.gif")
|
46
|
-
assert_equal "http://www.test.com/images/image.gif", ImageScraper::Util.absolute_url("http://www.test.com/","/images/image.gif")
|
47
|
-
assert_equal "http://www.test.com/", ImageScraper::Util.absolute_url("http://www.test.com/")
|
48
|
-
assert_equal "http://www.test.com/123/test.html", ImageScraper::Util.absolute_url("http://www.test.com/123/test.html")
|
49
|
-
end
|
50
|
-
|
51
|
-
should "return images from a stylesheet" do
|
52
|
-
scraper = ImageScraper::Client.new("http://couponshack.com")
|
53
|
-
assert scraper.stylesheet_images.include? ("http://couponshack.com/images/bg.jpg")
|
54
|
-
end
|
55
|
-
|
56
|
-
should "strip quotes from a url" do
|
57
|
-
assert_equal "/images/test.png", ImageScraper::Util.strip_quotes("'/images/test.png'")
|
58
|
-
assert_equal "http://www.somsite.com/images/test.png", ImageScraper::Util.strip_quotes("'http://www.somsite.com/images/test.png'")
|
59
|
-
assert_equal "/images/test.png", ImageScraper::Util.strip_quotes('"/images/test.png"')
|
60
|
-
end
|
61
|
-
|
62
|
-
should "return domain section from a url" do
|
63
|
-
assert_equal "http://ug.ly", ImageScraper::Util.domain("http://ug.ly/what/is/this.html")
|
64
|
-
assert_equal "http://ug.ly", ImageScraper::Util.domain("http://ug.ly/what/is/this/")
|
65
|
-
assert_equal "http://ug.ly", ImageScraper::Util.domain("http://ug.ly/what")
|
66
|
-
assert_equal "http://www.ug.ly", ImageScraper::Util.domain("http://www.ug.ly/what/is/this/")
|
67
|
-
end
|
68
|
-
|
69
|
-
should "return nil for doc if URL is invalid" do
|
70
|
-
scraper = ImageScraper::Client.new("couponshack.com")
|
71
|
-
assert scraper.doc.nil?
|
72
|
-
end
|
73
|
-
|
74
|
-
should "return empty arrays if URL is invalid" do
|
75
|
-
scraper = ImageScraper::Client.new("couponshack.com")
|
76
|
-
assert_equal [], scraper.image_urls
|
77
|
-
assert_equal [], scraper.stylesheets
|
78
|
-
assert_equal [], scraper.stylesheet_images
|
79
|
-
assert_equal [], scraper.page_images
|
80
|
-
end
|
81
|
-
|
82
|
-
should "Handle a URL with unescaped spaces" do
|
83
|
-
images = ["http://bits.wikimedia.org/skins-1.18/vector/images/search-ltr.png?303-4",
|
84
|
-
"http://bits.wikimedia.org/images/wikimedia-button.png",
|
85
|
-
"http://bits.wikimedia.org/skins-1.18/common/images/poweredby_mediawiki_88x31.png"]
|
86
|
-
scraper = ImageScraper::Client.new("http://en.wikipedia.org/wiki/Standard test image",:include_css_images=>false)
|
87
|
-
assert_equal images, scraper.image_urls
|
88
|
-
end
|
89
|
-
|
90
|
-
should "Handle a page image with an unescaped url" do
|
91
|
-
scraper = ImageScraper::Client.new ''
|
92
|
-
scraper.doc = Nokogiri::HTML("<img src='http://test.com/unescaped path'>")
|
93
|
-
assert_equal ['http://test.com/unescaped%20path'], scraper.page_images
|
94
|
-
end
|
95
|
-
|
96
|
-
should "Handle a stylesheet with an unescaped url" do
|
97
|
-
scraper = ImageScraper::Client.new ''
|
98
|
-
scraper.url = 'http://test.com'
|
99
|
-
scraper.doc = Nokogiri::HTML("<link rel='stylesheet' href='http://test.com/unescaped path.css'>")
|
100
|
-
assert_equal ['http://test.com/unescaped%20path.css'], scraper.stylesheets
|
101
|
-
end
|
102
|
-
|
103
|
-
should "Handle a stylesheet image with an unescaped url" do
|
104
|
-
scraper = ImageScraper::Client.new 'https://raw.github.com/charlotte-ruby/image_scraper/master/test/resources/stylesheet_unescaped_image.html', :include_css_images => true
|
105
|
-
assert_equal ['https://raw.github.com/charlotte-ruby/image_scraper/master/some%20image.png'], scraper.stylesheet_images
|
106
|
-
end
|
107
|
-
|
108
|
-
should "Handle a stylesheet image with a relative url" do
|
109
|
-
scraper = ImageScraper::Client.new 'https://raw.github.com/charlotte-ruby/image_scraper/master/test/resources/relative_image_url.html', :include_css_images => true
|
110
|
-
assert_equal ['https://raw.github.com/charlotte-ruby/image_scraper/master/test/images/some_image.png'], scraper.stylesheet_images
|
111
|
-
end
|
112
|
-
end
|