image_scraper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+ gem "nokogiri"
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "bundler", "~> 1.0.0"
11
+ gem "jeweler", "~> 1.5.2"
12
+ gem "rcov", ">= 0"
13
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,22 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ git (1.2.5)
5
+ jeweler (1.5.2)
6
+ bundler (~> 1.0.0)
7
+ git (>= 1.2.5)
8
+ rake
9
+ nokogiri (1.4.4)
10
+ rake (0.9.0)
11
+ rcov (0.9.9)
12
+ shoulda (2.11.3)
13
+
14
+ PLATFORMS
15
+ ruby
16
+
17
+ DEPENDENCIES
18
+ bundler (~> 1.0.0)
19
+ jeweler (~> 1.5.2)
20
+ nokogiri
21
+ rcov
22
+ shoulda
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 John McAliley
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # image_scraper
2
+
3
+ Simple utility that pulls image URLS from web page
4
+
5
+ ## INSTALL
6
+
7
+ Add to your gemfile
8
+
9
+ gem "image_scraper"
10
+
11
+ Install w/ Bundler
12
+
13
+ bundle install
14
+
15
+ ## USAGE
16
+
17
+ Pull all the image URLs from a specified URL and convert all images URLs to absolute paths
18
+
19
+ ImageScraper.image_urls("http://www.rubygems.org")
20
+
21
+ Pull all images URLs from specified URL and use relative URLs
22
+
23
+ ImageScraper.image_urls("http://www.rubygems.org",false)
24
+
25
+ ## TODO
26
+
27
+ 1. Parse CSS files for images
28
+
29
+ ## Contributing to image_scraper
30
+
31
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
32
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
33
+ * Fork the project
34
+ * Start a feature/bugfix branch
35
+ * Commit and push until you are happy with your contribution
36
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
37
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
38
+
39
+ ## Copyright
40
+
41
+ Copyright (c) 2011 John McAliley. See LICENSE.txt for
42
+ further details.
43
+
data/Rakefile ADDED
@@ -0,0 +1,50 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "image_scraper"
16
+ gem.homepage = "http://github.com/charlotte-ruby/image_scraper"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{Simple utility to pull image urls from web page}
19
+ gem.description = %Q{Simple utility to pull image urls from web page}
20
+ gem.email = "john.mcaliley@gmail.com"
21
+ gem.authors = ["John McAliley"]
22
+ gem.add_dependency "nokogiri"
23
+ end
24
+ Jeweler::RubygemsDotOrgTasks.new
25
+
26
+ require 'rake/testtask'
27
+ Rake::TestTask.new(:test) do |test|
28
+ test.libs << 'lib' << 'test'
29
+ test.pattern = 'test/**/test_*.rb'
30
+ test.verbose = true
31
+ end
32
+
33
+ require 'rcov/rcovtask'
34
+ Rcov::RcovTask.new do |test|
35
+ test.libs << 'test'
36
+ test.pattern = 'test/**/test_*.rb'
37
+ test.verbose = true
38
+ end
39
+
40
+ task :default => :test
41
+
42
+ require 'rake/rdoctask'
43
+ Rake::RDocTask.new do |rdoc|
44
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
45
+
46
+ rdoc.rdoc_dir = 'rdoc'
47
+ rdoc.title = "image_scraper #{version}"
48
+ rdoc.rdoc_files.include('README*')
49
+ rdoc.rdoc_files.include('lib/**/*.rb')
50
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,70 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{image_scraper}
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["John McAliley"]
12
+ s.date = %q{2011-05-27}
13
+ s.description = %q{Simple utility to pull image urls from web page}
14
+ s.email = %q{john.mcaliley@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.md"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ "Gemfile",
22
+ "Gemfile.lock",
23
+ "LICENSE.txt",
24
+ "README.md",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "image_scraper.gemspec",
28
+ "lib/image_scraper.rb",
29
+ "test/helper.rb",
30
+ "test/test_image_scraper.rb"
31
+ ]
32
+ s.homepage = %q{http://github.com/charlotte-ruby/image_scraper}
33
+ s.licenses = ["MIT"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = %q{1.3.7}
36
+ s.summary = %q{Simple utility to pull image urls from web page}
37
+ s.test_files = [
38
+ "test/helper.rb",
39
+ "test/test_image_scraper.rb"
40
+ ]
41
+
42
+ if s.respond_to? :specification_version then
43
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
+ s.specification_version = 3
45
+
46
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
47
+ s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
48
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
49
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
50
+ s.add_development_dependency(%q<jeweler>, ["~> 1.5.2"])
51
+ s.add_development_dependency(%q<rcov>, [">= 0"])
52
+ s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
53
+ else
54
+ s.add_dependency(%q<nokogiri>, [">= 0"])
55
+ s.add_dependency(%q<shoulda>, [">= 0"])
56
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
57
+ s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
58
+ s.add_dependency(%q<rcov>, [">= 0"])
59
+ s.add_dependency(%q<nokogiri>, [">= 0"])
60
+ end
61
+ else
62
+ s.add_dependency(%q<nokogiri>, [">= 0"])
63
+ s.add_dependency(%q<shoulda>, [">= 0"])
64
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
65
+ s.add_dependency(%q<jeweler>, ["~> 1.5.2"])
66
+ s.add_dependency(%q<rcov>, [">= 0"])
67
+ s.add_dependency(%q<nokogiri>, [">= 0"])
68
+ end
69
+ end
70
+
@@ -0,0 +1,17 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+
4
+ module ImageScraper
5
+ def self.image_urls(url, convert_to_absolute_url=true)
6
+ uri = URI.parse(url)
7
+ domain = "#{uri.scheme}://#{uri.host}"
8
+ doc = Nokogiri::HTML(open url)
9
+ urls = []
10
+ doc.xpath("//img").each do |img|
11
+ image = img["src"]
12
+ image = domain + image if convert_to_absolute_url and !image.include?("://")
13
+ urls << image
14
+ end
15
+ urls
16
+ end
17
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'image_scraper'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,13 @@
1
+ require 'helper'
2
+
3
+ class TestImageScraper < Test::Unit::TestCase
4
+ should "return list of all image urls on a web page with absolute paths" do
5
+ images = ["http://upload.wikimedia.org/wikipedia/en/thumb/2/24/Lenna.png/200px-Lenna.png", "http://bits.wikimedia.org/skins-1.17/common/images/magnify-clip.png", "http://bits.wikimedia.org/skins-1.17/vector/images/search-ltr.png?301-2", "http://en.wikipedia.org/images/wikimedia-button.png", "http://bits.wikimedia.org/skins-1.17/common/images/poweredby_mediawiki_88x31.png"]
6
+ assert_equal ImageScraper.image_urls("http://en.wikipedia.org/wiki/Standard_test_image"), images
7
+ end
8
+
9
+ should "return list of all image urls on a web page with relative paths" do
10
+ images = ["http://upload.wikimedia.org/wikipedia/en/thumb/2/24/Lenna.png/200px-Lenna.png","http://bits.wikimedia.org/skins-1.17/common/images/magnify-clip.png","http://bits.wikimedia.org/skins-1.17/vector/images/search-ltr.png?301-2","/images/wikimedia-button.png","http://bits.wikimedia.org/skins-1.17/common/images/poweredby_mediawiki_88x31.png"]
11
+ assert_equal ImageScraper.image_urls("http://en.wikipedia.org/wiki/Standard_test_image",false), images
12
+ end
13
+ end
metadata ADDED
@@ -0,0 +1,158 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: image_scraper
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - John McAliley
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-05-27 00:00:00 -04:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: nokogiri
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ version: "0"
30
+ type: :runtime
31
+ prerelease: false
32
+ version_requirements: *id001
33
+ - !ruby/object:Gem::Dependency
34
+ name: shoulda
35
+ requirement: &id002 !ruby/object:Gem::Requirement
36
+ none: false
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ segments:
41
+ - 0
42
+ version: "0"
43
+ type: :development
44
+ prerelease: false
45
+ version_requirements: *id002
46
+ - !ruby/object:Gem::Dependency
47
+ name: bundler
48
+ requirement: &id003 !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ segments:
54
+ - 1
55
+ - 0
56
+ - 0
57
+ version: 1.0.0
58
+ type: :development
59
+ prerelease: false
60
+ version_requirements: *id003
61
+ - !ruby/object:Gem::Dependency
62
+ name: jeweler
63
+ requirement: &id004 !ruby/object:Gem::Requirement
64
+ none: false
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ segments:
69
+ - 1
70
+ - 5
71
+ - 2
72
+ version: 1.5.2
73
+ type: :development
74
+ prerelease: false
75
+ version_requirements: *id004
76
+ - !ruby/object:Gem::Dependency
77
+ name: rcov
78
+ requirement: &id005 !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ segments:
84
+ - 0
85
+ version: "0"
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: *id005
89
+ - !ruby/object:Gem::Dependency
90
+ name: nokogiri
91
+ requirement: &id006 !ruby/object:Gem::Requirement
92
+ none: false
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ segments:
97
+ - 0
98
+ version: "0"
99
+ type: :runtime
100
+ prerelease: false
101
+ version_requirements: *id006
102
+ description: Simple utility to pull image urls from web page
103
+ email: john.mcaliley@gmail.com
104
+ executables: []
105
+
106
+ extensions: []
107
+
108
+ extra_rdoc_files:
109
+ - LICENSE.txt
110
+ - README.md
111
+ files:
112
+ - .document
113
+ - Gemfile
114
+ - Gemfile.lock
115
+ - LICENSE.txt
116
+ - README.md
117
+ - Rakefile
118
+ - VERSION
119
+ - image_scraper.gemspec
120
+ - lib/image_scraper.rb
121
+ - test/helper.rb
122
+ - test/test_image_scraper.rb
123
+ has_rdoc: true
124
+ homepage: http://github.com/charlotte-ruby/image_scraper
125
+ licenses:
126
+ - MIT
127
+ post_install_message:
128
+ rdoc_options: []
129
+
130
+ require_paths:
131
+ - lib
132
+ required_ruby_version: !ruby/object:Gem::Requirement
133
+ none: false
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ hash: -2865953740112575330
138
+ segments:
139
+ - 0
140
+ version: "0"
141
+ required_rubygems_version: !ruby/object:Gem::Requirement
142
+ none: false
143
+ requirements:
144
+ - - ">="
145
+ - !ruby/object:Gem::Version
146
+ segments:
147
+ - 0
148
+ version: "0"
149
+ requirements: []
150
+
151
+ rubyforge_project:
152
+ rubygems_version: 1.3.7
153
+ signing_key:
154
+ specification_version: 3
155
+ summary: Simple utility to pull image urls from web page
156
+ test_files:
157
+ - test/helper.rb
158
+ - test/test_image_scraper.rb