url-fetcher 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Binary file
data/Gemfile ADDED
@@ -0,0 +1,19 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+ group :runtime do
6
+ gem 'image_size'
7
+ gem 'mechanize'
8
+ end
9
+
10
+ # Add dependencies to develop your gem here.
11
+ # Include everything needed to run rake, tests, features, etc.
12
+ group :development do
13
+ gem "minitest", ">= 0"
14
+ gem "rdoc", "~> 3.12"
15
+ gem "bundler", "~> 1.1.5"
16
+ gem "jeweler", "~> 1.8.4"
17
+ gem "rcov", '0.9.11'
18
+ gem 'metrical'
19
+ end
@@ -0,0 +1,114 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ Saikuro (1.1.0)
5
+ activesupport (3.2.7)
6
+ i18n (~> 0.6)
7
+ multi_json (~> 1.0)
8
+ awesome_print (1.0.2)
9
+ chronic (0.3.0)
10
+ churn (0.0.13)
11
+ chronic (>= 0.2.3)
12
+ hirb
13
+ json_pure
14
+ main
15
+ ruby_parser (~> 2.0.4)
16
+ sexp_processor (~> 3.0.3)
17
+ colored (1.2)
18
+ domain_name (0.5.3)
19
+ unf (~> 0.0.3)
20
+ erubis (2.7.0)
21
+ flay (1.4.3)
22
+ ruby_parser (~> 2.0)
23
+ sexp_processor (~> 3.0)
24
+ flog (2.5.3)
25
+ ruby_parser (~> 2.0)
26
+ sexp_processor (~> 3.0)
27
+ git (1.2.5)
28
+ hirb (0.7.0)
29
+ i18n (0.6.0)
30
+ image_size (1.1.1)
31
+ jeweler (1.8.4)
32
+ bundler (~> 1.0)
33
+ git (>= 1.2.5)
34
+ rake
35
+ rdoc
36
+ json (1.7.4)
37
+ json_pure (1.7.4)
38
+ main (4.7.1)
39
+ mechanize (2.5.1)
40
+ domain_name (~> 0.5, >= 0.5.1)
41
+ mime-types (~> 1.17, >= 1.17.2)
42
+ net-http-digest_auth (~> 1.1, >= 1.1.1)
43
+ net-http-persistent (~> 2.5, >= 2.5.2)
44
+ nokogiri (~> 1.4)
45
+ ntlm-http (~> 0.1, >= 0.1.1)
46
+ webrobots (~> 0.0, >= 0.0.9)
47
+ metric_fu (2.1.1)
48
+ Saikuro (>= 1.1.0)
49
+ activesupport (>= 2.0.0)
50
+ chronic (~> 0.3.0)
51
+ churn (>= 0.0.7)
52
+ flay (>= 1.2.1)
53
+ flog (>= 2.3.0)
54
+ rails_best_practices (>= 0.6.4)
55
+ rcov (>= 0.8.3.3)
56
+ reek (>= 1.2.6)
57
+ roodi (>= 2.1.0)
58
+ syntax
59
+ metrical (0.1.0)
60
+ metric_fu (~> 2.1.1)
61
+ rcov (~> 0.9)
62
+ mime-types (1.19)
63
+ minitest (3.3.0)
64
+ multi_json (1.3.6)
65
+ net-http-digest_auth (1.2.1)
66
+ net-http-persistent (2.7)
67
+ nokogiri (1.5.5)
68
+ ntlm-http (0.1.1)
69
+ progressbar (0.11.0)
70
+ rails_best_practices (1.10.1)
71
+ activesupport
72
+ awesome_print
73
+ colored
74
+ erubis
75
+ i18n
76
+ progressbar
77
+ sexp_processor
78
+ rake (0.9.2.2)
79
+ rcov (0.9.11)
80
+ rdoc (3.12)
81
+ json (~> 1.4)
82
+ reek (1.2.12)
83
+ ripper_ruby_parser (~> 0.0.7)
84
+ ruby2ruby (~> 1.2.5)
85
+ ruby_parser (~> 2.0)
86
+ sexp_processor (~> 3.0)
87
+ ripper_ruby_parser (0.0.8)
88
+ sexp_processor (~> 3.0)
89
+ roodi (2.1.0)
90
+ ruby_parser
91
+ ruby2ruby (1.2.5)
92
+ ruby_parser (~> 2.0)
93
+ sexp_processor (~> 3.0)
94
+ ruby_parser (2.0.6)
95
+ sexp_processor (~> 3.0)
96
+ sexp_processor (3.0.10)
97
+ syntax (1.0.0)
98
+ unf (0.0.5)
99
+ unf_ext
100
+ unf_ext (0.0.5)
101
+ webrobots (0.0.13)
102
+
103
+ PLATFORMS
104
+ ruby
105
+
106
+ DEPENDENCIES
107
+ bundler (~> 1.1.5)
108
+ image_size
109
+ jeweler (~> 1.8.4)
110
+ mechanize
111
+ metrical
112
+ minitest
113
+ rcov (= 0.9.11)
114
+ rdoc (~> 3.12)
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Nuwan Sameera
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,21 @@
1
+ url-fetcher
2
+ ===========
3
+ Easy way to retrieve images urls of a webpage.
4
+
5
+ Contributing to url-fetcher
6
+ ===========
7
+
8
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
9
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
10
+ * Fork the project.
11
+ * Start a feature/bugfix branch.
12
+ * Commit and push until you are happy with your contribution.
13
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
14
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
15
+
16
+ Copyright
17
+ ===========
18
+
19
+ Copyright (c) 2012 Nuwan Sameera. See LICENSE.txt for further details.
20
+
21
+
@@ -0,0 +1,53 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "url-fetcher"
18
+ gem.homepage = "https://github.com/nuwansh/url-fetcher"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Easy way to retrieve images urls of a webpage}
21
+ gem.description = %Q{Url fetcher gem scan given webpage and will find all images. This gem can have to sort images from size.}
22
+ gem.email = "nuwan28@gmail.com"
23
+ gem.authors = ["Nuwan Sameera"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ require 'rcov/rcovtask'
36
+ Rcov::RcovTask.new do |test|
37
+ test.libs << 'test'
38
+ test.pattern = 'test/**/test_*.rb'
39
+ test.verbose = true
40
+ test.rcov_opts << '--exclude "gems/*"'
41
+ end
42
+
43
+ task :default => :test
44
+
45
+ require 'rdoc/task'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "url-fetcher #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,94 @@
1
+ require 'uri'
2
+ require 'open-uri'
3
+
4
+ module Fetch
5
+
6
+ class UrlFetcher
7
+ attr_accessor :url
8
+ attr_reader :title, :image_urls, :width
9
+
10
+ def initialize(options={})
11
+ @url = get_url_params(options[:url]).scheme.nil? ? "http://#{options[:url]}" : options[:url]
12
+ @width = options[:width].nil? ? 100 : options[:width]
13
+ end
14
+
15
+ #def custom_error
16
+ # raise(Fetch::MyCustomException, 'Your custom error message here')
17
+ #end
18
+
19
+ def find
20
+ agent = Mechanize.new
21
+ doc = agent.get(url)
22
+
23
+ images = doc.parser.xpath("//img/@src | //a/img/@src").map {|a|
24
+ unless get_url_params(a.value).nil?
25
+ image_uri = a.value
26
+ #if fetch uri is relative, we just add host name
27
+ if is_related_uri?(image_uri)
28
+ image_uri = url+image_uri
29
+ end
30
+
31
+ #check this uri has file extension
32
+ unless has_extention?(image_uri).empty?
33
+ image_sizes = calculate_img_size(image_uri)
34
+ if image_sizes
35
+ (image_sizes[0].to_i > width) ? image_uri : nil
36
+ else
37
+ return nil
38
+ end
39
+ end
40
+ end
41
+ }
42
+
43
+ @image_urls = images.compact
44
+ @title = doc.title
45
+ end
46
+
47
+ private
48
+ # def get_url_scheme(url)
49
+ # params = get_url_params(url)
50
+ # params.scheme.nil? ? "http://" : "#{params.scheme}://"
51
+ # #TODO: params return nil? system give error message to object
52
+ # end
53
+
54
+ def get_url_params(url)
55
+ begin
56
+ URI.parse(url)
57
+ rescue URI::Error => err
58
+ STDERR.puts err.message
59
+ end
60
+ end
61
+
62
+ def is_related_uri?(image_uri)
63
+ params = get_url_params(image_uri)
64
+ if params.scheme.nil?
65
+ return true
66
+ else
67
+ return false
68
+ end
69
+ end
70
+
71
+ def has_extention?(uri)
72
+ begin
73
+ File.extname("#{uri}")
74
+ rescue Exception => err
75
+
76
+ STDERR.puts err.message
77
+
78
+ end
79
+ end
80
+
81
+ def calculate_img_size(uri)
82
+ begin
83
+ open(uri, "rb") do |fh|
84
+ ImageSize.new(fh.read).size
85
+ end
86
+ rescue Exception => err
87
+ STDERR.puts err.message
88
+ end
89
+ end
90
+
91
+ end
92
+
93
+ #class MyCustomException < StandardError; end #nodoc
94
+ end
@@ -0,0 +1,26 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ require 'mechanize'
4
+ require 'image_size'
5
+ require 'uri'
6
+ require 'json'
7
+ require 'open-uri'
8
+
9
+ begin
10
+ Bundler.setup(:default, :development)
11
+ rescue Bundler::BundlerError => e
12
+ $stderr.puts e.message
13
+ $stderr.puts "Run `bundle install` to install missing gems"
14
+ exit e.status_code
15
+ end
16
+ require 'minitest/spec'
17
+ require 'minitest/unit'
18
+
19
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
20
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
21
+ require 'url-fetcher'
22
+
23
+ class MiniTest::Unit::TestCase
24
+ end
25
+
26
+ MiniTest::Unit.autorun
@@ -0,0 +1,41 @@
1
+ require 'helper'
2
+
3
+ describe Fetch::UrlFetcher do
4
+ before do
5
+ @submit_url = "http://google.com"
6
+ @submit_url_without_schem = "google.com"
7
+ @submit_wrong = "google"
8
+ end
9
+
10
+ describe "when submited correct URL" do
11
+ it "must respond with correct url" do
12
+ uri = Fetch::UrlFetcher.new({:url => @submit_url, :width => 150})
13
+ assert_equal @submit_url, uri.url
14
+ end
15
+ end
16
+
17
+ describe "when user submit URLs without schema" do
18
+ it "must respond with correct url" do
19
+ uri = Fetch::UrlFetcher.new({:url => @submit_url_without_schem, :width => 100})
20
+ assert_equal @submit_url, uri.url
21
+ end
22
+ end
23
+
24
+ # describe "when user submit wrong URL(google)" do
25
+ # it "must respond with error" do
26
+ # proc {
27
+ # uri = Fetch::UrlFetcher.new({:url => @submit_url, :width => 100})
28
+ # uri.custom_error
29
+ # }.must_raise Fetch::MyCustomException
30
+ # end
31
+ # end
32
+
33
+ describe "When system process given correct URL" do
34
+ it "must respond with paths of cannced images" do
35
+ uri = Fetch::UrlFetcher.new({:url => @submit_url, :width => 100})
36
+ uri.find
37
+ assert uri
38
+ end
39
+ end
40
+
41
+ end
@@ -0,0 +1,71 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "url-fetcher"
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Nuwan Sameera"]
12
+ s.date = "2012-08-03"
13
+ s.description = "Url fetcher gem scan given webpage and will find all images. This gem can have to sort images from size."
14
+ s.email = "nuwan28@gmail.com"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.md"
18
+ ]
19
+ s.files = [
20
+ ".DS_Store",
21
+ "Gemfile",
22
+ "Gemfile.lock",
23
+ "LICENSE.txt",
24
+ "README.md",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "lib/url-fetcher.rb",
28
+ "test/helper.rb",
29
+ "test/test_url-fetcher.rb",
30
+ "url-fetcher.gemspec"
31
+ ]
32
+ s.homepage = "https://github.com/nuwansh/url-fetcher"
33
+ s.licenses = ["MIT"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = "1.8.11"
36
+ s.summary = "Easy way to retrieve images urls of a webpage"
37
+
38
+ if s.respond_to? :specification_version then
39
+ s.specification_version = 3
40
+
41
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
42
+ s.add_runtime_dependency(%q<image_size>, [">= 0"])
43
+ s.add_runtime_dependency(%q<mechanize>, [">= 0"])
44
+ s.add_development_dependency(%q<minitest>, [">= 0"])
45
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
46
+ s.add_development_dependency(%q<bundler>, ["~> 1.1.5"])
47
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
48
+ s.add_development_dependency(%q<rcov>, ["= 0.9.11"])
49
+ s.add_development_dependency(%q<metrical>, [">= 0"])
50
+ else
51
+ s.add_dependency(%q<image_size>, [">= 0"])
52
+ s.add_dependency(%q<mechanize>, [">= 0"])
53
+ s.add_dependency(%q<minitest>, [">= 0"])
54
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
55
+ s.add_dependency(%q<bundler>, ["~> 1.1.5"])
56
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
57
+ s.add_dependency(%q<rcov>, ["= 0.9.11"])
58
+ s.add_dependency(%q<metrical>, [">= 0"])
59
+ end
60
+ else
61
+ s.add_dependency(%q<image_size>, [">= 0"])
62
+ s.add_dependency(%q<mechanize>, [">= 0"])
63
+ s.add_dependency(%q<minitest>, [">= 0"])
64
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
65
+ s.add_dependency(%q<bundler>, ["~> 1.1.5"])
66
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
67
+ s.add_dependency(%q<rcov>, ["= 0.9.11"])
68
+ s.add_dependency(%q<metrical>, [">= 0"])
69
+ end
70
+ end
71
+
metadata ADDED
@@ -0,0 +1,150 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url-fetcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Nuwan Sameera
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-03 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: image_size
16
+ requirement: &2160853960 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2160853960
25
+ - !ruby/object:Gem::Dependency
26
+ name: mechanize
27
+ requirement: &2160852500 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *2160852500
36
+ - !ruby/object:Gem::Dependency
37
+ name: minitest
38
+ requirement: &2160850220 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *2160850220
47
+ - !ruby/object:Gem::Dependency
48
+ name: rdoc
49
+ requirement: &2160848140 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '3.12'
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2160848140
58
+ - !ruby/object:Gem::Dependency
59
+ name: bundler
60
+ requirement: &2160862820 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ~>
64
+ - !ruby/object:Gem::Version
65
+ version: 1.1.5
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *2160862820
69
+ - !ruby/object:Gem::Dependency
70
+ name: jeweler
71
+ requirement: &2160858240 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ~>
75
+ - !ruby/object:Gem::Version
76
+ version: 1.8.4
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: *2160858240
80
+ - !ruby/object:Gem::Dependency
81
+ name: rcov
82
+ requirement: &2160893600 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - =
86
+ - !ruby/object:Gem::Version
87
+ version: 0.9.11
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: *2160893600
91
+ - !ruby/object:Gem::Dependency
92
+ name: metrical
93
+ requirement: &2160892720 !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ type: :development
100
+ prerelease: false
101
+ version_requirements: *2160892720
102
+ description: Url fetcher gem scan given webpage and will find all images. This gem
103
+ can have to sort images from size.
104
+ email: nuwan28@gmail.com
105
+ executables: []
106
+ extensions: []
107
+ extra_rdoc_files:
108
+ - LICENSE.txt
109
+ - README.md
110
+ files:
111
+ - .DS_Store
112
+ - Gemfile
113
+ - Gemfile.lock
114
+ - LICENSE.txt
115
+ - README.md
116
+ - Rakefile
117
+ - VERSION
118
+ - lib/url-fetcher.rb
119
+ - test/helper.rb
120
+ - test/test_url-fetcher.rb
121
+ - url-fetcher.gemspec
122
+ homepage: https://github.com/nuwansh/url-fetcher
123
+ licenses:
124
+ - MIT
125
+ post_install_message:
126
+ rdoc_options: []
127
+ require_paths:
128
+ - lib
129
+ required_ruby_version: !ruby/object:Gem::Requirement
130
+ none: false
131
+ requirements:
132
+ - - ! '>='
133
+ - !ruby/object:Gem::Version
134
+ version: '0'
135
+ segments:
136
+ - 0
137
+ hash: 3732475507414345840
138
+ required_rubygems_version: !ruby/object:Gem::Requirement
139
+ none: false
140
+ requirements:
141
+ - - ! '>='
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ requirements: []
145
+ rubyforge_project:
146
+ rubygems_version: 1.8.11
147
+ signing_key:
148
+ specification_version: 3
149
+ summary: Easy way to retrieve images urls of a webpage
150
+ test_files: []