video_scraper 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +4 -0
- data/README +71 -0
- data/Rakefile +146 -0
- data/lib/www/video_scraper.rb +88 -0
- data/lib/www/video_scraper/adult_satellites.rb +27 -0
- data/lib/www/video_scraper/age_sage.rb +28 -0
- data/lib/www/video_scraper/ameba_vision.rb +22 -0
- data/lib/www/video_scraper/base.rb +88 -0
- data/lib/www/video_scraper/dailymotion.rb +30 -0
- data/lib/www/video_scraper/eic_book.rb +34 -0
- data/lib/www/video_scraper/moro_tube.rb +31 -0
- data/lib/www/video_scraper/nico_video.rb +68 -0
- data/lib/www/video_scraper/pornhub.rb +24 -0
- data/lib/www/video_scraper/pornotube.rb +39 -0
- data/lib/www/video_scraper/red_tube.rb +89 -0
- data/lib/www/video_scraper/tube8.rb +31 -0
- data/lib/www/video_scraper/veoh.rb +28 -0
- data/lib/www/video_scraper/you_porn.rb +26 -0
- data/lib/www/video_scraper/you_tube.rb +53 -0
- data/lib/www/video_scraper/your_file_host.rb +54 -0
- data/test/test_helper.rb +23 -0
- data/test/www/test_video_scraper.rb +43 -0
- data/test/www/video_scraper/test_adult_satellites.rb +13 -0
- data/test/www/video_scraper/test_age_sage.rb +13 -0
- data/test/www/video_scraper/test_ameba_vision.rb +12 -0
- data/test/www/video_scraper/test_base.rb +14 -0
- data/test/www/video_scraper/test_dailymotion.rb +14 -0
- data/test/www/video_scraper/test_eic_book.rb +14 -0
- data/test/www/video_scraper/test_moro_tube.rb +13 -0
- data/test/www/video_scraper/test_nico_video.rb +23 -0
- data/test/www/video_scraper/test_pornhub.rb +14 -0
- data/test/www/video_scraper/test_pornotube.rb +21 -0
- data/test/www/video_scraper/test_red_tube.rb +13 -0
- data/test/www/video_scraper/test_tube8.rb +14 -0
- data/test/www/video_scraper/test_veoh.rb +24 -0
- data/test/www/video_scraper/test_you_porn.rb +13 -0
- data/test/www/video_scraper/test_you_tube.rb +32 -0
- data/test/www/video_scraper/test_your_file_host.rb +14 -0
- metadata +133 -0
data/ChangeLog
ADDED
data/README
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
= WWW::VideoScraper
|
2
|
+
|
3
|
+
* http://coderepos.org/share/browser/lang/ruby/video_scraper
|
4
|
+
* http://github.com/valda/video_scraper/tree/master
|
5
|
+
|
6
|
+
== DESCRIPTION:
|
7
|
+
|
8
|
+
Web scraping library for video sharing sites.
|
9
|
+
|
10
|
+
== FEATURES/PROBLEMS:
|
11
|
+
|
12
|
+
Supported sites
|
13
|
+
|
14
|
+
* AdultSatellites
|
15
|
+
* AmebaVision
|
16
|
+
* Dailymotion
|
17
|
+
* MoroTube
|
18
|
+
* NICO NICO DOUGA
|
19
|
+
* Pornhub
|
20
|
+
* Pornotube
|
21
|
+
* RedTube
|
22
|
+
* Tube8
|
23
|
+
* Ura Agesage
|
24
|
+
* Veoh
|
25
|
+
* YouPorn
|
26
|
+
* YouTube
|
27
|
+
* YourFileHost
|
28
|
+
|
29
|
+
== SYNOPSIS:
|
30
|
+
|
31
|
+
>> require 'www/video_scraper'
|
32
|
+
>> scraper = WWW::VideoScraper.scrape('http://www.youtube.com/watch?v=OFPnvARUOHI')
|
33
|
+
>> scraper.video_url
|
34
|
+
=> "http://www.youtube.com/get_video?video_id=OFPnvARUOHI&t=OEgsToPDskIpQJU48rm4-sS1RtbItouY"
|
35
|
+
>> scraper.thumb_url
|
36
|
+
=> "http://i.ytimg.com/vi/OFPnvARUOHI/default.jpg"
|
37
|
+
|
38
|
+
== REQUIREMENTS:
|
39
|
+
|
40
|
+
* WWW::Mechanize
|
41
|
+
* Hpricot
|
42
|
+
* CGIAlt (recommend)
|
43
|
+
|
44
|
+
== INSTALL:
|
45
|
+
|
46
|
+
* sudo gem install valda-video_scraper
|
47
|
+
|
48
|
+
== LICENSE:
|
49
|
+
|
50
|
+
(The MIT License)
|
51
|
+
|
52
|
+
Copyright (c) 2009 YAMAGUCHI Seiji <valda at underscore.jp>
|
53
|
+
|
54
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
55
|
+
a copy of this software and associated documentation files (the
|
56
|
+
'Software'), to deal in the Software without restriction, including
|
57
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
58
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
59
|
+
permit persons to whom the Software is furnished to do so, subject to
|
60
|
+
the following conditions:
|
61
|
+
|
62
|
+
The above copyright notice and this permission notice shall be
|
63
|
+
included in all copies or substantial portions of the Software.
|
64
|
+
|
65
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
66
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
67
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
68
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
69
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
70
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
71
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require 'rake/packagetask'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
require 'rake/rdoctask'
|
8
|
+
require 'rake/contrib/rubyforgepublisher'
|
9
|
+
require 'rake/contrib/sshpublisher'
|
10
|
+
require 'fileutils'
|
11
|
+
require 'lib/www/video_scraper'
|
12
|
+
include FileUtils
|
13
|
+
|
14
|
+
NAME = "video_scraper"
|
15
|
+
AUTHOR = "YAMAGUCHI Seiji"
|
16
|
+
EMAIL = "valda@underscore.jp"
|
17
|
+
DESCRIPTION = "Web scraping library for video sharing sites."
|
18
|
+
RUBYFORGE_PROJECT = "video_scraper"
|
19
|
+
HOMEPATH = "http://github.com/valda/video_scraper"
|
20
|
+
BIN_FILES = %w( )
|
21
|
+
|
22
|
+
VERS = WWW::VideoScraper::VERSION
|
23
|
+
REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
|
24
|
+
CLEAN.include ['**/.*.sw?', '*.gem', '.config']
|
25
|
+
RDOC_OPTS = [
|
26
|
+
'--title', "#{NAME} documentation",
|
27
|
+
"--charset", "utf-8",
|
28
|
+
"--opname", "index.html",
|
29
|
+
"--line-numbers",
|
30
|
+
"--main", "README",
|
31
|
+
"--inline-source",
|
32
|
+
]
|
33
|
+
|
34
|
+
task :default => [:test]
|
35
|
+
task :package => [:clean]
|
36
|
+
|
37
|
+
Rake::TestTask.new("test") do |t|
|
38
|
+
t.libs << "test"
|
39
|
+
t.pattern = "test/**/*_test.rb"
|
40
|
+
t.verbose = true
|
41
|
+
end
|
42
|
+
|
43
|
+
spec = Gem::Specification.new do |s|
|
44
|
+
s.name = NAME
|
45
|
+
s.version = VERS
|
46
|
+
s.platform = Gem::Platform::RUBY
|
47
|
+
s.has_rdoc = true
|
48
|
+
s.extra_rdoc_files = ["README", "ChangeLog"]
|
49
|
+
s.rdoc_options += RDOC_OPTS + ['--exclude', '^(examples|extras)/']
|
50
|
+
s.summary = DESCRIPTION
|
51
|
+
s.description = DESCRIPTION
|
52
|
+
s.author = AUTHOR
|
53
|
+
s.email = EMAIL
|
54
|
+
s.homepage = HOMEPATH
|
55
|
+
s.executables = BIN_FILES
|
56
|
+
s.rubyforge_project = RUBYFORGE_PROJECT
|
57
|
+
s.bindir = "bin"
|
58
|
+
s.require_path = "lib"
|
59
|
+
#s.autorequire = ""
|
60
|
+
s.test_files = Dir["test/*_test.rb"]
|
61
|
+
|
62
|
+
s.add_dependency('mechanize', '>=0.8.4')
|
63
|
+
s.add_dependency('hpricot', '>=0.6.164')
|
64
|
+
s.add_dependency('json', '>=1.1.3')
|
65
|
+
#s.required_ruby_version = '>= 1.8.2'
|
66
|
+
|
67
|
+
s.files = %w(README ChangeLog Rakefile) +
|
68
|
+
Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
|
69
|
+
Dir.glob("ext/**/*.{h,c,rb}") +
|
70
|
+
Dir.glob("examples/**/*.rb") +
|
71
|
+
Dir.glob("tools/*.rb") +
|
72
|
+
Dir.glob("rails/*.rb")
|
73
|
+
|
74
|
+
s.extensions = FileList["ext/**/extconf.rb"].to_a
|
75
|
+
end
|
76
|
+
|
77
|
+
Rake::GemPackageTask.new(spec) do |p|
|
78
|
+
p.need_tar = true
|
79
|
+
p.gem_spec = spec
|
80
|
+
end
|
81
|
+
|
82
|
+
task :install do
|
83
|
+
name = "#{NAME}-#{VERS}.gem"
|
84
|
+
sh %{rake package}
|
85
|
+
sh %{sudo gem install pkg/#{name}}
|
86
|
+
end
|
87
|
+
|
88
|
+
task :uninstall => [:clean] do
|
89
|
+
sh %{sudo gem uninstall #{NAME}}
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
Rake::RDocTask.new do |rdoc|
|
94
|
+
rdoc.rdoc_dir = 'html'
|
95
|
+
rdoc.options += RDOC_OPTS
|
96
|
+
rdoc.template = "resh"
|
97
|
+
#rdoc.template = "#{ENV['template']}.rb" if ENV['template']
|
98
|
+
if ENV['DOC_FILES']
|
99
|
+
rdoc.rdoc_files.include(ENV['DOC_FILES'].split(/,\s*/))
|
100
|
+
else
|
101
|
+
rdoc.rdoc_files.include('README', 'ChangeLog')
|
102
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
103
|
+
rdoc.rdoc_files.include('ext/**/*.c')
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
desc "Publish to RubyForge"
|
108
|
+
task :rubyforge => [:rdoc, :package] do
|
109
|
+
require 'rubyforge'
|
110
|
+
Rake::RubyForgePublisher.new(RUBYFORGE_PROJECT, 'yamaguchi').upload
|
111
|
+
end
|
112
|
+
|
113
|
+
desc 'Package and upload the release to rubyforge.'
|
114
|
+
task :release => [:clean, :package] do |t|
|
115
|
+
v = ENV["VERSION"] or abort "Must supply VERSION=x.y.z"
|
116
|
+
abort "Versions don't match #{v} vs #{VERS}" unless v == VERS
|
117
|
+
pkg = "pkg/#{NAME}-#{VERS}"
|
118
|
+
|
119
|
+
require 'rubyforge'
|
120
|
+
rf = RubyForge.new.configure
|
121
|
+
puts "Logging in"
|
122
|
+
rf.login
|
123
|
+
|
124
|
+
c = rf.userconfig
|
125
|
+
# c["release_notes"] = description if description
|
126
|
+
# c["release_changes"] = changes if changes
|
127
|
+
c["preformatted"] = true
|
128
|
+
|
129
|
+
files = [
|
130
|
+
"#{pkg}.tgz",
|
131
|
+
"#{pkg}.gem"
|
132
|
+
].compact
|
133
|
+
|
134
|
+
puts "Releasing #{NAME} v. #{VERS}"
|
135
|
+
rf.add_release RUBYFORGE_PROJECT, NAME, VERS, *files
|
136
|
+
end
|
137
|
+
|
138
|
+
desc 'Show information about the gem.'
|
139
|
+
task :debug_gem do
|
140
|
+
puts spec.to_ruby
|
141
|
+
end
|
142
|
+
|
143
|
+
desc 'Update gem spec'
|
144
|
+
task :gemspec do
|
145
|
+
open("#{NAME}.gemspec", 'w').write spec.to_ruby
|
146
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'hpricot'
|
6
|
+
require 'mechanize'
|
7
|
+
require 'kconv'
|
8
|
+
require 'json'
|
9
|
+
require 'uri'
|
10
|
+
begin
|
11
|
+
require 'cgialt' unless defined? CGI
|
12
|
+
rescue LoadError
|
13
|
+
require 'cgi'
|
14
|
+
end
|
15
|
+
|
16
|
+
module WWW
|
17
|
+
module VideoScraper
|
18
|
+
VERSION = '1.0.5'
|
19
|
+
|
20
|
+
MODULES_NAME = %w(adult_satellites age_sage ameba_vision dailymotion eic_book
|
21
|
+
moro_tube nico_video pornhub pornotube red_tube tube8 veoh
|
22
|
+
you_porn you_tube your_file_host)
|
23
|
+
|
24
|
+
@@modules = MODULES_NAME.map do |name|
|
25
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'video_scraper', name))
|
26
|
+
const_get( name.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase } )
|
27
|
+
end
|
28
|
+
|
29
|
+
@@options = {
|
30
|
+
:logger => nil,
|
31
|
+
:cache => nil,
|
32
|
+
}
|
33
|
+
|
34
|
+
class << self
|
35
|
+
def modules
|
36
|
+
@@nodules
|
37
|
+
end
|
38
|
+
|
39
|
+
def options
|
40
|
+
@@options
|
41
|
+
end
|
42
|
+
|
43
|
+
def options=(opts)
|
44
|
+
@@options = opts
|
45
|
+
end
|
46
|
+
|
47
|
+
def configure(&proc)
|
48
|
+
raise ArgumentError, "Block is required." unless block_given?
|
49
|
+
yield @@options
|
50
|
+
end
|
51
|
+
|
52
|
+
def find_module(url)
|
53
|
+
@@modules.find { |mod| mod.valid_url?(url) }
|
54
|
+
end
|
55
|
+
|
56
|
+
# 与えられた URL を処理できるモジュールを @@modules から検索して実行する
|
57
|
+
def scrape(url, opt = nil)
|
58
|
+
opt = @@options.merge(opt || {})
|
59
|
+
opt[:logger] ||= logger
|
60
|
+
raise StandardError, "url param is requred" unless url
|
61
|
+
|
62
|
+
logger.info "url: #{url}"
|
63
|
+
if mod = find_module(url)
|
64
|
+
logger.info "found module: #{mod.to_s}"
|
65
|
+
return mod.scrape(url, opt)
|
66
|
+
end
|
67
|
+
logger.info "unsupport url."
|
68
|
+
return nil
|
69
|
+
rescue TimeoutError, Timeout::Error, Errno::ETIMEDOUT => e
|
70
|
+
logger.warn " Timeout : #{e.to_s}"
|
71
|
+
raise TryAgainLater, e.to_s
|
72
|
+
rescue OpenURI::HTTPError => e
|
73
|
+
raise TryAgainLater, e.to_s if e.to_s.match(/50\d/)
|
74
|
+
raise FileNotFound, e.to_s if e.to_s.match(/40\d/)
|
75
|
+
raise
|
76
|
+
rescue Exception => e
|
77
|
+
logger.error "#{e.class}: #{e.to_s}"
|
78
|
+
raise e
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
def logger
|
83
|
+
return @@options[:logger] if @@options[:logger]
|
84
|
+
@@options[:logger] = NullLogger.new
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class AdultSatellites < Base
|
8
|
+
url_regex %r!http://(?:www\.)?asa\.tv/movie_detail\.php.*!
|
9
|
+
|
10
|
+
def scrape
|
11
|
+
html = http_get(@page_url)
|
12
|
+
doc = Hpricot(html.toutf8)
|
13
|
+
raise FileNotFound unless flashvars = doc.at('//object //param[@name="FlashVars"]')
|
14
|
+
flashvars = CGI.parse(flashvars.attributes['value'])
|
15
|
+
@video_url = flashvars['videoName'][0]
|
16
|
+
uri = URI.parse(@page_url)
|
17
|
+
if m = @video_url.match(%r!/([[:alnum:]]+/[[:alnum:]]+)\.flv!)
|
18
|
+
@thumb_url = "#{uri.scheme}://#{uri.host}/captured/#{m[1]}_1.jpg"
|
19
|
+
end
|
20
|
+
@title = doc.at('//strong[@class="ptitle"]').inner_html rescue nil
|
21
|
+
if embed = doc.at('//input[@name="embed"]')
|
22
|
+
@embed_tag = CGI.unescapeHTML(embed.attributes['value'])
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class AgeSage < Base
|
8
|
+
url_regex %r!\Ahttp://adult\.agesage\.jp/contentsPage\.html\?mcd=[[:alnum:]]{16}!
|
9
|
+
|
10
|
+
def scrape
|
11
|
+
@request_url = @page_url.sub('.html', '.xml')
|
12
|
+
@response_body = http_get(@request_url)
|
13
|
+
raise FileNotFound if @response_body.nil? or @response_body.empty?
|
14
|
+
xdoc = Hpricot.XML(@response_body.toutf8)
|
15
|
+
if movie = xdoc.at('/movie')
|
16
|
+
@video_url = movie.at('/movieurl').inner_html
|
17
|
+
@thumb_url = movie.at('/thumbnail').inner_html
|
18
|
+
@title = movie.at('/title').inner_html
|
19
|
+
mcd = @page_url.match(%r|agesage\.jp/contentsPage\.html\?mcd=([[:alnum:]]{16})|)[1]
|
20
|
+
@embed_tag = <<-HTML
|
21
|
+
<script type="text/javascript" src="http://adult.agesage.jp/js/past_uraui.js"></script>
|
22
|
+
<script type="text/javascript">Purauifla("mcd=#{mcd}", 320, 275);</script>
|
23
|
+
HTML
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/base')
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
module VideoScraper
|
7
|
+
class AmebaVision < Base
|
8
|
+
url_regex %r!\Ahttp://vision\.ameba\.jp/watch\.do.*?\?movie=(\d+)!
|
9
|
+
|
10
|
+
def scrape
|
11
|
+
id = url_regex_match[1]
|
12
|
+
request_url = "http://vision.ameba.jp/api/get/detailMovie.do?movie=#{id}"
|
13
|
+
xml = http_get(request_url)
|
14
|
+
xdoc = Hpricot.XML(xml.toutf8)
|
15
|
+
@title = xdoc.at('//item/title').inner_html
|
16
|
+
@page_url = xdoc.at('//item/link').inner_html
|
17
|
+
@thumb_url = xdoc.at('//item/imageUrlLarge').inner_html
|
18
|
+
@video_url = @thumb_url.sub('//vi', '//vm').sub('/jpg/', '/flv/').sub('_4.jpg', '.flv')
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# -*- mode:ruby; coding:utf-8 -*-
|
2
|
+
|
3
|
+
module WWW
|
4
|
+
module VideoScraper
|
5
|
+
class TryAgainLater < RuntimeError; end
|
6
|
+
class FileNotFound < RuntimeError; end
|
7
|
+
|
8
|
+
class NullLogger
|
9
|
+
def method_missing(name, *args); return nil; end
|
10
|
+
end
|
11
|
+
|
12
|
+
class Base
|
13
|
+
attr_reader :page_url, :video_url, :thumb_url, :embed_tag, :title
|
14
|
+
|
15
|
+
## class methods
|
16
|
+
class << self
|
17
|
+
def url_regex(regex)
|
18
|
+
@url_regex = regex
|
19
|
+
end
|
20
|
+
|
21
|
+
def valid_url?(url)
|
22
|
+
Array(@url_regex).any? { |r| r.match(url) }
|
23
|
+
end
|
24
|
+
|
25
|
+
def scrape(url, opt = nil)
|
26
|
+
instance = self.new(url, opt)
|
27
|
+
instance.scrape
|
28
|
+
instance
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize(url, opt = nil)
|
33
|
+
@page_url = url
|
34
|
+
@opt = (opt || {})
|
35
|
+
url_regex = self.class.instance_variable_get(:@url_regex)
|
36
|
+
Array(url_regex).any? do |r|
|
37
|
+
@url_regex_match = r.match(@page_url).freeze
|
38
|
+
end
|
39
|
+
raise StandardError, "url is not #{self.class.name} link: #{url}" if @url_regex_match.nil?
|
40
|
+
end
|
41
|
+
|
42
|
+
def scrape
|
43
|
+
raise StandardError, 'not implemented yet'
|
44
|
+
end
|
45
|
+
|
46
|
+
protected
|
47
|
+
def url_regex_match
|
48
|
+
@url_regex_match
|
49
|
+
end
|
50
|
+
|
51
|
+
def agent
|
52
|
+
@agent ||= WWW::Mechanize.new do |a|
|
53
|
+
a.user_agent_alias = 'Windows IE 6'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def logger
|
58
|
+
return @opt[:logger] if @opt[:logger]
|
59
|
+
@opt[:logger] = NullLogger.new
|
60
|
+
end
|
61
|
+
|
62
|
+
def http_get(url, opt = nil)
|
63
|
+
open_opt = {
|
64
|
+
"User-Agent" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)",
|
65
|
+
}.merge( opt || {} )
|
66
|
+
if @opt[:cache]
|
67
|
+
unless @opt[:cache].respond_to?(:get) and @opt[:cache].respond_to?(:set)
|
68
|
+
raise RuntimeError, 'As for cache object what responds to :get and :set is required.'
|
69
|
+
end
|
70
|
+
@opt[:logger].debug 'use cache.'
|
71
|
+
cache_key = "#{url}|#{open_opt}"
|
72
|
+
unless content = @opt[:cache].get(cache_key)
|
73
|
+
content = open(url, open_opt) {|fh| fh.read }
|
74
|
+
@opt[:cache].set(cache_key, content)
|
75
|
+
end
|
76
|
+
else
|
77
|
+
content = open(url, open_opt) {|fh| fh.read }
|
78
|
+
end
|
79
|
+
content
|
80
|
+
rescue OpenURI::HTTPError => e
|
81
|
+
raise TryAgainLater, e.to_s if e.to_s.include?('503')
|
82
|
+
raise e
|
83
|
+
rescue TimeoutError, Timeout::Error, Errno::ETIMEDOUT => e
|
84
|
+
raise TryAgainLater, e.to_s
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|