ero_getter 0.1.6 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +3 -1
- data/Guardfile +9 -0
- data/VERSION +1 -1
- data/ero_getter.gemspec +20 -9
- data/lib/downloader/nijigazou_sokuhou.rb +28 -0
- data/lib/ero_getter/base.rb +147 -0
- data/lib/ero_getter/utils.rb +11 -0
- data/lib/ero_getter.rb +23 -2
- data/spec/downloader/nijigazou_sokuhou_spec.rb +47 -0
- data/spec/ero_getter/base_spec.rb +113 -0
- data/spec/samples/nijigazou_sokuhou/first.html +1532 -0
- data/spec/samples/nijigazou_sokuhou/last.html +1171 -0
- data/spec/samples/nijigazou_sokuhou/middle.html +1499 -0
- data/spec/spec_helper.rb +4 -0
- metadata +45 -8
- data/lib/ero_getter/downloader/base.rb +0 -81
- data/lib/ero_getter/downloader.rb +0 -41
- data/spec/downloader/base_spec.rb +0 -57
- data/spec/downloader_spec.rb +0 -6
data/Gemfile
CHANGED
@@ -2,7 +2,7 @@ source :rubygems
|
|
2
2
|
|
3
3
|
gem 'activesupport'
|
4
4
|
gem 'httpclient'
|
5
|
-
gem '
|
5
|
+
gem 'zipruby'
|
6
6
|
gem 'nokogiri'
|
7
7
|
|
8
8
|
platforms :jruby do
|
@@ -11,6 +11,8 @@ end
|
|
11
11
|
|
12
12
|
group :development, :test do
|
13
13
|
gem 'rspec'
|
14
|
+
gem 'guard-rspec'
|
14
15
|
gem 'bundler'
|
15
16
|
gem 'jeweler'
|
17
|
+
gem 'fakeweb'
|
16
18
|
end
|
data/Guardfile
ADDED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
1.0.0
|
data/ero_getter.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "ero_getter"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "1.0.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["masarakki"]
|
12
|
-
s.date = "2012-05-
|
12
|
+
s.date = "2012-05-26"
|
13
13
|
s.description = "ero getter"
|
14
14
|
s.email = "masaki@hisme.net"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -22,17 +22,22 @@ Gem::Specification.new do |s|
|
|
22
22
|
".rvmrc",
|
23
23
|
".travis.yml",
|
24
24
|
"Gemfile",
|
25
|
+
"Guardfile",
|
25
26
|
"LICENSE.txt",
|
26
27
|
"README.rdoc",
|
27
28
|
"Rakefile",
|
28
29
|
"VERSION",
|
29
30
|
"ero_getter.gemspec",
|
31
|
+
"lib/downloader/nijigazou_sokuhou.rb",
|
30
32
|
"lib/ero_getter.rb",
|
31
|
-
"lib/ero_getter/
|
32
|
-
"lib/ero_getter/
|
33
|
-
"spec/downloader/
|
34
|
-
"spec/
|
33
|
+
"lib/ero_getter/base.rb",
|
34
|
+
"lib/ero_getter/utils.rb",
|
35
|
+
"spec/downloader/nijigazou_sokuhou_spec.rb",
|
36
|
+
"spec/ero_getter/base_spec.rb",
|
35
37
|
"spec/ero_getter_spec.rb",
|
38
|
+
"spec/samples/nijigazou_sokuhou/first.html",
|
39
|
+
"spec/samples/nijigazou_sokuhou/last.html",
|
40
|
+
"spec/samples/nijigazou_sokuhou/middle.html",
|
36
41
|
"spec/samples/sample.html",
|
37
42
|
"spec/spec_helper.rb"
|
38
43
|
]
|
@@ -48,31 +53,37 @@ Gem::Specification.new do |s|
|
|
48
53
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
54
|
s.add_runtime_dependency(%q<activesupport>, [">= 0"])
|
50
55
|
s.add_runtime_dependency(%q<httpclient>, [">= 0"])
|
51
|
-
s.add_runtime_dependency(%q<
|
56
|
+
s.add_runtime_dependency(%q<zipruby>, [">= 0"])
|
52
57
|
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
53
58
|
s.add_runtime_dependency(%q<jruby-openssl>, [">= 0"])
|
54
59
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
60
|
+
s.add_development_dependency(%q<guard-rspec>, [">= 0"])
|
55
61
|
s.add_development_dependency(%q<bundler>, [">= 0"])
|
56
62
|
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
63
|
+
s.add_development_dependency(%q<fakeweb>, [">= 0"])
|
57
64
|
else
|
58
65
|
s.add_dependency(%q<activesupport>, [">= 0"])
|
59
66
|
s.add_dependency(%q<httpclient>, [">= 0"])
|
60
|
-
s.add_dependency(%q<
|
67
|
+
s.add_dependency(%q<zipruby>, [">= 0"])
|
61
68
|
s.add_dependency(%q<nokogiri>, [">= 0"])
|
62
69
|
s.add_dependency(%q<jruby-openssl>, [">= 0"])
|
63
70
|
s.add_dependency(%q<rspec>, [">= 0"])
|
71
|
+
s.add_dependency(%q<guard-rspec>, [">= 0"])
|
64
72
|
s.add_dependency(%q<bundler>, [">= 0"])
|
65
73
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
74
|
+
s.add_dependency(%q<fakeweb>, [">= 0"])
|
66
75
|
end
|
67
76
|
else
|
68
77
|
s.add_dependency(%q<activesupport>, [">= 0"])
|
69
78
|
s.add_dependency(%q<httpclient>, [">= 0"])
|
70
|
-
s.add_dependency(%q<
|
79
|
+
s.add_dependency(%q<zipruby>, [">= 0"])
|
71
80
|
s.add_dependency(%q<nokogiri>, [">= 0"])
|
72
81
|
s.add_dependency(%q<jruby-openssl>, [">= 0"])
|
73
82
|
s.add_dependency(%q<rspec>, [">= 0"])
|
83
|
+
s.add_dependency(%q<guard-rspec>, [">= 0"])
|
74
84
|
s.add_dependency(%q<bundler>, [">= 0"])
|
75
85
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
86
|
+
s.add_dependency(%q<fakeweb>, [">= 0"])
|
76
87
|
end
|
77
88
|
end
|
78
89
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
class NijigazouSokuhou < EroGetter::Base
|
5
|
+
|
6
|
+
name '二次画像速報'
|
7
|
+
url %r{http://nijigazo.2chblog.jp/archives/\d+.html}
|
8
|
+
|
9
|
+
target ".article-body-more > a > img" do |path|
|
10
|
+
path.parent[:href] if path.parent[:href] =~ /jpe?g|png|gif$/
|
11
|
+
end
|
12
|
+
|
13
|
+
sub_directory do
|
14
|
+
path = targets.first.split('/')
|
15
|
+
d = path[3..5].join('')
|
16
|
+
chara = path[6]
|
17
|
+
File.join(chara, d)
|
18
|
+
end
|
19
|
+
|
20
|
+
after ["//a[@rel='prev']", "//a[@rel='next']"] do |path|
|
21
|
+
path.text.match(Regexp.escape(title_part))
|
22
|
+
end
|
23
|
+
|
24
|
+
def title_part
|
25
|
+
@title_part ||= title.split(/:/).last.match(/(.+?)(その.+)?$/)[1].strip.gsub(/&/, '&')
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'active_support/inflector'
|
2
|
+
require 'httpclient'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'zipruby'
|
6
|
+
|
7
|
+
class EroGetter::Base
|
8
|
+
def initialize(url, direction = :none)
|
9
|
+
raise unless url.match url_regex
|
10
|
+
@url = url
|
11
|
+
@direction = direction
|
12
|
+
end
|
13
|
+
|
14
|
+
def base_dir
|
15
|
+
self.class.to_s.underscore
|
16
|
+
end
|
17
|
+
|
18
|
+
def directory
|
19
|
+
unless @dir
|
20
|
+
@dir = File.join(EroGetter.directory, base_dir, sub_directory)
|
21
|
+
EroGetter.mkdir(@dir)
|
22
|
+
end
|
23
|
+
@dir
|
24
|
+
end
|
25
|
+
|
26
|
+
def http_client
|
27
|
+
@http_client ||= HTTPClient.new
|
28
|
+
end
|
29
|
+
|
30
|
+
def url
|
31
|
+
@url
|
32
|
+
end
|
33
|
+
|
34
|
+
def direction
|
35
|
+
@direction
|
36
|
+
end
|
37
|
+
|
38
|
+
def document
|
39
|
+
@document ||= Nokogiri::HTML(open(url).read)
|
40
|
+
end
|
41
|
+
|
42
|
+
def title
|
43
|
+
@title ||= document.title
|
44
|
+
end
|
45
|
+
|
46
|
+
def run
|
47
|
+
targets.each do |target_url|
|
48
|
+
if target_url =~ /.*\.zip$/
|
49
|
+
save_zip(target_url)
|
50
|
+
else
|
51
|
+
save_image(target_url)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
self.class.new(self.prev, :prev).run if run_prev?
|
55
|
+
self.class.new(self.next, :next).run if run_next?
|
56
|
+
end
|
57
|
+
|
58
|
+
def get_target(target)
|
59
|
+
response = http_client.get(target, :header => {:referer => url}, :follow_redirect => true)
|
60
|
+
raise unless response.status == 200
|
61
|
+
response
|
62
|
+
end
|
63
|
+
|
64
|
+
def save_image(target_url)
|
65
|
+
filename = File.basename(target_url)
|
66
|
+
response = get_target(target_url)
|
67
|
+
File.open(File.join(directory, filename), "wb") {|f| f.write response.body }
|
68
|
+
end
|
69
|
+
|
70
|
+
def save_zip(target_url)
|
71
|
+
response = get_target(target_url)
|
72
|
+
unzip(response.body).each do |filename, data|
|
73
|
+
File.open(File.join(dir, filename), "wb") {|f| f.write data }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def unzip(zip_data)
|
78
|
+
result = []
|
79
|
+
Zip::Archive.open_buffer(zip_data) do |archive|
|
80
|
+
archive.num_files.times do |i|
|
81
|
+
entry_name = archive.get_name(i)
|
82
|
+
archive.fopen(entry_name) do |f|
|
83
|
+
result << [f.name, f.read]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
result
|
88
|
+
end
|
89
|
+
|
90
|
+
class << self
|
91
|
+
def name(site_name)
|
92
|
+
define_method(:name) do
|
93
|
+
site_name
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def url(regex)
|
98
|
+
define_method(:url_regex) do
|
99
|
+
regex
|
100
|
+
end
|
101
|
+
EroGetter.add_mapping(regex, self)
|
102
|
+
end
|
103
|
+
|
104
|
+
def target(css_selector, &block)
|
105
|
+
define_method(:targets) do
|
106
|
+
unless instance_variable_defined?(:@targets)
|
107
|
+
items = document.css(css_selector).map do |elm|
|
108
|
+
yield(elm)
|
109
|
+
end
|
110
|
+
instance_variable_set(:@targets, items.compact)
|
111
|
+
end
|
112
|
+
instance_variable_get(:@targets)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def sub_directory(&block)
|
117
|
+
define_method(:sub_directory) do
|
118
|
+
unless instance_variable_defined?(:@sub_directory)
|
119
|
+
instance_variable_set(:@sub_directory, self.instance_eval(&block))
|
120
|
+
end
|
121
|
+
instance_variable_get(:@sub_directory)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def after(xpath, &block)
|
126
|
+
[:prev, :next].each_with_index do |method_name, index|
|
127
|
+
var_name = "@#{method_name}".to_sym
|
128
|
+
define_method(method_name) do
|
129
|
+
unless instance_variable_defined?(var_name)
|
130
|
+
tag = document.xpath(xpath[index]).first
|
131
|
+
instance_variable_set(var_name, instance_exec(tag, &block) ? tag[:href] : nil)
|
132
|
+
end
|
133
|
+
instance_variable_get(var_name)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
private
|
140
|
+
def run_next?
|
141
|
+
direction != :prev && respond_to?(:next) && self.next != nil
|
142
|
+
end
|
143
|
+
|
144
|
+
def run_prev?
|
145
|
+
direction != :next && respond_to?(:prev) && self.prev != nil
|
146
|
+
end
|
147
|
+
end
|
data/lib/ero_getter.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
class EroGetter
|
2
|
-
autoload :
|
2
|
+
autoload :Base, 'ero_getter/base'
|
3
|
+
autoload :Utils, 'ero_getter/utils'
|
3
4
|
|
4
5
|
def detect(url)
|
5
6
|
self.class.url_mapping.each_pair do |regex, klazz|
|
@@ -17,17 +18,37 @@ class EroGetter
|
|
17
18
|
|
18
19
|
class << self
|
19
20
|
def directory
|
20
|
-
File.join ENV['HOME'], 'ero_getter'
|
21
|
+
path = File.join ENV['HOME'], 'ero_getter'
|
22
|
+
puts path unless Dir.exists?(path)
|
23
|
+
path
|
24
|
+
end
|
25
|
+
|
26
|
+
def mkdir(path)
|
27
|
+
unless File.exists?(path)
|
28
|
+
basedir = File.dirname(path)
|
29
|
+
mkdir(basedir) unless File.exists?(basedir)
|
30
|
+
Dir.mkdir(path)
|
31
|
+
end
|
21
32
|
end
|
22
33
|
|
23
34
|
def url_mapping
|
24
35
|
@url_mapping ||= {}
|
25
36
|
end
|
37
|
+
|
26
38
|
def add_mapping(regex, strategy)
|
27
39
|
url_mapping[regex] = strategy
|
28
40
|
end
|
41
|
+
|
29
42
|
def clean
|
30
43
|
@url_mapping = {}
|
31
44
|
end
|
32
45
|
end
|
33
46
|
end
|
47
|
+
|
48
|
+
Dir.glob(File.expand_path(File.dirname(__FILE__) + '/downloader/*.rb')) do |file|
|
49
|
+
begin
|
50
|
+
require file
|
51
|
+
rescue
|
52
|
+
puts "Load error in #{file}"
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe NijigazouSokuhou do
|
5
|
+
let(:url) { 'http://nijigazo.2chblog.jp/archives/52249806.html' }
|
6
|
+
subject { @dl = NijigazouSokuhou.new(url) }
|
7
|
+
|
8
|
+
before do
|
9
|
+
fake(:get, url, 'nijigazou_sokuhou/middle.html')
|
10
|
+
EroGetter.stub(:mkdir)
|
11
|
+
end
|
12
|
+
|
13
|
+
describe :sample do
|
14
|
+
context :first_page do
|
15
|
+
before do
|
16
|
+
fake(:get, url, 'nijigazou_sokuhou/first.html')
|
17
|
+
end
|
18
|
+
|
19
|
+
its(:next) { should == url }
|
20
|
+
its(:prev) { should be_nil }
|
21
|
+
end
|
22
|
+
|
23
|
+
context :middle do
|
24
|
+
before do
|
25
|
+
fake(:get, url, 'nijigazou_sokuhou/middle.html')
|
26
|
+
end
|
27
|
+
|
28
|
+
its(:name) { should == '二次画像速報' }
|
29
|
+
its(:title) { should == '二次画像速報 : 【けいおん!】中野梓【あずにゃん】 その2' }
|
30
|
+
its(:title_part) { should == '【けいおん!】中野梓【あずにゃん】' }
|
31
|
+
its("targets.count") { should == 50 }
|
32
|
+
its("targets.first") { should == "http://img03.nijigazo.com/2012/05/11/azusa/0511azusa_0051.jpg" }
|
33
|
+
its(:sub_directory) { should == 'azusa/20120511' }
|
34
|
+
its(:prev) { should == 'http://nijigazo.2chblog.jp/archives/52249804.html' }
|
35
|
+
its(:next) { should == 'http://nijigazo.2chblog.jp/archives/52249808.html' }
|
36
|
+
end
|
37
|
+
|
38
|
+
context :last do
|
39
|
+
before do
|
40
|
+
fake(:get, url, 'nijigazou_sokuhou/last.html')
|
41
|
+
end
|
42
|
+
|
43
|
+
its(:next) { should be_nil }
|
44
|
+
its(:prev) { should == url }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe EroGetter::Base do
|
4
|
+
let(:regex) { %r{http://example.net/\d+.html} }
|
5
|
+
before do
|
6
|
+
_regex = regex
|
7
|
+
fake(:get, regex, 'sample.html')
|
8
|
+
@klazz = Class.new(EroGetter::Base) do
|
9
|
+
name 'NijiEro BBS'
|
10
|
+
url _regex
|
11
|
+
|
12
|
+
target "ul#sources li a" do |elm|
|
13
|
+
elm[:href]
|
14
|
+
end
|
15
|
+
|
16
|
+
sub_directory do
|
17
|
+
targets.map{|x| x.split(%r{/}).last }.join('/')
|
18
|
+
end
|
19
|
+
end
|
20
|
+
@klazz.stub(:to_s).and_return('TestClass')
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "assign url_mapping" do
|
24
|
+
it { EroGetter.url_mapping.should have_key regex }
|
25
|
+
it { EroGetter.url_mapping[regex].should == @klazz }
|
26
|
+
end
|
27
|
+
|
28
|
+
describe :instance_methods do
|
29
|
+
subject { @dl }
|
30
|
+
context :good do
|
31
|
+
before do
|
32
|
+
@dl = @klazz.new('http://example.net/10101010.html')
|
33
|
+
EroGetter.stub(:mkdir).and_return(true)
|
34
|
+
EroGetter.stub('directory').and_return('/tmp')
|
35
|
+
end
|
36
|
+
its(:name) { should == 'NijiEro BBS' }
|
37
|
+
its(:url_regex) { should == regex }
|
38
|
+
its(:base_dir) { should == 'test_class' }
|
39
|
+
its(:http_client) { should be_a HTTPClient }
|
40
|
+
its(:document) { should be_a Nokogiri::HTML::Document }
|
41
|
+
its(:title) { should == 'EroGetter Server' }
|
42
|
+
its(:url) { should == 'http://example.net/10101010.html' }
|
43
|
+
its(:direction) { should == :none }
|
44
|
+
its(:targets) { should == ['https://github.com/masarakki/ero_getter_server',
|
45
|
+
'https://github.com/masarakki/ero_getter_chrome_extension'] }
|
46
|
+
its(:sub_directory) { should == 'ero_getter_server/ero_getter_chrome_extension' }
|
47
|
+
its(:directory) { should == '/tmp/test_class/ero_getter_server/ero_getter_chrome_extension' }
|
48
|
+
describe :after_run do
|
49
|
+
context :not_set_after do
|
50
|
+
its(:run_next?) { should be_false }
|
51
|
+
its(:run_prev?) { should be_false }
|
52
|
+
end
|
53
|
+
|
54
|
+
context :direction_none do
|
55
|
+
context :has_next do
|
56
|
+
before do
|
57
|
+
@dl.stub(:next).and_return('hoge')
|
58
|
+
end
|
59
|
+
its(:run_next?) { should be_true }
|
60
|
+
its(:run_prev?) { should be_false }
|
61
|
+
end
|
62
|
+
context :has_prev do
|
63
|
+
before do
|
64
|
+
@dl.stub(:prev).and_return('hoge')
|
65
|
+
end
|
66
|
+
its(:run_next?) { should be_false }
|
67
|
+
its(:run_prev?) { should be_true }
|
68
|
+
end
|
69
|
+
context :has_next_and_prev do
|
70
|
+
before do
|
71
|
+
@dl.stub(:prev).and_return('hoge')
|
72
|
+
@dl.stub(:next).and_return('hoge')
|
73
|
+
end
|
74
|
+
its(:run_next?) { should be_true }
|
75
|
+
its(:run_prev?) { should be_true }
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
context :direction_prev do
|
80
|
+
before do
|
81
|
+
@dl.stub(:direction).and_return(:prev)
|
82
|
+
end
|
83
|
+
context :has_next do
|
84
|
+
before do
|
85
|
+
@dl.stub(:next).and_return('hoge')
|
86
|
+
end
|
87
|
+
its(:run_next?) { should be_false }
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
context :direction_next do
|
92
|
+
before do
|
93
|
+
@dl.stub(:direction).and_return(:next)
|
94
|
+
end
|
95
|
+
context :has_pref do
|
96
|
+
before do
|
97
|
+
@dl.stub(:prev).and_return('hoge')
|
98
|
+
end
|
99
|
+
its(:run_prev?) { should be_false }
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
context :url_mismatch do
|
106
|
+
it {
|
107
|
+
lambda {
|
108
|
+
@klazz.new('http://example.com/10101010.html')
|
109
|
+
}.should raise_error
|
110
|
+
}
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|