ero_getter 0.1.6 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +3 -1
- data/Guardfile +9 -0
- data/VERSION +1 -1
- data/ero_getter.gemspec +20 -9
- data/lib/downloader/nijigazou_sokuhou.rb +28 -0
- data/lib/ero_getter/base.rb +147 -0
- data/lib/ero_getter/utils.rb +11 -0
- data/lib/ero_getter.rb +23 -2
- data/spec/downloader/nijigazou_sokuhou_spec.rb +47 -0
- data/spec/ero_getter/base_spec.rb +113 -0
- data/spec/samples/nijigazou_sokuhou/first.html +1532 -0
- data/spec/samples/nijigazou_sokuhou/last.html +1171 -0
- data/spec/samples/nijigazou_sokuhou/middle.html +1499 -0
- data/spec/spec_helper.rb +4 -0
- metadata +45 -8
- data/lib/ero_getter/downloader/base.rb +0 -81
- data/lib/ero_getter/downloader.rb +0 -41
- data/spec/downloader/base_spec.rb +0 -57
- data/spec/downloader_spec.rb +0 -6
data/Gemfile
CHANGED
@@ -2,7 +2,7 @@ source :rubygems
|
|
2
2
|
|
3
3
|
gem 'activesupport'
|
4
4
|
gem 'httpclient'
|
5
|
-
gem '
|
5
|
+
gem 'zipruby'
|
6
6
|
gem 'nokogiri'
|
7
7
|
|
8
8
|
platforms :jruby do
|
@@ -11,6 +11,8 @@ end
|
|
11
11
|
|
12
12
|
group :development, :test do
|
13
13
|
gem 'rspec'
|
14
|
+
gem 'guard-rspec'
|
14
15
|
gem 'bundler'
|
15
16
|
gem 'jeweler'
|
17
|
+
gem 'fakeweb'
|
16
18
|
end
|
data/Guardfile
ADDED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
1.0.0
|
data/ero_getter.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "ero_getter"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "1.0.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["masarakki"]
|
12
|
-
s.date = "2012-05-
|
12
|
+
s.date = "2012-05-26"
|
13
13
|
s.description = "ero getter"
|
14
14
|
s.email = "masaki@hisme.net"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -22,17 +22,22 @@ Gem::Specification.new do |s|
|
|
22
22
|
".rvmrc",
|
23
23
|
".travis.yml",
|
24
24
|
"Gemfile",
|
25
|
+
"Guardfile",
|
25
26
|
"LICENSE.txt",
|
26
27
|
"README.rdoc",
|
27
28
|
"Rakefile",
|
28
29
|
"VERSION",
|
29
30
|
"ero_getter.gemspec",
|
31
|
+
"lib/downloader/nijigazou_sokuhou.rb",
|
30
32
|
"lib/ero_getter.rb",
|
31
|
-
"lib/ero_getter/
|
32
|
-
"lib/ero_getter/
|
33
|
-
"spec/downloader/
|
34
|
-
"spec/
|
33
|
+
"lib/ero_getter/base.rb",
|
34
|
+
"lib/ero_getter/utils.rb",
|
35
|
+
"spec/downloader/nijigazou_sokuhou_spec.rb",
|
36
|
+
"spec/ero_getter/base_spec.rb",
|
35
37
|
"spec/ero_getter_spec.rb",
|
38
|
+
"spec/samples/nijigazou_sokuhou/first.html",
|
39
|
+
"spec/samples/nijigazou_sokuhou/last.html",
|
40
|
+
"spec/samples/nijigazou_sokuhou/middle.html",
|
36
41
|
"spec/samples/sample.html",
|
37
42
|
"spec/spec_helper.rb"
|
38
43
|
]
|
@@ -48,31 +53,37 @@ Gem::Specification.new do |s|
|
|
48
53
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
54
|
s.add_runtime_dependency(%q<activesupport>, [">= 0"])
|
50
55
|
s.add_runtime_dependency(%q<httpclient>, [">= 0"])
|
51
|
-
s.add_runtime_dependency(%q<
|
56
|
+
s.add_runtime_dependency(%q<zipruby>, [">= 0"])
|
52
57
|
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
53
58
|
s.add_runtime_dependency(%q<jruby-openssl>, [">= 0"])
|
54
59
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
60
|
+
s.add_development_dependency(%q<guard-rspec>, [">= 0"])
|
55
61
|
s.add_development_dependency(%q<bundler>, [">= 0"])
|
56
62
|
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
63
|
+
s.add_development_dependency(%q<fakeweb>, [">= 0"])
|
57
64
|
else
|
58
65
|
s.add_dependency(%q<activesupport>, [">= 0"])
|
59
66
|
s.add_dependency(%q<httpclient>, [">= 0"])
|
60
|
-
s.add_dependency(%q<
|
67
|
+
s.add_dependency(%q<zipruby>, [">= 0"])
|
61
68
|
s.add_dependency(%q<nokogiri>, [">= 0"])
|
62
69
|
s.add_dependency(%q<jruby-openssl>, [">= 0"])
|
63
70
|
s.add_dependency(%q<rspec>, [">= 0"])
|
71
|
+
s.add_dependency(%q<guard-rspec>, [">= 0"])
|
64
72
|
s.add_dependency(%q<bundler>, [">= 0"])
|
65
73
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
74
|
+
s.add_dependency(%q<fakeweb>, [">= 0"])
|
66
75
|
end
|
67
76
|
else
|
68
77
|
s.add_dependency(%q<activesupport>, [">= 0"])
|
69
78
|
s.add_dependency(%q<httpclient>, [">= 0"])
|
70
|
-
s.add_dependency(%q<
|
79
|
+
s.add_dependency(%q<zipruby>, [">= 0"])
|
71
80
|
s.add_dependency(%q<nokogiri>, [">= 0"])
|
72
81
|
s.add_dependency(%q<jruby-openssl>, [">= 0"])
|
73
82
|
s.add_dependency(%q<rspec>, [">= 0"])
|
83
|
+
s.add_dependency(%q<guard-rspec>, [">= 0"])
|
74
84
|
s.add_dependency(%q<bundler>, [">= 0"])
|
75
85
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
86
|
+
s.add_dependency(%q<fakeweb>, [">= 0"])
|
76
87
|
end
|
77
88
|
end
|
78
89
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
class NijigazouSokuhou < EroGetter::Base
|
5
|
+
|
6
|
+
name '二次画像速報'
|
7
|
+
url %r{http://nijigazo.2chblog.jp/archives/\d+.html}
|
8
|
+
|
9
|
+
target ".article-body-more > a > img" do |path|
|
10
|
+
path.parent[:href] if path.parent[:href] =~ /jpe?g|png|gif$/
|
11
|
+
end
|
12
|
+
|
13
|
+
sub_directory do
|
14
|
+
path = targets.first.split('/')
|
15
|
+
d = path[3..5].join('')
|
16
|
+
chara = path[6]
|
17
|
+
File.join(chara, d)
|
18
|
+
end
|
19
|
+
|
20
|
+
after ["//a[@rel='prev']", "//a[@rel='next']"] do |path|
|
21
|
+
path.text.match(Regexp.escape(title_part))
|
22
|
+
end
|
23
|
+
|
24
|
+
def title_part
|
25
|
+
@title_part ||= title.split(/:/).last.match(/(.+?)(その.+)?$/)[1].strip.gsub(/&/, '&')
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'active_support/inflector'
|
2
|
+
require 'httpclient'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'zipruby'
|
6
|
+
|
7
|
+
class EroGetter::Base
|
8
|
+
def initialize(url, direction = :none)
|
9
|
+
raise unless url.match url_regex
|
10
|
+
@url = url
|
11
|
+
@direction = direction
|
12
|
+
end
|
13
|
+
|
14
|
+
def base_dir
|
15
|
+
self.class.to_s.underscore
|
16
|
+
end
|
17
|
+
|
18
|
+
def directory
|
19
|
+
unless @dir
|
20
|
+
@dir = File.join(EroGetter.directory, base_dir, sub_directory)
|
21
|
+
EroGetter.mkdir(@dir)
|
22
|
+
end
|
23
|
+
@dir
|
24
|
+
end
|
25
|
+
|
26
|
+
def http_client
|
27
|
+
@http_client ||= HTTPClient.new
|
28
|
+
end
|
29
|
+
|
30
|
+
def url
|
31
|
+
@url
|
32
|
+
end
|
33
|
+
|
34
|
+
def direction
|
35
|
+
@direction
|
36
|
+
end
|
37
|
+
|
38
|
+
def document
|
39
|
+
@document ||= Nokogiri::HTML(open(url).read)
|
40
|
+
end
|
41
|
+
|
42
|
+
def title
|
43
|
+
@title ||= document.title
|
44
|
+
end
|
45
|
+
|
46
|
+
def run
|
47
|
+
targets.each do |target_url|
|
48
|
+
if target_url =~ /.*\.zip$/
|
49
|
+
save_zip(target_url)
|
50
|
+
else
|
51
|
+
save_image(target_url)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
self.class.new(self.prev, :prev).run if run_prev?
|
55
|
+
self.class.new(self.next, :next).run if run_next?
|
56
|
+
end
|
57
|
+
|
58
|
+
def get_target(target)
|
59
|
+
response = http_client.get(target, :header => {:referer => url}, :follow_redirect => true)
|
60
|
+
raise unless response.status == 200
|
61
|
+
response
|
62
|
+
end
|
63
|
+
|
64
|
+
def save_image(target_url)
|
65
|
+
filename = File.basename(target_url)
|
66
|
+
response = get_target(target_url)
|
67
|
+
File.open(File.join(directory, filename), "wb") {|f| f.write response.body }
|
68
|
+
end
|
69
|
+
|
70
|
+
def save_zip(target_url)
|
71
|
+
response = get_target(target_url)
|
72
|
+
unzip(response.body).each do |filename, data|
|
73
|
+
File.open(File.join(dir, filename), "wb") {|f| f.write data }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def unzip(zip_data)
|
78
|
+
result = []
|
79
|
+
Zip::Archive.open_buffer(zip_data) do |archive|
|
80
|
+
archive.num_files.times do |i|
|
81
|
+
entry_name = archive.get_name(i)
|
82
|
+
archive.fopen(entry_name) do |f|
|
83
|
+
result << [f.name, f.read]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
result
|
88
|
+
end
|
89
|
+
|
90
|
+
class << self
|
91
|
+
def name(site_name)
|
92
|
+
define_method(:name) do
|
93
|
+
site_name
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def url(regex)
|
98
|
+
define_method(:url_regex) do
|
99
|
+
regex
|
100
|
+
end
|
101
|
+
EroGetter.add_mapping(regex, self)
|
102
|
+
end
|
103
|
+
|
104
|
+
def target(css_selector, &block)
|
105
|
+
define_method(:targets) do
|
106
|
+
unless instance_variable_defined?(:@targets)
|
107
|
+
items = document.css(css_selector).map do |elm|
|
108
|
+
yield(elm)
|
109
|
+
end
|
110
|
+
instance_variable_set(:@targets, items.compact)
|
111
|
+
end
|
112
|
+
instance_variable_get(:@targets)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def sub_directory(&block)
|
117
|
+
define_method(:sub_directory) do
|
118
|
+
unless instance_variable_defined?(:@sub_directory)
|
119
|
+
instance_variable_set(:@sub_directory, self.instance_eval(&block))
|
120
|
+
end
|
121
|
+
instance_variable_get(:@sub_directory)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def after(xpath, &block)
|
126
|
+
[:prev, :next].each_with_index do |method_name, index|
|
127
|
+
var_name = "@#{method_name}".to_sym
|
128
|
+
define_method(method_name) do
|
129
|
+
unless instance_variable_defined?(var_name)
|
130
|
+
tag = document.xpath(xpath[index]).first
|
131
|
+
instance_variable_set(var_name, instance_exec(tag, &block) ? tag[:href] : nil)
|
132
|
+
end
|
133
|
+
instance_variable_get(var_name)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
private
|
140
|
+
def run_next?
|
141
|
+
direction != :prev && respond_to?(:next) && self.next != nil
|
142
|
+
end
|
143
|
+
|
144
|
+
def run_prev?
|
145
|
+
direction != :next && respond_to?(:prev) && self.prev != nil
|
146
|
+
end
|
147
|
+
end
|
data/lib/ero_getter.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
class EroGetter
|
2
|
-
autoload :
|
2
|
+
autoload :Base, 'ero_getter/base'
|
3
|
+
autoload :Utils, 'ero_getter/utils'
|
3
4
|
|
4
5
|
def detect(url)
|
5
6
|
self.class.url_mapping.each_pair do |regex, klazz|
|
@@ -17,17 +18,37 @@ class EroGetter
|
|
17
18
|
|
18
19
|
class << self
|
19
20
|
def directory
|
20
|
-
File.join ENV['HOME'], 'ero_getter'
|
21
|
+
path = File.join ENV['HOME'], 'ero_getter'
|
22
|
+
puts path unless Dir.exists?(path)
|
23
|
+
path
|
24
|
+
end
|
25
|
+
|
26
|
+
def mkdir(path)
|
27
|
+
unless File.exists?(path)
|
28
|
+
basedir = File.dirname(path)
|
29
|
+
mkdir(basedir) unless File.exists?(basedir)
|
30
|
+
Dir.mkdir(path)
|
31
|
+
end
|
21
32
|
end
|
22
33
|
|
23
34
|
def url_mapping
|
24
35
|
@url_mapping ||= {}
|
25
36
|
end
|
37
|
+
|
26
38
|
def add_mapping(regex, strategy)
|
27
39
|
url_mapping[regex] = strategy
|
28
40
|
end
|
41
|
+
|
29
42
|
def clean
|
30
43
|
@url_mapping = {}
|
31
44
|
end
|
32
45
|
end
|
33
46
|
end
|
47
|
+
|
48
|
+
Dir.glob(File.expand_path(File.dirname(__FILE__) + '/downloader/*.rb')) do |file|
|
49
|
+
begin
|
50
|
+
require file
|
51
|
+
rescue
|
52
|
+
puts "Load error in #{file}"
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe NijigazouSokuhou do
|
5
|
+
let(:url) { 'http://nijigazo.2chblog.jp/archives/52249806.html' }
|
6
|
+
subject { @dl = NijigazouSokuhou.new(url) }
|
7
|
+
|
8
|
+
before do
|
9
|
+
fake(:get, url, 'nijigazou_sokuhou/middle.html')
|
10
|
+
EroGetter.stub(:mkdir)
|
11
|
+
end
|
12
|
+
|
13
|
+
describe :sample do
|
14
|
+
context :first_page do
|
15
|
+
before do
|
16
|
+
fake(:get, url, 'nijigazou_sokuhou/first.html')
|
17
|
+
end
|
18
|
+
|
19
|
+
its(:next) { should == url }
|
20
|
+
its(:prev) { should be_nil }
|
21
|
+
end
|
22
|
+
|
23
|
+
context :middle do
|
24
|
+
before do
|
25
|
+
fake(:get, url, 'nijigazou_sokuhou/middle.html')
|
26
|
+
end
|
27
|
+
|
28
|
+
its(:name) { should == '二次画像速報' }
|
29
|
+
its(:title) { should == '二次画像速報 : 【けいおん!】中野梓【あずにゃん】 その2' }
|
30
|
+
its(:title_part) { should == '【けいおん!】中野梓【あずにゃん】' }
|
31
|
+
its("targets.count") { should == 50 }
|
32
|
+
its("targets.first") { should == "http://img03.nijigazo.com/2012/05/11/azusa/0511azusa_0051.jpg" }
|
33
|
+
its(:sub_directory) { should == 'azusa/20120511' }
|
34
|
+
its(:prev) { should == 'http://nijigazo.2chblog.jp/archives/52249804.html' }
|
35
|
+
its(:next) { should == 'http://nijigazo.2chblog.jp/archives/52249808.html' }
|
36
|
+
end
|
37
|
+
|
38
|
+
context :last do
|
39
|
+
before do
|
40
|
+
fake(:get, url, 'nijigazou_sokuhou/last.html')
|
41
|
+
end
|
42
|
+
|
43
|
+
its(:next) { should be_nil }
|
44
|
+
its(:prev) { should == url }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe EroGetter::Base do
|
4
|
+
let(:regex) { %r{http://example.net/\d+.html} }
|
5
|
+
before do
|
6
|
+
_regex = regex
|
7
|
+
fake(:get, regex, 'sample.html')
|
8
|
+
@klazz = Class.new(EroGetter::Base) do
|
9
|
+
name 'NijiEro BBS'
|
10
|
+
url _regex
|
11
|
+
|
12
|
+
target "ul#sources li a" do |elm|
|
13
|
+
elm[:href]
|
14
|
+
end
|
15
|
+
|
16
|
+
sub_directory do
|
17
|
+
targets.map{|x| x.split(%r{/}).last }.join('/')
|
18
|
+
end
|
19
|
+
end
|
20
|
+
@klazz.stub(:to_s).and_return('TestClass')
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "assign url_mapping" do
|
24
|
+
it { EroGetter.url_mapping.should have_key regex }
|
25
|
+
it { EroGetter.url_mapping[regex].should == @klazz }
|
26
|
+
end
|
27
|
+
|
28
|
+
describe :instance_methods do
|
29
|
+
subject { @dl }
|
30
|
+
context :good do
|
31
|
+
before do
|
32
|
+
@dl = @klazz.new('http://example.net/10101010.html')
|
33
|
+
EroGetter.stub(:mkdir).and_return(true)
|
34
|
+
EroGetter.stub('directory').and_return('/tmp')
|
35
|
+
end
|
36
|
+
its(:name) { should == 'NijiEro BBS' }
|
37
|
+
its(:url_regex) { should == regex }
|
38
|
+
its(:base_dir) { should == 'test_class' }
|
39
|
+
its(:http_client) { should be_a HTTPClient }
|
40
|
+
its(:document) { should be_a Nokogiri::HTML::Document }
|
41
|
+
its(:title) { should == 'EroGetter Server' }
|
42
|
+
its(:url) { should == 'http://example.net/10101010.html' }
|
43
|
+
its(:direction) { should == :none }
|
44
|
+
its(:targets) { should == ['https://github.com/masarakki/ero_getter_server',
|
45
|
+
'https://github.com/masarakki/ero_getter_chrome_extension'] }
|
46
|
+
its(:sub_directory) { should == 'ero_getter_server/ero_getter_chrome_extension' }
|
47
|
+
its(:directory) { should == '/tmp/test_class/ero_getter_server/ero_getter_chrome_extension' }
|
48
|
+
describe :after_run do
|
49
|
+
context :not_set_after do
|
50
|
+
its(:run_next?) { should be_false }
|
51
|
+
its(:run_prev?) { should be_false }
|
52
|
+
end
|
53
|
+
|
54
|
+
context :direction_none do
|
55
|
+
context :has_next do
|
56
|
+
before do
|
57
|
+
@dl.stub(:next).and_return('hoge')
|
58
|
+
end
|
59
|
+
its(:run_next?) { should be_true }
|
60
|
+
its(:run_prev?) { should be_false }
|
61
|
+
end
|
62
|
+
context :has_prev do
|
63
|
+
before do
|
64
|
+
@dl.stub(:prev).and_return('hoge')
|
65
|
+
end
|
66
|
+
its(:run_next?) { should be_false }
|
67
|
+
its(:run_prev?) { should be_true }
|
68
|
+
end
|
69
|
+
context :has_next_and_prev do
|
70
|
+
before do
|
71
|
+
@dl.stub(:prev).and_return('hoge')
|
72
|
+
@dl.stub(:next).and_return('hoge')
|
73
|
+
end
|
74
|
+
its(:run_next?) { should be_true }
|
75
|
+
its(:run_prev?) { should be_true }
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
context :direction_prev do
|
80
|
+
before do
|
81
|
+
@dl.stub(:direction).and_return(:prev)
|
82
|
+
end
|
83
|
+
context :has_next do
|
84
|
+
before do
|
85
|
+
@dl.stub(:next).and_return('hoge')
|
86
|
+
end
|
87
|
+
its(:run_next?) { should be_false }
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
context :direction_next do
|
92
|
+
before do
|
93
|
+
@dl.stub(:direction).and_return(:next)
|
94
|
+
end
|
95
|
+
context :has_pref do
|
96
|
+
before do
|
97
|
+
@dl.stub(:prev).and_return('hoge')
|
98
|
+
end
|
99
|
+
its(:run_prev?) { should be_false }
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
context :url_mismatch do
|
106
|
+
it {
|
107
|
+
lambda {
|
108
|
+
@klazz.new('http://example.com/10101010.html')
|
109
|
+
}.should raise_error
|
110
|
+
}
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|