ero_getter 0.1.6 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -2,7 +2,7 @@ source :rubygems
2
2
 
3
3
  gem 'activesupport'
4
4
  gem 'httpclient'
5
- gem 'rubyzip'
5
+ gem 'zipruby'
6
6
  gem 'nokogiri'
7
7
 
8
8
  platforms :jruby do
@@ -11,6 +11,8 @@ end
11
11
 
12
12
  group :development, :test do
13
13
  gem 'rspec'
14
+ gem 'guard-rspec'
14
15
  gem 'bundler'
15
16
  gem 'jeweler'
17
+ gem 'fakeweb'
16
18
  end
data/Guardfile ADDED
@@ -0,0 +1,9 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard 'rspec', :version => 2 do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+ end
9
+
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.6
1
+ 1.0.0
data/ero_getter.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "ero_getter"
8
- s.version = "0.1.6"
8
+ s.version = "1.0.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["masarakki"]
12
- s.date = "2012-05-25"
12
+ s.date = "2012-05-26"
13
13
  s.description = "ero getter"
14
14
  s.email = "masaki@hisme.net"
15
15
  s.extra_rdoc_files = [
@@ -22,17 +22,22 @@ Gem::Specification.new do |s|
22
22
  ".rvmrc",
23
23
  ".travis.yml",
24
24
  "Gemfile",
25
+ "Guardfile",
25
26
  "LICENSE.txt",
26
27
  "README.rdoc",
27
28
  "Rakefile",
28
29
  "VERSION",
29
30
  "ero_getter.gemspec",
31
+ "lib/downloader/nijigazou_sokuhou.rb",
30
32
  "lib/ero_getter.rb",
31
- "lib/ero_getter/downloader.rb",
32
- "lib/ero_getter/downloader/base.rb",
33
- "spec/downloader/base_spec.rb",
34
- "spec/downloader_spec.rb",
33
+ "lib/ero_getter/base.rb",
34
+ "lib/ero_getter/utils.rb",
35
+ "spec/downloader/nijigazou_sokuhou_spec.rb",
36
+ "spec/ero_getter/base_spec.rb",
35
37
  "spec/ero_getter_spec.rb",
38
+ "spec/samples/nijigazou_sokuhou/first.html",
39
+ "spec/samples/nijigazou_sokuhou/last.html",
40
+ "spec/samples/nijigazou_sokuhou/middle.html",
36
41
  "spec/samples/sample.html",
37
42
  "spec/spec_helper.rb"
38
43
  ]
@@ -48,31 +53,37 @@ Gem::Specification.new do |s|
48
53
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
54
  s.add_runtime_dependency(%q<activesupport>, [">= 0"])
50
55
  s.add_runtime_dependency(%q<httpclient>, [">= 0"])
51
- s.add_runtime_dependency(%q<rubyzip>, [">= 0"])
56
+ s.add_runtime_dependency(%q<zipruby>, [">= 0"])
52
57
  s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
53
58
  s.add_runtime_dependency(%q<jruby-openssl>, [">= 0"])
54
59
  s.add_development_dependency(%q<rspec>, [">= 0"])
60
+ s.add_development_dependency(%q<guard-rspec>, [">= 0"])
55
61
  s.add_development_dependency(%q<bundler>, [">= 0"])
56
62
  s.add_development_dependency(%q<jeweler>, [">= 0"])
63
+ s.add_development_dependency(%q<fakeweb>, [">= 0"])
57
64
  else
58
65
  s.add_dependency(%q<activesupport>, [">= 0"])
59
66
  s.add_dependency(%q<httpclient>, [">= 0"])
60
- s.add_dependency(%q<rubyzip>, [">= 0"])
67
+ s.add_dependency(%q<zipruby>, [">= 0"])
61
68
  s.add_dependency(%q<nokogiri>, [">= 0"])
62
69
  s.add_dependency(%q<jruby-openssl>, [">= 0"])
63
70
  s.add_dependency(%q<rspec>, [">= 0"])
71
+ s.add_dependency(%q<guard-rspec>, [">= 0"])
64
72
  s.add_dependency(%q<bundler>, [">= 0"])
65
73
  s.add_dependency(%q<jeweler>, [">= 0"])
74
+ s.add_dependency(%q<fakeweb>, [">= 0"])
66
75
  end
67
76
  else
68
77
  s.add_dependency(%q<activesupport>, [">= 0"])
69
78
  s.add_dependency(%q<httpclient>, [">= 0"])
70
- s.add_dependency(%q<rubyzip>, [">= 0"])
79
+ s.add_dependency(%q<zipruby>, [">= 0"])
71
80
  s.add_dependency(%q<nokogiri>, [">= 0"])
72
81
  s.add_dependency(%q<jruby-openssl>, [">= 0"])
73
82
  s.add_dependency(%q<rspec>, [">= 0"])
83
+ s.add_dependency(%q<guard-rspec>, [">= 0"])
74
84
  s.add_dependency(%q<bundler>, [">= 0"])
75
85
  s.add_dependency(%q<jeweler>, [">= 0"])
86
+ s.add_dependency(%q<fakeweb>, [">= 0"])
76
87
  end
77
88
  end
78
89
 
@@ -0,0 +1,28 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'nokogiri'
3
+
4
+ class NijigazouSokuhou < EroGetter::Base
5
+
6
+ name '二次画像速報'
7
+ url %r{http://nijigazo.2chblog.jp/archives/\d+.html}
8
+
9
+ target ".article-body-more > a > img" do |path|
10
+ path.parent[:href] if path.parent[:href] =~ /jpe?g|png|gif$/
11
+ end
12
+
13
+ sub_directory do
14
+ path = targets.first.split('/')
15
+ d = path[3..5].join('')
16
+ chara = path[6]
17
+ File.join(chara, d)
18
+ end
19
+
20
+ after ["//a[@rel='prev']", "//a[@rel='next']"] do |path|
21
+ path.text.match(Regexp.escape(title_part))
22
+ end
23
+
24
+ def title_part
25
+ @title_part ||= title.split(/:/).last.match(/(.+?)(その.+)?$/)[1].strip.gsub(/&amp;/, '&')
26
+ end
27
+
28
+ end
@@ -0,0 +1,147 @@
1
+ require 'active_support/inflector'
2
+ require 'httpclient'
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+ require 'zipruby'
6
+
7
+ class EroGetter::Base
8
+ def initialize(url, direction = :none)
9
+ raise unless url.match url_regex
10
+ @url = url
11
+ @direction = direction
12
+ end
13
+
14
+ def base_dir
15
+ self.class.to_s.underscore
16
+ end
17
+
18
+ def directory
19
+ unless @dir
20
+ @dir = File.join(EroGetter.directory, base_dir, sub_directory)
21
+ EroGetter.mkdir(@dir)
22
+ end
23
+ @dir
24
+ end
25
+
26
+ def http_client
27
+ @http_client ||= HTTPClient.new
28
+ end
29
+
30
+ def url
31
+ @url
32
+ end
33
+
34
+ def direction
35
+ @direction
36
+ end
37
+
38
+ def document
39
+ @document ||= Nokogiri::HTML(open(url).read)
40
+ end
41
+
42
+ def title
43
+ @title ||= document.title
44
+ end
45
+
46
+ def run
47
+ targets.each do |target_url|
48
+ if target_url =~ /.*\.zip$/
49
+ save_zip(target_url)
50
+ else
51
+ save_image(target_url)
52
+ end
53
+ end
54
+ self.class.new(self.prev, :prev).run if run_prev?
55
+ self.class.new(self.next, :next).run if run_next?
56
+ end
57
+
58
+ def get_target(target)
59
+ response = http_client.get(target, :header => {:referer => url}, :follow_redirect => true)
60
+ raise unless response.status == 200
61
+ response
62
+ end
63
+
64
+ def save_image(target_url)
65
+ filename = File.basename(target_url)
66
+ response = get_target(target_url)
67
+ File.open(File.join(directory, filename), "wb") {|f| f.write response.body }
68
+ end
69
+
70
+ def save_zip(target_url)
71
+ response = get_target(target_url)
72
+ unzip(response.body).each do |filename, data|
73
+ File.open(File.join(dir, filename), "wb") {|f| f.write data }
74
+ end
75
+ end
76
+
77
+ def unzip(zip_data)
78
+ result = []
79
+ Zip::Archive.open_buffer(zip_data) do |archive|
80
+ archive.num_files.times do |i|
81
+ entry_name = archive.get_name(i)
82
+ archive.fopen(entry_name) do |f|
83
+ result << [f.name, f.read]
84
+ end
85
+ end
86
+ end
87
+ result
88
+ end
89
+
90
+ class << self
91
+ def name(site_name)
92
+ define_method(:name) do
93
+ site_name
94
+ end
95
+ end
96
+
97
+ def url(regex)
98
+ define_method(:url_regex) do
99
+ regex
100
+ end
101
+ EroGetter.add_mapping(regex, self)
102
+ end
103
+
104
+ def target(css_selector, &block)
105
+ define_method(:targets) do
106
+ unless instance_variable_defined?(:@targets)
107
+ items = document.css(css_selector).map do |elm|
108
+ yield(elm)
109
+ end
110
+ instance_variable_set(:@targets, items.compact)
111
+ end
112
+ instance_variable_get(:@targets)
113
+ end
114
+ end
115
+
116
+ def sub_directory(&block)
117
+ define_method(:sub_directory) do
118
+ unless instance_variable_defined?(:@sub_directory)
119
+ instance_variable_set(:@sub_directory, self.instance_eval(&block))
120
+ end
121
+ instance_variable_get(:@sub_directory)
122
+ end
123
+ end
124
+
125
+ def after(xpath, &block)
126
+ [:prev, :next].each_with_index do |method_name, index|
127
+ var_name = "@#{method_name}".to_sym
128
+ define_method(method_name) do
129
+ unless instance_variable_defined?(var_name)
130
+ tag = document.xpath(xpath[index]).first
131
+ instance_variable_set(var_name, instance_exec(tag, &block) ? tag[:href] : nil)
132
+ end
133
+ instance_variable_get(var_name)
134
+ end
135
+ end
136
+ end
137
+ end
138
+
139
+ private
140
+ def run_next?
141
+ direction != :prev && respond_to?(:next) && self.next != nil
142
+ end
143
+
144
+ def run_prev?
145
+ direction != :next && respond_to?(:prev) && self.prev != nil
146
+ end
147
+ end
@@ -0,0 +1,11 @@
1
+ class EroGetter::Utils
2
+ class << self
3
+ def mkdir(path)
4
+ unless File.exists?(path)
5
+ basedir = File.dirname(path)
6
+ mkdir(basedir) unless File.exists?(basedir)
7
+ Dir.mkdir(path)
8
+ end
9
+ end
10
+ end
11
+ end
data/lib/ero_getter.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  class EroGetter
2
- autoload :Downloader, 'ero_getter/downloader'
2
+ autoload :Base, 'ero_getter/base'
3
+ autoload :Utils, 'ero_getter/utils'
3
4
 
4
5
  def detect(url)
5
6
  self.class.url_mapping.each_pair do |regex, klazz|
@@ -17,17 +18,37 @@ class EroGetter
17
18
 
18
19
  class << self
19
20
  def directory
20
- File.join ENV['HOME'], 'ero_getter'
21
+ path = File.join ENV['HOME'], 'ero_getter'
22
+ puts path unless Dir.exists?(path)
23
+ path
24
+ end
25
+
26
+ def mkdir(path)
27
+ unless File.exists?(path)
28
+ basedir = File.dirname(path)
29
+ mkdir(basedir) unless File.exists?(basedir)
30
+ Dir.mkdir(path)
31
+ end
21
32
  end
22
33
 
23
34
  def url_mapping
24
35
  @url_mapping ||= {}
25
36
  end
37
+
26
38
  def add_mapping(regex, strategy)
27
39
  url_mapping[regex] = strategy
28
40
  end
41
+
29
42
  def clean
30
43
  @url_mapping = {}
31
44
  end
32
45
  end
33
46
  end
47
+
48
+ Dir.glob(File.expand_path(File.dirname(__FILE__) + '/downloader/*.rb')) do |file|
49
+ begin
50
+ require file
51
+ rescue
52
+ puts "Load error in #{file}"
53
+ end
54
+ end
@@ -0,0 +1,47 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe NijigazouSokuhou do
5
+ let(:url) { 'http://nijigazo.2chblog.jp/archives/52249806.html' }
6
+ subject { @dl = NijigazouSokuhou.new(url) }
7
+
8
+ before do
9
+ fake(:get, url, 'nijigazou_sokuhou/middle.html')
10
+ EroGetter.stub(:mkdir)
11
+ end
12
+
13
+ describe :sample do
14
+ context :first_page do
15
+ before do
16
+ fake(:get, url, 'nijigazou_sokuhou/first.html')
17
+ end
18
+
19
+ its(:next) { should == url }
20
+ its(:prev) { should be_nil }
21
+ end
22
+
23
+ context :middle do
24
+ before do
25
+ fake(:get, url, 'nijigazou_sokuhou/middle.html')
26
+ end
27
+
28
+ its(:name) { should == '二次画像速報' }
29
+ its(:title) { should == '二次画像速報 : 【けいおん!】中野梓【あずにゃん】 その2' }
30
+ its(:title_part) { should == '【けいおん!】中野梓【あずにゃん】' }
31
+ its("targets.count") { should == 50 }
32
+ its("targets.first") { should == "http://img03.nijigazo.com/2012/05/11/azusa/0511azusa_0051.jpg" }
33
+ its(:sub_directory) { should == 'azusa/20120511' }
34
+ its(:prev) { should == 'http://nijigazo.2chblog.jp/archives/52249804.html' }
35
+ its(:next) { should == 'http://nijigazo.2chblog.jp/archives/52249808.html' }
36
+ end
37
+
38
+ context :last do
39
+ before do
40
+ fake(:get, url, 'nijigazou_sokuhou/last.html')
41
+ end
42
+
43
+ its(:next) { should be_nil }
44
+ its(:prev) { should == url }
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,113 @@
1
+ require 'spec_helper'
2
+
3
+ describe EroGetter::Base do
4
+ let(:regex) { %r{http://example.net/\d+.html} }
5
+ before do
6
+ _regex = regex
7
+ fake(:get, regex, 'sample.html')
8
+ @klazz = Class.new(EroGetter::Base) do
9
+ name 'NijiEro BBS'
10
+ url _regex
11
+
12
+ target "ul#sources li a" do |elm|
13
+ elm[:href]
14
+ end
15
+
16
+ sub_directory do
17
+ targets.map{|x| x.split(%r{/}).last }.join('/')
18
+ end
19
+ end
20
+ @klazz.stub(:to_s).and_return('TestClass')
21
+ end
22
+
23
+ describe "assign url_mapping" do
24
+ it { EroGetter.url_mapping.should have_key regex }
25
+ it { EroGetter.url_mapping[regex].should == @klazz }
26
+ end
27
+
28
+ describe :instance_methods do
29
+ subject { @dl }
30
+ context :good do
31
+ before do
32
+ @dl = @klazz.new('http://example.net/10101010.html')
33
+ EroGetter.stub(:mkdir).and_return(true)
34
+ EroGetter.stub('directory').and_return('/tmp')
35
+ end
36
+ its(:name) { should == 'NijiEro BBS' }
37
+ its(:url_regex) { should == regex }
38
+ its(:base_dir) { should == 'test_class' }
39
+ its(:http_client) { should be_a HTTPClient }
40
+ its(:document) { should be_a Nokogiri::HTML::Document }
41
+ its(:title) { should == 'EroGetter Server' }
42
+ its(:url) { should == 'http://example.net/10101010.html' }
43
+ its(:direction) { should == :none }
44
+ its(:targets) { should == ['https://github.com/masarakki/ero_getter_server',
45
+ 'https://github.com/masarakki/ero_getter_chrome_extension'] }
46
+ its(:sub_directory) { should == 'ero_getter_server/ero_getter_chrome_extension' }
47
+ its(:directory) { should == '/tmp/test_class/ero_getter_server/ero_getter_chrome_extension' }
48
+ describe :after_run do
49
+ context :not_set_after do
50
+ its(:run_next?) { should be_false }
51
+ its(:run_prev?) { should be_false }
52
+ end
53
+
54
+ context :direction_none do
55
+ context :has_next do
56
+ before do
57
+ @dl.stub(:next).and_return('hoge')
58
+ end
59
+ its(:run_next?) { should be_true }
60
+ its(:run_prev?) { should be_false }
61
+ end
62
+ context :has_prev do
63
+ before do
64
+ @dl.stub(:prev).and_return('hoge')
65
+ end
66
+ its(:run_next?) { should be_false }
67
+ its(:run_prev?) { should be_true }
68
+ end
69
+ context :has_next_and_prev do
70
+ before do
71
+ @dl.stub(:prev).and_return('hoge')
72
+ @dl.stub(:next).and_return('hoge')
73
+ end
74
+ its(:run_next?) { should be_true }
75
+ its(:run_prev?) { should be_true }
76
+ end
77
+ end
78
+
79
+ context :direction_prev do
80
+ before do
81
+ @dl.stub(:direction).and_return(:prev)
82
+ end
83
+ context :has_next do
84
+ before do
85
+ @dl.stub(:next).and_return('hoge')
86
+ end
87
+ its(:run_next?) { should be_false }
88
+ end
89
+ end
90
+
91
+ context :direction_next do
92
+ before do
93
+ @dl.stub(:direction).and_return(:next)
94
+ end
95
+ context :has_pref do
96
+ before do
97
+ @dl.stub(:prev).and_return('hoge')
98
+ end
99
+ its(:run_prev?) { should be_false }
100
+ end
101
+ end
102
+ end
103
+ end
104
+
105
+ context :url_mismatch do
106
+ it {
107
+ lambda {
108
+ @klazz.new('http://example.com/10101010.html')
109
+ }.should raise_error
110
+ }
111
+ end
112
+ end
113
+ end