ero_getter 0.1.6 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -2,7 +2,7 @@ source :rubygems
2
2
 
3
3
  gem 'activesupport'
4
4
  gem 'httpclient'
5
- gem 'rubyzip'
5
+ gem 'zipruby'
6
6
  gem 'nokogiri'
7
7
 
8
8
  platforms :jruby do
@@ -11,6 +11,8 @@ end
11
11
 
12
12
  group :development, :test do
13
13
  gem 'rspec'
14
+ gem 'guard-rspec'
14
15
  gem 'bundler'
15
16
  gem 'jeweler'
17
+ gem 'fakeweb'
16
18
  end
data/Guardfile ADDED
@@ -0,0 +1,9 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard 'rspec', :version => 2 do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+ end
9
+
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.6
1
+ 1.0.0
data/ero_getter.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "ero_getter"
8
- s.version = "0.1.6"
8
+ s.version = "1.0.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["masarakki"]
12
- s.date = "2012-05-25"
12
+ s.date = "2012-05-26"
13
13
  s.description = "ero getter"
14
14
  s.email = "masaki@hisme.net"
15
15
  s.extra_rdoc_files = [
@@ -22,17 +22,22 @@ Gem::Specification.new do |s|
22
22
  ".rvmrc",
23
23
  ".travis.yml",
24
24
  "Gemfile",
25
+ "Guardfile",
25
26
  "LICENSE.txt",
26
27
  "README.rdoc",
27
28
  "Rakefile",
28
29
  "VERSION",
29
30
  "ero_getter.gemspec",
31
+ "lib/downloader/nijigazou_sokuhou.rb",
30
32
  "lib/ero_getter.rb",
31
- "lib/ero_getter/downloader.rb",
32
- "lib/ero_getter/downloader/base.rb",
33
- "spec/downloader/base_spec.rb",
34
- "spec/downloader_spec.rb",
33
+ "lib/ero_getter/base.rb",
34
+ "lib/ero_getter/utils.rb",
35
+ "spec/downloader/nijigazou_sokuhou_spec.rb",
36
+ "spec/ero_getter/base_spec.rb",
35
37
  "spec/ero_getter_spec.rb",
38
+ "spec/samples/nijigazou_sokuhou/first.html",
39
+ "spec/samples/nijigazou_sokuhou/last.html",
40
+ "spec/samples/nijigazou_sokuhou/middle.html",
36
41
  "spec/samples/sample.html",
37
42
  "spec/spec_helper.rb"
38
43
  ]
@@ -48,31 +53,37 @@ Gem::Specification.new do |s|
48
53
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
54
  s.add_runtime_dependency(%q<activesupport>, [">= 0"])
50
55
  s.add_runtime_dependency(%q<httpclient>, [">= 0"])
51
- s.add_runtime_dependency(%q<rubyzip>, [">= 0"])
56
+ s.add_runtime_dependency(%q<zipruby>, [">= 0"])
52
57
  s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
53
58
  s.add_runtime_dependency(%q<jruby-openssl>, [">= 0"])
54
59
  s.add_development_dependency(%q<rspec>, [">= 0"])
60
+ s.add_development_dependency(%q<guard-rspec>, [">= 0"])
55
61
  s.add_development_dependency(%q<bundler>, [">= 0"])
56
62
  s.add_development_dependency(%q<jeweler>, [">= 0"])
63
+ s.add_development_dependency(%q<fakeweb>, [">= 0"])
57
64
  else
58
65
  s.add_dependency(%q<activesupport>, [">= 0"])
59
66
  s.add_dependency(%q<httpclient>, [">= 0"])
60
- s.add_dependency(%q<rubyzip>, [">= 0"])
67
+ s.add_dependency(%q<zipruby>, [">= 0"])
61
68
  s.add_dependency(%q<nokogiri>, [">= 0"])
62
69
  s.add_dependency(%q<jruby-openssl>, [">= 0"])
63
70
  s.add_dependency(%q<rspec>, [">= 0"])
71
+ s.add_dependency(%q<guard-rspec>, [">= 0"])
64
72
  s.add_dependency(%q<bundler>, [">= 0"])
65
73
  s.add_dependency(%q<jeweler>, [">= 0"])
74
+ s.add_dependency(%q<fakeweb>, [">= 0"])
66
75
  end
67
76
  else
68
77
  s.add_dependency(%q<activesupport>, [">= 0"])
69
78
  s.add_dependency(%q<httpclient>, [">= 0"])
70
- s.add_dependency(%q<rubyzip>, [">= 0"])
79
+ s.add_dependency(%q<zipruby>, [">= 0"])
71
80
  s.add_dependency(%q<nokogiri>, [">= 0"])
72
81
  s.add_dependency(%q<jruby-openssl>, [">= 0"])
73
82
  s.add_dependency(%q<rspec>, [">= 0"])
83
+ s.add_dependency(%q<guard-rspec>, [">= 0"])
74
84
  s.add_dependency(%q<bundler>, [">= 0"])
75
85
  s.add_dependency(%q<jeweler>, [">= 0"])
86
+ s.add_dependency(%q<fakeweb>, [">= 0"])
76
87
  end
77
88
  end
78
89
 
@@ -0,0 +1,28 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'nokogiri'
3
+
4
+ class NijigazouSokuhou < EroGetter::Base
5
+
6
+ name '二次画像速報'
7
+ url %r{http://nijigazo.2chblog.jp/archives/\d+.html}
8
+
9
+ target ".article-body-more > a > img" do |path|
10
+ path.parent[:href] if path.parent[:href] =~ /jpe?g|png|gif$/
11
+ end
12
+
13
+ sub_directory do
14
+ path = targets.first.split('/')
15
+ d = path[3..5].join('')
16
+ chara = path[6]
17
+ File.join(chara, d)
18
+ end
19
+
20
+ after ["//a[@rel='prev']", "//a[@rel='next']"] do |path|
21
+ path.text.match(Regexp.escape(title_part))
22
+ end
23
+
24
+ def title_part
25
+ @title_part ||= title.split(/:/).last.match(/(.+?)(その.+)?$/)[1].strip.gsub(/&amp;/, '&')
26
+ end
27
+
28
+ end
@@ -0,0 +1,147 @@
1
+ require 'active_support/inflector'
2
+ require 'httpclient'
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+ require 'zipruby'
6
+
7
+ class EroGetter::Base
8
+ def initialize(url, direction = :none)
9
+ raise unless url.match url_regex
10
+ @url = url
11
+ @direction = direction
12
+ end
13
+
14
+ def base_dir
15
+ self.class.to_s.underscore
16
+ end
17
+
18
+ def directory
19
+ unless @dir
20
+ @dir = File.join(EroGetter.directory, base_dir, sub_directory)
21
+ EroGetter.mkdir(@dir)
22
+ end
23
+ @dir
24
+ end
25
+
26
+ def http_client
27
+ @http_client ||= HTTPClient.new
28
+ end
29
+
30
+ def url
31
+ @url
32
+ end
33
+
34
+ def direction
35
+ @direction
36
+ end
37
+
38
+ def document
39
+ @document ||= Nokogiri::HTML(open(url).read)
40
+ end
41
+
42
+ def title
43
+ @title ||= document.title
44
+ end
45
+
46
+ def run
47
+ targets.each do |target_url|
48
+ if target_url =~ /.*\.zip$/
49
+ save_zip(target_url)
50
+ else
51
+ save_image(target_url)
52
+ end
53
+ end
54
+ self.class.new(self.prev, :prev).run if run_prev?
55
+ self.class.new(self.next, :next).run if run_next?
56
+ end
57
+
58
+ def get_target(target)
59
+ response = http_client.get(target, :header => {:referer => url}, :follow_redirect => true)
60
+ raise unless response.status == 200
61
+ response
62
+ end
63
+
64
+ def save_image(target_url)
65
+ filename = File.basename(target_url)
66
+ response = get_target(target_url)
67
+ File.open(File.join(directory, filename), "wb") {|f| f.write response.body }
68
+ end
69
+
70
+ def save_zip(target_url)
71
+ response = get_target(target_url)
72
+ unzip(response.body).each do |filename, data|
73
+ File.open(File.join(dir, filename), "wb") {|f| f.write data }
74
+ end
75
+ end
76
+
77
+ def unzip(zip_data)
78
+ result = []
79
+ Zip::Archive.open_buffer(zip_data) do |archive|
80
+ archive.num_files.times do |i|
81
+ entry_name = archive.get_name(i)
82
+ archive.fopen(entry_name) do |f|
83
+ result << [f.name, f.read]
84
+ end
85
+ end
86
+ end
87
+ result
88
+ end
89
+
90
+ class << self
91
+ def name(site_name)
92
+ define_method(:name) do
93
+ site_name
94
+ end
95
+ end
96
+
97
+ def url(regex)
98
+ define_method(:url_regex) do
99
+ regex
100
+ end
101
+ EroGetter.add_mapping(regex, self)
102
+ end
103
+
104
+ def target(css_selector, &block)
105
+ define_method(:targets) do
106
+ unless instance_variable_defined?(:@targets)
107
+ items = document.css(css_selector).map do |elm|
108
+ yield(elm)
109
+ end
110
+ instance_variable_set(:@targets, items.compact)
111
+ end
112
+ instance_variable_get(:@targets)
113
+ end
114
+ end
115
+
116
+ def sub_directory(&block)
117
+ define_method(:sub_directory) do
118
+ unless instance_variable_defined?(:@sub_directory)
119
+ instance_variable_set(:@sub_directory, self.instance_eval(&block))
120
+ end
121
+ instance_variable_get(:@sub_directory)
122
+ end
123
+ end
124
+
125
+ def after(xpath, &block)
126
+ [:prev, :next].each_with_index do |method_name, index|
127
+ var_name = "@#{method_name}".to_sym
128
+ define_method(method_name) do
129
+ unless instance_variable_defined?(var_name)
130
+ tag = document.xpath(xpath[index]).first
131
+ instance_variable_set(var_name, instance_exec(tag, &block) ? tag[:href] : nil)
132
+ end
133
+ instance_variable_get(var_name)
134
+ end
135
+ end
136
+ end
137
+ end
138
+
139
+ private
140
+ def run_next?
141
+ direction != :prev && respond_to?(:next) && self.next != nil
142
+ end
143
+
144
+ def run_prev?
145
+ direction != :next && respond_to?(:prev) && self.prev != nil
146
+ end
147
+ end
@@ -0,0 +1,11 @@
1
+ class EroGetter::Utils
2
+ class << self
3
+ def mkdir(path)
4
+ unless File.exists?(path)
5
+ basedir = File.dirname(path)
6
+ mkdir(basedir) unless File.exists?(basedir)
7
+ Dir.mkdir(path)
8
+ end
9
+ end
10
+ end
11
+ end
data/lib/ero_getter.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  class EroGetter
2
- autoload :Downloader, 'ero_getter/downloader'
2
+ autoload :Base, 'ero_getter/base'
3
+ autoload :Utils, 'ero_getter/utils'
3
4
 
4
5
  def detect(url)
5
6
  self.class.url_mapping.each_pair do |regex, klazz|
@@ -17,17 +18,37 @@ class EroGetter
17
18
 
18
19
  class << self
19
20
  def directory
20
- File.join ENV['HOME'], 'ero_getter'
21
+ path = File.join ENV['HOME'], 'ero_getter'
22
+ puts path unless Dir.exists?(path)
23
+ path
24
+ end
25
+
26
+ def mkdir(path)
27
+ unless File.exists?(path)
28
+ basedir = File.dirname(path)
29
+ mkdir(basedir) unless File.exists?(basedir)
30
+ Dir.mkdir(path)
31
+ end
21
32
  end
22
33
 
23
34
  def url_mapping
24
35
  @url_mapping ||= {}
25
36
  end
37
+
26
38
  def add_mapping(regex, strategy)
27
39
  url_mapping[regex] = strategy
28
40
  end
41
+
29
42
  def clean
30
43
  @url_mapping = {}
31
44
  end
32
45
  end
33
46
  end
47
+
48
+ Dir.glob(File.expand_path(File.dirname(__FILE__) + '/downloader/*.rb')) do |file|
49
+ begin
50
+ require file
51
+ rescue
52
+ puts "Load error in #{file}"
53
+ end
54
+ end
@@ -0,0 +1,47 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe NijigazouSokuhou do
5
+ let(:url) { 'http://nijigazo.2chblog.jp/archives/52249806.html' }
6
+ subject { @dl = NijigazouSokuhou.new(url) }
7
+
8
+ before do
9
+ fake(:get, url, 'nijigazou_sokuhou/middle.html')
10
+ EroGetter.stub(:mkdir)
11
+ end
12
+
13
+ describe :sample do
14
+ context :first_page do
15
+ before do
16
+ fake(:get, url, 'nijigazou_sokuhou/first.html')
17
+ end
18
+
19
+ its(:next) { should == url }
20
+ its(:prev) { should be_nil }
21
+ end
22
+
23
+ context :middle do
24
+ before do
25
+ fake(:get, url, 'nijigazou_sokuhou/middle.html')
26
+ end
27
+
28
+ its(:name) { should == '二次画像速報' }
29
+ its(:title) { should == '二次画像速報 : 【けいおん!】中野梓【あずにゃん】 その2' }
30
+ its(:title_part) { should == '【けいおん!】中野梓【あずにゃん】' }
31
+ its("targets.count") { should == 50 }
32
+ its("targets.first") { should == "http://img03.nijigazo.com/2012/05/11/azusa/0511azusa_0051.jpg" }
33
+ its(:sub_directory) { should == 'azusa/20120511' }
34
+ its(:prev) { should == 'http://nijigazo.2chblog.jp/archives/52249804.html' }
35
+ its(:next) { should == 'http://nijigazo.2chblog.jp/archives/52249808.html' }
36
+ end
37
+
38
+ context :last do
39
+ before do
40
+ fake(:get, url, 'nijigazou_sokuhou/last.html')
41
+ end
42
+
43
+ its(:next) { should be_nil }
44
+ its(:prev) { should == url }
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,113 @@
1
+ require 'spec_helper'
2
+
3
+ describe EroGetter::Base do
4
+ let(:regex) { %r{http://example.net/\d+.html} }
5
+ before do
6
+ _regex = regex
7
+ fake(:get, regex, 'sample.html')
8
+ @klazz = Class.new(EroGetter::Base) do
9
+ name 'NijiEro BBS'
10
+ url _regex
11
+
12
+ target "ul#sources li a" do |elm|
13
+ elm[:href]
14
+ end
15
+
16
+ sub_directory do
17
+ targets.map{|x| x.split(%r{/}).last }.join('/')
18
+ end
19
+ end
20
+ @klazz.stub(:to_s).and_return('TestClass')
21
+ end
22
+
23
+ describe "assign url_mapping" do
24
+ it { EroGetter.url_mapping.should have_key regex }
25
+ it { EroGetter.url_mapping[regex].should == @klazz }
26
+ end
27
+
28
+ describe :instance_methods do
29
+ subject { @dl }
30
+ context :good do
31
+ before do
32
+ @dl = @klazz.new('http://example.net/10101010.html')
33
+ EroGetter.stub(:mkdir).and_return(true)
34
+ EroGetter.stub('directory').and_return('/tmp')
35
+ end
36
+ its(:name) { should == 'NijiEro BBS' }
37
+ its(:url_regex) { should == regex }
38
+ its(:base_dir) { should == 'test_class' }
39
+ its(:http_client) { should be_a HTTPClient }
40
+ its(:document) { should be_a Nokogiri::HTML::Document }
41
+ its(:title) { should == 'EroGetter Server' }
42
+ its(:url) { should == 'http://example.net/10101010.html' }
43
+ its(:direction) { should == :none }
44
+ its(:targets) { should == ['https://github.com/masarakki/ero_getter_server',
45
+ 'https://github.com/masarakki/ero_getter_chrome_extension'] }
46
+ its(:sub_directory) { should == 'ero_getter_server/ero_getter_chrome_extension' }
47
+ its(:directory) { should == '/tmp/test_class/ero_getter_server/ero_getter_chrome_extension' }
48
+ describe :after_run do
49
+ context :not_set_after do
50
+ its(:run_next?) { should be_false }
51
+ its(:run_prev?) { should be_false }
52
+ end
53
+
54
+ context :direction_none do
55
+ context :has_next do
56
+ before do
57
+ @dl.stub(:next).and_return('hoge')
58
+ end
59
+ its(:run_next?) { should be_true }
60
+ its(:run_prev?) { should be_false }
61
+ end
62
+ context :has_prev do
63
+ before do
64
+ @dl.stub(:prev).and_return('hoge')
65
+ end
66
+ its(:run_next?) { should be_false }
67
+ its(:run_prev?) { should be_true }
68
+ end
69
+ context :has_next_and_prev do
70
+ before do
71
+ @dl.stub(:prev).and_return('hoge')
72
+ @dl.stub(:next).and_return('hoge')
73
+ end
74
+ its(:run_next?) { should be_true }
75
+ its(:run_prev?) { should be_true }
76
+ end
77
+ end
78
+
79
+ context :direction_prev do
80
+ before do
81
+ @dl.stub(:direction).and_return(:prev)
82
+ end
83
+ context :has_next do
84
+ before do
85
+ @dl.stub(:next).and_return('hoge')
86
+ end
87
+ its(:run_next?) { should be_false }
88
+ end
89
+ end
90
+
91
+ context :direction_next do
92
+ before do
93
+ @dl.stub(:direction).and_return(:next)
94
+ end
95
+ context :has_pref do
96
+ before do
97
+ @dl.stub(:prev).and_return('hoge')
98
+ end
99
+ its(:run_prev?) { should be_false }
100
+ end
101
+ end
102
+ end
103
+ end
104
+
105
+ context :url_mismatch do
106
+ it {
107
+ lambda {
108
+ @klazz.new('http://example.com/10101010.html')
109
+ }.should raise_error
110
+ }
111
+ end
112
+ end
113
+ end