ero_getter 0.1.6 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +3 -1
- data/Guardfile +9 -0
- data/VERSION +1 -1
- data/ero_getter.gemspec +20 -9
- data/lib/downloader/nijigazou_sokuhou.rb +28 -0
- data/lib/ero_getter/base.rb +147 -0
- data/lib/ero_getter/utils.rb +11 -0
- data/lib/ero_getter.rb +23 -2
- data/spec/downloader/nijigazou_sokuhou_spec.rb +47 -0
- data/spec/ero_getter/base_spec.rb +113 -0
- data/spec/samples/nijigazou_sokuhou/first.html +1532 -0
- data/spec/samples/nijigazou_sokuhou/last.html +1171 -0
- data/spec/samples/nijigazou_sokuhou/middle.html +1499 -0
- data/spec/spec_helper.rb +4 -0
- metadata +45 -8
- data/lib/ero_getter/downloader/base.rb +0 -81
- data/lib/ero_getter/downloader.rb +0 -41
- data/spec/downloader/base_spec.rb +0 -57
- data/spec/downloader_spec.rb +0 -6
data/spec/spec_helper.rb
CHANGED
@@ -2,6 +2,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
2
2
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
3
|
require 'rspec'
|
4
4
|
require 'ero_getter'
|
5
|
+
require 'fakeweb'
|
5
6
|
|
6
7
|
# Requires supporting files with custom matchers and macros, etc,
|
7
8
|
# in ./support/ and its subdirectories.
|
@@ -11,6 +12,9 @@ RSpec.configure do |config|
|
|
11
12
|
|
12
13
|
end
|
13
14
|
|
15
|
+
def fake(method, url, file)
|
16
|
+
FakeWeb.register_uri(method, url, :body => File.read(sample_path(file)))
|
17
|
+
end
|
14
18
|
|
15
19
|
def sample_path(file)
|
16
20
|
File.join(File.dirname(__FILE__), 'samples', file)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ero_getter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-05-
|
12
|
+
date: 2012-05-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -44,7 +44,7 @@ dependencies:
|
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: '0'
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
|
-
name:
|
47
|
+
name: zipruby
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
49
49
|
none: false
|
50
50
|
requirements:
|
@@ -107,6 +107,22 @@ dependencies:
|
|
107
107
|
- - ! '>='
|
108
108
|
- !ruby/object:Gem::Version
|
109
109
|
version: '0'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: guard-rspec
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
110
126
|
- !ruby/object:Gem::Dependency
|
111
127
|
name: bundler
|
112
128
|
requirement: !ruby/object:Gem::Requirement
|
@@ -139,6 +155,22 @@ dependencies:
|
|
139
155
|
- - ! '>='
|
140
156
|
- !ruby/object:Gem::Version
|
141
157
|
version: '0'
|
158
|
+
- !ruby/object:Gem::Dependency
|
159
|
+
name: fakeweb
|
160
|
+
requirement: !ruby/object:Gem::Requirement
|
161
|
+
none: false
|
162
|
+
requirements:
|
163
|
+
- - ! '>='
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '0'
|
166
|
+
type: :development
|
167
|
+
prerelease: false
|
168
|
+
version_requirements: !ruby/object:Gem::Requirement
|
169
|
+
none: false
|
170
|
+
requirements:
|
171
|
+
- - ! '>='
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
142
174
|
description: ero getter
|
143
175
|
email: masaki@hisme.net
|
144
176
|
executables: []
|
@@ -152,17 +184,22 @@ files:
|
|
152
184
|
- .rvmrc
|
153
185
|
- .travis.yml
|
154
186
|
- Gemfile
|
187
|
+
- Guardfile
|
155
188
|
- LICENSE.txt
|
156
189
|
- README.rdoc
|
157
190
|
- Rakefile
|
158
191
|
- VERSION
|
159
192
|
- ero_getter.gemspec
|
193
|
+
- lib/downloader/nijigazou_sokuhou.rb
|
160
194
|
- lib/ero_getter.rb
|
161
|
-
- lib/ero_getter/
|
162
|
-
- lib/ero_getter/
|
163
|
-
- spec/downloader/
|
164
|
-
- spec/
|
195
|
+
- lib/ero_getter/base.rb
|
196
|
+
- lib/ero_getter/utils.rb
|
197
|
+
- spec/downloader/nijigazou_sokuhou_spec.rb
|
198
|
+
- spec/ero_getter/base_spec.rb
|
165
199
|
- spec/ero_getter_spec.rb
|
200
|
+
- spec/samples/nijigazou_sokuhou/first.html
|
201
|
+
- spec/samples/nijigazou_sokuhou/last.html
|
202
|
+
- spec/samples/nijigazou_sokuhou/middle.html
|
166
203
|
- spec/samples/sample.html
|
167
204
|
- spec/spec_helper.rb
|
168
205
|
homepage: http://github.com/masarakki/ero_getter
|
@@ -180,7 +217,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
180
217
|
version: '0'
|
181
218
|
segments:
|
182
219
|
- 0
|
183
|
-
hash:
|
220
|
+
hash: -971160779263307343
|
184
221
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
185
222
|
none: false
|
186
223
|
requirements:
|
@@ -1,81 +0,0 @@
|
|
1
|
-
require 'active_support/inflector'
|
2
|
-
require 'httpclient'
|
3
|
-
require 'nokogiri'
|
4
|
-
require 'open-uri'
|
5
|
-
|
6
|
-
class EroGetter::Downloader::Base
|
7
|
-
def initialize(url, direction = 0)
|
8
|
-
raise unless url.match url_regex
|
9
|
-
@url = url
|
10
|
-
@direction = direction
|
11
|
-
end
|
12
|
-
|
13
|
-
def base_dir
|
14
|
-
self.class.to_s.underscore
|
15
|
-
end
|
16
|
-
|
17
|
-
def directory
|
18
|
-
unless @dir
|
19
|
-
@dir = File.join(EroGetter.directory, base_dir, sub_directory)
|
20
|
-
EroGetter::Downloader.mkdir(@dir)
|
21
|
-
end
|
22
|
-
@dir
|
23
|
-
end
|
24
|
-
|
25
|
-
def http_client
|
26
|
-
@http_client ||= HTTPClient.new
|
27
|
-
end
|
28
|
-
|
29
|
-
def url
|
30
|
-
@url
|
31
|
-
end
|
32
|
-
|
33
|
-
def direction
|
34
|
-
@direction
|
35
|
-
end
|
36
|
-
|
37
|
-
def document
|
38
|
-
@document ||= Nokogiri::HTML(open(url).read)
|
39
|
-
end
|
40
|
-
|
41
|
-
def title
|
42
|
-
@title ||= document.title
|
43
|
-
end
|
44
|
-
|
45
|
-
class << self
|
46
|
-
def name(site_name)
|
47
|
-
define_method(:name) do
|
48
|
-
site_name
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
def url(regex)
|
53
|
-
define_method(:url_regex) do
|
54
|
-
regex
|
55
|
-
end
|
56
|
-
EroGetter.add_mapping(regex, self)
|
57
|
-
end
|
58
|
-
|
59
|
-
def target(css_selector, &block)
|
60
|
-
define_method(:targets) do
|
61
|
-
unless instance_variable_defined?(:@targets)
|
62
|
-
items = document.css(css_selector).map do |elm|
|
63
|
-
yield(elm)
|
64
|
-
end
|
65
|
-
instance_variable_set(:@targets, items.compact)
|
66
|
-
end
|
67
|
-
instance_variable_get(:@targets)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
def sub_directory(&block)
|
72
|
-
define_method(:sub_directory) do
|
73
|
-
unless instance_variable_defined?(:@sub_directory)
|
74
|
-
instance_variable_set(:@sub_directory, self.instance_eval(&block))
|
75
|
-
end
|
76
|
-
instance_variable_get(:@sub_directory)
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
end
|
@@ -1,41 +0,0 @@
|
|
1
|
-
require 'zip/zip'
|
2
|
-
require 'digest/md5'
|
3
|
-
|
4
|
-
class EroGetter::Downloader
|
5
|
-
autoload :Base, 'ero_getter/downloader/base'
|
6
|
-
|
7
|
-
class << self
|
8
|
-
def base_path
|
9
|
-
path = File.join(ENV['HOME'], 'ero_getter')
|
10
|
-
mkdir(path) unless Dir.exists?(path)
|
11
|
-
path
|
12
|
-
end
|
13
|
-
|
14
|
-
def mkdir(path)
|
15
|
-
unless File.exists?(path)
|
16
|
-
basedir = File.dirname(path)
|
17
|
-
mkdir(basedir) unless File.exists?(basedir)
|
18
|
-
Dir.mkdir(path)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def unzip(zip_data)
|
23
|
-
tmp_file = File.join(base_path, Digest::MD5.hexdigest(zip_data))
|
24
|
-
begin
|
25
|
-
File.open(tmp_file, 'wb') { |f| f.write zip_data }
|
26
|
-
result = []
|
27
|
-
Zip::ZipInputStream.open(tmp_file) do |zip|
|
28
|
-
while entry = zip.get_next_entry
|
29
|
-
filename = entry.name_in(entry.name_encoding)
|
30
|
-
if entry.file? && !filename.match(/\.(txt|html)$/)
|
31
|
-
result << [filename, entry.get_input_stream {|f| f.read}]
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
ensure
|
36
|
-
File.delete tmp_file if File.exists?(tmp_file)
|
37
|
-
end
|
38
|
-
result
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
@@ -1,57 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe EroGetter::Downloader::Base do
|
4
|
-
let(:regex) { %r{http://example.net/\d+.html} }
|
5
|
-
before do
|
6
|
-
_regex = regex
|
7
|
-
@klazz = Class.new(EroGetter::Downloader::Base) do
|
8
|
-
name 'NijiEro BBS'
|
9
|
-
url _regex
|
10
|
-
|
11
|
-
target "ul#sources li a" do |elm|
|
12
|
-
elm[:href]
|
13
|
-
end
|
14
|
-
|
15
|
-
sub_directory do
|
16
|
-
targets.map{|x| x.split(%r{/}).last }.join('/')
|
17
|
-
end
|
18
|
-
end
|
19
|
-
@klazz.stub(:to_s).and_return('TestClass')
|
20
|
-
end
|
21
|
-
|
22
|
-
describe "assign url_mapping" do
|
23
|
-
it { EroGetter.url_mapping.should have_key regex }
|
24
|
-
it { EroGetter.url_mapping[regex].should == @klazz }
|
25
|
-
end
|
26
|
-
|
27
|
-
describe :instance_methods do
|
28
|
-
subject { @dl }
|
29
|
-
context :good do
|
30
|
-
before do
|
31
|
-
@dl = @klazz.new('http://example.net/10101010.html')
|
32
|
-
@dl.stub(:open).and_return(File.open(sample_path('sample.html')))
|
33
|
-
EroGetter::Downloader.stub(:mkdir).and_return(true)
|
34
|
-
EroGetter.stub('directory').and_return('/tmp')
|
35
|
-
end
|
36
|
-
its(:name) { should == 'NijiEro BBS' }
|
37
|
-
its(:url_regex) { should == regex }
|
38
|
-
its(:base_dir) { should == 'test_class' }
|
39
|
-
its(:http_client) { should be_a HTTPClient }
|
40
|
-
its(:document) { should be_a Nokogiri::HTML::Document }
|
41
|
-
its(:title) { should == 'EroGetter Server' }
|
42
|
-
its(:url) { should == 'http://example.net/10101010.html' }
|
43
|
-
its(:direction) { should == 0 }
|
44
|
-
its(:targets) { should == ['https://github.com/masarakki/ero_getter_server',
|
45
|
-
'https://github.com/masarakki/ero_getter_chrome_extension'] }
|
46
|
-
its(:sub_directory) { should == 'ero_getter_server/ero_getter_chrome_extension' }
|
47
|
-
its(:directory) { should == '/tmp/test_class/ero_getter_server/ero_getter_chrome_extension' }
|
48
|
-
end
|
49
|
-
context :url_mismatch do
|
50
|
-
it {
|
51
|
-
lambda {
|
52
|
-
@klazz.new('http://example.com/10101010.html')
|
53
|
-
}.should raise_error
|
54
|
-
}
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|