bookscan 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bookscan.gemspec +2 -2
- data/lib/bookscan.rb +2 -2
- data/lib/bookscan/agent.rb +29 -13
- data/lib/bookscan/book.rb +16 -6
- data/lib/bookscan/cache.rb +15 -3
- data/lib/bookscan/commands.rb +59 -13
- metadata +5 -5
data/Rakefile
CHANGED
@@ -16,7 +16,7 @@ Jeweler::Tasks.new do |gem|
|
|
16
16
|
gem.homepage = "http://github.com/tumf/bookscan"
|
17
17
|
gem.license = "MIT"
|
18
18
|
gem.summary = %Q{BookScan Scraper}
|
19
|
-
gem.description = %Q{This is a scraper of
|
19
|
+
gem.description = %Q{This is a scraper of Bookscan (http://www.bookscan.co.jp) Service.This is *NOT* a official software of Bookscan.}
|
20
20
|
gem.email = "y.takahara@gmail.com"
|
21
21
|
gem.authors = ["Yoshihiro TAKAHARA"]
|
22
22
|
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/bookscan.gemspec
CHANGED
@@ -5,13 +5,13 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{bookscan}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Yoshihiro TAKAHARA"]
|
12
12
|
s.date = %q{2011-01-18}
|
13
13
|
s.default_executable = %q{bookscan}
|
14
|
-
s.description = %q{This is a scraper of
|
14
|
+
s.description = %q{This is a scraper of Bookscan (http://www.bookscan.co.jp) Service.This is *NOT* a official software of Bookscan.}
|
15
15
|
s.email = %q{y.takahara@gmail.com}
|
16
16
|
s.executables = ["bookscan"]
|
17
17
|
s.extra_rdoc_files = [
|
data/lib/bookscan.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
module Bookscan
|
3
3
|
BSURL = "http://system.bookscan.co.jp"
|
4
|
-
TUNE_TYPES = ["ipad","
|
5
|
-
TUNED_PATTERN = /((
|
4
|
+
TUNE_TYPES = ["ipad","iphone4","kindle3","kindledx","android","sonyreader","nook","jpg"]
|
5
|
+
TUNED_PATTERN = /((iphone4|ipad|kindle3|kindledx|android|sonyreader|nook|jpg)_([^>%]*\.pdf))/
|
6
6
|
end
|
data/lib/bookscan/agent.rb
CHANGED
@@ -64,19 +64,35 @@ module Bookscan
|
|
64
64
|
bs
|
65
65
|
end
|
66
66
|
|
67
|
-
def
|
68
|
-
|
69
|
-
|
70
|
-
if
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
67
|
+
def tuning?(book,type)
|
68
|
+
@tuning = tuning unless @tuning
|
69
|
+
@tuning.each { |b|
|
70
|
+
return true if b.title == type+"_"+book.title
|
71
|
+
}
|
72
|
+
false
|
73
|
+
end
|
74
|
+
|
75
|
+
def tune(book,type,is_premium = true)
|
76
|
+
if is_premium
|
77
|
+
max_queue = 10
|
78
|
+
else
|
79
|
+
max_queue = 1
|
79
80
|
end
|
81
|
+
|
82
|
+
@tuning = tuning unless @tuning
|
83
|
+
# チューニングいっぱい
|
84
|
+
raise "tune queue is full" if @tuning.length >= max_queue
|
85
|
+
# チューニング
|
86
|
+
return false if tuning?(book,type)
|
87
|
+
# tune
|
88
|
+
getr(book.tune_url)
|
89
|
+
page.forms.first["optimize_type"] = type;
|
90
|
+
page.forms.first["cover_flg"] = "1";
|
91
|
+
page.forms.first.submit
|
92
|
+
tuned = book.clone
|
93
|
+
tuned.title = type +"_"+book.title
|
94
|
+
@tuning << tuned
|
95
|
+
tuned
|
80
96
|
end
|
81
97
|
|
82
98
|
def groups
|
@@ -131,7 +147,7 @@ module Bookscan
|
|
131
147
|
book.title = u.text.to_s
|
132
148
|
book.url = u.attributes["href"].value.to_s
|
133
149
|
book.group_url = url
|
134
|
-
books[book.
|
150
|
+
books[book.book_id] = book
|
135
151
|
end
|
136
152
|
end
|
137
153
|
books
|
data/lib/bookscan/book.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
require 'digest/md5'
|
4
|
+
require 'uri'
|
4
5
|
require 'rubygems'
|
5
6
|
require 'mutter'
|
6
7
|
|
@@ -14,7 +15,7 @@ module Bookscan
|
|
14
15
|
end
|
15
16
|
|
16
17
|
each do |b|
|
17
|
-
table << [b.
|
18
|
+
table << [b.book_id,b.title]
|
18
19
|
end
|
19
20
|
table.to_s if length > 0
|
20
21
|
end
|
@@ -22,20 +23,20 @@ module Bookscan
|
|
22
23
|
def ids
|
23
24
|
a = Array.new
|
24
25
|
each do |b|
|
25
|
-
a << b.
|
26
|
+
a << b.book_id
|
26
27
|
end
|
27
28
|
a
|
28
29
|
end
|
29
30
|
|
30
31
|
def by_id(book_id)
|
31
32
|
each do |b|
|
32
|
-
return b if b.
|
33
|
+
return b if b.book_id == book_id
|
33
34
|
end
|
34
35
|
end
|
35
36
|
|
36
37
|
def has?(book_id)
|
37
38
|
each do |b|
|
38
|
-
return true if b.
|
39
|
+
return true if b.book_id == book_id
|
39
40
|
end
|
40
41
|
false
|
41
42
|
end
|
@@ -43,6 +44,15 @@ module Bookscan
|
|
43
44
|
|
44
45
|
class Book
|
45
46
|
attr_accessor :url,:title,:group_url
|
47
|
+
def tune_url
|
48
|
+
"/bookoptimize.php?hash=%s&d=%s&filename=%s" % [hash,d,URI.encode(@title)]
|
49
|
+
end
|
50
|
+
def d
|
51
|
+
return $1 if /.*download.php\?d=([^&]+)/ =~ @url
|
52
|
+
end
|
53
|
+
def hash
|
54
|
+
return $1 if /.*bookdetail.php\?hash=(.*)/ =~ @group_url
|
55
|
+
end
|
46
56
|
|
47
57
|
def to_s
|
48
58
|
@title
|
@@ -51,7 +61,7 @@ module Bookscan
|
|
51
61
|
def filename
|
52
62
|
return @title if isbn
|
53
63
|
if /(.*)\.pdf$/ =~ @title
|
54
|
-
return $1 + "_" +
|
64
|
+
return $1 + "_" + book_id + ".pdf"
|
55
65
|
end
|
56
66
|
raise "Can't make filename"
|
57
67
|
end
|
@@ -68,7 +78,7 @@ module Bookscan
|
|
68
78
|
return $1 if /_([0-9a-zA-Z]+)\.pdf$/ =~ @title
|
69
79
|
end
|
70
80
|
|
71
|
-
def
|
81
|
+
def book_id
|
72
82
|
return isbn if isbn
|
73
83
|
title = @title
|
74
84
|
if TUNED_PATTERN =~ title
|
data/lib/bookscan/cache.rb
CHANGED
@@ -22,9 +22,21 @@ module Bookscan
|
|
22
22
|
ts
|
23
23
|
end
|
24
24
|
|
25
|
-
def
|
26
|
-
|
27
|
-
|
25
|
+
def tuned?(book,type)
|
26
|
+
tuned.has?(book.book_id) and tuned.by_id(book.book_id).tune_type == type
|
27
|
+
end
|
28
|
+
|
29
|
+
def books(group = nil)
|
30
|
+
if group
|
31
|
+
groups.each do |g|
|
32
|
+
return g.books if g.hash == group.hash
|
33
|
+
end
|
34
|
+
else
|
35
|
+
bs = Books.new
|
36
|
+
groups.each do |g|
|
37
|
+
bs += g.books
|
38
|
+
end
|
39
|
+
bs
|
28
40
|
end
|
29
41
|
end
|
30
42
|
|
data/lib/bookscan/commands.rb
CHANGED
@@ -172,6 +172,7 @@ module Bookscan
|
|
172
172
|
directory = "."
|
173
173
|
hash = nil
|
174
174
|
type = nil
|
175
|
+
dry_run = false
|
175
176
|
opt.on('-d DIR','--directory=DIR', 'download directory') do |v|
|
176
177
|
directory = v
|
177
178
|
end
|
@@ -181,31 +182,59 @@ module Bookscan
|
|
181
182
|
opt.on('-t TYPE','--tuned=TYPE', 'download tuned') do |v|
|
182
183
|
type = v
|
183
184
|
end
|
185
|
+
opt.on('--dry-run', 'dry-run mode') do |v|
|
186
|
+
dry_run = true
|
187
|
+
end
|
184
188
|
opt.parse!(@command_options)
|
185
189
|
book_id = @command_options.shift
|
186
|
-
|
187
|
-
|
190
|
+
|
191
|
+
if book_id == "all"
|
192
|
+
if type
|
193
|
+
bs = @cache.tuned
|
194
|
+
else
|
195
|
+
bs = @cache.books
|
196
|
+
end
|
197
|
+
bs.each { |book|
|
198
|
+
if Dir.glob(directory + "/**/*" + book.book_id + "*.pdf").length == 0
|
199
|
+
path = directory + "/" +book.filename
|
200
|
+
puts "download: " + path
|
201
|
+
unless dry_run
|
202
|
+
start
|
203
|
+
@agent.download(book.url,path)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
}
|
188
207
|
else
|
189
|
-
|
208
|
+
if type
|
209
|
+
book = ask_tuned_book_id(book_id,type)
|
210
|
+
else
|
211
|
+
book = ask_book_id(book_id,hash)
|
212
|
+
end
|
213
|
+
|
214
|
+
# download
|
215
|
+
path = directory + "/" +book.filename
|
216
|
+
puts "download: " + path
|
217
|
+
unless dry_run
|
218
|
+
start
|
219
|
+
@agent.download(book.url,path)
|
220
|
+
end
|
190
221
|
end
|
191
222
|
|
192
|
-
# download
|
193
|
-
start
|
194
|
-
path = directory + "/" +book.filename
|
195
|
-
puts "download: " + path
|
196
|
-
@agent.download(book.url,path)
|
197
223
|
end
|
198
224
|
|
199
225
|
def tune
|
200
226
|
opt = OptionParser.new
|
201
227
|
hash = nil
|
228
|
+
dry_run = false
|
202
229
|
opt.on('-g HASH','--group=HASH', 'group hash') do |v|
|
203
230
|
hash = v
|
204
231
|
end
|
232
|
+
opt.on('--dry-run', 'dry-run mode') do |v|
|
233
|
+
dry_run = true
|
234
|
+
end
|
205
235
|
opt.parse!(@command_options)
|
206
236
|
book_id = @command_options.shift
|
207
237
|
type = @command_options.shift
|
208
|
-
book = ask_book_id(book_id,hash)
|
209
238
|
unless type
|
210
239
|
type = ask('Enter tune type: ',TUNE_TYPES) do |q|
|
211
240
|
q.validate = /\w+/
|
@@ -213,10 +242,27 @@ module Bookscan
|
|
213
242
|
end
|
214
243
|
end
|
215
244
|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
245
|
+
if book_id == "all"
|
246
|
+
tuned = @cache.tuned
|
247
|
+
bs = @cache.books
|
248
|
+
bs.each { |book|
|
249
|
+
unless @cache.tuned?(book,type)
|
250
|
+
# tune
|
251
|
+
unless dry_run
|
252
|
+
start
|
253
|
+
end
|
254
|
+
puts "tune for %s: %s" % [type, book.title] if dry_run or @agent.tune(book,type)
|
255
|
+
end
|
256
|
+
}
|
257
|
+
else
|
258
|
+
book = ask_book_id(book_id,hash)
|
259
|
+
# tune
|
260
|
+
puts "tune for %s: %s" % [type, book.title]
|
261
|
+
unless dry_run
|
262
|
+
start
|
263
|
+
@agent.tune(book,type)
|
264
|
+
end
|
265
|
+
end
|
220
266
|
end
|
221
267
|
|
222
268
|
def tuning
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bookscan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Yoshihiro TAKAHARA
|
@@ -138,7 +138,7 @@ dependencies:
|
|
138
138
|
requirement: *id008
|
139
139
|
prerelease: false
|
140
140
|
name: highline
|
141
|
-
description: This is a scraper of
|
141
|
+
description: This is a scraper of Bookscan (http://www.bookscan.co.jp) Service.This is *NOT* a official software of Bookscan.
|
142
142
|
email: y.takahara@gmail.com
|
143
143
|
executables:
|
144
144
|
- bookscan
|