bookscan 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bookscan.gemspec +2 -2
- data/lib/bookscan.rb +2 -2
- data/lib/bookscan/agent.rb +29 -13
- data/lib/bookscan/book.rb +16 -6
- data/lib/bookscan/cache.rb +15 -3
- data/lib/bookscan/commands.rb +59 -13
- metadata +5 -5
data/Rakefile
CHANGED
@@ -16,7 +16,7 @@ Jeweler::Tasks.new do |gem|
|
|
16
16
|
gem.homepage = "http://github.com/tumf/bookscan"
|
17
17
|
gem.license = "MIT"
|
18
18
|
gem.summary = %Q{BookScan Scraper}
|
19
|
-
gem.description = %Q{This is a scraper of
|
19
|
+
gem.description = %Q{This is a scraper of Bookscan (http://www.bookscan.co.jp) Service.This is *NOT* a official software of Bookscan.}
|
20
20
|
gem.email = "y.takahara@gmail.com"
|
21
21
|
gem.authors = ["Yoshihiro TAKAHARA"]
|
22
22
|
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/bookscan.gemspec
CHANGED
@@ -5,13 +5,13 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{bookscan}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Yoshihiro TAKAHARA"]
|
12
12
|
s.date = %q{2011-01-18}
|
13
13
|
s.default_executable = %q{bookscan}
|
14
|
-
s.description = %q{This is a scraper of
|
14
|
+
s.description = %q{This is a scraper of Bookscan (http://www.bookscan.co.jp) Service.This is *NOT* a official software of Bookscan.}
|
15
15
|
s.email = %q{y.takahara@gmail.com}
|
16
16
|
s.executables = ["bookscan"]
|
17
17
|
s.extra_rdoc_files = [
|
data/lib/bookscan.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
module Bookscan
|
3
3
|
BSURL = "http://system.bookscan.co.jp"
|
4
|
-
TUNE_TYPES = ["ipad","
|
5
|
-
TUNED_PATTERN = /((
|
4
|
+
TUNE_TYPES = ["ipad","iphone4","kindle3","kindledx","android","sonyreader","nook","jpg"]
|
5
|
+
TUNED_PATTERN = /((iphone4|ipad|kindle3|kindledx|android|sonyreader|nook|jpg)_([^>%]*\.pdf))/
|
6
6
|
end
|
data/lib/bookscan/agent.rb
CHANGED
@@ -64,19 +64,35 @@ module Bookscan
|
|
64
64
|
bs
|
65
65
|
end
|
66
66
|
|
67
|
-
def
|
68
|
-
|
69
|
-
|
70
|
-
if
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
67
|
+
def tuning?(book,type)
|
68
|
+
@tuning = tuning unless @tuning
|
69
|
+
@tuning.each { |b|
|
70
|
+
return true if b.title == type+"_"+book.title
|
71
|
+
}
|
72
|
+
false
|
73
|
+
end
|
74
|
+
|
75
|
+
def tune(book,type,is_premium = true)
|
76
|
+
if is_premium
|
77
|
+
max_queue = 10
|
78
|
+
else
|
79
|
+
max_queue = 1
|
79
80
|
end
|
81
|
+
|
82
|
+
@tuning = tuning unless @tuning
|
83
|
+
# チューニングいっぱい
|
84
|
+
raise "tune queue is full" if @tuning.length >= max_queue
|
85
|
+
# チューニング
|
86
|
+
return false if tuning?(book,type)
|
87
|
+
# tune
|
88
|
+
getr(book.tune_url)
|
89
|
+
page.forms.first["optimize_type"] = type;
|
90
|
+
page.forms.first["cover_flg"] = "1";
|
91
|
+
page.forms.first.submit
|
92
|
+
tuned = book.clone
|
93
|
+
tuned.title = type +"_"+book.title
|
94
|
+
@tuning << tuned
|
95
|
+
tuned
|
80
96
|
end
|
81
97
|
|
82
98
|
def groups
|
@@ -131,7 +147,7 @@ module Bookscan
|
|
131
147
|
book.title = u.text.to_s
|
132
148
|
book.url = u.attributes["href"].value.to_s
|
133
149
|
book.group_url = url
|
134
|
-
books[book.
|
150
|
+
books[book.book_id] = book
|
135
151
|
end
|
136
152
|
end
|
137
153
|
books
|
data/lib/bookscan/book.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
require 'digest/md5'
|
4
|
+
require 'uri'
|
4
5
|
require 'rubygems'
|
5
6
|
require 'mutter'
|
6
7
|
|
@@ -14,7 +15,7 @@ module Bookscan
|
|
14
15
|
end
|
15
16
|
|
16
17
|
each do |b|
|
17
|
-
table << [b.
|
18
|
+
table << [b.book_id,b.title]
|
18
19
|
end
|
19
20
|
table.to_s if length > 0
|
20
21
|
end
|
@@ -22,20 +23,20 @@ module Bookscan
|
|
22
23
|
def ids
|
23
24
|
a = Array.new
|
24
25
|
each do |b|
|
25
|
-
a << b.
|
26
|
+
a << b.book_id
|
26
27
|
end
|
27
28
|
a
|
28
29
|
end
|
29
30
|
|
30
31
|
def by_id(book_id)
|
31
32
|
each do |b|
|
32
|
-
return b if b.
|
33
|
+
return b if b.book_id == book_id
|
33
34
|
end
|
34
35
|
end
|
35
36
|
|
36
37
|
def has?(book_id)
|
37
38
|
each do |b|
|
38
|
-
return true if b.
|
39
|
+
return true if b.book_id == book_id
|
39
40
|
end
|
40
41
|
false
|
41
42
|
end
|
@@ -43,6 +44,15 @@ module Bookscan
|
|
43
44
|
|
44
45
|
class Book
|
45
46
|
attr_accessor :url,:title,:group_url
|
47
|
+
def tune_url
|
48
|
+
"/bookoptimize.php?hash=%s&d=%s&filename=%s" % [hash,d,URI.encode(@title)]
|
49
|
+
end
|
50
|
+
def d
|
51
|
+
return $1 if /.*download.php\?d=([^&]+)/ =~ @url
|
52
|
+
end
|
53
|
+
def hash
|
54
|
+
return $1 if /.*bookdetail.php\?hash=(.*)/ =~ @group_url
|
55
|
+
end
|
46
56
|
|
47
57
|
def to_s
|
48
58
|
@title
|
@@ -51,7 +61,7 @@ module Bookscan
|
|
51
61
|
def filename
|
52
62
|
return @title if isbn
|
53
63
|
if /(.*)\.pdf$/ =~ @title
|
54
|
-
return $1 + "_" +
|
64
|
+
return $1 + "_" + book_id + ".pdf"
|
55
65
|
end
|
56
66
|
raise "Can't make filename"
|
57
67
|
end
|
@@ -68,7 +78,7 @@ module Bookscan
|
|
68
78
|
return $1 if /_([0-9a-zA-Z]+)\.pdf$/ =~ @title
|
69
79
|
end
|
70
80
|
|
71
|
-
def
|
81
|
+
def book_id
|
72
82
|
return isbn if isbn
|
73
83
|
title = @title
|
74
84
|
if TUNED_PATTERN =~ title
|
data/lib/bookscan/cache.rb
CHANGED
@@ -22,9 +22,21 @@ module Bookscan
|
|
22
22
|
ts
|
23
23
|
end
|
24
24
|
|
25
|
-
def
|
26
|
-
|
27
|
-
|
25
|
+
def tuned?(book,type)
|
26
|
+
tuned.has?(book.book_id) and tuned.by_id(book.book_id).tune_type == type
|
27
|
+
end
|
28
|
+
|
29
|
+
def books(group = nil)
|
30
|
+
if group
|
31
|
+
groups.each do |g|
|
32
|
+
return g.books if g.hash == group.hash
|
33
|
+
end
|
34
|
+
else
|
35
|
+
bs = Books.new
|
36
|
+
groups.each do |g|
|
37
|
+
bs += g.books
|
38
|
+
end
|
39
|
+
bs
|
28
40
|
end
|
29
41
|
end
|
30
42
|
|
data/lib/bookscan/commands.rb
CHANGED
@@ -172,6 +172,7 @@ module Bookscan
|
|
172
172
|
directory = "."
|
173
173
|
hash = nil
|
174
174
|
type = nil
|
175
|
+
dry_run = false
|
175
176
|
opt.on('-d DIR','--directory=DIR', 'download directory') do |v|
|
176
177
|
directory = v
|
177
178
|
end
|
@@ -181,31 +182,59 @@ module Bookscan
|
|
181
182
|
opt.on('-t TYPE','--tuned=TYPE', 'download tuned') do |v|
|
182
183
|
type = v
|
183
184
|
end
|
185
|
+
opt.on('--dry-run', 'dry-run mode') do |v|
|
186
|
+
dry_run = true
|
187
|
+
end
|
184
188
|
opt.parse!(@command_options)
|
185
189
|
book_id = @command_options.shift
|
186
|
-
|
187
|
-
|
190
|
+
|
191
|
+
if book_id == "all"
|
192
|
+
if type
|
193
|
+
bs = @cache.tuned
|
194
|
+
else
|
195
|
+
bs = @cache.books
|
196
|
+
end
|
197
|
+
bs.each { |book|
|
198
|
+
if Dir.glob(directory + "/**/*" + book.book_id + "*.pdf").length == 0
|
199
|
+
path = directory + "/" +book.filename
|
200
|
+
puts "download: " + path
|
201
|
+
unless dry_run
|
202
|
+
start
|
203
|
+
@agent.download(book.url,path)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
}
|
188
207
|
else
|
189
|
-
|
208
|
+
if type
|
209
|
+
book = ask_tuned_book_id(book_id,type)
|
210
|
+
else
|
211
|
+
book = ask_book_id(book_id,hash)
|
212
|
+
end
|
213
|
+
|
214
|
+
# download
|
215
|
+
path = directory + "/" +book.filename
|
216
|
+
puts "download: " + path
|
217
|
+
unless dry_run
|
218
|
+
start
|
219
|
+
@agent.download(book.url,path)
|
220
|
+
end
|
190
221
|
end
|
191
222
|
|
192
|
-
# download
|
193
|
-
start
|
194
|
-
path = directory + "/" +book.filename
|
195
|
-
puts "download: " + path
|
196
|
-
@agent.download(book.url,path)
|
197
223
|
end
|
198
224
|
|
199
225
|
def tune
|
200
226
|
opt = OptionParser.new
|
201
227
|
hash = nil
|
228
|
+
dry_run = false
|
202
229
|
opt.on('-g HASH','--group=HASH', 'group hash') do |v|
|
203
230
|
hash = v
|
204
231
|
end
|
232
|
+
opt.on('--dry-run', 'dry-run mode') do |v|
|
233
|
+
dry_run = true
|
234
|
+
end
|
205
235
|
opt.parse!(@command_options)
|
206
236
|
book_id = @command_options.shift
|
207
237
|
type = @command_options.shift
|
208
|
-
book = ask_book_id(book_id,hash)
|
209
238
|
unless type
|
210
239
|
type = ask('Enter tune type: ',TUNE_TYPES) do |q|
|
211
240
|
q.validate = /\w+/
|
@@ -213,10 +242,27 @@ module Bookscan
|
|
213
242
|
end
|
214
243
|
end
|
215
244
|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
245
|
+
if book_id == "all"
|
246
|
+
tuned = @cache.tuned
|
247
|
+
bs = @cache.books
|
248
|
+
bs.each { |book|
|
249
|
+
unless @cache.tuned?(book,type)
|
250
|
+
# tune
|
251
|
+
unless dry_run
|
252
|
+
start
|
253
|
+
end
|
254
|
+
puts "tune for %s: %s" % [type, book.title] if dry_run or @agent.tune(book,type)
|
255
|
+
end
|
256
|
+
}
|
257
|
+
else
|
258
|
+
book = ask_book_id(book_id,hash)
|
259
|
+
# tune
|
260
|
+
puts "tune for %s: %s" % [type, book.title]
|
261
|
+
unless dry_run
|
262
|
+
start
|
263
|
+
@agent.tune(book,type)
|
264
|
+
end
|
265
|
+
end
|
220
266
|
end
|
221
267
|
|
222
268
|
def tuning
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bookscan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Yoshihiro TAKAHARA
|
@@ -138,7 +138,7 @@ dependencies:
|
|
138
138
|
requirement: *id008
|
139
139
|
prerelease: false
|
140
140
|
name: highline
|
141
|
-
description: This is a scraper of
|
141
|
+
description: This is a scraper of Bookscan (http://www.bookscan.co.jp) Service.This is *NOT* a official software of Bookscan.
|
142
142
|
email: y.takahara@gmail.com
|
143
143
|
executables:
|
144
144
|
- bookscan
|