kindai 1.9.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.9.0
1
+ 2.0.0
data/kindai.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{kindai}
8
- s.version = "1.9.0"
8
+ s.version = "2.0.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = [%q{hitode909}]
12
- s.date = %q{2011-12-23}
12
+ s.date = %q{2012-05-19}
13
13
  s.description = %q{kindai.rb is kindai digital library downloader.}
14
14
  s.email = %q{hitode909@gmail.com}
15
15
  s.executables = [%q{kindai.rb}]
data/lib/kindai/book.rb CHANGED
@@ -1,15 +1,14 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  module Kindai
3
3
  class Book
4
- attr_accessor :permalink_uri
5
- attr_accessor :trimming
6
4
 
7
5
  # ----- constructor -----
8
- def self.new_from_permalink(permalink_uri, trimming = {})
9
- raise "not info:ndljp" unless permalink_uri.match(/info\:ndljp/)
6
+ def self.new_from_permalink(permalink_uri)
7
+ raise "not info:ndljp: #{permalink_uri}" unless permalink_uri.match(/info\:ndljp/)
10
8
  me = new
11
- me.permalink_uri = permalink_uri
12
- me.trimming = trimming
9
+ me.instance_eval {
10
+ @permalink_uri = permalink_uri
11
+ }
13
12
  return me
14
13
  end
15
14
 
@@ -19,52 +18,58 @@ module Kindai
19
18
  return self.new_from_permalink(permalink)
20
19
  end
21
20
 
21
+ def self.new_from_search_result_uri(search_result_uri)
22
+ raise "not iss.ndl.go.jp: #{search_result_uri}" unless search_result_uri.match(/iss\.ndl\.go\.jp/)
23
+ me = new
24
+ me.instance_eval {
25
+ @search_result_uri = search_result_uri
26
+ }
27
+ me
28
+ end
29
+
22
30
  # ----- metadata -----
23
31
 
32
+ def permalink_uri
33
+ @permalink_uri ||=
34
+ begin
35
+ get_permalink_from_search_result_uri
36
+ end
37
+ end
38
+
24
39
  def key
25
40
  permalink_uri.match(/\d+$/)[0]
26
41
  end
27
42
 
28
43
  def title
29
- title_container = control_page.at('.titlehead')
30
- subtitle_container = control_page.at('.headmenu')
31
- title_string = title_container.content.strip
32
- title_string += subtitle_container.content.strip if subtitle_container
33
- title_string
44
+ main = metadata_like 'title'
45
+
46
+ sub = metadata_like('volumeTranscription').to_i.to_s rescue nil
47
+ sub ? main + sub : main
34
48
  end
35
49
 
36
50
  def author
37
- metadata['著者標目']
51
+ metadata_like 'creator:NDLNH'
38
52
  end
39
53
 
40
54
  def total_spread
41
- self.spread_at(1).page.search('select#dlPages option').length
55
+ permalink_page.search('#sel-content-no option').length
42
56
  end
43
57
 
44
58
  def spreads
45
59
  @spreads ||= 1.upto(self.total_spread).map{|i| self.spread_at(i) }
46
60
  end
47
61
 
48
- def base_uri
49
- @base_uri ||=
50
- begin
51
- Kindai::Util.logger.debug "fetch permalink page"
52
- page_uri = URI.parse(permalink_uri) + permalink_page.at('frame[name="W_BODY"]')['src']
53
-
54
- page_base_uri = Kindai::Util.get_redirected_uri page_uri.to_s
55
- uri = page_base_uri.to_s + '&vs=10000,10000,0,0,0,0,0,0'
56
- unless self.trimming.keys.empty?
57
- %w{x y w h resize_w resize_h}.map(&:to_sym).each{ |key|
58
- self.trimming[key] ||= 0
59
- }
60
- uri += "&ref=" + [self.trimming[:x], self.trimming[:y], self.trimming[:w], self.trimming[:h], self.trimming[:resize_w], self.trimming[:resize_h], 0, 0].join(',')
61
- end
62
- uri
63
- end
64
- end
65
-
66
62
  protected
67
63
 
64
+ def metadata_like query
65
+ query_regexp = Regexp.new(Regexp.quote("(#{ query })"))
66
+ key = metadata.keys.find{ |key|
67
+ key =~ query_regexp
68
+ }
69
+ raise "metadata like #{query} not found" unless key
70
+ metadata[key]
71
+ end
72
+
68
73
  def spread_at(spread_number)
69
74
  Kindai::Spread.new_from_book_and_spread_number(self, spread_number)
70
75
  end
@@ -72,14 +77,12 @@ module Kindai
72
77
  def metadata
73
78
  @metadata ||=
74
79
  begin
75
- metadata_table = detail_page.search('table').find{ |table|
76
- table.at('td').text == 'タイトル'
80
+ dts = permalink_page.search('dl.detail-metadata-list dt').map{ |tag| tag.text }
81
+ dds = permalink_page.search('dl.detail-metadata-list dd').map{ |tag| tag.text }
82
+ dts.zip(dds).inject({ }) { |table, tupple|
83
+ table[tupple.first.strip] = tupple.last.strip
84
+ table
77
85
  }
78
- metadata_table.search('tr').inject({ }) { |prev, tr|
79
- key, _, value = *tr.search('td').map{ |elem| elem.text }
80
- prev[key] = value
81
- prev
82
- }
83
86
  end
84
87
  end
85
88
 
@@ -93,35 +96,11 @@ module Kindai
93
96
  end
94
97
  end
95
98
 
96
- def detail_uri
97
- root = URI.parse('http://kindai.ndl.go.jp/BIBibDetail.php')
98
- params = { }
99
- control_page.search('input').each{ |input|
100
- params[input['name']] = input['value'] if input['value']
101
- }
102
- path = '?' + params.each_pair.map{ |k, v| [URI.escape(k), URI.escape(v)].join('=')}.join('&')
103
- root + path
104
- end
105
-
106
- def detail_page
107
- @detail_page ||=
108
- begin
109
- Kindai::Util.logger.debug "fetch detail page"
110
- page = Kindai::Util.fetch_uri detail_uri rescue Kindai::Util.fetch_uri URI.escape(detail_uri)
111
- Nokogiri page
112
- end
113
- end
114
-
115
- def control_uri
116
- URI.parse(permalink_uri) + permalink_page.at('frame[name="W_CONTROL"]')['src']
117
- end
118
-
119
- def control_page
120
- @control_page ||=
121
- begin
122
- Kindai::Util.logger.debug "fetch permalink page"
123
- Nokogiri Kindai::Util.fetch_uri control_uri
124
- end
99
+ def get_permalink_from_search_result_uri
100
+ "search_result_uri is required" unless @search_result_uri
101
+ page = Nokogiri Kindai::Util.fetch_uri @search_result_uri
102
+ a = page.at "#reviewsites a[href^='http://kindai.da.ndl.go.jp/info:ndljp/pid/']"
103
+ a['href']
125
104
  end
126
105
 
127
106
  end
@@ -64,9 +64,14 @@ module Kindai
64
64
  end
65
65
 
66
66
  def download_spreads
67
- self.spread_downloaders.each{|dl|
67
+ is_first = true
68
+ self.spread_downloaders.each{ |dl|
69
+ next if dl.has_file?
70
+ sleep 30 unless is_first
71
+ is_first = false
68
72
  dl.download
69
73
  }
74
+
70
75
  return true
71
76
  end
72
77
  end
@@ -20,7 +20,7 @@ module Kindai
20
20
  uris = result_for(@keyword, page)
21
21
  return if uris.empty?
22
22
  uris.each{ |uri|
23
- yield Kindai::Book.new_from_permalink(uri)
23
+ yield Kindai::Book.new_from_search_result_uri(uri)
24
24
  }
25
25
  }
26
26
  end
@@ -42,9 +42,9 @@ module Kindai
42
42
  end
43
43
 
44
44
  def uri_for keyword, page = 0
45
- count = 10
45
+ count = 100
46
46
  params = { :any => keyword, :dpid => 'kindai', :idx => page * count + 1, :cnt => count}
47
- root = URI.parse("http://api.porta.ndl.go.jp/servicedp/opensearch")
47
+ root = URI.parse("http://iss.ndl.go.jp/api/opensearch")
48
48
  path = '?' + Kindai::Util.expand_params(params)
49
49
  root + path
50
50
  end
data/lib/kindai/spread.rb CHANGED
@@ -13,20 +13,16 @@ module Kindai
13
13
  end
14
14
 
15
15
  def uri
16
- book.base_uri.gsub(/koma=(\d+)/) { "koma=#{spread_number}" }
16
+ "#{book.permalink_uri}/#{spread_number.to_s}"
17
17
  end
18
18
 
19
19
  def image_uri
20
- image = page.at("img#imMain")
21
- raise "not exists" unless image
22
- image['src']
23
- end
24
-
25
-
26
- def has_local_file?
27
- end
28
-
29
- def local_file_path
20
+ params = {
21
+ :itemId => "info:ndljp/pid/#{book.key}",
22
+ :contentNo => spread_number,
23
+ :outputScale => 1,
24
+ }
25
+ "http://kindai.ndl.go.jp/view/jpegOutput?" + Kindai::Util.expand_params(params)
30
26
  end
31
27
 
32
28
  # protected
@@ -56,7 +56,7 @@ module Kindai
56
56
 
57
57
  Kindai::Util.logger.info "sleep and retry"
58
58
  failed_count += 1
59
- sleep 10
59
+ sleep 30
60
60
  retry
61
61
  end
62
62
  end
data/lib/kindai/util.rb CHANGED
@@ -54,7 +54,7 @@ module Kindai::Util
54
54
  # input: {:a => 'a', :b => 'bbb'}
55
55
  # output: 'a=a&b=bbb
56
56
  def self.expand_params(params)
57
- params.each_pair.map{ |k, v| [URI.escape(k.to_s), URI.escape(v.to_s)].join('=')}.join('&')
57
+ URI.encode_www_form(params)
58
58
  end
59
59
 
60
60
  def self.append_suffix(path, suffix)
data/spec/book_spec.rb CHANGED
@@ -1,6 +1,24 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
 
4
+ describe Kindai::Book, 'from search result uri' do
5
+ before do
6
+ @book = Kindai::Book.new_from_search_result_uri('http://iss.ndl.go.jp/books/R000000008-I000162417-00')
7
+ end
8
+
9
+ it 'is a book' do
10
+ @book.should be_kind_of Kindai::Book
11
+ end
12
+
13
+ it 'has permalink' do
14
+ @book.permalink_uri.should == 'http://kindai.da.ndl.go.jp/info:ndljp/pid/922693'
15
+ end
16
+
17
+ it 'has key' do
18
+ @book.key.should == "922693"
19
+ end
20
+ end
21
+
4
22
  describe Kindai::Book do
5
23
  before do
6
24
  @book = Kindai::Book.new_from_permalink('http://kindai.ndl.go.jp/info:ndljp/pid/922693')
@@ -26,10 +44,6 @@ describe Kindai::Book do
26
44
  @book.spreads.should have_exactly(@book.total_spread).spreads
27
45
  end
28
46
 
29
- it 'has base_uri' do
30
- @book.base_uri.should == "http://kindai.da.ndl.go.jp/scrpt/ndlimageviewer-rgc.aspx?pid=info%3Andljp%2Fpid%2F922693&jp=42016454&vol=10010&koma=1&vs=10000,10000,0,0,0,0,0,0"
31
- end
32
-
33
47
  end
34
48
 
35
49
  describe Kindai::Book, 'with series' do
@@ -38,33 +52,13 @@ describe Kindai::Book, 'with series' do
38
52
  end
39
53
 
40
54
  it 'has title' do
41
- @book.title.should == '講談日露戦争記[第3冊]第3'
55
+ @book.title.should == '講談日露戦争記3'
42
56
  end
43
57
  end
44
58
 
45
- describe Kindai::Book, 'with trimming' do
46
- before do
47
-
48
- @trimming = {:x => 342, :y => 190, :w => 2829, :h => 2485, :resize_w => 900, :resize_h => 900}
49
- @book = Kindai::Book.new_from_permalink('http://kindai.ndl.go.jp/info:ndljp/pid/922693', @trimming)
50
- @book_normal = Kindai::Book.new_from_permalink('http://kindai.ndl.go.jp/info:ndljp/pid/922693')
51
- end
52
-
53
- it 'has trimming' do
54
- @book.trimming.should be_kind_of Hash
55
- @book.trimming.should == @trimming
59
+ describe Kindai::Book, 'from without any uri' do
60
+ it 'will die' do
61
+ empty_book = Kindai::Book.new
62
+ lambda { empty_book.permalink_uri }.should raise_error Exception
56
63
  end
57
-
58
- it 'has base uri including trimming info' do
59
- @book.base_uri.should match(/2485/)
60
- end
61
-
62
- it 'has different image uri' do
63
- @book.spreads.first.uri.should_not == @book_normal.spreads.first.uri
64
- end
65
-
66
64
  end
67
-
68
-
69
-
70
-
data/spec/spread_spec.rb CHANGED
@@ -16,11 +16,11 @@ describe Kindai::Spread do
16
16
  end
17
17
 
18
18
  it 'has uri' do
19
- @spread.uri.should == "http://kindai.da.ndl.go.jp/scrpt/ndlimageviewer-rgc.aspx?pid=info%3Andljp%2Fpid%2F922693&jp=42016454&vol=10010&koma=5&vs=10000,10000,0,0,0,0,0,0"
19
+ @spread.uri.should == 'http://kindai.ndl.go.jp/info:ndljp/pid/922693/5'
20
20
  end
21
21
 
22
22
  it 'has image_uri' do
23
- @spread.image_uri.should match /http:\/\/kindai.da.ndl.go.jp\/JPEG\/\w+\.jpg/
23
+ @spread.image_uri.should == "http://kindai.ndl.go.jp/view/jpegOutput?itemId=info%3Andljp%2Fpid%2F922693&contentNo=5&outputScale=1"
24
24
  end
25
25
 
26
26
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kindai
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.0
4
+ version: 2.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-12-23 00:00:00.000000000Z
12
+ date: 2012-05-19 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &2179443420 !ruby/object:Gem::Requirement
16
+ requirement: &70335097974460 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2179443420
24
+ version_requirements: *70335097974460
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rmagick
27
- requirement: &2179442720 !ruby/object:Gem::Requirement
27
+ requirement: &70335097973980 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2179442720
35
+ version_requirements: *70335097973980
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: zipruby
38
- requirement: &2179442240 !ruby/object:Gem::Requirement
38
+ requirement: &70335097973500 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *2179442240
46
+ version_requirements: *70335097973500
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: json
49
- requirement: &2179441740 !ruby/object:Gem::Requirement
49
+ requirement: &70335097973020 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '1.4'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *2179441740
57
+ version_requirements: *70335097973020
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &2179441040 !ruby/object:Gem::Requirement
60
+ requirement: &70335097972540 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 2.3.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2179441040
68
+ version_requirements: *70335097972540
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: bundler
71
- requirement: &2179440540 !ruby/object:Gem::Requirement
71
+ requirement: &70335097972060 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.0.0
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2179440540
79
+ version_requirements: *70335097972060
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: jeweler
82
- requirement: &2179440060 !ruby/object:Gem::Requirement
82
+ requirement: &70335097971580 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: 1.5.2
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2179440060
90
+ version_requirements: *70335097971580
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: rcov
93
- requirement: &2179439340 !ruby/object:Gem::Requirement
93
+ requirement: &70335097971100 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,7 +98,7 @@ dependencies:
98
98
  version: '0'
99
99
  type: :development
100
100
  prerelease: false
101
- version_requirements: *2179439340
101
+ version_requirements: *70335097971100
102
102
  description: kindai.rb is kindai digital library downloader.
103
103
  email: hitode909@gmail.com
104
104
  executables:
@@ -154,7 +154,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
154
154
  version: '0'
155
155
  segments:
156
156
  - 0
157
- hash: -1373734152558500671
157
+ hash: 1619067764939517805
158
158
  required_rubygems_version: !ruby/object:Gem::Requirement
159
159
  none: false
160
160
  requirements: