kindai 1.9.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.9.0
1
+ 2.0.0
data/kindai.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{kindai}
8
- s.version = "1.9.0"
8
+ s.version = "2.0.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = [%q{hitode909}]
12
- s.date = %q{2011-12-23}
12
+ s.date = %q{2012-05-19}
13
13
  s.description = %q{kindai.rb is kindai digital library downloader.}
14
14
  s.email = %q{hitode909@gmail.com}
15
15
  s.executables = [%q{kindai.rb}]
data/lib/kindai/book.rb CHANGED
@@ -1,15 +1,14 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  module Kindai
3
3
  class Book
4
- attr_accessor :permalink_uri
5
- attr_accessor :trimming
6
4
 
7
5
  # ----- constructor -----
8
- def self.new_from_permalink(permalink_uri, trimming = {})
9
- raise "not info:ndljp" unless permalink_uri.match(/info\:ndljp/)
6
+ def self.new_from_permalink(permalink_uri)
7
+ raise "not info:ndljp: #{permalink_uri}" unless permalink_uri.match(/info\:ndljp/)
10
8
  me = new
11
- me.permalink_uri = permalink_uri
12
- me.trimming = trimming
9
+ me.instance_eval {
10
+ @permalink_uri = permalink_uri
11
+ }
13
12
  return me
14
13
  end
15
14
 
@@ -19,52 +18,58 @@ module Kindai
19
18
  return self.new_from_permalink(permalink)
20
19
  end
21
20
 
21
+ def self.new_from_search_result_uri(search_result_uri)
22
+ raise "not iss.ndl.go.jp: #{search_result_uri}" unless search_result_uri.match(/iss\.ndl\.go\.jp/)
23
+ me = new
24
+ me.instance_eval {
25
+ @search_result_uri = search_result_uri
26
+ }
27
+ me
28
+ end
29
+
22
30
  # ----- metadata -----
23
31
 
32
+ def permalink_uri
33
+ @permalink_uri ||=
34
+ begin
35
+ get_permalink_from_search_result_uri
36
+ end
37
+ end
38
+
24
39
  def key
25
40
  permalink_uri.match(/\d+$/)[0]
26
41
  end
27
42
 
28
43
  def title
29
- title_container = control_page.at('.titlehead')
30
- subtitle_container = control_page.at('.headmenu')
31
- title_string = title_container.content.strip
32
- title_string += subtitle_container.content.strip if subtitle_container
33
- title_string
44
+ main = metadata_like 'title'
45
+
46
+ sub = metadata_like('volumeTranscription').to_i.to_s rescue nil
47
+ sub ? main + sub : main
34
48
  end
35
49
 
36
50
  def author
37
- metadata['著者標目']
51
+ metadata_like 'creator:NDLNH'
38
52
  end
39
53
 
40
54
  def total_spread
41
- self.spread_at(1).page.search('select#dlPages option').length
55
+ permalink_page.search('#sel-content-no option').length
42
56
  end
43
57
 
44
58
  def spreads
45
59
  @spreads ||= 1.upto(self.total_spread).map{|i| self.spread_at(i) }
46
60
  end
47
61
 
48
- def base_uri
49
- @base_uri ||=
50
- begin
51
- Kindai::Util.logger.debug "fetch permalink page"
52
- page_uri = URI.parse(permalink_uri) + permalink_page.at('frame[name="W_BODY"]')['src']
53
-
54
- page_base_uri = Kindai::Util.get_redirected_uri page_uri.to_s
55
- uri = page_base_uri.to_s + '&vs=10000,10000,0,0,0,0,0,0'
56
- unless self.trimming.keys.empty?
57
- %w{x y w h resize_w resize_h}.map(&:to_sym).each{ |key|
58
- self.trimming[key] ||= 0
59
- }
60
- uri += "&ref=" + [self.trimming[:x], self.trimming[:y], self.trimming[:w], self.trimming[:h], self.trimming[:resize_w], self.trimming[:resize_h], 0, 0].join(',')
61
- end
62
- uri
63
- end
64
- end
65
-
66
62
  protected
67
63
 
64
+ def metadata_like query
65
+ query_regexp = Regexp.new(Regexp.quote("(#{ query })"))
66
+ key = metadata.keys.find{ |key|
67
+ key =~ query_regexp
68
+ }
69
+ raise "metadata like #{query} not found" unless key
70
+ metadata[key]
71
+ end
72
+
68
73
  def spread_at(spread_number)
69
74
  Kindai::Spread.new_from_book_and_spread_number(self, spread_number)
70
75
  end
@@ -72,14 +77,12 @@ module Kindai
72
77
  def metadata
73
78
  @metadata ||=
74
79
  begin
75
- metadata_table = detail_page.search('table').find{ |table|
76
- table.at('td').text == 'タイトル'
80
+ dts = permalink_page.search('dl.detail-metadata-list dt').map{ |tag| tag.text }
81
+ dds = permalink_page.search('dl.detail-metadata-list dd').map{ |tag| tag.text }
82
+ dts.zip(dds).inject({ }) { |table, tupple|
83
+ table[tupple.first.strip] = tupple.last.strip
84
+ table
77
85
  }
78
- metadata_table.search('tr').inject({ }) { |prev, tr|
79
- key, _, value = *tr.search('td').map{ |elem| elem.text }
80
- prev[key] = value
81
- prev
82
- }
83
86
  end
84
87
  end
85
88
 
@@ -93,35 +96,11 @@ module Kindai
93
96
  end
94
97
  end
95
98
 
96
- def detail_uri
97
- root = URI.parse('http://kindai.ndl.go.jp/BIBibDetail.php')
98
- params = { }
99
- control_page.search('input').each{ |input|
100
- params[input['name']] = input['value'] if input['value']
101
- }
102
- path = '?' + params.each_pair.map{ |k, v| [URI.escape(k), URI.escape(v)].join('=')}.join('&')
103
- root + path
104
- end
105
-
106
- def detail_page
107
- @detail_page ||=
108
- begin
109
- Kindai::Util.logger.debug "fetch detail page"
110
- page = Kindai::Util.fetch_uri detail_uri rescue Kindai::Util.fetch_uri URI.escape(detail_uri)
111
- Nokogiri page
112
- end
113
- end
114
-
115
- def control_uri
116
- URI.parse(permalink_uri) + permalink_page.at('frame[name="W_CONTROL"]')['src']
117
- end
118
-
119
- def control_page
120
- @control_page ||=
121
- begin
122
- Kindai::Util.logger.debug "fetch permalink page"
123
- Nokogiri Kindai::Util.fetch_uri control_uri
124
- end
99
+ def get_permalink_from_search_result_uri
100
+ "search_result_uri is required" unless @search_result_uri
101
+ page = Nokogiri Kindai::Util.fetch_uri @search_result_uri
102
+ a = page.at "#reviewsites a[href^='http://kindai.da.ndl.go.jp/info:ndljp/pid/']"
103
+ a['href']
125
104
  end
126
105
 
127
106
  end
@@ -64,9 +64,14 @@ module Kindai
64
64
  end
65
65
 
66
66
  def download_spreads
67
- self.spread_downloaders.each{|dl|
67
+ is_first = true
68
+ self.spread_downloaders.each{ |dl|
69
+ next if dl.has_file?
70
+ sleep 30 unless is_first
71
+ is_first = false
68
72
  dl.download
69
73
  }
74
+
70
75
  return true
71
76
  end
72
77
  end
@@ -20,7 +20,7 @@ module Kindai
20
20
  uris = result_for(@keyword, page)
21
21
  return if uris.empty?
22
22
  uris.each{ |uri|
23
- yield Kindai::Book.new_from_permalink(uri)
23
+ yield Kindai::Book.new_from_search_result_uri(uri)
24
24
  }
25
25
  }
26
26
  end
@@ -42,9 +42,9 @@ module Kindai
42
42
  end
43
43
 
44
44
  def uri_for keyword, page = 0
45
- count = 10
45
+ count = 100
46
46
  params = { :any => keyword, :dpid => 'kindai', :idx => page * count + 1, :cnt => count}
47
- root = URI.parse("http://api.porta.ndl.go.jp/servicedp/opensearch")
47
+ root = URI.parse("http://iss.ndl.go.jp/api/opensearch")
48
48
  path = '?' + Kindai::Util.expand_params(params)
49
49
  root + path
50
50
  end
data/lib/kindai/spread.rb CHANGED
@@ -13,20 +13,16 @@ module Kindai
13
13
  end
14
14
 
15
15
  def uri
16
- book.base_uri.gsub(/koma=(\d+)/) { "koma=#{spread_number}" }
16
+ "#{book.permalink_uri}/#{spread_number.to_s}"
17
17
  end
18
18
 
19
19
  def image_uri
20
- image = page.at("img#imMain")
21
- raise "not exists" unless image
22
- image['src']
23
- end
24
-
25
-
26
- def has_local_file?
27
- end
28
-
29
- def local_file_path
20
+ params = {
21
+ :itemId => "info:ndljp/pid/#{book.key}",
22
+ :contentNo => spread_number,
23
+ :outputScale => 1,
24
+ }
25
+ "http://kindai.ndl.go.jp/view/jpegOutput?" + Kindai::Util.expand_params(params)
30
26
  end
31
27
 
32
28
  # protected
@@ -56,7 +56,7 @@ module Kindai
56
56
 
57
57
  Kindai::Util.logger.info "sleep and retry"
58
58
  failed_count += 1
59
- sleep 10
59
+ sleep 30
60
60
  retry
61
61
  end
62
62
  end
data/lib/kindai/util.rb CHANGED
@@ -54,7 +54,7 @@ module Kindai::Util
54
54
  # input: {:a => 'a', :b => 'bbb'}
55
55
  # output: 'a=a&b=bbb
56
56
  def self.expand_params(params)
57
- params.each_pair.map{ |k, v| [URI.escape(k.to_s), URI.escape(v.to_s)].join('=')}.join('&')
57
+ URI.encode_www_form(params)
58
58
  end
59
59
 
60
60
  def self.append_suffix(path, suffix)
data/spec/book_spec.rb CHANGED
@@ -1,6 +1,24 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
 
4
+ describe Kindai::Book, 'from search result uri' do
5
+ before do
6
+ @book = Kindai::Book.new_from_search_result_uri('http://iss.ndl.go.jp/books/R000000008-I000162417-00')
7
+ end
8
+
9
+ it 'is a book' do
10
+ @book.should be_kind_of Kindai::Book
11
+ end
12
+
13
+ it 'has permalink' do
14
+ @book.permalink_uri.should == 'http://kindai.da.ndl.go.jp/info:ndljp/pid/922693'
15
+ end
16
+
17
+ it 'has key' do
18
+ @book.key.should == "922693"
19
+ end
20
+ end
21
+
4
22
  describe Kindai::Book do
5
23
  before do
6
24
  @book = Kindai::Book.new_from_permalink('http://kindai.ndl.go.jp/info:ndljp/pid/922693')
@@ -26,10 +44,6 @@ describe Kindai::Book do
26
44
  @book.spreads.should have_exactly(@book.total_spread).spreads
27
45
  end
28
46
 
29
- it 'has base_uri' do
30
- @book.base_uri.should == "http://kindai.da.ndl.go.jp/scrpt/ndlimageviewer-rgc.aspx?pid=info%3Andljp%2Fpid%2F922693&jp=42016454&vol=10010&koma=1&vs=10000,10000,0,0,0,0,0,0"
31
- end
32
-
33
47
  end
34
48
 
35
49
  describe Kindai::Book, 'with series' do
@@ -38,33 +52,13 @@ describe Kindai::Book, 'with series' do
38
52
  end
39
53
 
40
54
  it 'has title' do
41
- @book.title.should == '講談日露戦争記[第3冊]第3'
55
+ @book.title.should == '講談日露戦争記3'
42
56
  end
43
57
  end
44
58
 
45
- describe Kindai::Book, 'with trimming' do
46
- before do
47
-
48
- @trimming = {:x => 342, :y => 190, :w => 2829, :h => 2485, :resize_w => 900, :resize_h => 900}
49
- @book = Kindai::Book.new_from_permalink('http://kindai.ndl.go.jp/info:ndljp/pid/922693', @trimming)
50
- @book_normal = Kindai::Book.new_from_permalink('http://kindai.ndl.go.jp/info:ndljp/pid/922693')
51
- end
52
-
53
- it 'has trimming' do
54
- @book.trimming.should be_kind_of Hash
55
- @book.trimming.should == @trimming
59
+ describe Kindai::Book, 'from without any uri' do
60
+ it 'will die' do
61
+ empty_book = Kindai::Book.new
62
+ lambda { empty_book.permalink_uri }.should raise_error Exception
56
63
  end
57
-
58
- it 'has base uri including trimming info' do
59
- @book.base_uri.should match(/2485/)
60
- end
61
-
62
- it 'has different image uri' do
63
- @book.spreads.first.uri.should_not == @book_normal.spreads.first.uri
64
- end
65
-
66
64
  end
67
-
68
-
69
-
70
-
data/spec/spread_spec.rb CHANGED
@@ -16,11 +16,11 @@ describe Kindai::Spread do
16
16
  end
17
17
 
18
18
  it 'has uri' do
19
- @spread.uri.should == "http://kindai.da.ndl.go.jp/scrpt/ndlimageviewer-rgc.aspx?pid=info%3Andljp%2Fpid%2F922693&jp=42016454&vol=10010&koma=5&vs=10000,10000,0,0,0,0,0,0"
19
+ @spread.uri.should == 'http://kindai.ndl.go.jp/info:ndljp/pid/922693/5'
20
20
  end
21
21
 
22
22
  it 'has image_uri' do
23
- @spread.image_uri.should match /http:\/\/kindai.da.ndl.go.jp\/JPEG\/\w+\.jpg/
23
+ @spread.image_uri.should == "http://kindai.ndl.go.jp/view/jpegOutput?itemId=info%3Andljp%2Fpid%2F922693&contentNo=5&outputScale=1"
24
24
  end
25
25
 
26
26
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kindai
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.0
4
+ version: 2.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-12-23 00:00:00.000000000Z
12
+ date: 2012-05-19 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &2179443420 !ruby/object:Gem::Requirement
16
+ requirement: &70335097974460 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2179443420
24
+ version_requirements: *70335097974460
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rmagick
27
- requirement: &2179442720 !ruby/object:Gem::Requirement
27
+ requirement: &70335097973980 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2179442720
35
+ version_requirements: *70335097973980
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: zipruby
38
- requirement: &2179442240 !ruby/object:Gem::Requirement
38
+ requirement: &70335097973500 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *2179442240
46
+ version_requirements: *70335097973500
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: json
49
- requirement: &2179441740 !ruby/object:Gem::Requirement
49
+ requirement: &70335097973020 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '1.4'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *2179441740
57
+ version_requirements: *70335097973020
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &2179441040 !ruby/object:Gem::Requirement
60
+ requirement: &70335097972540 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 2.3.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2179441040
68
+ version_requirements: *70335097972540
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: bundler
71
- requirement: &2179440540 !ruby/object:Gem::Requirement
71
+ requirement: &70335097972060 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.0.0
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2179440540
79
+ version_requirements: *70335097972060
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: jeweler
82
- requirement: &2179440060 !ruby/object:Gem::Requirement
82
+ requirement: &70335097971580 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: 1.5.2
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2179440060
90
+ version_requirements: *70335097971580
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: rcov
93
- requirement: &2179439340 !ruby/object:Gem::Requirement
93
+ requirement: &70335097971100 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,7 +98,7 @@ dependencies:
98
98
  version: '0'
99
99
  type: :development
100
100
  prerelease: false
101
- version_requirements: *2179439340
101
+ version_requirements: *70335097971100
102
102
  description: kindai.rb is kindai digital library downloader.
103
103
  email: hitode909@gmail.com
104
104
  executables:
@@ -154,7 +154,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
154
154
  version: '0'
155
155
  segments:
156
156
  - 0
157
- hash: -1373734152558500671
157
+ hash: 1619067764939517805
158
158
  required_rubygems_version: !ruby/object:Gem::Requirement
159
159
  none: false
160
160
  requirements: