epub-parser 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,12 +1,14 @@
1
1
  require 'epub/constants'
2
2
  require 'epub/ocf'
3
3
  require 'epub/ocf/physical_container'
4
+ require 'epub/parser/metadata'
4
5
  require 'nokogiri'
5
6
 
6
7
  module EPUB
7
8
  class Parser
8
9
  class OCF
9
10
  include Utils
11
+ include Metadata
10
12
 
11
13
  DIRECTORY = 'META-INF'
12
14
 
@@ -26,9 +28,7 @@ module EPUB
26
28
  begin
27
29
  data = @container.read(File.join(DIRECTORY, "#{m}.xml"))
28
30
  @ocf.__send__ "#{m}=", __send__("parse_#{m}", data)
29
- rescue EPUB::OCF::PhysicalContainer::NoEntry, ::Errno::ENOENT, OpenURI::HTTPError
30
- rescue => error
31
- raise error unless (Object.const_defined? :Zip and ::Zip.const_defined? :Error and error.kind_of? ::Zip::Error)
31
+ rescue EPUB::OCF::PhysicalContainer::NoEntry
32
32
  end
33
33
  end
34
34
 
@@ -59,7 +59,14 @@ module EPUB
59
59
  end
60
60
 
61
61
  def parse_metadata(content)
62
- warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
62
+ doc = Nokogiri.XML(content)
63
+ unless multiple_rendition_metadata?(doc)
64
+ warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
65
+ metadata = EPUB::OCF::UnknownFormatMetadata.new
66
+ metadata.content = content
67
+ return metadata
68
+ end
69
+ super(doc.root, doc.root['unique-identifier'], 'metadata')
63
70
  end
64
71
 
65
72
  def parse_rights(content)
@@ -69,6 +76,14 @@ module EPUB
69
76
  def parse_signatures(content)
70
77
  warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
71
78
  end
79
+
80
+ private
81
+
82
+ def multiple_rendition_metadata?(doc)
83
+ doc.root &&
84
+ doc.root.name == 'metadata' &&
85
+ doc.namespaces['xmlns'] == EPUB::NAMESPACES['metadata']
86
+ end
72
87
  end
73
88
  end
74
89
  end
@@ -2,79 +2,56 @@ require 'strscan'
2
2
  require 'nokogiri'
3
3
  require 'epub/publication'
4
4
  require 'epub/constants'
5
+ require 'epub/parser/metadata'
5
6
 
6
7
  module EPUB
7
8
  class Parser
8
9
  class Publication
9
10
  include Utils
11
+ include Metadata
10
12
 
11
13
  class << self
12
14
  def parse(container, file)
13
15
  opf = container.read(Addressable::URI.unencode(file))
14
16
 
15
- new(opf, file).parse
17
+ new(opf).parse
16
18
  end
17
19
  end
18
20
 
19
- def initialize(opf, rootfile)
20
- @package = EPUB::Publication::Package.new
21
- @rootfile = Addressable::URI.parse(rootfile)
21
+ def initialize(opf, rootfile=nil)
22
+ warn "Second argument for #{self.class}.new is deprecated" if rootfile
22
23
  @doc = Nokogiri.XML(opf)
23
24
  end
24
25
 
25
26
  def parse
26
- ([:package] + EPUB::Publication::Package::CONTENT_MODELS).each do |model|
27
- __send__ "parse_#{model}"
27
+ package = parse_package(@doc)
28
+ (EPUB::Publication::Package::CONTENT_MODELS - [:bindings]).each do |model|
29
+ package.__send__ "#{model}=", __send__("parse_#{model}", @doc)
28
30
  end
31
+ package.bindings = parse_bindings(@doc, package.manifest)
29
32
 
30
- @package
33
+ package
31
34
  end
32
35
 
33
- def parse_package
34
- elem = @doc.root
36
+ def parse_package(doc)
37
+ package = EPUB::Publication::Package.new
38
+ elem = doc.root
35
39
  %w[version xml:lang dir id].each do |attr|
36
- @package.__send__ "#{attr.gsub(/\:/, '_')}=", extract_attribute(elem, attr)
40
+ package.__send__ "#{attr.gsub(/\:/, '_')}=", extract_attribute(elem, attr)
37
41
  end
38
- @unique_identifier_id = elem['unique-identifier']
39
- @package.prefix = parse_prefix(extract_attribute(elem, 'prefix'))
40
- EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout if @package.prefix.key? EPUB::Publication::FixedLayout::PREFIX_KEY
42
+ package.prefix = parse_prefix(extract_attribute(elem, 'prefix'))
43
+ EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout if package.prefix.key? EPUB::Publication::FixedLayout::PREFIX_KEY
41
44
 
42
- @package
45
+ package
43
46
  end
44
47
 
45
- def parse_metadata
46
- metadata = @package.metadata = EPUB::Publication::Package::Metadata.new
47
- elem = @doc.xpath('/opf:package/opf:metadata', EPUB::NAMESPACES).first
48
- id_map = {}
49
-
50
- metadata.identifiers = extract_model(elem, id_map, './dc:identifier', :Identifier, ['id']) {|identifier, e|
51
- identifier.scheme = extract_attribute(e, 'scheme', 'opf')
52
- metadata.unique_identifier = identifier if identifier.id == @unique_identifier_id
53
- }
54
- metadata.titles = extract_model(elem, id_map, './dc:title', :Title)
55
- metadata.languages = extract_model(elem, id_map, './dc:language', :DCMES, %w[id])
56
- %w[contributor coverage creator date description format publisher relation source subject type].each do |dcmes|
57
- metadata.__send__ "#{dcmes}s=", extract_model(elem, id_map, "./dc:#{dcmes}")
58
- end
59
- metadata.rights = extract_model(elem, id_map, './dc:rights')
60
- metadata.metas = extract_refinee(elem, id_map, './opf:meta', :Meta, %w[property id scheme])
61
- metadata.links = extract_refinee(elem, id_map, './opf:link', :Link, %w[id media-type]) {|link, e|
62
- link.href = extract_attribute(e, 'href')
63
- link.rel = Set.new(extract_attribute(e, 'rel').split(nil))
64
- }
65
-
66
- id_map.values.each do |hsh|
67
- next unless hsh[:refiners]
68
- next unless hsh[:metadata]
69
- hsh[:refiners].each {|meta| meta.refines = hsh[:metadata]}
70
- end
71
-
72
- metadata
48
+ def parse_metadata(doc)
49
+ super(doc.xpath('/opf:package/opf:metadata', EPUB::NAMESPACES).first, doc.root['unique-identifier'], 'opf')
73
50
  end
74
51
 
75
- def parse_manifest
76
- manifest = @package.manifest = EPUB::Publication::Package::Manifest.new
77
- elem = @doc.xpath('/opf:package/opf:manifest', EPUB::NAMESPACES).first
52
+ def parse_manifest(doc)
53
+ manifest = EPUB::Publication::Package::Manifest.new
54
+ elem = doc.xpath('/opf:package/opf:manifest', EPUB::NAMESPACES).first
78
55
  manifest.id = extract_attribute(elem, 'id')
79
56
 
80
57
  fallback_map = {}
@@ -97,9 +74,9 @@ module EPUB
97
74
  manifest
98
75
  end
99
76
 
100
- def parse_spine
101
- spine = @package.spine = EPUB::Publication::Package::Spine.new
102
- elem = @doc.xpath('/opf:package/opf:spine', EPUB::NAMESPACES).first
77
+ def parse_spine(doc)
78
+ spine = EPUB::Publication::Package::Spine.new
79
+ elem = doc.xpath('/opf:package/opf:spine', EPUB::NAMESPACES).first
103
80
  %w[id toc page-progression-direction].each do |attr|
104
81
  spine.__send__ "#{attr.gsub(/-/, '_')}=", extract_attribute(elem, attr)
105
82
  end
@@ -118,9 +95,9 @@ module EPUB
118
95
  spine
119
96
  end
120
97
 
121
- def parse_guide
122
- guide = @package.guide = EPUB::Publication::Package::Guide.new
123
- @doc.xpath('/opf:package/opf:guide/opf:reference', EPUB::NAMESPACES).each do |ref|
98
+ def parse_guide(doc)
99
+ guide = EPUB::Publication::Package::Guide.new
100
+ doc.xpath('/opf:package/opf:guide/opf:reference', EPUB::NAMESPACES).each do |ref|
124
101
  reference = EPUB::Publication::Package::Guide::Reference.new
125
102
  %w[type title].each do |attr|
126
103
  reference.__send__ "#{attr}=", extract_attribute(ref, attr)
@@ -132,12 +109,12 @@ module EPUB
132
109
  guide
133
110
  end
134
111
 
135
- def parse_bindings
136
- bindings = @package.bindings = EPUB::Publication::Package::Bindings.new
137
- @doc.xpath('/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES).each do |elem|
112
+ def parse_bindings(doc, handler_map)
113
+ bindings = EPUB::Publication::Package::Bindings.new
114
+ doc.xpath('/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES).each do |elem|
138
115
  media_type = EPUB::Publication::Package::Bindings::MediaType.new
139
116
  media_type.media_type = extract_attribute(elem, 'media-type')
140
- media_type.handler = @package.manifest[extract_attribute(elem, 'handler')]
117
+ media_type.handler = handler_map[extract_attribute(elem, 'handler')]
141
118
  bindings << media_type
142
119
  end
143
120
 
@@ -161,39 +138,6 @@ module EPUB
161
138
  end
162
139
  prefixes
163
140
  end
164
-
165
- def extract_model(elem, id_map, xpath, klass=:DCMES, attributes=%w[id lang dir])
166
- models = elem.xpath(xpath, EPUB::NAMESPACES).collect do |e|
167
- model = EPUB::Publication::Package::Metadata.const_get(klass).new
168
- attributes.each do |attr|
169
- model.__send__ "#{attr.gsub(/-/, '_')}=", extract_attribute(e, attr)
170
- end
171
- model.content = e.content unless klass == :Link
172
-
173
- yield model, e if block_given?
174
-
175
- model
176
- end
177
-
178
- models.each do |model|
179
- id_map[model.id] = {metadata: model} if model.respond_to?(:id) && model.id
180
- end
181
-
182
- models
183
- end
184
-
185
- def extract_refinee(elem, id_map, xpath, klass, attributes)
186
- extract_model(elem, id_map, xpath, klass, attributes) {|model, e|
187
- yield model, e if block_given?
188
- refines = extract_attribute(e, 'refines')
189
- if refines && refines[0] == '#'
190
- id = refines[1..-1]
191
- id_map[id] ||= {}
192
- id_map[id][:refiners] ||= []
193
- id_map[id][:refiners] << model
194
- end
195
- }
196
- end
197
141
  end
198
142
  end
199
143
  end
@@ -1,5 +1,5 @@
1
1
  module EPUB
2
2
  class Parser
3
- VERSION = "0.2.5"
3
+ VERSION = "0.2.6"
4
4
  end
5
5
  end
@@ -1,174 +1,9 @@
1
- require 'set'
1
+ require 'epub/metadata'
2
2
 
3
3
  module EPUB
4
4
  module Publication
5
5
  class Package
6
- class Metadata
7
- include Inspector::PublicationModel
8
-
9
- DC_ELEMS = [:identifiers, :titles, :languages] +
10
- [:contributors, :coverages, :creators, :dates, :descriptions, :formats, :publishers,
11
- :relations, :rights, :sources, :subjects, :types]
12
- attr_accessor :package, :unique_identifier, :metas, :links,
13
- *(DC_ELEMS.collect {|elem| "dc_#{elem}"})
14
- DC_ELEMS.each do |elem|
15
- alias_method elem, "dc_#{elem}"
16
- alias_method "#{elem}=", "dc_#{elem}="
17
- end
18
-
19
- def initialize
20
- (DC_ELEMS + [:metas, :links]).each do |elem|
21
- __send__ "#{elem}=", []
22
- end
23
- end
24
-
25
- def title
26
- return extended_title unless extended_title.empty?
27
- compositted = titles.select {|title| title.display_seq}.sort.join("\n")
28
- return compositted unless compositted.empty?
29
- return main_title unless main_title.empty?
30
- titles.sort.join("\n")
31
- end
32
-
33
- %w[main short collection edition extended].each do |type|
34
- define_method "#{type}_title" do
35
- titles.select {|title| title.title_type.to_s == type}.sort.join(' ')
36
- end
37
- end
38
-
39
- def subtitle
40
- titles.select {|title| title.title_type.to_s == 'subtitle'}.sort.join(' ')
41
- end
42
-
43
- def description
44
- descriptions.join(' ')
45
- end
46
-
47
- def date
48
- dates.first
49
- end
50
-
51
- def language
52
- languages.first
53
- end
54
-
55
- def modified
56
- metas.find {|meta| meta.property == 'dcterms:modified'}
57
- end
58
-
59
- def to_h
60
- DC_ELEMS.inject({}) do |hsh, elem|
61
- hsh[elem] = __send__(elem)
62
- hsh
63
- end
64
- end
65
-
66
- def primary_metas
67
- metas.select {|meta| meta.primary_expression?}
68
- end
69
-
70
- module Refinee
71
- PROPERTIES = %w[alternate-script display-seq file-as group-position identifier-type meta-auth role title-type]
72
-
73
- attr_writer :refiners
74
-
75
- def refiners
76
- @refiners ||= Set.new
77
- end
78
-
79
- PROPERTIES.each do |voc|
80
- met = voc.gsub(/-/, '_')
81
- attr_writer met
82
- define_method met do
83
- refiners.find {|refiner| refiner.property == voc}
84
- end
85
- end
86
- end
87
-
88
- class DCMES
89
- include Refinee
90
-
91
- attr_accessor :content, :id, :lang, :dir
92
-
93
- def to_s
94
- content.to_s
95
- end
96
- end
97
-
98
- class Identifier < DCMES
99
- # @note This is ad-hoc
100
- # @todo Define and include OPF module for opf:scheme attribute
101
- # @todo Define general way to handle with identifier-type refiners
102
- attr_accessor :scheme
103
-
104
- # @note This is ad-hoc
105
- # @todo Define and include OPF module for opf:scheme attribute
106
- # @todo Define general way to handle with identifier-type refiners
107
- def isbn?
108
- scheme == 'ISBN' or
109
- content.to_s.downcase.start_with? 'urn:isbn' or
110
- refiners.any? {|refiner|
111
- refiner.property == 'identifier-type' and
112
- refiner.scheme == 'onix:codelist5' and
113
- %w[02 15].include? refiner.content
114
- }
115
- end
116
- end
117
-
118
- class Title < DCMES
119
- include Comparable
120
-
121
- def <=>(other)
122
- return 1 if other.display_seq.nil?
123
- return -1 if display_seq.nil?
124
- display_seq.to_s.to_i <=> other.display_seq.to_s.to_i
125
- end
126
- end
127
-
128
- class Meta
129
- include Refinee
130
-
131
- attr_accessor :property, :id, :scheme, :content
132
- attr_reader :refines
133
-
134
- def refines=(refinee)
135
- refinee.refiners << self
136
- @refines = refinee
137
- end
138
-
139
- def refines?
140
- ! refines.nil?
141
- end
142
- alias subexpression? refines?
143
-
144
- def primary_expression?
145
- ! subexpression?
146
- end
147
-
148
- def inspect
149
- ivs = instance_variables.map {|iv|
150
- [iv, instance_variable_get(iv).inspect].join('=')
151
- }.join(' ')
152
- '<#%s:%#0x %s>' % [self.class, __id__, ivs]
153
- end
154
-
155
- def to_s
156
- content.to_s
157
- end
158
- end
159
-
160
- class Link
161
- include Refinee
162
-
163
- attr_accessor :href, :rel, :id, :media_type
164
- attr_reader :refines
165
-
166
- def refines=(refinee)
167
- refinee.refiners << self
168
- @refines = refinee
169
- end
170
- end
171
- end
6
+ Metadata = EPUB::Metadata
172
7
  end
173
8
  end
174
9
  end
@@ -0,0 +1,6 @@
1
+ <metadata xmlns="http://www.idpf.org/2013/metadata"
2
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
3
+ unique-identifier="pub-id">
4
+ <dc:identifier id="pub-id">urn:uuid:A1B0D67E-2E81-4DF5-9E67-A64CBE366809</dc:identifier>
5
+ <meta property="dcterms:modified">2011-01-01T12:00:00Z</meta>
6
+ </metadata>
data/test/helper.rb CHANGED
@@ -8,4 +8,7 @@ require 'test/unit'
8
8
  require 'test/unit/rr'
9
9
  require 'test/unit/notify'
10
10
  require 'pry'
11
+ require 'pretty_backtrace'
12
+ PrettyBacktrace.enable
13
+
11
14
  require 'epub/parser'
data/test/test_epub.rb CHANGED
@@ -17,5 +17,9 @@ class TestEUPB < Test::Unit::TestCase
17
17
  assert_kind_of EPUB::Publication::Package::Manifest::Item, entry
18
18
  end
19
19
  end
20
- end
21
20
 
21
+ def test_book_has_multiple_packages
22
+ book = EPUB::Parser.parse(@file)
23
+ assert_instance_of Array, book.packages
24
+ end
25
+ end