epub-parser 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,14 @@
1
1
  require 'epub/constants'
2
2
  require 'epub/ocf'
3
3
  require 'epub/ocf/physical_container'
4
+ require 'epub/parser/metadata'
4
5
  require 'nokogiri'
5
6
 
6
7
  module EPUB
7
8
  class Parser
8
9
  class OCF
9
10
  include Utils
11
+ include Metadata
10
12
 
11
13
  DIRECTORY = 'META-INF'
12
14
 
@@ -26,9 +28,7 @@ module EPUB
26
28
  begin
27
29
  data = @container.read(File.join(DIRECTORY, "#{m}.xml"))
28
30
  @ocf.__send__ "#{m}=", __send__("parse_#{m}", data)
29
- rescue EPUB::OCF::PhysicalContainer::NoEntry, ::Errno::ENOENT, OpenURI::HTTPError
30
- rescue => error
31
- raise error unless (Object.const_defined? :Zip and ::Zip.const_defined? :Error and error.kind_of? ::Zip::Error)
31
+ rescue EPUB::OCF::PhysicalContainer::NoEntry
32
32
  end
33
33
  end
34
34
 
@@ -59,7 +59,14 @@ module EPUB
59
59
  end
60
60
 
61
61
  def parse_metadata(content)
62
- warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
62
+ doc = Nokogiri.XML(content)
63
+ unless multiple_rendition_metadata?(doc)
64
+ warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
65
+ metadata = EPUB::OCF::UnknownFormatMetadata.new
66
+ metadata.content = content
67
+ return metadata
68
+ end
69
+ super(doc.root, doc.root['unique-identifier'], 'metadata')
63
70
  end
64
71
 
65
72
  def parse_rights(content)
@@ -69,6 +76,14 @@ module EPUB
69
76
  def parse_signatures(content)
70
77
  warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
71
78
  end
79
+
80
+ private
81
+
82
+ def multiple_rendition_metadata?(doc)
83
+ doc.root &&
84
+ doc.root.name == 'metadata' &&
85
+ doc.namespaces['xmlns'] == EPUB::NAMESPACES['metadata']
86
+ end
72
87
  end
73
88
  end
74
89
  end
@@ -2,79 +2,56 @@ require 'strscan'
2
2
  require 'nokogiri'
3
3
  require 'epub/publication'
4
4
  require 'epub/constants'
5
+ require 'epub/parser/metadata'
5
6
 
6
7
  module EPUB
7
8
  class Parser
8
9
  class Publication
9
10
  include Utils
11
+ include Metadata
10
12
 
11
13
  class << self
12
14
  def parse(container, file)
13
15
  opf = container.read(Addressable::URI.unencode(file))
14
16
 
15
- new(opf, file).parse
17
+ new(opf).parse
16
18
  end
17
19
  end
18
20
 
19
- def initialize(opf, rootfile)
20
- @package = EPUB::Publication::Package.new
21
- @rootfile = Addressable::URI.parse(rootfile)
21
+ def initialize(opf, rootfile=nil)
22
+ warn "Second argument for #{self.class}.new is deprecated" if rootfile
22
23
  @doc = Nokogiri.XML(opf)
23
24
  end
24
25
 
25
26
  def parse
26
- ([:package] + EPUB::Publication::Package::CONTENT_MODELS).each do |model|
27
- __send__ "parse_#{model}"
27
+ package = parse_package(@doc)
28
+ (EPUB::Publication::Package::CONTENT_MODELS - [:bindings]).each do |model|
29
+ package.__send__ "#{model}=", __send__("parse_#{model}", @doc)
28
30
  end
31
+ package.bindings = parse_bindings(@doc, package.manifest)
29
32
 
30
- @package
33
+ package
31
34
  end
32
35
 
33
- def parse_package
34
- elem = @doc.root
36
+ def parse_package(doc)
37
+ package = EPUB::Publication::Package.new
38
+ elem = doc.root
35
39
  %w[version xml:lang dir id].each do |attr|
36
- @package.__send__ "#{attr.gsub(/\:/, '_')}=", extract_attribute(elem, attr)
40
+ package.__send__ "#{attr.gsub(/\:/, '_')}=", extract_attribute(elem, attr)
37
41
  end
38
- @unique_identifier_id = elem['unique-identifier']
39
- @package.prefix = parse_prefix(extract_attribute(elem, 'prefix'))
40
- EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout if @package.prefix.key? EPUB::Publication::FixedLayout::PREFIX_KEY
42
+ package.prefix = parse_prefix(extract_attribute(elem, 'prefix'))
43
+ EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout if package.prefix.key? EPUB::Publication::FixedLayout::PREFIX_KEY
41
44
 
42
- @package
45
+ package
43
46
  end
44
47
 
45
- def parse_metadata
46
- metadata = @package.metadata = EPUB::Publication::Package::Metadata.new
47
- elem = @doc.xpath('/opf:package/opf:metadata', EPUB::NAMESPACES).first
48
- id_map = {}
49
-
50
- metadata.identifiers = extract_model(elem, id_map, './dc:identifier', :Identifier, ['id']) {|identifier, e|
51
- identifier.scheme = extract_attribute(e, 'scheme', 'opf')
52
- metadata.unique_identifier = identifier if identifier.id == @unique_identifier_id
53
- }
54
- metadata.titles = extract_model(elem, id_map, './dc:title', :Title)
55
- metadata.languages = extract_model(elem, id_map, './dc:language', :DCMES, %w[id])
56
- %w[contributor coverage creator date description format publisher relation source subject type].each do |dcmes|
57
- metadata.__send__ "#{dcmes}s=", extract_model(elem, id_map, "./dc:#{dcmes}")
58
- end
59
- metadata.rights = extract_model(elem, id_map, './dc:rights')
60
- metadata.metas = extract_refinee(elem, id_map, './opf:meta', :Meta, %w[property id scheme])
61
- metadata.links = extract_refinee(elem, id_map, './opf:link', :Link, %w[id media-type]) {|link, e|
62
- link.href = extract_attribute(e, 'href')
63
- link.rel = Set.new(extract_attribute(e, 'rel').split(nil))
64
- }
65
-
66
- id_map.values.each do |hsh|
67
- next unless hsh[:refiners]
68
- next unless hsh[:metadata]
69
- hsh[:refiners].each {|meta| meta.refines = hsh[:metadata]}
70
- end
71
-
72
- metadata
48
+ def parse_metadata(doc)
49
+ super(doc.xpath('/opf:package/opf:metadata', EPUB::NAMESPACES).first, doc.root['unique-identifier'], 'opf')
73
50
  end
74
51
 
75
- def parse_manifest
76
- manifest = @package.manifest = EPUB::Publication::Package::Manifest.new
77
- elem = @doc.xpath('/opf:package/opf:manifest', EPUB::NAMESPACES).first
52
+ def parse_manifest(doc)
53
+ manifest = EPUB::Publication::Package::Manifest.new
54
+ elem = doc.xpath('/opf:package/opf:manifest', EPUB::NAMESPACES).first
78
55
  manifest.id = extract_attribute(elem, 'id')
79
56
 
80
57
  fallback_map = {}
@@ -97,9 +74,9 @@ module EPUB
97
74
  manifest
98
75
  end
99
76
 
100
- def parse_spine
101
- spine = @package.spine = EPUB::Publication::Package::Spine.new
102
- elem = @doc.xpath('/opf:package/opf:spine', EPUB::NAMESPACES).first
77
+ def parse_spine(doc)
78
+ spine = EPUB::Publication::Package::Spine.new
79
+ elem = doc.xpath('/opf:package/opf:spine', EPUB::NAMESPACES).first
103
80
  %w[id toc page-progression-direction].each do |attr|
104
81
  spine.__send__ "#{attr.gsub(/-/, '_')}=", extract_attribute(elem, attr)
105
82
  end
@@ -118,9 +95,9 @@ module EPUB
118
95
  spine
119
96
  end
120
97
 
121
- def parse_guide
122
- guide = @package.guide = EPUB::Publication::Package::Guide.new
123
- @doc.xpath('/opf:package/opf:guide/opf:reference', EPUB::NAMESPACES).each do |ref|
98
+ def parse_guide(doc)
99
+ guide = EPUB::Publication::Package::Guide.new
100
+ doc.xpath('/opf:package/opf:guide/opf:reference', EPUB::NAMESPACES).each do |ref|
124
101
  reference = EPUB::Publication::Package::Guide::Reference.new
125
102
  %w[type title].each do |attr|
126
103
  reference.__send__ "#{attr}=", extract_attribute(ref, attr)
@@ -132,12 +109,12 @@ module EPUB
132
109
  guide
133
110
  end
134
111
 
135
- def parse_bindings
136
- bindings = @package.bindings = EPUB::Publication::Package::Bindings.new
137
- @doc.xpath('/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES).each do |elem|
112
+ def parse_bindings(doc, handler_map)
113
+ bindings = EPUB::Publication::Package::Bindings.new
114
+ doc.xpath('/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES).each do |elem|
138
115
  media_type = EPUB::Publication::Package::Bindings::MediaType.new
139
116
  media_type.media_type = extract_attribute(elem, 'media-type')
140
- media_type.handler = @package.manifest[extract_attribute(elem, 'handler')]
117
+ media_type.handler = handler_map[extract_attribute(elem, 'handler')]
141
118
  bindings << media_type
142
119
  end
143
120
 
@@ -161,39 +138,6 @@ module EPUB
161
138
  end
162
139
  prefixes
163
140
  end
164
-
165
- def extract_model(elem, id_map, xpath, klass=:DCMES, attributes=%w[id lang dir])
166
- models = elem.xpath(xpath, EPUB::NAMESPACES).collect do |e|
167
- model = EPUB::Publication::Package::Metadata.const_get(klass).new
168
- attributes.each do |attr|
169
- model.__send__ "#{attr.gsub(/-/, '_')}=", extract_attribute(e, attr)
170
- end
171
- model.content = e.content unless klass == :Link
172
-
173
- yield model, e if block_given?
174
-
175
- model
176
- end
177
-
178
- models.each do |model|
179
- id_map[model.id] = {metadata: model} if model.respond_to?(:id) && model.id
180
- end
181
-
182
- models
183
- end
184
-
185
- def extract_refinee(elem, id_map, xpath, klass, attributes)
186
- extract_model(elem, id_map, xpath, klass, attributes) {|model, e|
187
- yield model, e if block_given?
188
- refines = extract_attribute(e, 'refines')
189
- if refines && refines[0] == '#'
190
- id = refines[1..-1]
191
- id_map[id] ||= {}
192
- id_map[id][:refiners] ||= []
193
- id_map[id][:refiners] << model
194
- end
195
- }
196
- end
197
141
  end
198
142
  end
199
143
  end
@@ -1,5 +1,5 @@
1
1
  module EPUB
2
2
  class Parser
3
- VERSION = "0.2.5"
3
+ VERSION = "0.2.6"
4
4
  end
5
5
  end
@@ -1,174 +1,9 @@
1
- require 'set'
1
+ require 'epub/metadata'
2
2
 
3
3
  module EPUB
4
4
  module Publication
5
5
  class Package
6
- class Metadata
7
- include Inspector::PublicationModel
8
-
9
- DC_ELEMS = [:identifiers, :titles, :languages] +
10
- [:contributors, :coverages, :creators, :dates, :descriptions, :formats, :publishers,
11
- :relations, :rights, :sources, :subjects, :types]
12
- attr_accessor :package, :unique_identifier, :metas, :links,
13
- *(DC_ELEMS.collect {|elem| "dc_#{elem}"})
14
- DC_ELEMS.each do |elem|
15
- alias_method elem, "dc_#{elem}"
16
- alias_method "#{elem}=", "dc_#{elem}="
17
- end
18
-
19
- def initialize
20
- (DC_ELEMS + [:metas, :links]).each do |elem|
21
- __send__ "#{elem}=", []
22
- end
23
- end
24
-
25
- def title
26
- return extended_title unless extended_title.empty?
27
- compositted = titles.select {|title| title.display_seq}.sort.join("\n")
28
- return compositted unless compositted.empty?
29
- return main_title unless main_title.empty?
30
- titles.sort.join("\n")
31
- end
32
-
33
- %w[main short collection edition extended].each do |type|
34
- define_method "#{type}_title" do
35
- titles.select {|title| title.title_type.to_s == type}.sort.join(' ')
36
- end
37
- end
38
-
39
- def subtitle
40
- titles.select {|title| title.title_type.to_s == 'subtitle'}.sort.join(' ')
41
- end
42
-
43
- def description
44
- descriptions.join(' ')
45
- end
46
-
47
- def date
48
- dates.first
49
- end
50
-
51
- def language
52
- languages.first
53
- end
54
-
55
- def modified
56
- metas.find {|meta| meta.property == 'dcterms:modified'}
57
- end
58
-
59
- def to_h
60
- DC_ELEMS.inject({}) do |hsh, elem|
61
- hsh[elem] = __send__(elem)
62
- hsh
63
- end
64
- end
65
-
66
- def primary_metas
67
- metas.select {|meta| meta.primary_expression?}
68
- end
69
-
70
- module Refinee
71
- PROPERTIES = %w[alternate-script display-seq file-as group-position identifier-type meta-auth role title-type]
72
-
73
- attr_writer :refiners
74
-
75
- def refiners
76
- @refiners ||= Set.new
77
- end
78
-
79
- PROPERTIES.each do |voc|
80
- met = voc.gsub(/-/, '_')
81
- attr_writer met
82
- define_method met do
83
- refiners.find {|refiner| refiner.property == voc}
84
- end
85
- end
86
- end
87
-
88
- class DCMES
89
- include Refinee
90
-
91
- attr_accessor :content, :id, :lang, :dir
92
-
93
- def to_s
94
- content.to_s
95
- end
96
- end
97
-
98
- class Identifier < DCMES
99
- # @note This is ad-hoc
100
- # @todo Define and include OPF module for opf:scheme attribute
101
- # @todo Define general way to handle with identifier-type refiners
102
- attr_accessor :scheme
103
-
104
- # @note This is ad-hoc
105
- # @todo Define and include OPF module for opf:scheme attribute
106
- # @todo Define general way to handle with identifier-type refiners
107
- def isbn?
108
- scheme == 'ISBN' or
109
- content.to_s.downcase.start_with? 'urn:isbn' or
110
- refiners.any? {|refiner|
111
- refiner.property == 'identifier-type' and
112
- refiner.scheme == 'onix:codelist5' and
113
- %w[02 15].include? refiner.content
114
- }
115
- end
116
- end
117
-
118
- class Title < DCMES
119
- include Comparable
120
-
121
- def <=>(other)
122
- return 1 if other.display_seq.nil?
123
- return -1 if display_seq.nil?
124
- display_seq.to_s.to_i <=> other.display_seq.to_s.to_i
125
- end
126
- end
127
-
128
- class Meta
129
- include Refinee
130
-
131
- attr_accessor :property, :id, :scheme, :content
132
- attr_reader :refines
133
-
134
- def refines=(refinee)
135
- refinee.refiners << self
136
- @refines = refinee
137
- end
138
-
139
- def refines?
140
- ! refines.nil?
141
- end
142
- alias subexpression? refines?
143
-
144
- def primary_expression?
145
- ! subexpression?
146
- end
147
-
148
- def inspect
149
- ivs = instance_variables.map {|iv|
150
- [iv, instance_variable_get(iv).inspect].join('=')
151
- }.join(' ')
152
- '<#%s:%#0x %s>' % [self.class, __id__, ivs]
153
- end
154
-
155
- def to_s
156
- content.to_s
157
- end
158
- end
159
-
160
- class Link
161
- include Refinee
162
-
163
- attr_accessor :href, :rel, :id, :media_type
164
- attr_reader :refines
165
-
166
- def refines=(refinee)
167
- refinee.refiners << self
168
- @refines = refinee
169
- end
170
- end
171
- end
6
+ Metadata = EPUB::Metadata
172
7
  end
173
8
  end
174
9
  end
@@ -0,0 +1,6 @@
1
+ <metadata xmlns="http://www.idpf.org/2013/metadata"
2
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
3
+ unique-identifier="pub-id">
4
+ <dc:identifier id="pub-id">urn:uuid:A1B0D67E-2E81-4DF5-9E67-A64CBE366809</dc:identifier>
5
+ <meta property="dcterms:modified">2011-01-01T12:00:00Z</meta>
6
+ </metadata>
data/test/helper.rb CHANGED
@@ -8,4 +8,7 @@ require 'test/unit'
8
8
  require 'test/unit/rr'
9
9
  require 'test/unit/notify'
10
10
  require 'pry'
11
+ require 'pretty_backtrace'
12
+ PrettyBacktrace.enable
13
+
11
14
  require 'epub/parser'
data/test/test_epub.rb CHANGED
@@ -17,5 +17,9 @@ class TestEUPB < Test::Unit::TestCase
17
17
  assert_kind_of EPUB::Publication::Package::Manifest::Item, entry
18
18
  end
19
19
  end
20
- end
21
20
 
21
+ def test_book_has_multiple_packages
22
+ book = EPUB::Parser.parse(@file)
23
+ assert_instance_of Array, book.packages
24
+ end
25
+ end