epub-parser 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +1 -0
  3. data/.yardopts +2 -0
  4. data/CHANGELOG.markdown +18 -0
  5. data/README.markdown +40 -11
  6. data/bin/epub-open +1 -1
  7. data/bin/epubinfo +14 -14
  8. data/docs/Home.markdown +3 -2
  9. data/docs/Item.markdown +3 -3
  10. data/docs/Navigation.markdown +58 -0
  11. data/docs/Publication.markdown +54 -0
  12. data/epub-parser.gemspec +1 -2
  13. data/lib/epub.rb +5 -83
  14. data/lib/epub/book.rb +1 -1
  15. data/lib/epub/book/features.rb +85 -0
  16. data/lib/epub/constants.rb +2 -0
  17. data/lib/epub/content_document/navigation.rb +31 -4
  18. data/lib/epub/content_document/xhtml.rb +1 -1
  19. data/lib/epub/inspector.rb +9 -7
  20. data/lib/epub/parser.rb +1 -1
  21. data/lib/epub/parser/content_document.rb +5 -1
  22. data/lib/epub/parser/ocf.rb +2 -2
  23. data/lib/epub/parser/publication.rb +46 -87
  24. data/lib/epub/parser/version.rb +1 -1
  25. data/lib/epub/publication/fixed_layout.rb +2 -3
  26. data/lib/epub/publication/package/guide.rb +19 -14
  27. data/lib/epub/publication/package/manifest.rb +36 -6
  28. data/lib/epub/publication/package/metadata.rb +27 -8
  29. data/lib/epub/publication/package/spine.rb +10 -3
  30. data/test/fixtures/book/OPS/nav.xhtml +1 -1
  31. data/test/fixtures/book/OPS//343/203/253/343/203/274/343/203/210/343/203/225/343/202/241/343/202/244/343/203/253.opf +2 -1
  32. data/test/helper.rb +1 -1
  33. data/test/test_content_document.rb +41 -2
  34. data/test/test_epub.rb +0 -7
  35. data/test/test_parser.rb +4 -4
  36. data/test/test_parser_content_document.rb +2 -0
  37. data/test/test_parser_publication.rb +4 -0
  38. data/test/test_publication.rb +60 -0
  39. metadata +55 -67
  40. data/lib/method_decorators/deprecated.rb +0 -84
data/lib/epub/book.rb CHANGED
@@ -2,6 +2,6 @@ require 'epub'
2
2
 
3
3
  module EPUB
4
4
  class Book
5
- include EPUB
5
+ include EPUB::Book::Features
6
6
  end
7
7
  end
@@ -0,0 +1,85 @@
1
+ module EPUB
2
+ class Book
3
+ module Features
4
+ modules = [:ocf, :package]
5
+ attr_reader *modules
6
+ attr_accessor :epub_file
7
+ modules.each do |mod|
8
+ define_method "#{mod}=" do |obj|
9
+ instance_variable_set "@#{mod}", obj
10
+ obj.book = self
11
+ end
12
+ end
13
+
14
+ Publication::Package::CONTENT_MODELS.each do |model|
15
+ define_method model do
16
+ package.__send__(model)
17
+ end
18
+ end
19
+
20
+ %w[ title main_title subtitle short_title collection_title edition_title extended_title description date unique_identifier ].each do |met|
21
+ define_method met do
22
+ metadata.__send__(met)
23
+ end
24
+ end
25
+
26
+ %w[nav].each do |met|
27
+ define_method met do
28
+ manifest.__send__ met
29
+ end
30
+ end
31
+
32
+ # @overload each_page_on_spine(&blk)
33
+ # iterate over items in order of spine when block given
34
+ # @yieldparam item [Publication::Package::Manifest::Item]
35
+ # @overload each_page_on_spine
36
+ # @return [Enumerator] which iterates over {Publication::Package::Manifest::Item}s in order of spine when block not given
37
+ def each_page_on_spine(&blk)
38
+ enum = package.spine.items
39
+ if block_given?
40
+ enum.each &blk
41
+ else
42
+ enum
43
+ end
44
+ end
45
+
46
+ def each_page_on_toc(&blk)
47
+ raise NotImplementedError
48
+ end
49
+
50
+ # @overload each_content(&blk)
51
+ # iterate all items over when block given
52
+ # @yieldparam item [Publication::Package::Manifest::Item]
53
+ # @overload each_content
54
+ # @return [Enumerator] which iterates over all {Publication::Package::Manifest::Item}s in EPUB package when block not given
55
+ def each_content(&blk)
56
+ enum = manifest.items
57
+ if block_given?
58
+ enum.each &blk
59
+ else
60
+ enum.to_enum
61
+ end
62
+ end
63
+
64
+ def other_navigation
65
+ raise NotImplementedError
66
+ end
67
+
68
+ # @return [Array<Publication::Package::Manifest::Item>] All {Publication::Package::Manifest::Item}s in EPUB package
69
+ def resources
70
+ manifest.items
71
+ end
72
+
73
+ # Syntax sugar
74
+ def rootfile_path
75
+ ocf.container.rootfile.full_path.to_s
76
+ end
77
+
78
+ # Syntax sugar
79
+ def cover_image
80
+ manifest.cover_image
81
+ end
82
+
83
+ end
84
+ end
85
+ end
@@ -12,7 +12,9 @@ module EPUB
12
12
  }
13
13
 
14
14
  module MediaType
15
+ # @deprecated Use {UnsupportedMediaType} instead
15
16
  class UnsupportedError < StandardError; end
17
+ class UnsupportedMediaType < StandardError; end
16
18
 
17
19
  EPUB = 'application/epub+zip'
18
20
  ROOTFILE = 'application/oebps-package+xml'
@@ -46,16 +46,34 @@ module EPUB
46
46
  navigations.first
47
47
  end
48
48
 
49
+ module Hidable
50
+ attr_accessor :hidden, :parent
51
+
52
+ def hidden?
53
+ if @hidden.nil?
54
+ @parent ? @parent.hidden? : false
55
+ else
56
+ true
57
+ end
58
+ end
59
+ end
60
+
49
61
  class Item
50
- attr_accessor :items, :text, :hidden,
62
+ include Hidable
63
+
64
+ attr_accessor :items, :text,
51
65
  :content_document, :href, :item
52
66
 
53
67
  def initialize
54
- @items = []
68
+ @items = ItemList.new
69
+ @items.parent = self
55
70
  end
56
71
 
57
- def hidden?
58
- !! hidden
72
+ def traverse(depth=0, &block)
73
+ block.call self, depth
74
+ items.each do |item|
75
+ item.traverse depth + 1, &block
76
+ end
59
77
  end
60
78
  end
61
79
 
@@ -72,6 +90,15 @@ module EPUB
72
90
  alias heading text
73
91
  alias heading= text=
74
92
  end
93
+
94
+ class ItemList < Array
95
+ include Hidable
96
+
97
+ def <<(item)
98
+ super
99
+ item.parent = self
100
+ end
101
+ end
75
102
  end
76
103
  end
77
104
  end
@@ -17,7 +17,7 @@ module EPUB
17
17
  # @return [String] Returns the value of title element.
18
18
  # If none, returns empty string
19
19
  def title
20
- title_elem = Nokogiri.XML(read).search('title').first
20
+ title_elem = nokogiri.search('title').first
21
21
  if title_elem
22
22
  title_elem.text
23
23
  else
@@ -1,9 +1,10 @@
1
1
  module EPUB
2
2
  module Inspector
3
3
  INSTANCE_VARIABLES_OPTION = {:exclude => []}
4
+ SIMPLE_TEMPLATE = "#<%{class}:%{object_id}>"
4
5
 
5
6
  def inspect_simply
6
- "#<%{class}:%{object_id}>" % {
7
+ SIMPLE_TEMPLATE % {
7
8
  :class => self.class,
8
9
  :object_id => inspect_object_id
9
10
  }
@@ -24,6 +25,7 @@ module EPUB
24
25
  end
25
26
 
26
27
  module PublicationModel
28
+ TEMPLATE = "#<%{class}:%{object_id} @package=%{package} %{attributes}>"
27
29
  class << self
28
30
  def included(mod)
29
31
  mod.__send__ :include, Inspector
@@ -31,12 +33,12 @@ module EPUB
31
33
  end
32
34
 
33
35
  def inspect
34
- "#<%{class}:%{object_id} @package=%{package} %{attributes}>" % {
35
- :class => self.class,
36
- :package => package.inspect_simply,
37
- :object_id => inspect_object_id,
38
- :attributes => inspect_instance_variables(exclude: [:@package])
39
- }
36
+ TEMPLATE % {
37
+ :class => self.class,
38
+ :package => package.inspect_simply,
39
+ :object_id => inspect_object_id,
40
+ :attributes => inspect_instance_variables(exclude: [:@package])
41
+ }
40
42
  end
41
43
  end
42
44
  end
data/lib/epub/parser.rb CHANGED
@@ -49,7 +49,7 @@ module EPUB
49
49
  def parse
50
50
  Zip::Archive.open @filepath do |zip|
51
51
  @book.ocf = OCF.parse(zip)
52
- @book.package = Publication.parse(zip, @book.rootfile_path)
52
+ @book.package = Publication.parse(zip, @book.ocf.container.rootfile.full_path.to_s)
53
53
  end
54
54
 
55
55
  @book
@@ -47,8 +47,12 @@ module EPUB
47
47
  def parse_navigation(element)
48
48
  nav = EPUB::ContentDocument::Navigation::Navigation.new
49
49
  nav.text = find_heading(element)
50
+ hidden = extract_attribute(element, 'hidden')
51
+ nav.hidden = hidden.nil? ? nil : true
50
52
  nav.type = extract_attribute(element, 'type', 'epub')
51
- nav.items = element.xpath('./xhtml:ol/xhtml:li', EPUB::NAMESPACES).map {|elem| parse_navigation_item(elem)}
53
+ element.xpath('./xhtml:ol/xhtml:li', EPUB::NAMESPACES).map do |elem|
54
+ nav.items << parse_navigation_item(elem)
55
+ end
52
56
 
53
57
  nav
54
58
  end
@@ -9,7 +9,7 @@ module EPUB
9
9
  include Utils
10
10
 
11
11
  DIRECTORY = 'META-INF'
12
- EPUB::OCF::MODULES.each {|m| self.const_set "#{m.upcase}_FILE", "#{m}.xml"}
12
+ EPUB::OCF::MODULES.each {|m| self.const_set "#{m.upcase}_FILE", "#{m}.xml"} # Deprecated
13
13
 
14
14
  class << self
15
15
  def parse(zip_archive)
@@ -25,7 +25,7 @@ module EPUB
25
25
  def parse
26
26
  EPUB::OCF::MODULES.each do |m|
27
27
  begin
28
- file = @zip.fopen(File.join(DIRECTORY, self.class.const_get("#{m.upcase}_FILE")))
28
+ file = @zip.fopen(File.join(DIRECTORY, "#{m}.xml"))
29
29
  @ocf.__send__ "#{m}=", __send__("parse_#{m}", file.read)
30
30
  rescue Zip::Error
31
31
  end
@@ -24,12 +24,9 @@ module EPUB
24
24
  end
25
25
 
26
26
  def parse
27
- parse_package
28
- parse_metadata
29
- parse_manifest
30
- parse_spine
31
- parse_guide
32
- parse_bindings
27
+ ([:package] + EPUB::Publication::Package::CONTENT_MODELS).each do |model|
28
+ __send__ "parse_#{model}"
29
+ end
33
30
 
34
31
  @package
35
32
  end
@@ -51,80 +48,21 @@ module EPUB
51
48
  elem = @doc.xpath('/opf:package/opf:metadata', EPUB::NAMESPACES).first
52
49
  id_map = {}
53
50
 
54
- metadata.identifiers = elem.xpath('./dc:identifier', EPUB::NAMESPACES).collect do |e|
55
- identifier = EPUB::Publication::Package::Metadata::DCMES.new
56
- identifier.content = e.content
57
- identifier.id = id = extract_attribute(e, 'id')
58
- metadata.unique_identifier = identifier if id == @unique_identifier_id
59
-
60
- identifier
61
- end
62
- metadata.identifiers.each {|i| id_map[i.id] = {metadata: i} if i.respond_to?(:id) && i.id}
63
-
64
- metadata.titles = elem.xpath('./dc:title', EPUB::NAMESPACES).collect do |e|
65
- title = EPUB::Publication::Package::Metadata::Title.new
66
- %w[ id lang dir ].each do |attr|
67
- title.__send__("#{attr}=", extract_attribute(e, attr))
68
- end
69
- title.content = e.content
70
-
71
- title
72
- end
73
- metadata.titles.each {|t| id_map[t.id] = {metadata: t} if t.respond_to?(:id) && t.id}
74
-
75
- metadata.languages = elem.xpath('./dc:language', EPUB::NAMESPACES).collect do |e|
76
- language = EPUB::Publication::Package::Metadata::DCMES.new
77
- language.content = e.content
78
- language.id = e['id'] if e['id']
79
-
80
- language
81
- end
82
- metadata.languages.each {|l| id_map[l.id] = {metadata: l} if l.respond_to?(:id) && l.id}
83
-
51
+ metadata.identifiers = extract_model(elem, id_map, './dc:identifier', :Identifier, ['id']) {|identifier, e|
52
+ identifier.scheme = extract_attribute(e, 'scheme', 'opf')
53
+ metadata.unique_identifier = identifier if identifier.id == @unique_identifier_id
54
+ }
55
+ metadata.titles = extract_model(elem, id_map, './dc:title', :Title)
56
+ metadata.languages = extract_model(elem, id_map, './dc:language', :DCMES, %w[id])
84
57
  %w[ contributor coverage creator date description format publisher relation source subject type ].each do |dcmes|
85
- metadata.__send__ "#{dcmes}s=", collect_dcmes(elem, "./dc:#{dcmes}")
86
- metadata.__send__("#{dcmes}s").each {|d| id_map[d.id] = {metadata: d} if d.respond_to?(:id) && d.id}
58
+ metadata.__send__ "#{dcmes}s=", extract_model(elem, id_map, "./dc:#{dcmes}")
87
59
  end
88
-
89
- metadata.rights = collect_dcmes(elem, './dc:rights')
90
- metadata.rights.each {|r| id_map[r.id] = {metadata: r} if r.respond_to?(:id) && r.id}
91
-
92
- metadata.metas = elem.xpath('./opf:meta', EPUB::NAMESPACES).collect do |e|
93
- meta = EPUB::Publication::Package::Metadata::Meta.new
94
- %w[property id scheme].each do |attr|
95
- meta.__send__ "#{attr}=", extract_attribute(e, attr)
96
- end
97
- meta.content = e.content
98
- refines = extract_attribute(e, 'refines')
99
- if refines && refines[0] == '#'
100
- id = refines[1..-1]
101
- id_map[id] ||= {}
102
- id_map[id][:refiners] ||= []
103
- id_map[id][:refiners] << meta
104
- end
105
-
106
- meta
107
- end
108
- metadata.metas.each {|m| id_map[m.id] = {metadata: m} if m.respond_to?(:id) && m.id}
109
-
110
- metadata.links = elem.xpath('./opf:link', EPUB::NAMESPACES).collect do |e|
111
- link = EPUB::Publication::Package::Metadata::Link.new
112
- %w[ id media-type ].each do |attr|
113
- link.__send__ (attr.gsub(/-/, '_') + '='), extract_attribute(e, attr)
114
- end
60
+ metadata.rights = extract_model(elem, id_map, './dc:rights')
61
+ metadata.metas = extract_refinee(elem, id_map, './opf:meta', :Meta, %w[property id scheme])
62
+ metadata.links = extract_refinee(elem, id_map, './opf:link', :Link, %w[id media-type]) {|link, e|
115
63
  link.href = Addressable::URI.parse(extract_attribute(e, 'href'))
116
- link.rel = extract_attribute(e, 'rel').strip.split
117
- refines = extract_attribute(e, 'refines')
118
- if refines && refines[0] == '#'
119
- id = refines[1..-1]
120
- id_map[id] ||= {}
121
- id_map[id][:refiners] ||= []
122
- id_map[id][:refiners] << link
123
- end
124
-
125
- link
126
- end
127
- metadata.links.each {|l| id_map[l.id] = {metadata: l} if l.respond_to?(:id) && l.id}
64
+ link.rel = Set.new(extract_attribute(e, 'rel').split(nil))
65
+ }
128
66
 
129
67
  id_map.values.each do |hsh|
130
68
  next unless hsh[:refiners]
@@ -150,7 +88,7 @@ module EPUB
150
88
  fallback = extract_attribute(e, 'fallback')
151
89
  fallback_map[fallback] = item if fallback
152
90
  properties = extract_attribute(e, 'properties')
153
- item.properties = properties ? properties.split(' ') : []
91
+ item.properties = properties.split(' ') if properties
154
92
  manifest << item
155
93
  end
156
94
  fallback_map.each_pair do |id, from|
@@ -174,7 +112,7 @@ module EPUB
174
112
  end
175
113
  itemref.linear = (extract_attribute(e, 'linear') != 'no')
176
114
  properties = extract_attribute(e, 'properties')
177
- itemref.properties = properties ? properties.split(' ') : []
115
+ itemref.properties = properties.split(' ') if properties
178
116
  spine << itemref
179
117
  end
180
118
 
@@ -225,16 +163,37 @@ module EPUB
225
163
  prefixes
226
164
  end
227
165
 
228
- def collect_dcmes(elem, selector)
229
- elem.xpath(selector, EPUB::NAMESPACES).collect do |e|
230
- md = EPUB::Publication::Package::Metadata::DCMES.new
231
- md.content = e.content
232
- %w[ id lang dir ].each do |attr|
233
- md.__send__ "#{attr}=", extract_attribute(e, attr)
166
+ def extract_model(elem, id_map, xpath, klass=:DCMES, attributes=%w[id lang dir])
167
+ models = elem.xpath(xpath, EPUB::NAMESPACES).collect do |e|
168
+ model = EPUB::Publication::Package::Metadata.const_get(klass).new
169
+ attributes.each do |attr|
170
+ model.__send__ "#{attr.gsub(/-/, '_')}=", extract_attribute(e, attr)
234
171
  end
235
- yield(md, e) if block_given?
236
- md
172
+ model.content = e.content unless klass == :Link
173
+
174
+ yield model, e if block_given?
175
+
176
+ model
237
177
  end
178
+
179
+ models.each do |model|
180
+ id_map[model.id] = {metadata: model} if model.respond_to?(:id) && model.id
181
+ end
182
+
183
+ models
184
+ end
185
+
186
+ def extract_refinee(elem, id_map, xpath, klass, attributes)
187
+ extract_model(elem, id_map, xpath, klass, attributes) {|model, e|
188
+ yield model, e if block_given?
189
+ refines = extract_attribute(e, 'refines')
190
+ if refines && refines[0] == '#'
191
+ id = refines[1..-1]
192
+ id_map[id] ||= {}
193
+ id_map[id][:refiners] ||= []
194
+ id_map[id][:refiners] << model
195
+ end
196
+ }
238
197
  end
239
198
  end
240
199
  end
@@ -1,5 +1,5 @@
1
1
  module EPUB
2
2
  class Parser
3
- VERSION = "0.1.5"
3
+ VERSION = "0.1.6"
4
4
  end
5
5
  end
@@ -124,8 +124,7 @@ module EPUB
124
124
  base.__send__ :define_method, :page_spread do
125
125
  property = page_spread_without_fixed_layout
126
126
  return property if property
127
- property = properties.find {|prop| prop == prefixed_page_spread_property}
128
- property ? PAGE_SPREAD_PROPERTY : nil
127
+ properties.include?(prefixed_page_spread_property) ? PAGE_SPREAD_PROPERTY : nil
129
128
  end
130
129
 
131
130
  base.__send__ :define_method, :page_spread= do |new_value|
@@ -162,7 +161,7 @@ module EPUB
162
161
  values_to_be_deleted = (values - [new_value]).map {|value| "#{rendition_property_prefix}#{value}"}
163
162
  properties.delete_if {|prop| values_to_be_deleted.include? prop}
164
163
  new_property = "#{rendition_property_prefix}#{new_value}"
165
- properties << new_property unless properties.any? {|prop| prop == new_property}
164
+ properties << new_property unless properties.include? new_property
166
165
  new_value
167
166
  end
168
167
  end