epub-parser 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/.yardopts +2 -0
- data/CHANGELOG.markdown +18 -0
- data/README.markdown +40 -11
- data/bin/epub-open +1 -1
- data/bin/epubinfo +14 -14
- data/docs/Home.markdown +3 -2
- data/docs/Item.markdown +3 -3
- data/docs/Navigation.markdown +58 -0
- data/docs/Publication.markdown +54 -0
- data/epub-parser.gemspec +1 -2
- data/lib/epub.rb +5 -83
- data/lib/epub/book.rb +1 -1
- data/lib/epub/book/features.rb +85 -0
- data/lib/epub/constants.rb +2 -0
- data/lib/epub/content_document/navigation.rb +31 -4
- data/lib/epub/content_document/xhtml.rb +1 -1
- data/lib/epub/inspector.rb +9 -7
- data/lib/epub/parser.rb +1 -1
- data/lib/epub/parser/content_document.rb +5 -1
- data/lib/epub/parser/ocf.rb +2 -2
- data/lib/epub/parser/publication.rb +46 -87
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/publication/fixed_layout.rb +2 -3
- data/lib/epub/publication/package/guide.rb +19 -14
- data/lib/epub/publication/package/manifest.rb +36 -6
- data/lib/epub/publication/package/metadata.rb +27 -8
- data/lib/epub/publication/package/spine.rb +10 -3
- data/test/fixtures/book/OPS/nav.xhtml +1 -1
- data/test/fixtures/book/OPS//343/203/253/343/203/274/343/203/210/343/203/225/343/202/241/343/202/244/343/203/253.opf +2 -1
- data/test/helper.rb +1 -1
- data/test/test_content_document.rb +41 -2
- data/test/test_epub.rb +0 -7
- data/test/test_parser.rb +4 -4
- data/test/test_parser_content_document.rb +2 -0
- data/test/test_parser_publication.rb +4 -0
- data/test/test_publication.rb +60 -0
- metadata +55 -67
- data/lib/method_decorators/deprecated.rb +0 -84
data/lib/epub/book.rb
CHANGED
@@ -0,0 +1,85 @@
|
|
1
|
+
module EPUB
|
2
|
+
class Book
|
3
|
+
module Features
|
4
|
+
modules = [:ocf, :package]
|
5
|
+
attr_reader *modules
|
6
|
+
attr_accessor :epub_file
|
7
|
+
modules.each do |mod|
|
8
|
+
define_method "#{mod}=" do |obj|
|
9
|
+
instance_variable_set "@#{mod}", obj
|
10
|
+
obj.book = self
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
Publication::Package::CONTENT_MODELS.each do |model|
|
15
|
+
define_method model do
|
16
|
+
package.__send__(model)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
%w[ title main_title subtitle short_title collection_title edition_title extended_title description date unique_identifier ].each do |met|
|
21
|
+
define_method met do
|
22
|
+
metadata.__send__(met)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
%w[nav].each do |met|
|
27
|
+
define_method met do
|
28
|
+
manifest.__send__ met
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# @overload each_page_on_spine(&blk)
|
33
|
+
# iterate over items in order of spine when block given
|
34
|
+
# @yieldparam item [Publication::Package::Manifest::Item]
|
35
|
+
# @overload each_page_on_spine
|
36
|
+
# @return [Enumerator] which iterates over {Publication::Package::Manifest::Item}s in order of spine when block not given
|
37
|
+
def each_page_on_spine(&blk)
|
38
|
+
enum = package.spine.items
|
39
|
+
if block_given?
|
40
|
+
enum.each &blk
|
41
|
+
else
|
42
|
+
enum
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def each_page_on_toc(&blk)
|
47
|
+
raise NotImplementedError
|
48
|
+
end
|
49
|
+
|
50
|
+
# @overload each_content(&blk)
|
51
|
+
# iterate all items over when block given
|
52
|
+
# @yieldparam item [Publication::Package::Manifest::Item]
|
53
|
+
# @overload each_content
|
54
|
+
# @return [Enumerator] which iterates over all {Publication::Package::Manifest::Item}s in EPUB package when block not given
|
55
|
+
def each_content(&blk)
|
56
|
+
enum = manifest.items
|
57
|
+
if block_given?
|
58
|
+
enum.each &blk
|
59
|
+
else
|
60
|
+
enum.to_enum
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def other_navigation
|
65
|
+
raise NotImplementedError
|
66
|
+
end
|
67
|
+
|
68
|
+
# @return [Array<Publication::Package::Manifest::Item>] All {Publication::Package::Manifest::Item}s in EPUB package
|
69
|
+
def resources
|
70
|
+
manifest.items
|
71
|
+
end
|
72
|
+
|
73
|
+
# Syntax sugar
|
74
|
+
def rootfile_path
|
75
|
+
ocf.container.rootfile.full_path.to_s
|
76
|
+
end
|
77
|
+
|
78
|
+
# Syntax sugar
|
79
|
+
def cover_image
|
80
|
+
manifest.cover_image
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/epub/constants.rb
CHANGED
@@ -12,7 +12,9 @@ module EPUB
|
|
12
12
|
}
|
13
13
|
|
14
14
|
module MediaType
|
15
|
+
# @deprecated Use {UnsupportedMediaType} instead
|
15
16
|
class UnsupportedError < StandardError; end
|
17
|
+
class UnsupportedMediaType < StandardError; end
|
16
18
|
|
17
19
|
EPUB = 'application/epub+zip'
|
18
20
|
ROOTFILE = 'application/oebps-package+xml'
|
@@ -46,16 +46,34 @@ module EPUB
|
|
46
46
|
navigations.first
|
47
47
|
end
|
48
48
|
|
49
|
+
module Hidable
|
50
|
+
attr_accessor :hidden, :parent
|
51
|
+
|
52
|
+
def hidden?
|
53
|
+
if @hidden.nil?
|
54
|
+
@parent ? @parent.hidden? : false
|
55
|
+
else
|
56
|
+
true
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
49
61
|
class Item
|
50
|
-
|
62
|
+
include Hidable
|
63
|
+
|
64
|
+
attr_accessor :items, :text,
|
51
65
|
:content_document, :href, :item
|
52
66
|
|
53
67
|
def initialize
|
54
|
-
@items =
|
68
|
+
@items = ItemList.new
|
69
|
+
@items.parent = self
|
55
70
|
end
|
56
71
|
|
57
|
-
def
|
58
|
-
|
72
|
+
def traverse(depth=0, &block)
|
73
|
+
block.call self, depth
|
74
|
+
items.each do |item|
|
75
|
+
item.traverse depth + 1, &block
|
76
|
+
end
|
59
77
|
end
|
60
78
|
end
|
61
79
|
|
@@ -72,6 +90,15 @@ module EPUB
|
|
72
90
|
alias heading text
|
73
91
|
alias heading= text=
|
74
92
|
end
|
93
|
+
|
94
|
+
class ItemList < Array
|
95
|
+
include Hidable
|
96
|
+
|
97
|
+
def <<(item)
|
98
|
+
super
|
99
|
+
item.parent = self
|
100
|
+
end
|
101
|
+
end
|
75
102
|
end
|
76
103
|
end
|
77
104
|
end
|
@@ -17,7 +17,7 @@ module EPUB
|
|
17
17
|
# @return [String] Returns the value of title element.
|
18
18
|
# If none, returns empty string
|
19
19
|
def title
|
20
|
-
title_elem =
|
20
|
+
title_elem = nokogiri.search('title').first
|
21
21
|
if title_elem
|
22
22
|
title_elem.text
|
23
23
|
else
|
data/lib/epub/inspector.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
module EPUB
|
2
2
|
module Inspector
|
3
3
|
INSTANCE_VARIABLES_OPTION = {:exclude => []}
|
4
|
+
SIMPLE_TEMPLATE = "#<%{class}:%{object_id}>"
|
4
5
|
|
5
6
|
def inspect_simply
|
6
|
-
|
7
|
+
SIMPLE_TEMPLATE % {
|
7
8
|
:class => self.class,
|
8
9
|
:object_id => inspect_object_id
|
9
10
|
}
|
@@ -24,6 +25,7 @@ module EPUB
|
|
24
25
|
end
|
25
26
|
|
26
27
|
module PublicationModel
|
28
|
+
TEMPLATE = "#<%{class}:%{object_id} @package=%{package} %{attributes}>"
|
27
29
|
class << self
|
28
30
|
def included(mod)
|
29
31
|
mod.__send__ :include, Inspector
|
@@ -31,12 +33,12 @@ module EPUB
|
|
31
33
|
end
|
32
34
|
|
33
35
|
def inspect
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
TEMPLATE % {
|
37
|
+
:class => self.class,
|
38
|
+
:package => package.inspect_simply,
|
39
|
+
:object_id => inspect_object_id,
|
40
|
+
:attributes => inspect_instance_variables(exclude: [:@package])
|
41
|
+
}
|
40
42
|
end
|
41
43
|
end
|
42
44
|
end
|
data/lib/epub/parser.rb
CHANGED
@@ -49,7 +49,7 @@ module EPUB
|
|
49
49
|
def parse
|
50
50
|
Zip::Archive.open @filepath do |zip|
|
51
51
|
@book.ocf = OCF.parse(zip)
|
52
|
-
@book.package = Publication.parse(zip, @book.
|
52
|
+
@book.package = Publication.parse(zip, @book.ocf.container.rootfile.full_path.to_s)
|
53
53
|
end
|
54
54
|
|
55
55
|
@book
|
@@ -47,8 +47,12 @@ module EPUB
|
|
47
47
|
def parse_navigation(element)
|
48
48
|
nav = EPUB::ContentDocument::Navigation::Navigation.new
|
49
49
|
nav.text = find_heading(element)
|
50
|
+
hidden = extract_attribute(element, 'hidden')
|
51
|
+
nav.hidden = hidden.nil? ? nil : true
|
50
52
|
nav.type = extract_attribute(element, 'type', 'epub')
|
51
|
-
|
53
|
+
element.xpath('./xhtml:ol/xhtml:li', EPUB::NAMESPACES).map do |elem|
|
54
|
+
nav.items << parse_navigation_item(elem)
|
55
|
+
end
|
52
56
|
|
53
57
|
nav
|
54
58
|
end
|
data/lib/epub/parser/ocf.rb
CHANGED
@@ -9,7 +9,7 @@ module EPUB
|
|
9
9
|
include Utils
|
10
10
|
|
11
11
|
DIRECTORY = 'META-INF'
|
12
|
-
EPUB::OCF::MODULES.each {|m| self.const_set "#{m.upcase}_FILE", "#{m}.xml"}
|
12
|
+
EPUB::OCF::MODULES.each {|m| self.const_set "#{m.upcase}_FILE", "#{m}.xml"} # Deprecated
|
13
13
|
|
14
14
|
class << self
|
15
15
|
def parse(zip_archive)
|
@@ -25,7 +25,7 @@ module EPUB
|
|
25
25
|
def parse
|
26
26
|
EPUB::OCF::MODULES.each do |m|
|
27
27
|
begin
|
28
|
-
file = @zip.fopen(File.join(DIRECTORY,
|
28
|
+
file = @zip.fopen(File.join(DIRECTORY, "#{m}.xml"))
|
29
29
|
@ocf.__send__ "#{m}=", __send__("parse_#{m}", file.read)
|
30
30
|
rescue Zip::Error
|
31
31
|
end
|
@@ -24,12 +24,9 @@ module EPUB
|
|
24
24
|
end
|
25
25
|
|
26
26
|
def parse
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
parse_spine
|
31
|
-
parse_guide
|
32
|
-
parse_bindings
|
27
|
+
([:package] + EPUB::Publication::Package::CONTENT_MODELS).each do |model|
|
28
|
+
__send__ "parse_#{model}"
|
29
|
+
end
|
33
30
|
|
34
31
|
@package
|
35
32
|
end
|
@@ -51,80 +48,21 @@ module EPUB
|
|
51
48
|
elem = @doc.xpath('/opf:package/opf:metadata', EPUB::NAMESPACES).first
|
52
49
|
id_map = {}
|
53
50
|
|
54
|
-
metadata.identifiers = elem
|
55
|
-
identifier =
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
identifier
|
61
|
-
end
|
62
|
-
metadata.identifiers.each {|i| id_map[i.id] = {metadata: i} if i.respond_to?(:id) && i.id}
|
63
|
-
|
64
|
-
metadata.titles = elem.xpath('./dc:title', EPUB::NAMESPACES).collect do |e|
|
65
|
-
title = EPUB::Publication::Package::Metadata::Title.new
|
66
|
-
%w[ id lang dir ].each do |attr|
|
67
|
-
title.__send__("#{attr}=", extract_attribute(e, attr))
|
68
|
-
end
|
69
|
-
title.content = e.content
|
70
|
-
|
71
|
-
title
|
72
|
-
end
|
73
|
-
metadata.titles.each {|t| id_map[t.id] = {metadata: t} if t.respond_to?(:id) && t.id}
|
74
|
-
|
75
|
-
metadata.languages = elem.xpath('./dc:language', EPUB::NAMESPACES).collect do |e|
|
76
|
-
language = EPUB::Publication::Package::Metadata::DCMES.new
|
77
|
-
language.content = e.content
|
78
|
-
language.id = e['id'] if e['id']
|
79
|
-
|
80
|
-
language
|
81
|
-
end
|
82
|
-
metadata.languages.each {|l| id_map[l.id] = {metadata: l} if l.respond_to?(:id) && l.id}
|
83
|
-
|
51
|
+
metadata.identifiers = extract_model(elem, id_map, './dc:identifier', :Identifier, ['id']) {|identifier, e|
|
52
|
+
identifier.scheme = extract_attribute(e, 'scheme', 'opf')
|
53
|
+
metadata.unique_identifier = identifier if identifier.id == @unique_identifier_id
|
54
|
+
}
|
55
|
+
metadata.titles = extract_model(elem, id_map, './dc:title', :Title)
|
56
|
+
metadata.languages = extract_model(elem, id_map, './dc:language', :DCMES, %w[id])
|
84
57
|
%w[ contributor coverage creator date description format publisher relation source subject type ].each do |dcmes|
|
85
|
-
metadata.__send__ "#{dcmes}s=",
|
86
|
-
metadata.__send__("#{dcmes}s").each {|d| id_map[d.id] = {metadata: d} if d.respond_to?(:id) && d.id}
|
58
|
+
metadata.__send__ "#{dcmes}s=", extract_model(elem, id_map, "./dc:#{dcmes}")
|
87
59
|
end
|
88
|
-
|
89
|
-
metadata.
|
90
|
-
metadata.
|
91
|
-
|
92
|
-
metadata.metas = elem.xpath('./opf:meta', EPUB::NAMESPACES).collect do |e|
|
93
|
-
meta = EPUB::Publication::Package::Metadata::Meta.new
|
94
|
-
%w[property id scheme].each do |attr|
|
95
|
-
meta.__send__ "#{attr}=", extract_attribute(e, attr)
|
96
|
-
end
|
97
|
-
meta.content = e.content
|
98
|
-
refines = extract_attribute(e, 'refines')
|
99
|
-
if refines && refines[0] == '#'
|
100
|
-
id = refines[1..-1]
|
101
|
-
id_map[id] ||= {}
|
102
|
-
id_map[id][:refiners] ||= []
|
103
|
-
id_map[id][:refiners] << meta
|
104
|
-
end
|
105
|
-
|
106
|
-
meta
|
107
|
-
end
|
108
|
-
metadata.metas.each {|m| id_map[m.id] = {metadata: m} if m.respond_to?(:id) && m.id}
|
109
|
-
|
110
|
-
metadata.links = elem.xpath('./opf:link', EPUB::NAMESPACES).collect do |e|
|
111
|
-
link = EPUB::Publication::Package::Metadata::Link.new
|
112
|
-
%w[ id media-type ].each do |attr|
|
113
|
-
link.__send__ (attr.gsub(/-/, '_') + '='), extract_attribute(e, attr)
|
114
|
-
end
|
60
|
+
metadata.rights = extract_model(elem, id_map, './dc:rights')
|
61
|
+
metadata.metas = extract_refinee(elem, id_map, './opf:meta', :Meta, %w[property id scheme])
|
62
|
+
metadata.links = extract_refinee(elem, id_map, './opf:link', :Link, %w[id media-type]) {|link, e|
|
115
63
|
link.href = Addressable::URI.parse(extract_attribute(e, 'href'))
|
116
|
-
link.rel = extract_attribute(e, 'rel').
|
117
|
-
|
118
|
-
if refines && refines[0] == '#'
|
119
|
-
id = refines[1..-1]
|
120
|
-
id_map[id] ||= {}
|
121
|
-
id_map[id][:refiners] ||= []
|
122
|
-
id_map[id][:refiners] << link
|
123
|
-
end
|
124
|
-
|
125
|
-
link
|
126
|
-
end
|
127
|
-
metadata.links.each {|l| id_map[l.id] = {metadata: l} if l.respond_to?(:id) && l.id}
|
64
|
+
link.rel = Set.new(extract_attribute(e, 'rel').split(nil))
|
65
|
+
}
|
128
66
|
|
129
67
|
id_map.values.each do |hsh|
|
130
68
|
next unless hsh[:refiners]
|
@@ -150,7 +88,7 @@ module EPUB
|
|
150
88
|
fallback = extract_attribute(e, 'fallback')
|
151
89
|
fallback_map[fallback] = item if fallback
|
152
90
|
properties = extract_attribute(e, 'properties')
|
153
|
-
item.properties = properties
|
91
|
+
item.properties = properties.split(' ') if properties
|
154
92
|
manifest << item
|
155
93
|
end
|
156
94
|
fallback_map.each_pair do |id, from|
|
@@ -174,7 +112,7 @@ module EPUB
|
|
174
112
|
end
|
175
113
|
itemref.linear = (extract_attribute(e, 'linear') != 'no')
|
176
114
|
properties = extract_attribute(e, 'properties')
|
177
|
-
itemref.properties = properties
|
115
|
+
itemref.properties = properties.split(' ') if properties
|
178
116
|
spine << itemref
|
179
117
|
end
|
180
118
|
|
@@ -225,16 +163,37 @@ module EPUB
|
|
225
163
|
prefixes
|
226
164
|
end
|
227
165
|
|
228
|
-
def
|
229
|
-
elem.xpath(
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
md.__send__ "#{attr}=", extract_attribute(e, attr)
|
166
|
+
def extract_model(elem, id_map, xpath, klass=:DCMES, attributes=%w[id lang dir])
|
167
|
+
models = elem.xpath(xpath, EPUB::NAMESPACES).collect do |e|
|
168
|
+
model = EPUB::Publication::Package::Metadata.const_get(klass).new
|
169
|
+
attributes.each do |attr|
|
170
|
+
model.__send__ "#{attr.gsub(/-/, '_')}=", extract_attribute(e, attr)
|
234
171
|
end
|
235
|
-
|
236
|
-
|
172
|
+
model.content = e.content unless klass == :Link
|
173
|
+
|
174
|
+
yield model, e if block_given?
|
175
|
+
|
176
|
+
model
|
237
177
|
end
|
178
|
+
|
179
|
+
models.each do |model|
|
180
|
+
id_map[model.id] = {metadata: model} if model.respond_to?(:id) && model.id
|
181
|
+
end
|
182
|
+
|
183
|
+
models
|
184
|
+
end
|
185
|
+
|
186
|
+
def extract_refinee(elem, id_map, xpath, klass, attributes)
|
187
|
+
extract_model(elem, id_map, xpath, klass, attributes) {|model, e|
|
188
|
+
yield model, e if block_given?
|
189
|
+
refines = extract_attribute(e, 'refines')
|
190
|
+
if refines && refines[0] == '#'
|
191
|
+
id = refines[1..-1]
|
192
|
+
id_map[id] ||= {}
|
193
|
+
id_map[id][:refiners] ||= []
|
194
|
+
id_map[id][:refiners] << model
|
195
|
+
end
|
196
|
+
}
|
238
197
|
end
|
239
198
|
end
|
240
199
|
end
|
data/lib/epub/parser/version.rb
CHANGED
@@ -124,8 +124,7 @@ module EPUB
|
|
124
124
|
base.__send__ :define_method, :page_spread do
|
125
125
|
property = page_spread_without_fixed_layout
|
126
126
|
return property if property
|
127
|
-
|
128
|
-
property ? PAGE_SPREAD_PROPERTY : nil
|
127
|
+
properties.include?(prefixed_page_spread_property) ? PAGE_SPREAD_PROPERTY : nil
|
129
128
|
end
|
130
129
|
|
131
130
|
base.__send__ :define_method, :page_spread= do |new_value|
|
@@ -162,7 +161,7 @@ module EPUB
|
|
162
161
|
values_to_be_deleted = (values - [new_value]).map {|value| "#{rendition_property_prefix}#{value}"}
|
163
162
|
properties.delete_if {|prop| values_to_be_deleted.include? prop}
|
164
163
|
new_property = "#{rendition_property_prefix}#{new_value}"
|
165
|
-
properties << new_property unless properties.
|
164
|
+
properties << new_property unless properties.include? new_property
|
166
165
|
new_value
|
167
166
|
end
|
168
167
|
end
|