epub-parser 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +2 -0
- data/CHANGELOG.markdown +10 -0
- data/README.markdown +43 -27
- data/bin/epubinfo +22 -0
- data/docs/EpubOpen.markdown +43 -0
- data/docs/Epubinfo.markdown +37 -0
- data/docs/FixedLayout.markdown +3 -5
- data/docs/Home.markdown +30 -15
- data/docs/Item.markdown +14 -14
- data/epub-parser.gemspec +5 -2
- data/lib/epub.rb +14 -1
- data/lib/epub/content_document.rb +1 -5
- data/lib/epub/content_document/navigation.rb +3 -5
- data/lib/epub/content_document/xhtml.rb +25 -1
- data/lib/epub/inspector.rb +43 -0
- data/lib/epub/ocf/container.rb +2 -0
- data/lib/epub/parser.rb +0 -2
- data/lib/epub/parser/content_document.rb +3 -5
- data/lib/epub/parser/ocf.rb +2 -4
- data/lib/epub/parser/publication.rb +7 -7
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/publication.rb +1 -0
- data/lib/epub/publication/package.rb +20 -1
- data/lib/epub/publication/package/bindings.rb +5 -1
- data/lib/epub/publication/package/guide.rb +1 -0
- data/lib/epub/publication/package/manifest.rb +40 -5
- data/lib/epub/publication/package/metadata.rb +7 -10
- data/lib/epub/publication/package/spine.rb +14 -4
- data/lib/method_decorators/deprecated.rb +84 -0
- data/test/fixtures/book/OPS/nav.xhtml +2 -0
- data/test/helper.rb +4 -2
- data/test/test_content_document.rb +21 -0
- data/test/test_epub.rb +12 -0
- data/test/test_fixed_layout.rb +0 -1
- data/test/test_inspect.rb +121 -0
- data/test/test_parser_content_document.rb +3 -0
- data/test/test_parser_fixed_layout.rb +1 -1
- data/test/test_parser_ocf.rb +1 -1
- data/test/test_publication.rb +125 -4
- metadata +56 -8
data/epub-parser.gemspec
CHANGED
@@ -29,7 +29,9 @@ Gem::Specification.new do |s|
|
|
29
29
|
s.add_development_dependency 'rake'
|
30
30
|
s.add_development_dependency 'pry'
|
31
31
|
s.add_development_dependency 'pry-doc'
|
32
|
-
s.add_development_dependency 'test-unit
|
32
|
+
s.add_development_dependency 'test-unit'
|
33
|
+
s.add_development_dependency 'test-unit-rr'
|
34
|
+
s.add_development_dependency 'test-unit-notify'
|
33
35
|
s.add_development_dependency 'simplecov'
|
34
36
|
s.add_development_dependency 'thin'
|
35
37
|
s.add_development_dependency 'yard'
|
@@ -42,6 +44,7 @@ Gem::Specification.new do |s|
|
|
42
44
|
|
43
45
|
s.add_runtime_dependency 'enumerabler'
|
44
46
|
s.add_runtime_dependency 'zipruby'
|
45
|
-
s.add_runtime_dependency 'nokogiri', '1.
|
47
|
+
s.add_runtime_dependency 'nokogiri', '~> 1.6'
|
46
48
|
s.add_runtime_dependency 'addressable'
|
49
|
+
s.add_runtime_dependency 'method_decorators', '0.9.3'
|
47
50
|
end
|
data/lib/epub.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'method_decorators/deprecated'
|
2
|
+
require 'epub/inspector'
|
1
3
|
require 'epub/ocf'
|
2
4
|
require 'epub/publication'
|
3
5
|
require 'epub/content_document'
|
@@ -37,6 +39,11 @@ module EPUB
|
|
37
39
|
end
|
38
40
|
end
|
39
41
|
|
42
|
+
# @overload each_page_on_spine(&blk)
|
43
|
+
# iterate over items in order of spine when block given
|
44
|
+
# @yieldparam item [Publication::Package::Manifest::Item]
|
45
|
+
# @overload each_page_on_spine
|
46
|
+
# @return [Enumerator] which iterates over {Publication::Package::Manifest::Item}s in order of spine when block not given
|
40
47
|
def each_page_on_spine(&blk)
|
41
48
|
enum = package.spine.items
|
42
49
|
if block_given?
|
@@ -50,6 +57,11 @@ module EPUB
|
|
50
57
|
raise NotImplementedError
|
51
58
|
end
|
52
59
|
|
60
|
+
# @overload each_content(&blk)
|
61
|
+
# iterate all items over when block given
|
62
|
+
# @yieldparam item [Publication::Package::Manifest::Item]
|
63
|
+
# @overload each_content
|
64
|
+
# @return [Enumerator] which iterates over all {Publication::Package::Manifest::Item}s in EPUB package when block not given
|
53
65
|
def each_content(&blk)
|
54
66
|
enum = manifest.items
|
55
67
|
if block_given?
|
@@ -63,13 +75,14 @@ module EPUB
|
|
63
75
|
raise NotImplementedError
|
64
76
|
end
|
65
77
|
|
78
|
+
# @return [Array<Publication::Package::Manifest::Item>] All {Publication::Package::Manifest::Item}s in EPUB package
|
66
79
|
def resources
|
67
80
|
manifest.items
|
68
81
|
end
|
69
82
|
|
70
83
|
# Syntax sugar
|
71
84
|
def rootfile_path
|
72
|
-
ocf.container.rootfile.full_path
|
85
|
+
ocf.container.rootfile.full_path.to_s
|
73
86
|
end
|
74
87
|
|
75
88
|
# Syntax sugar
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'epub/content_document/xhtml'
|
2
|
-
|
3
1
|
module EPUB
|
4
2
|
module ContentDocument
|
5
3
|
class Navigation < XHTML
|
@@ -11,15 +9,15 @@ module EPUB
|
|
11
9
|
end
|
12
10
|
|
13
11
|
def toc
|
14
|
-
|
12
|
+
navigations.selector {|nav| nav.type == Navigation::Type::TOC}.first
|
15
13
|
end
|
16
14
|
|
17
15
|
def page_list
|
18
|
-
|
16
|
+
navigations.selector {|nav| nav.type == Nagivation::Type::PAGE_LIST}.first
|
19
17
|
end
|
20
18
|
|
21
19
|
def landmarks
|
22
|
-
|
20
|
+
navigations.selector {|nav| nav.type == Navigation::Type::LANDMARKS}.first
|
23
21
|
end
|
24
22
|
|
25
23
|
# Enumerator version of toc
|
@@ -3,15 +3,39 @@ module EPUB
|
|
3
3
|
class XHTML
|
4
4
|
attr_accessor :item
|
5
5
|
|
6
|
+
# @return [String] Returns the content string.
|
6
7
|
def read
|
7
8
|
item.read
|
8
9
|
end
|
9
10
|
alias raw_document read
|
10
11
|
|
11
|
-
# referenced directly from spine
|
12
|
+
# @return [true|false] Whether referenced directly from spine or not.
|
12
13
|
def top_level?
|
13
14
|
!! item.itemref
|
14
15
|
end
|
16
|
+
|
17
|
+
# @return [String] Returns the value of title element.
|
18
|
+
# If none, returns empty string
|
19
|
+
def title
|
20
|
+
title_elem = Nokogiri.XML(read).search('title').first
|
21
|
+
if title_elem
|
22
|
+
title_elem.text
|
23
|
+
else
|
24
|
+
warn 'title element not found'
|
25
|
+
''
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [REXML::Document] content as REXML::Document object
|
30
|
+
def rexml
|
31
|
+
require 'rexml/document'
|
32
|
+
@rexml ||= REXML::Document.new(raw_document)
|
33
|
+
end
|
34
|
+
|
35
|
+
# @return [Nokogiri::XML::Document] content as Nokogiri::XML::Document object
|
36
|
+
def nokogiri
|
37
|
+
@nokogiri ||= Nokogiri.XML(raw_document)
|
38
|
+
end
|
15
39
|
end
|
16
40
|
end
|
17
41
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module EPUB
|
2
|
+
module Inspector
|
3
|
+
INSTANCE_VARIABLES_OPTION = {:exclude => []}
|
4
|
+
|
5
|
+
def inspect_simply
|
6
|
+
"#<%{class}:%{object_id}>" % {
|
7
|
+
:class => self.class,
|
8
|
+
:object_id => inspect_object_id
|
9
|
+
}
|
10
|
+
end
|
11
|
+
|
12
|
+
def inspect_object_id
|
13
|
+
(__id__ << 1).to_s(16)
|
14
|
+
end
|
15
|
+
|
16
|
+
def inspect_instance_variables(options={})
|
17
|
+
options = INSTANCE_VARIABLES_OPTION.merge(options)
|
18
|
+
exclude = options[:exclude]
|
19
|
+
|
20
|
+
(instance_variables - exclude).map {|name|
|
21
|
+
value = instance_variable_get(name)
|
22
|
+
"#{name}=#{value.inspect}"
|
23
|
+
}.join(' ')
|
24
|
+
end
|
25
|
+
|
26
|
+
module PublicationModel
|
27
|
+
class << self
|
28
|
+
def included(mod)
|
29
|
+
mod.__send__ :include, Inspector
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def inspect
|
34
|
+
"#<%{class}:%{object_id} @package=%{package} %{attributes}>" % {
|
35
|
+
:class => self.class,
|
36
|
+
:package => package.inspect_simply,
|
37
|
+
:object_id => inspect_object_id,
|
38
|
+
:attributes => inspect_instance_variables(exclude: [:@package])
|
39
|
+
}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/epub/ocf/container.rb
CHANGED
@@ -17,6 +17,8 @@ module EPUB
|
|
17
17
|
class Rootfile
|
18
18
|
attr_accessor :full_path, :media_type
|
19
19
|
|
20
|
+
# @param full_path [Addressable::URI|nil]
|
21
|
+
# @param media_type [String]
|
20
22
|
def initialize(full_path=nil, media_type=EPUB::MediaType::ROOTFILE)
|
21
23
|
@full_path, @media_type = full_path, media_type
|
22
24
|
end
|
data/lib/epub/parser.rb
CHANGED
@@ -32,8 +32,6 @@ module EPUB
|
|
32
32
|
# parse_content_document(document)
|
33
33
|
if @item.nav?
|
34
34
|
content_document.navigations = parse_navigations(document)
|
35
|
-
else
|
36
|
-
raise NotImplementedError
|
37
35
|
end
|
38
36
|
content_document
|
39
37
|
end
|
@@ -71,12 +69,12 @@ module EPUB
|
|
71
69
|
when 'canvas'
|
72
70
|
when 'embed'
|
73
71
|
when 'iframe'
|
74
|
-
item.text =
|
72
|
+
item.text = extract_attribute(embedded_content, 'name') || extract_attribute(embedded_content, 'srcdoc')
|
75
73
|
when 'img'
|
76
|
-
item.text = extract_attribute(embedded_content, 'alt')
|
74
|
+
item.text = extract_attribute(embedded_content, 'alt')
|
77
75
|
when 'math'
|
78
76
|
when 'object'
|
79
|
-
item.text = extract_attribute(embedded_content, 'name')
|
77
|
+
item.text = extract_attribute(embedded_content, 'name')
|
80
78
|
when 'svg'
|
81
79
|
when 'video'
|
82
80
|
else
|
data/lib/epub/parser/ocf.rb
CHANGED
@@ -39,10 +39,8 @@ module EPUB
|
|
39
39
|
doc = Nokogiri.XML(xml)
|
40
40
|
doc.xpath('/ocf:container/ocf:rootfiles/ocf:rootfile', EPUB::NAMESPACES).each do |elem|
|
41
41
|
rootfile = EPUB::OCF::Container::Rootfile.new
|
42
|
-
|
43
|
-
|
44
|
-
rootfile.__send__(attr.gsub(/-/, '_') + '=', value)
|
45
|
-
end
|
42
|
+
rootfile.full_path = Addressable::URI.parse(extract_attribute(elem, 'full-path'))
|
43
|
+
rootfile.media_type = extract_attribute(elem, 'media-type')
|
46
44
|
container.rootfiles << rootfile
|
47
45
|
end
|
48
46
|
|
@@ -41,10 +41,7 @@ module EPUB
|
|
41
41
|
end
|
42
42
|
@unique_identifier_id = elem['unique-identifier']
|
43
43
|
@package.prefix = parse_prefix(extract_attribute(elem, 'prefix'))
|
44
|
-
if @package.prefix.key?
|
45
|
-
require 'epub/publication/fixed_layout'
|
46
|
-
EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout
|
47
|
-
end
|
44
|
+
EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout if @package.prefix.key? EPUB::Publication::FixedLayout::PREFIX_KEY
|
48
45
|
|
49
46
|
@package
|
50
47
|
end
|
@@ -76,7 +73,11 @@ module EPUB
|
|
76
73
|
metadata.titles.each {|t| id_map[t.id] = {metadata: t} if t.respond_to?(:id) && t.id}
|
77
74
|
|
78
75
|
metadata.languages = elem.xpath('./dc:language', EPUB::NAMESPACES).collect do |e|
|
79
|
-
|
76
|
+
language = EPUB::Publication::Package::Metadata::DCMES.new
|
77
|
+
language.content = e.content
|
78
|
+
language.id = e['id'] if e['id']
|
79
|
+
|
80
|
+
language
|
80
81
|
end
|
81
82
|
metadata.languages.each {|l| id_map[l.id] = {metadata: l} if l.respond_to?(:id) && l.id}
|
82
83
|
|
@@ -199,8 +200,7 @@ module EPUB
|
|
199
200
|
@doc.xpath('/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES).each do |elem|
|
200
201
|
media_type = EPUB::Publication::Package::Bindings::MediaType.new
|
201
202
|
media_type.media_type = extract_attribute(elem, 'media-type')
|
202
|
-
|
203
|
-
media_type.handler = items.detect {|item| item.id == extract_attribute(elem, 'handler')}
|
203
|
+
media_type.handler = @package.manifest[extract_attribute(elem, 'handler')]
|
204
204
|
bindings << media_type
|
205
205
|
end
|
206
206
|
|
data/lib/epub/parser/version.rb
CHANGED
data/lib/epub/publication.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
module EPUB
|
2
2
|
module Publication
|
3
3
|
class Package
|
4
|
+
include Inspector
|
5
|
+
|
4
6
|
CONTENT_MODELS = [:metadata, :manifest, :spine, :guide, :bindings]
|
5
7
|
RESERVED_VOCABULARY_PREFIXES = {
|
6
8
|
'' => 'http://idpf.org/epub/vocab/package/#',
|
@@ -23,7 +25,7 @@ module EPUB
|
|
23
25
|
end
|
24
26
|
end
|
25
27
|
|
26
|
-
attr_accessor :book,
|
28
|
+
attr_accessor :book,
|
27
29
|
:version, :prefix, :xml_lang, :dir, :id
|
28
30
|
attr_reader *CONTENT_MODELS
|
29
31
|
alias lang xml_lang
|
@@ -40,6 +42,23 @@ module EPUB
|
|
40
42
|
def unique_identifier
|
41
43
|
@metadata.unique_identifier
|
42
44
|
end
|
45
|
+
|
46
|
+
def inspect
|
47
|
+
"#<%{class}:%{object_id} %{attributes} %{models}>" % {
|
48
|
+
:class => self.class,
|
49
|
+
:object_id => inspect_object_id,
|
50
|
+
:attributes => inspect_instance_variables(exclude: CONTENT_MODELS.map {|model| :"@#{model}"}),
|
51
|
+
:models => inspect_models
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
def inspect_models
|
56
|
+
CONTENT_MODELS.map {|name|
|
57
|
+
model = __send__(name)
|
58
|
+
representation = model.nil? ? model.inspect : model.inspect_simply
|
59
|
+
"@#{name}=#{representation}"
|
60
|
+
}.join(' ')
|
61
|
+
end
|
43
62
|
end
|
44
63
|
end
|
45
64
|
end
|
@@ -2,10 +2,14 @@ module EPUB
|
|
2
2
|
module Publication
|
3
3
|
class Package
|
4
4
|
class Bindings
|
5
|
+
include Inspector::PublicationModel
|
5
6
|
attr_accessor :package
|
6
7
|
|
8
|
+
def initialize
|
9
|
+
@media_types = {}
|
10
|
+
end
|
11
|
+
|
7
12
|
def <<(media_type)
|
8
|
-
@media_types ||= {}
|
9
13
|
@media_types[media_type.media_type] = media_type
|
10
14
|
end
|
11
15
|
|
@@ -6,12 +6,17 @@ module EPUB
|
|
6
6
|
module Publication
|
7
7
|
class Package
|
8
8
|
class Manifest
|
9
|
+
include Inspector::PublicationModel
|
10
|
+
|
9
11
|
attr_accessor :package,
|
10
12
|
:id
|
11
13
|
|
14
|
+
def initialize
|
15
|
+
@items = {}
|
16
|
+
end
|
17
|
+
|
12
18
|
# @return self
|
13
19
|
def <<(item)
|
14
|
-
@items ||= {}
|
15
20
|
item.manifest = self
|
16
21
|
@items[item.id] = item
|
17
22
|
self
|
@@ -29,6 +34,12 @@ module EPUB
|
|
29
34
|
items.selector {|i| i.properties.include? 'cover-image'}.first
|
30
35
|
end
|
31
36
|
|
37
|
+
def each_item
|
38
|
+
@items.each_value do |item|
|
39
|
+
yield item
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
32
43
|
def items
|
33
44
|
@items.values
|
34
45
|
end
|
@@ -38,12 +49,15 @@ module EPUB
|
|
38
49
|
end
|
39
50
|
|
40
51
|
class Item
|
52
|
+
include Inspector
|
53
|
+
|
41
54
|
# @!attribute [rw] manifest
|
42
55
|
# @return [Manifest] Returns the value of manifest
|
43
56
|
# @!attribute [rw] id
|
44
57
|
# @return [String] Returns the value of id
|
45
58
|
# @!attribute [rw] href
|
46
|
-
# @return [Addressable::URI] Returns the value of href
|
59
|
+
# @return [Addressable::URI] Returns the value of href,
|
60
|
+
# which is relative path from rootfile(OPF file)
|
47
61
|
# @!attribute [rw] media_type
|
48
62
|
# @return [String] Returns the value of media_type
|
49
63
|
# @!attribute [rw] properties
|
@@ -55,19 +69,31 @@ module EPUB
|
|
55
69
|
attr_accessor :manifest,
|
56
70
|
:id, :href, :media_type, :fallback, :properties, :media_overlay
|
57
71
|
|
72
|
+
def initialize
|
73
|
+
@properties = []
|
74
|
+
end
|
75
|
+
|
58
76
|
# @todo Handle circular fallback chain
|
59
77
|
def fallback_chain
|
60
78
|
@fallback_chain ||= traverse_fallback_chain([])
|
61
79
|
end
|
62
80
|
|
81
|
+
# full path in archive
|
82
|
+
def entry_name
|
83
|
+
rootfile = manifest.package.book.ocf.container.rootfile.full_path
|
84
|
+
Addressable::URI.unescape(rootfile + href.normalize.request_uri)
|
85
|
+
end
|
86
|
+
|
63
87
|
def read
|
64
|
-
rootfile = Addressable::URI.parse(manifest.package.book.ocf.container.rootfile.full_path)
|
65
88
|
Zip::Archive.open(manifest.package.book.epub_file) {|zip|
|
66
|
-
|
67
|
-
zip.fopen(path).read
|
89
|
+
zip.fopen(entry_name).read
|
68
90
|
}
|
69
91
|
end
|
70
92
|
|
93
|
+
def xhtml?
|
94
|
+
media_type == 'application/xhtml+xml'
|
95
|
+
end
|
96
|
+
|
71
97
|
def nav?
|
72
98
|
properties.include? 'nav'
|
73
99
|
end
|
@@ -101,6 +127,15 @@ module EPUB
|
|
101
127
|
manifest.package.spine.itemrefs.find {|itemref| itemref.idref == id}
|
102
128
|
end
|
103
129
|
|
130
|
+
def inspect
|
131
|
+
"#<%{class}:%{object_id} %{manifest} %{attributes}>" % {
|
132
|
+
:class => self.class,
|
133
|
+
:object_id => inspect_object_id,
|
134
|
+
:manifest => "@manifest=#{@manifest.inspect_simply}",
|
135
|
+
:attributes => inspect_instance_variables(exclude: [:@manifest])
|
136
|
+
}
|
137
|
+
end
|
138
|
+
|
104
139
|
protected
|
105
140
|
|
106
141
|
def traverse_fallback_chain(chain)
|