epub-parser 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +2 -0
- data/CHANGELOG.markdown +10 -0
- data/README.markdown +43 -27
- data/bin/epubinfo +22 -0
- data/docs/EpubOpen.markdown +43 -0
- data/docs/Epubinfo.markdown +37 -0
- data/docs/FixedLayout.markdown +3 -5
- data/docs/Home.markdown +30 -15
- data/docs/Item.markdown +14 -14
- data/epub-parser.gemspec +5 -2
- data/lib/epub.rb +14 -1
- data/lib/epub/content_document.rb +1 -5
- data/lib/epub/content_document/navigation.rb +3 -5
- data/lib/epub/content_document/xhtml.rb +25 -1
- data/lib/epub/inspector.rb +43 -0
- data/lib/epub/ocf/container.rb +2 -0
- data/lib/epub/parser.rb +0 -2
- data/lib/epub/parser/content_document.rb +3 -5
- data/lib/epub/parser/ocf.rb +2 -4
- data/lib/epub/parser/publication.rb +7 -7
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/publication.rb +1 -0
- data/lib/epub/publication/package.rb +20 -1
- data/lib/epub/publication/package/bindings.rb +5 -1
- data/lib/epub/publication/package/guide.rb +1 -0
- data/lib/epub/publication/package/manifest.rb +40 -5
- data/lib/epub/publication/package/metadata.rb +7 -10
- data/lib/epub/publication/package/spine.rb +14 -4
- data/lib/method_decorators/deprecated.rb +84 -0
- data/test/fixtures/book/OPS/nav.xhtml +2 -0
- data/test/helper.rb +4 -2
- data/test/test_content_document.rb +21 -0
- data/test/test_epub.rb +12 -0
- data/test/test_fixed_layout.rb +0 -1
- data/test/test_inspect.rb +121 -0
- data/test/test_parser_content_document.rb +3 -0
- data/test/test_parser_fixed_layout.rb +1 -1
- data/test/test_parser_ocf.rb +1 -1
- data/test/test_publication.rb +125 -4
- metadata +56 -8
data/epub-parser.gemspec
CHANGED
@@ -29,7 +29,9 @@ Gem::Specification.new do |s|
|
|
29
29
|
s.add_development_dependency 'rake'
|
30
30
|
s.add_development_dependency 'pry'
|
31
31
|
s.add_development_dependency 'pry-doc'
|
32
|
-
s.add_development_dependency 'test-unit
|
32
|
+
s.add_development_dependency 'test-unit'
|
33
|
+
s.add_development_dependency 'test-unit-rr'
|
34
|
+
s.add_development_dependency 'test-unit-notify'
|
33
35
|
s.add_development_dependency 'simplecov'
|
34
36
|
s.add_development_dependency 'thin'
|
35
37
|
s.add_development_dependency 'yard'
|
@@ -42,6 +44,7 @@ Gem::Specification.new do |s|
|
|
42
44
|
|
43
45
|
s.add_runtime_dependency 'enumerabler'
|
44
46
|
s.add_runtime_dependency 'zipruby'
|
45
|
-
s.add_runtime_dependency 'nokogiri', '1.
|
47
|
+
s.add_runtime_dependency 'nokogiri', '~> 1.6'
|
46
48
|
s.add_runtime_dependency 'addressable'
|
49
|
+
s.add_runtime_dependency 'method_decorators', '0.9.3'
|
47
50
|
end
|
data/lib/epub.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'method_decorators/deprecated'
|
2
|
+
require 'epub/inspector'
|
1
3
|
require 'epub/ocf'
|
2
4
|
require 'epub/publication'
|
3
5
|
require 'epub/content_document'
|
@@ -37,6 +39,11 @@ module EPUB
|
|
37
39
|
end
|
38
40
|
end
|
39
41
|
|
42
|
+
# @overload each_page_on_spine(&blk)
|
43
|
+
# iterate over items in order of spine when block given
|
44
|
+
# @yieldparam item [Publication::Package::Manifest::Item]
|
45
|
+
# @overload each_page_on_spine
|
46
|
+
# @return [Enumerator] which iterates over {Publication::Package::Manifest::Item}s in order of spine when block not given
|
40
47
|
def each_page_on_spine(&blk)
|
41
48
|
enum = package.spine.items
|
42
49
|
if block_given?
|
@@ -50,6 +57,11 @@ module EPUB
|
|
50
57
|
raise NotImplementedError
|
51
58
|
end
|
52
59
|
|
60
|
+
# @overload each_content(&blk)
|
61
|
+
# iterate all items over when block given
|
62
|
+
# @yieldparam item [Publication::Package::Manifest::Item]
|
63
|
+
# @overload each_content
|
64
|
+
# @return [Enumerator] which iterates over all {Publication::Package::Manifest::Item}s in EPUB package when block not given
|
53
65
|
def each_content(&blk)
|
54
66
|
enum = manifest.items
|
55
67
|
if block_given?
|
@@ -63,13 +75,14 @@ module EPUB
|
|
63
75
|
raise NotImplementedError
|
64
76
|
end
|
65
77
|
|
78
|
+
# @return [Array<Publication::Package::Manifest::Item>] All {Publication::Package::Manifest::Item}s in EPUB package
|
66
79
|
def resources
|
67
80
|
manifest.items
|
68
81
|
end
|
69
82
|
|
70
83
|
# Syntax sugar
|
71
84
|
def rootfile_path
|
72
|
-
ocf.container.rootfile.full_path
|
85
|
+
ocf.container.rootfile.full_path.to_s
|
73
86
|
end
|
74
87
|
|
75
88
|
# Syntax sugar
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'epub/content_document/xhtml'
|
2
|
-
|
3
1
|
module EPUB
|
4
2
|
module ContentDocument
|
5
3
|
class Navigation < XHTML
|
@@ -11,15 +9,15 @@ module EPUB
|
|
11
9
|
end
|
12
10
|
|
13
11
|
def toc
|
14
|
-
|
12
|
+
navigations.selector {|nav| nav.type == Navigation::Type::TOC}.first
|
15
13
|
end
|
16
14
|
|
17
15
|
def page_list
|
18
|
-
|
16
|
+
navigations.selector {|nav| nav.type == Nagivation::Type::PAGE_LIST}.first
|
19
17
|
end
|
20
18
|
|
21
19
|
def landmarks
|
22
|
-
|
20
|
+
navigations.selector {|nav| nav.type == Navigation::Type::LANDMARKS}.first
|
23
21
|
end
|
24
22
|
|
25
23
|
# Enumerator version of toc
|
@@ -3,15 +3,39 @@ module EPUB
|
|
3
3
|
class XHTML
|
4
4
|
attr_accessor :item
|
5
5
|
|
6
|
+
# @return [String] Returns the content string.
|
6
7
|
def read
|
7
8
|
item.read
|
8
9
|
end
|
9
10
|
alias raw_document read
|
10
11
|
|
11
|
-
# referenced directly from spine
|
12
|
+
# @return [true|false] Whether referenced directly from spine or not.
|
12
13
|
def top_level?
|
13
14
|
!! item.itemref
|
14
15
|
end
|
16
|
+
|
17
|
+
# @return [String] Returns the value of title element.
|
18
|
+
# If none, returns empty string
|
19
|
+
def title
|
20
|
+
title_elem = Nokogiri.XML(read).search('title').first
|
21
|
+
if title_elem
|
22
|
+
title_elem.text
|
23
|
+
else
|
24
|
+
warn 'title element not found'
|
25
|
+
''
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [REXML::Document] content as REXML::Document object
|
30
|
+
def rexml
|
31
|
+
require 'rexml/document'
|
32
|
+
@rexml ||= REXML::Document.new(raw_document)
|
33
|
+
end
|
34
|
+
|
35
|
+
# @return [Nokogiri::XML::Document] content as Nokogiri::XML::Document object
|
36
|
+
def nokogiri
|
37
|
+
@nokogiri ||= Nokogiri.XML(raw_document)
|
38
|
+
end
|
15
39
|
end
|
16
40
|
end
|
17
41
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module EPUB
|
2
|
+
module Inspector
|
3
|
+
INSTANCE_VARIABLES_OPTION = {:exclude => []}
|
4
|
+
|
5
|
+
def inspect_simply
|
6
|
+
"#<%{class}:%{object_id}>" % {
|
7
|
+
:class => self.class,
|
8
|
+
:object_id => inspect_object_id
|
9
|
+
}
|
10
|
+
end
|
11
|
+
|
12
|
+
def inspect_object_id
|
13
|
+
(__id__ << 1).to_s(16)
|
14
|
+
end
|
15
|
+
|
16
|
+
def inspect_instance_variables(options={})
|
17
|
+
options = INSTANCE_VARIABLES_OPTION.merge(options)
|
18
|
+
exclude = options[:exclude]
|
19
|
+
|
20
|
+
(instance_variables - exclude).map {|name|
|
21
|
+
value = instance_variable_get(name)
|
22
|
+
"#{name}=#{value.inspect}"
|
23
|
+
}.join(' ')
|
24
|
+
end
|
25
|
+
|
26
|
+
module PublicationModel
|
27
|
+
class << self
|
28
|
+
def included(mod)
|
29
|
+
mod.__send__ :include, Inspector
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def inspect
|
34
|
+
"#<%{class}:%{object_id} @package=%{package} %{attributes}>" % {
|
35
|
+
:class => self.class,
|
36
|
+
:package => package.inspect_simply,
|
37
|
+
:object_id => inspect_object_id,
|
38
|
+
:attributes => inspect_instance_variables(exclude: [:@package])
|
39
|
+
}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/epub/ocf/container.rb
CHANGED
@@ -17,6 +17,8 @@ module EPUB
|
|
17
17
|
class Rootfile
|
18
18
|
attr_accessor :full_path, :media_type
|
19
19
|
|
20
|
+
# @param full_path [Addressable::URI|nil]
|
21
|
+
# @param media_type [String]
|
20
22
|
def initialize(full_path=nil, media_type=EPUB::MediaType::ROOTFILE)
|
21
23
|
@full_path, @media_type = full_path, media_type
|
22
24
|
end
|
data/lib/epub/parser.rb
CHANGED
@@ -32,8 +32,6 @@ module EPUB
|
|
32
32
|
# parse_content_document(document)
|
33
33
|
if @item.nav?
|
34
34
|
content_document.navigations = parse_navigations(document)
|
35
|
-
else
|
36
|
-
raise NotImplementedError
|
37
35
|
end
|
38
36
|
content_document
|
39
37
|
end
|
@@ -71,12 +69,12 @@ module EPUB
|
|
71
69
|
when 'canvas'
|
72
70
|
when 'embed'
|
73
71
|
when 'iframe'
|
74
|
-
item.text =
|
72
|
+
item.text = extract_attribute(embedded_content, 'name') || extract_attribute(embedded_content, 'srcdoc')
|
75
73
|
when 'img'
|
76
|
-
item.text = extract_attribute(embedded_content, 'alt')
|
74
|
+
item.text = extract_attribute(embedded_content, 'alt')
|
77
75
|
when 'math'
|
78
76
|
when 'object'
|
79
|
-
item.text = extract_attribute(embedded_content, 'name')
|
77
|
+
item.text = extract_attribute(embedded_content, 'name')
|
80
78
|
when 'svg'
|
81
79
|
when 'video'
|
82
80
|
else
|
data/lib/epub/parser/ocf.rb
CHANGED
@@ -39,10 +39,8 @@ module EPUB
|
|
39
39
|
doc = Nokogiri.XML(xml)
|
40
40
|
doc.xpath('/ocf:container/ocf:rootfiles/ocf:rootfile', EPUB::NAMESPACES).each do |elem|
|
41
41
|
rootfile = EPUB::OCF::Container::Rootfile.new
|
42
|
-
|
43
|
-
|
44
|
-
rootfile.__send__(attr.gsub(/-/, '_') + '=', value)
|
45
|
-
end
|
42
|
+
rootfile.full_path = Addressable::URI.parse(extract_attribute(elem, 'full-path'))
|
43
|
+
rootfile.media_type = extract_attribute(elem, 'media-type')
|
46
44
|
container.rootfiles << rootfile
|
47
45
|
end
|
48
46
|
|
@@ -41,10 +41,7 @@ module EPUB
|
|
41
41
|
end
|
42
42
|
@unique_identifier_id = elem['unique-identifier']
|
43
43
|
@package.prefix = parse_prefix(extract_attribute(elem, 'prefix'))
|
44
|
-
if @package.prefix.key?
|
45
|
-
require 'epub/publication/fixed_layout'
|
46
|
-
EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout
|
47
|
-
end
|
44
|
+
EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout if @package.prefix.key? EPUB::Publication::FixedLayout::PREFIX_KEY
|
48
45
|
|
49
46
|
@package
|
50
47
|
end
|
@@ -76,7 +73,11 @@ module EPUB
|
|
76
73
|
metadata.titles.each {|t| id_map[t.id] = {metadata: t} if t.respond_to?(:id) && t.id}
|
77
74
|
|
78
75
|
metadata.languages = elem.xpath('./dc:language', EPUB::NAMESPACES).collect do |e|
|
79
|
-
|
76
|
+
language = EPUB::Publication::Package::Metadata::DCMES.new
|
77
|
+
language.content = e.content
|
78
|
+
language.id = e['id'] if e['id']
|
79
|
+
|
80
|
+
language
|
80
81
|
end
|
81
82
|
metadata.languages.each {|l| id_map[l.id] = {metadata: l} if l.respond_to?(:id) && l.id}
|
82
83
|
|
@@ -199,8 +200,7 @@ module EPUB
|
|
199
200
|
@doc.xpath('/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES).each do |elem|
|
200
201
|
media_type = EPUB::Publication::Package::Bindings::MediaType.new
|
201
202
|
media_type.media_type = extract_attribute(elem, 'media-type')
|
202
|
-
|
203
|
-
media_type.handler = items.detect {|item| item.id == extract_attribute(elem, 'handler')}
|
203
|
+
media_type.handler = @package.manifest[extract_attribute(elem, 'handler')]
|
204
204
|
bindings << media_type
|
205
205
|
end
|
206
206
|
|
data/lib/epub/parser/version.rb
CHANGED
data/lib/epub/publication.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
module EPUB
|
2
2
|
module Publication
|
3
3
|
class Package
|
4
|
+
include Inspector
|
5
|
+
|
4
6
|
CONTENT_MODELS = [:metadata, :manifest, :spine, :guide, :bindings]
|
5
7
|
RESERVED_VOCABULARY_PREFIXES = {
|
6
8
|
'' => 'http://idpf.org/epub/vocab/package/#',
|
@@ -23,7 +25,7 @@ module EPUB
|
|
23
25
|
end
|
24
26
|
end
|
25
27
|
|
26
|
-
attr_accessor :book,
|
28
|
+
attr_accessor :book,
|
27
29
|
:version, :prefix, :xml_lang, :dir, :id
|
28
30
|
attr_reader *CONTENT_MODELS
|
29
31
|
alias lang xml_lang
|
@@ -40,6 +42,23 @@ module EPUB
|
|
40
42
|
def unique_identifier
|
41
43
|
@metadata.unique_identifier
|
42
44
|
end
|
45
|
+
|
46
|
+
def inspect
|
47
|
+
"#<%{class}:%{object_id} %{attributes} %{models}>" % {
|
48
|
+
:class => self.class,
|
49
|
+
:object_id => inspect_object_id,
|
50
|
+
:attributes => inspect_instance_variables(exclude: CONTENT_MODELS.map {|model| :"@#{model}"}),
|
51
|
+
:models => inspect_models
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
def inspect_models
|
56
|
+
CONTENT_MODELS.map {|name|
|
57
|
+
model = __send__(name)
|
58
|
+
representation = model.nil? ? model.inspect : model.inspect_simply
|
59
|
+
"@#{name}=#{representation}"
|
60
|
+
}.join(' ')
|
61
|
+
end
|
43
62
|
end
|
44
63
|
end
|
45
64
|
end
|
@@ -2,10 +2,14 @@ module EPUB
|
|
2
2
|
module Publication
|
3
3
|
class Package
|
4
4
|
class Bindings
|
5
|
+
include Inspector::PublicationModel
|
5
6
|
attr_accessor :package
|
6
7
|
|
8
|
+
def initialize
|
9
|
+
@media_types = {}
|
10
|
+
end
|
11
|
+
|
7
12
|
def <<(media_type)
|
8
|
-
@media_types ||= {}
|
9
13
|
@media_types[media_type.media_type] = media_type
|
10
14
|
end
|
11
15
|
|
@@ -6,12 +6,17 @@ module EPUB
|
|
6
6
|
module Publication
|
7
7
|
class Package
|
8
8
|
class Manifest
|
9
|
+
include Inspector::PublicationModel
|
10
|
+
|
9
11
|
attr_accessor :package,
|
10
12
|
:id
|
11
13
|
|
14
|
+
def initialize
|
15
|
+
@items = {}
|
16
|
+
end
|
17
|
+
|
12
18
|
# @return self
|
13
19
|
def <<(item)
|
14
|
-
@items ||= {}
|
15
20
|
item.manifest = self
|
16
21
|
@items[item.id] = item
|
17
22
|
self
|
@@ -29,6 +34,12 @@ module EPUB
|
|
29
34
|
items.selector {|i| i.properties.include? 'cover-image'}.first
|
30
35
|
end
|
31
36
|
|
37
|
+
def each_item
|
38
|
+
@items.each_value do |item|
|
39
|
+
yield item
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
32
43
|
def items
|
33
44
|
@items.values
|
34
45
|
end
|
@@ -38,12 +49,15 @@ module EPUB
|
|
38
49
|
end
|
39
50
|
|
40
51
|
class Item
|
52
|
+
include Inspector
|
53
|
+
|
41
54
|
# @!attribute [rw] manifest
|
42
55
|
# @return [Manifest] Returns the value of manifest
|
43
56
|
# @!attribute [rw] id
|
44
57
|
# @return [String] Returns the value of id
|
45
58
|
# @!attribute [rw] href
|
46
|
-
# @return [Addressable::URI] Returns the value of href
|
59
|
+
# @return [Addressable::URI] Returns the value of href,
|
60
|
+
# which is relative path from rootfile(OPF file)
|
47
61
|
# @!attribute [rw] media_type
|
48
62
|
# @return [String] Returns the value of media_type
|
49
63
|
# @!attribute [rw] properties
|
@@ -55,19 +69,31 @@ module EPUB
|
|
55
69
|
attr_accessor :manifest,
|
56
70
|
:id, :href, :media_type, :fallback, :properties, :media_overlay
|
57
71
|
|
72
|
+
def initialize
|
73
|
+
@properties = []
|
74
|
+
end
|
75
|
+
|
58
76
|
# @todo Handle circular fallback chain
|
59
77
|
def fallback_chain
|
60
78
|
@fallback_chain ||= traverse_fallback_chain([])
|
61
79
|
end
|
62
80
|
|
81
|
+
# full path in archive
|
82
|
+
def entry_name
|
83
|
+
rootfile = manifest.package.book.ocf.container.rootfile.full_path
|
84
|
+
Addressable::URI.unescape(rootfile + href.normalize.request_uri)
|
85
|
+
end
|
86
|
+
|
63
87
|
def read
|
64
|
-
rootfile = Addressable::URI.parse(manifest.package.book.ocf.container.rootfile.full_path)
|
65
88
|
Zip::Archive.open(manifest.package.book.epub_file) {|zip|
|
66
|
-
|
67
|
-
zip.fopen(path).read
|
89
|
+
zip.fopen(entry_name).read
|
68
90
|
}
|
69
91
|
end
|
70
92
|
|
93
|
+
def xhtml?
|
94
|
+
media_type == 'application/xhtml+xml'
|
95
|
+
end
|
96
|
+
|
71
97
|
def nav?
|
72
98
|
properties.include? 'nav'
|
73
99
|
end
|
@@ -101,6 +127,15 @@ module EPUB
|
|
101
127
|
manifest.package.spine.itemrefs.find {|itemref| itemref.idref == id}
|
102
128
|
end
|
103
129
|
|
130
|
+
def inspect
|
131
|
+
"#<%{class}:%{object_id} %{manifest} %{attributes}>" % {
|
132
|
+
:class => self.class,
|
133
|
+
:object_id => inspect_object_id,
|
134
|
+
:manifest => "@manifest=#{@manifest.inspect_simply}",
|
135
|
+
:attributes => inspect_instance_variables(exclude: [:@manifest])
|
136
|
+
}
|
137
|
+
end
|
138
|
+
|
104
139
|
protected
|
105
140
|
|
106
141
|
def traverse_fallback_chain(chain)
|