epub-parser 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/.yardopts +2 -0
- data/CHANGELOG.markdown +18 -0
- data/README.markdown +40 -11
- data/bin/epub-open +1 -1
- data/bin/epubinfo +14 -14
- data/docs/Home.markdown +3 -2
- data/docs/Item.markdown +3 -3
- data/docs/Navigation.markdown +58 -0
- data/docs/Publication.markdown +54 -0
- data/epub-parser.gemspec +1 -2
- data/lib/epub.rb +5 -83
- data/lib/epub/book.rb +1 -1
- data/lib/epub/book/features.rb +85 -0
- data/lib/epub/constants.rb +2 -0
- data/lib/epub/content_document/navigation.rb +31 -4
- data/lib/epub/content_document/xhtml.rb +1 -1
- data/lib/epub/inspector.rb +9 -7
- data/lib/epub/parser.rb +1 -1
- data/lib/epub/parser/content_document.rb +5 -1
- data/lib/epub/parser/ocf.rb +2 -2
- data/lib/epub/parser/publication.rb +46 -87
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/publication/fixed_layout.rb +2 -3
- data/lib/epub/publication/package/guide.rb +19 -14
- data/lib/epub/publication/package/manifest.rb +36 -6
- data/lib/epub/publication/package/metadata.rb +27 -8
- data/lib/epub/publication/package/spine.rb +10 -3
- data/test/fixtures/book/OPS/nav.xhtml +1 -1
- data/test/fixtures/book/OPS//343/203/253/343/203/274/343/203/210/343/203/225/343/202/241/343/202/244/343/203/253.opf +2 -1
- data/test/helper.rb +1 -1
- data/test/test_content_document.rb +41 -2
- data/test/test_epub.rb +0 -7
- data/test/test_parser.rb +4 -4
- data/test/test_parser_content_document.rb +2 -0
- data/test/test_parser_publication.rb +4 -0
- data/test/test_publication.rb +60 -0
- metadata +55 -67
- data/lib/method_decorators/deprecated.rb +0 -84
@@ -5,10 +5,14 @@ module EPUB
|
|
5
5
|
class Package
|
6
6
|
class Guide
|
7
7
|
include Inspector::PublicationModel
|
8
|
-
attr_accessor :package
|
8
|
+
attr_accessor :package, :references
|
9
9
|
|
10
|
-
def
|
11
|
-
|
10
|
+
def initialize
|
11
|
+
Reference::TYPES.each do |type|
|
12
|
+
variable_name = '@' + type.gsub('-', '_')
|
13
|
+
instance_variable_set variable_name, nil
|
14
|
+
end
|
15
|
+
@references = []
|
12
16
|
end
|
13
17
|
|
14
18
|
def <<(reference)
|
@@ -16,18 +20,8 @@ module EPUB
|
|
16
20
|
references << reference
|
17
21
|
end
|
18
22
|
|
19
|
-
%w[cover title-page toc index glossary acknowledgements bibliography colophon copyright-page dedication epigraph foreword loi lot notes preface text].each do |type|
|
20
|
-
method_name = type.gsub('-', '_')
|
21
|
-
define_method method_name do
|
22
|
-
var = instance_variable_get "@#{method_name}"
|
23
|
-
return var if var
|
24
|
-
|
25
|
-
var = references.selector {|ref| ref.type == type}.first
|
26
|
-
instance_variable_set "@#{method_name}", var
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
23
|
class Reference
|
24
|
+
TYPES = %w[cover title-page toc index glossary acknowledgements bibliography colophon copyright-page dedication epigraph foreword loi lot notes preface text]
|
31
25
|
attr_accessor :guide,
|
32
26
|
:type, :title, :href
|
33
27
|
|
@@ -40,6 +34,17 @@ module EPUB
|
|
40
34
|
end.first
|
41
35
|
end
|
42
36
|
end
|
37
|
+
|
38
|
+
Reference::TYPES.each do |type|
|
39
|
+
method_name = type.gsub('-', '_')
|
40
|
+
define_method method_name do
|
41
|
+
var = instance_variable_get "@#{method_name}"
|
42
|
+
return var if var
|
43
|
+
|
44
|
+
var = references.selector {|ref| ref.type == type}.first
|
45
|
+
instance_variable_set "@#{method_name}", var
|
46
|
+
end
|
47
|
+
end
|
43
48
|
end
|
44
49
|
end
|
45
50
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'set'
|
1
2
|
require 'enumerabler'
|
2
3
|
require 'epub/constants'
|
3
4
|
require 'epub/parser/content_document'
|
@@ -31,7 +32,7 @@ module EPUB
|
|
31
32
|
end
|
32
33
|
|
33
34
|
def cover_image
|
34
|
-
items.selector
|
35
|
+
items.selector(&:cover_image?).first
|
35
36
|
end
|
36
37
|
|
37
38
|
def each_item
|
@@ -57,20 +58,25 @@ module EPUB
|
|
57
58
|
# @return [String] Returns the value of id
|
58
59
|
# @!attribute [rw] href
|
59
60
|
# @return [Addressable::URI] Returns the value of href,
|
60
|
-
# which is relative
|
61
|
+
# which is relative IRI from rootfile(OPF file)
|
61
62
|
# @!attribute [rw] media_type
|
62
63
|
# @return [String] Returns the value of media_type
|
63
64
|
# @!attribute [rw] properties
|
64
|
-
# @return [
|
65
|
+
# @return [Set<String>] Returns the value of properties
|
65
66
|
# @!attribute [rw] media_overlay
|
66
67
|
# @return [String] Returns the value of media_overlay
|
67
68
|
# @!attribute [rw] fallback
|
68
69
|
# @return [Item] Returns the value of attribute fallback
|
69
70
|
attr_accessor :manifest,
|
70
|
-
:id, :href, :media_type, :fallback, :
|
71
|
+
:id, :href, :media_type, :fallback, :media_overlay
|
72
|
+
attr_reader :properties
|
71
73
|
|
72
74
|
def initialize
|
73
|
-
@properties =
|
75
|
+
@properties = Set.new
|
76
|
+
end
|
77
|
+
|
78
|
+
def properties=(props)
|
79
|
+
@properties = props.kind_of?(Set) ? props : Set.new(props)
|
74
80
|
end
|
75
81
|
|
76
82
|
# @todo Handle circular fallback chain
|
@@ -98,6 +104,10 @@ module EPUB
|
|
98
104
|
properties.include? 'nav'
|
99
105
|
end
|
100
106
|
|
107
|
+
def cover_image?
|
108
|
+
properties.include? 'cover-image'
|
109
|
+
end
|
110
|
+
|
101
111
|
# @todo Handle circular fallback chain
|
102
112
|
def use_fallback_chain(options = {})
|
103
113
|
supported = EPUB::MediaType::CORE
|
@@ -113,7 +123,7 @@ module EPUB
|
|
113
123
|
return yield binding_media_type.handler
|
114
124
|
end
|
115
125
|
return fallback.use_fallback_chain(options) {|fb| yield fb} if fallback
|
116
|
-
raise EPUB::MediaType::
|
126
|
+
raise EPUB::MediaType::UnsupportedMediaType
|
117
127
|
end
|
118
128
|
|
119
129
|
def content_document
|
@@ -127,6 +137,26 @@ module EPUB
|
|
127
137
|
manifest.package.spine.itemrefs.find {|itemref| itemref.idref == id}
|
128
138
|
end
|
129
139
|
|
140
|
+
# @param iri [Addressable::URI] relative iri
|
141
|
+
# @return [Item]
|
142
|
+
# @return [nil] when item not found
|
143
|
+
# @raise ArgumentError when +iri+ is not relative
|
144
|
+
# @raise ArgumentError when +iri+ starts with "/"(slash)
|
145
|
+
# @note Algorithm stolen form Rack::Utils#clean_path_info
|
146
|
+
def find_item_by_relative_iri(iri)
|
147
|
+
raise ArgumentError, "Not relative: #{iri.inspect}" unless iri.relative?
|
148
|
+
raise ArgumentError, "Start with slash: #{iri.inspect}" if iri.to_s.start_with? Addressable::URI::SLASH
|
149
|
+
target_href = href + iri
|
150
|
+
segments = target_href.to_s.split(Addressable::URI::SLASH)
|
151
|
+
clean_segments = []
|
152
|
+
segments.each do |segment|
|
153
|
+
next if segment.empty? || segment == '.'
|
154
|
+
segment == '..' ? clean_segments.pop : clean_segments << segment
|
155
|
+
end
|
156
|
+
target_iri = Addressable::URI.parse(clean_segments.join(Addressable::URI::SLASH))
|
157
|
+
manifest.items.find { |item| item.href == target_iri}
|
158
|
+
end
|
159
|
+
|
130
160
|
def inspect
|
131
161
|
"#<%{class}:%{object_id} %{manifest} %{attributes}>" % {
|
132
162
|
:class => self.class,
|
@@ -1,10 +1,10 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
1
3
|
module EPUB
|
2
4
|
module Publication
|
3
5
|
class Package
|
4
6
|
class Metadata
|
5
7
|
include Inspector::PublicationModel
|
6
|
-
include MethodDecorators
|
7
|
-
extend MethodDecorators
|
8
8
|
|
9
9
|
DC_ELEMS = [:identifiers, :titles, :languages] +
|
10
10
|
[:contributors, :coverages, :creators, :dates, :descriptions, :formats, :publishers,
|
@@ -48,6 +48,10 @@ module EPUB
|
|
48
48
|
dates.first
|
49
49
|
end
|
50
50
|
|
51
|
+
def language
|
52
|
+
languages.first
|
53
|
+
end
|
54
|
+
|
51
55
|
def to_h
|
52
56
|
DC_ELEMS.inject({}) do |hsh, elem|
|
53
57
|
hsh[elem] = __send__(elem)
|
@@ -55,11 +59,6 @@ module EPUB
|
|
55
59
|
end
|
56
60
|
end
|
57
61
|
|
58
|
-
+Deprecated.new {|klass, method| "#{klass}##{method} is deprecated. Use #to_h instead."}
|
59
|
-
def to_hash
|
60
|
-
to_h
|
61
|
-
end
|
62
|
-
|
63
62
|
def primary_metas
|
64
63
|
metas.select {|meta| meta.primary_expression?}
|
65
64
|
end
|
@@ -70,7 +69,7 @@ module EPUB
|
|
70
69
|
attr_writer :refiners
|
71
70
|
|
72
71
|
def refiners
|
73
|
-
@refiners ||=
|
72
|
+
@refiners ||= Set.new
|
74
73
|
end
|
75
74
|
|
76
75
|
PROPERTIES.each do |voc|
|
@@ -92,6 +91,26 @@ module EPUB
|
|
92
91
|
end
|
93
92
|
end
|
94
93
|
|
94
|
+
class Identifier < DCMES
|
95
|
+
# @note This is ad-hoc
|
96
|
+
# @todo Define and include OPF module for opf:scheme attribute
|
97
|
+
# @todo Define generale way to handle with identifier-type refiners
|
98
|
+
attr_accessor :scheme
|
99
|
+
|
100
|
+
# @note This is ad-hoc
|
101
|
+
# @todo Define and include OPF module for opf:scheme attribute
|
102
|
+
# @todo Define generale way to handle with identifier-type refiners
|
103
|
+
def isbn?
|
104
|
+
refiners.any? {|refiner|
|
105
|
+
refiner.property == 'identifier-type' and
|
106
|
+
refiner.scheme == 'onix:codelist5' and
|
107
|
+
%w[02 15].include? refiner.content
|
108
|
+
} or
|
109
|
+
scheme == 'ISBN' or
|
110
|
+
content.to_s.downcase.start_with? 'urn:isbn'
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
95
114
|
class Title < DCMES
|
96
115
|
include Comparable
|
97
116
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
1
3
|
module EPUB
|
2
4
|
module Publication
|
3
5
|
class Package
|
@@ -42,10 +44,15 @@ module EPUB
|
|
42
44
|
PAGE_SPREAD_PREFIX = 'page-spread-'.freeze
|
43
45
|
|
44
46
|
attr_accessor :spine,
|
45
|
-
:idref, :linear, :id
|
47
|
+
:idref, :linear, :id
|
48
|
+
attr_reader :properties
|
46
49
|
|
47
50
|
def initialize
|
48
|
-
@properties =
|
51
|
+
@properties = Set.new
|
52
|
+
end
|
53
|
+
|
54
|
+
def properties=(props)
|
55
|
+
@properties = props.kind_of?(Set) ? props : Set.new(props)
|
49
56
|
end
|
50
57
|
|
51
58
|
# @return [true|false]
|
@@ -68,7 +75,7 @@ module EPUB
|
|
68
75
|
self.__send__(meth) == other.__send__(meth)
|
69
76
|
} and
|
70
77
|
(linear? == other.linear?) and
|
71
|
-
(
|
78
|
+
(properties == other.properties)
|
72
79
|
end
|
73
80
|
|
74
81
|
# @return ["left", "right", nil]
|
@@ -4,8 +4,9 @@
|
|
4
4
|
xmlns="http://www.idpf.org/2007/opf"
|
5
5
|
prefix=" foaf: http://xmlns.com/foaf/spec/
|
6
6
|
dbp: http://dbpedia.org/ontology/ ">
|
7
|
-
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
|
7
|
+
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
|
8
8
|
<dc:identifier id="pub-id">da265185-8da8-462d-a146-17dd388f61fc</dc:identifier>
|
9
|
+
<dc:identifier opf:scheme="ISBN">0000000000000</dc:identifier>
|
9
10
|
|
10
11
|
<dc:title id="t1" xml:lang="fr">Mon premier guide de cuisson, un Mémoire</dc:title>
|
11
12
|
<meta refines="#t1" property="title-type">main</meta>
|
data/test/helper.rb
CHANGED
@@ -41,13 +41,52 @@ class TestContentDocument < Test::Unit::TestCase
|
|
41
41
|
|
42
42
|
def test_title_returns_value_of_title_element
|
43
43
|
content_doc = XHTML.new
|
44
|
-
stub(content_doc).
|
44
|
+
stub(content_doc).raw_document {File.read(File.join(File.dirname(__FILE__), 'fixtures', 'book', 'OPS', '日本語.xhtml'))}
|
45
45
|
assert_equal '日本語', content_doc.title
|
46
46
|
end
|
47
47
|
|
48
48
|
def test_title_returns_empty_string_when_title_element_not_exist
|
49
49
|
content_doc = XHTML.new
|
50
|
-
stub(content_doc).
|
50
|
+
stub(content_doc).raw_document {'content'}
|
51
51
|
assert_equal '', content_doc.title
|
52
52
|
end
|
53
|
+
|
54
|
+
class TestNavigationDocument < self
|
55
|
+
def test_item_hidden_returns_true_when_it_has_some_value
|
56
|
+
item = Navigation::Item.new.tap {|item| item.hidden = ''}
|
57
|
+
assert_true item.hidden?
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_item_hidden_returns_false_when_no_parent_and_no_value
|
61
|
+
item = Navigation::Item.new
|
62
|
+
assert_false item.hidden?
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_item_hidden_cascade_parent_item
|
66
|
+
parent = Navigation::Item.new.tap {|item| item.hidden = true}
|
67
|
+
child = Navigation::Item.new.tap {|item| item.hidden = nil}
|
68
|
+
parent.items << child
|
69
|
+
assert_true parent.items.hidden?
|
70
|
+
assert_true child.hidden?
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_item_is_traversable
|
74
|
+
parent = Navigation::Item.new
|
75
|
+
child = Navigation::Navigation.new
|
76
|
+
grandchild = Navigation::Item.new
|
77
|
+
parent.items << child
|
78
|
+
child.items << grandchild
|
79
|
+
|
80
|
+
parent.traverse do |item, deps|
|
81
|
+
case deps
|
82
|
+
when 0
|
83
|
+
assert_equal item, parent
|
84
|
+
when 1
|
85
|
+
assert_equal item, child
|
86
|
+
when 2
|
87
|
+
assert_equal item, grandchild
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
53
92
|
end
|
data/test/test_epub.rb
CHANGED
@@ -6,13 +6,6 @@ class TestEUPB < Test::Unit::TestCase
|
|
6
6
|
@file = 'test/fixtures/book.epub'
|
7
7
|
end
|
8
8
|
|
9
|
-
def test_parse
|
10
|
-
book = EPUB::Book.new
|
11
|
-
assert_nothing_raised do
|
12
|
-
book.parse @file
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
9
|
def test_each_page_on_spine_returns_enumerator_when_block_not_given
|
17
10
|
book = EPUB::Parser.parse(@file)
|
18
11
|
assert_kind_of Enumerator, book.each_page_on_spine
|
data/test/test_parser.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require File.expand_path 'helper', File.dirname(__FILE__)
|
2
2
|
|
3
3
|
class MyBook
|
4
|
-
include EPUB
|
4
|
+
include EPUB::Book::Features
|
5
5
|
end
|
6
6
|
|
7
7
|
class TestParser < Test::Unit::TestCase
|
@@ -13,16 +13,16 @@ class TestParser < Test::Unit::TestCase
|
|
13
13
|
assert_instance_of EPUB::Book, @parser.parse
|
14
14
|
|
15
15
|
book = Object.new
|
16
|
-
book.extend EPUB
|
16
|
+
book.extend EPUB::Book::Features
|
17
17
|
assert_nothing_raised do
|
18
18
|
EPUB::Parser.parse('test/fixtures/book.epub', book: book)
|
19
19
|
end
|
20
|
-
assert_kind_of EPUB, EPUB::Parser.parse('test/fixtures/book.epub', book: book)
|
20
|
+
assert_kind_of EPUB::Book::Features, EPUB::Parser.parse('test/fixtures/book.epub', book: book)
|
21
21
|
|
22
22
|
assert_nothing_raised do
|
23
23
|
EPUB::Parser.parse('test/fixtures/book.epub', class: MyBook)
|
24
24
|
end
|
25
|
-
assert_kind_of EPUB, EPUB::Parser.parse('test/fixtures/book.epub', class: MyBook)
|
25
|
+
assert_kind_of EPUB::Book::Features, EPUB::Parser.parse('test/fixtures/book.epub', class: MyBook)
|
26
26
|
end
|
27
27
|
|
28
28
|
class TestBook < TestParser
|
@@ -47,6 +47,10 @@ class TestParserPublication < Test::Unit::TestCase
|
|
47
47
|
assert_equal 'da265185-8da8-462d-a146-17dd388f61fc', @metadata.identifiers.first.content
|
48
48
|
end
|
49
49
|
|
50
|
+
def test_identifier_has_scheme_when_qualified_by_attribute
|
51
|
+
assert_equal 'ISBN', @metadata.identifiers[1].scheme
|
52
|
+
end
|
53
|
+
|
50
54
|
def test_has_unique_identifier
|
51
55
|
assert_equal 'da265185-8da8-462d-a146-17dd388f61fc', @metadata.unique_identifier.to_s
|
52
56
|
end
|
data/test/test_publication.rb
CHANGED
@@ -134,6 +134,51 @@ class TestPublication < Test::Unit::TestCase
|
|
134
134
|
|
135
135
|
assert_true meta.subexpression?
|
136
136
|
end
|
137
|
+
|
138
|
+
class TestIdentifier < self
|
139
|
+
def setup
|
140
|
+
@identifier = Package::Metadata::Identifier.new
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_is_isbn_when_refined_by_onix_scheme
|
144
|
+
meta = Package::Metadata::Meta.new
|
145
|
+
meta.property = 'identifier-type'
|
146
|
+
meta.scheme = 'onix:codelist5'
|
147
|
+
meta.content = '02'
|
148
|
+
meta.refines = @identifier
|
149
|
+
|
150
|
+
assert_true @identifier.isbn?
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_is_isbn_when_qualified_by_attribute
|
154
|
+
@identifier.content = '0000000000'
|
155
|
+
@identifier.scheme = 'ISBN'
|
156
|
+
|
157
|
+
assert_true @identifier.isbn?
|
158
|
+
end
|
159
|
+
|
160
|
+
def test_is_isbn_when_content_is_isbn_urn
|
161
|
+
@identifier.content = 'urn:isbn:0000000000'
|
162
|
+
|
163
|
+
assert_true @identifier.isbn?
|
164
|
+
end
|
165
|
+
|
166
|
+
def test_is_not_isbn_when_no_refiner_nor_scheme
|
167
|
+
assert_false @identifier.isbn?
|
168
|
+
end
|
169
|
+
|
170
|
+
def test_refiner_take_precedence_over_scheme_for_isbn
|
171
|
+
@identifier.content = '0000000000000'
|
172
|
+
@identifier.scheme = 'something'
|
173
|
+
meta = Package::Metadata::Meta.new
|
174
|
+
meta.property = 'identifier-type'
|
175
|
+
meta.scheme = 'onix:codelist5'
|
176
|
+
meta.content = '15'
|
177
|
+
meta.refines = @identifier
|
178
|
+
|
179
|
+
assert_true @identifier.isbn?
|
180
|
+
end
|
181
|
+
end
|
137
182
|
end
|
138
183
|
|
139
184
|
class TestManifest < TestPublication
|
@@ -179,6 +224,21 @@ class TestPublication < Test::Unit::TestCase
|
|
179
224
|
|
180
225
|
assert_false item.xhtml?
|
181
226
|
end
|
227
|
+
|
228
|
+
def test_find_item_by_relative_iri_returns_item_which_has_resolved_iri_as_href
|
229
|
+
manifest = Package::Manifest.new
|
230
|
+
manifest << xhtml_item = Package::Manifest::Item.new.tap {|item| item.href = Addressable::URI.parse('text/01.xhtml')}
|
231
|
+
manifest << image_item = Package::Manifest::Item.new.tap {|item| item.href = Addressable::URI.parse('image/01.png')}
|
232
|
+
|
233
|
+
assert_equal image_item, xhtml_item.find_item_by_relative_iri(Addressable::URI.parse('../image/01.png'))
|
234
|
+
end
|
235
|
+
|
236
|
+
def test_find_item_by_relative_iri_returns_nil_when_no_item_found
|
237
|
+
manifest = Package::Manifest.new
|
238
|
+
manifest << xhtml_item = Package::Manifest::Item.new.tap {|item| item.href = Addressable::URI.parse('text/01.xhtml')}
|
239
|
+
|
240
|
+
assert_nil xhtml_item.find_item_by_relative_iri(Addressable::URI.parse('../image/01.png'))
|
241
|
+
end
|
182
242
|
end
|
183
243
|
end
|
184
244
|
|