bb-epub 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,14 @@
1
+ # BbEPUB
2
+
3
+
4
+ EPUB 2 and EPUB 3 package support for Bookbinder.
5
+
6
+
7
+ ## HOWTO
8
+
9
+ $ bundle
10
+
11
+ $ bundle exec irb -rbookbinder -rbb-epub
12
+
13
+ >> puts Bookbinder::Operations.map('tmp/book.epub')
14
+
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env rake
2
+ require 'bundler/gem_tasks'
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new { |t|
6
+ t.libs << 'test'
7
+ t.test_files = FileList['test/unit/**/test*.rb']
8
+ t.verbose = true
9
+ }
10
+
11
+ desc('Run tests')
12
+ task(:default => :test)
@@ -0,0 +1,64 @@
1
+ class BbEPUB::Package < Bookbinder::Package
2
+
3
+ require_transforms(File.join(File.dirname(__FILE__), 'transform'))
4
+
5
+ DEFAULT_TRANSFORMS = [
6
+ BbEPUB::Transform::PackageIdentifier,
7
+ BbEPUB::Transform::Title,
8
+ BbEPUB::Transform::Creator,
9
+ BbEPUB::Transform::Contributor,
10
+ BbEPUB::Transform::Language,
11
+ BbEPUB::Transform::CoverImage,
12
+ BbEPUB::Transform::Description,
13
+ BbEPUB::Transform::Version,
14
+ BbEPUB::Transform::Spine,
15
+ BbEPUB::Transform::Resources,
16
+ BbEPUB::Transform::NavToc,
17
+ BbEPUB::Transform::CoverPage,
18
+ BbEPUB::Transform::Rendition,
19
+ BbEPUB::Transform::AudioOverlay,
20
+ Bookbinder::Transform::Organizer,
21
+ Bookbinder::Transform::Generator
22
+ ]
23
+
24
+ DEFAULT_CONTENT_ROOT = 'EPUB'
25
+
26
+
27
+ def self.recognize(path)
28
+ return (
29
+ File.extname(path).downcase == '.epub' ||
30
+ File.directory?(File.join(path, 'META-INF'))
31
+ )
32
+ end
33
+
34
+
35
+ def self.transforms
36
+ @transforms ||= DEFAULT_TRANSFORMS
37
+ end
38
+
39
+
40
+ def make_id(path)
41
+ path.gsub(/[^\w]/, '-')
42
+ end
43
+
44
+
45
+ def make_path(href)
46
+ CGI.unescape(href)
47
+ end
48
+
49
+
50
+ def make_href(path)
51
+ CGI.escape(path)
52
+ end
53
+
54
+
55
+ protected
56
+
57
+ # Overriding this Package method to inject EPUB's mimetype file.
58
+ #
59
+ def write_to_file_system(dest_file_system)
60
+ dest_file_system.write('mimetype', 'application/epub+zip')
61
+ super
62
+ end
63
+
64
+ end
@@ -0,0 +1,227 @@
1
+ # EPUB 3 only. Spec:
2
+ #
3
+ # http://www.idpf.org/epub/301/spec/epub-mediaoverlays.html#sec-package-metadata
4
+ #
5
+ # Each spine item's manifest item may have a media-overlay attribute, which
6
+ # is an idref pointing at a SMIL manifest item.
7
+ #
8
+ # Other properties to manage:
9
+ #
10
+ # <meta property="media:active-class">-epub-media-overlay-active</meta>
11
+ # <meta property="media:playback-active-class">-epub-media-overlay-playing</meta>
12
+ # <meta property="media:duration" refines="#ch1_audio">0:32:29</meta>
13
+ # <meta property="media:duration">1:36:20</meta>
14
+ # <meta property="media:narrator">Bill Speaker</meta>
15
+ #
16
+ # Note: "The Package Document must include the duration of each
17
+ # Media Overlay as well as of the entire Publication."
18
+ #
19
+ class BbEPUB::Transform::AudioOverlay < Bookbinder::Transform
20
+
21
+ def dependencies
22
+ [
23
+ BbEPUB::Transform::Metadata,
24
+ BbEPUB::Transform::Resources,
25
+ BbEPUB::Transform::Spine
26
+ ]
27
+ end
28
+
29
+
30
+ def to_map(package)
31
+ meta_to_prop(
32
+ package,
33
+ 'media:active-class',
34
+ 'audio-overlay-active-class'
35
+ )
36
+ meta_to_prop(
37
+ package,
38
+ 'media:playback-active-class',
39
+ 'audio-overlay-playback-active-class'
40
+ )
41
+ book_overlay_duration = meta_to_prop(
42
+ package,
43
+ 'media:duration',
44
+ 'audio-overlay-duration'
45
+ ) { |hashes|
46
+ hashes.detect { |hash|
47
+ unless hash.has_key?('refines')
48
+ hash['@'] = smil_clock_value_to_seconds(hash['@'])
49
+ true
50
+ end
51
+ }
52
+ }
53
+ # TODO: check that book does have a duration if there are any overlays.
54
+ meta_to_prop(
55
+ package,
56
+ 'media:narrator',
57
+ 'audio-overlay-narrator'
58
+ ) { |hashes|
59
+ hashes.detect { |hash| !hash.has_key?('refines') }
60
+ }
61
+ # Find each manifest item (SI) with media-overlay attribute:
62
+ #
63
+ # - find the map['resources'] item that matches the media-overlay id
64
+ # - find the map['spine'] item that matches the SI id
65
+ # - set 'audio-overlay' to the AO path
66
+ # - set 'audio-overlay-duration' to the refined duration
67
+ # - set 'audio-overlay-narrator' to the refined narrator
68
+ #
69
+ opf_doc = package.file(:opf).document('r')
70
+ opf_doc.each('opf|manifest > opf|item[media-overlay]') { |cmpt_item|
71
+ cmpt_id = cmpt_item['id']
72
+ cmpt = package.map['spine'].detect { |c| c['id'] == cmpt_id }
73
+ # TODO: nil check cmpt
74
+ ao_id = cmpt_item['media-overlay']
75
+ rsrc = package.map['resources'].detect { |r| r['id'] == ao_id }
76
+ # TODO: nil check rsrc
77
+ cmpt['audio-overlay'] = rsrc['path']
78
+ cmpt_overlay_duration = meta_to_prop(
79
+ package,
80
+ 'media:duration',
81
+ 'audio-overlay-duration',
82
+ cmpt
83
+ ) { |hashes|
84
+ hashes.detect { |hash|
85
+ if hash['refines']['@'] == '#'+ao_id
86
+ hash['@'] = smil_clock_value_to_seconds(hash['@'])
87
+ true
88
+ end
89
+ }
90
+ }
91
+ # TODO: nil check cmpt_overlay_duration
92
+ meta_to_prop(
93
+ package,
94
+ 'media:narrator',
95
+ 'audio-overlay-narrator',
96
+ cmpt
97
+ ) { |hashes|
98
+ hash['refines']['@'] == '#'+ao_id
99
+ }
100
+ }
101
+ end
102
+
103
+
104
+ # Create a meta tag for:
105
+ # audio-overlay-active-class => media:active-class
106
+ # audio-overlay-playback-active-class => media:playback-active-class
107
+ # audio-overlay-duration => media:duration
108
+ # audio-overlay-narrator => media:narrator
109
+ #
110
+ # For each spine item with a 'media-overlay' key:
111
+ #
112
+ # Find the corresponding map resource
113
+ # Find the corresponding manifest item for component:
114
+ # - set 'media-overlay' to rsrc['id']
115
+ # Create a top-level meta tag:
116
+ # cmpt['audio-overlay-duration'] => meta[property='media:duration'][refines='rsrc["id"]]
117
+ # Also create a top-level meta tag for media:narrator if cmpt has 'audio-overlay-narrator'
118
+ #
119
+ def from_map(package)
120
+ opf_doc = package.file(:opf).document
121
+ metadata_tag = opf_doc.find('opf|metadata')
122
+ prop_to_meta(
123
+ package.map,
124
+ metadata_tag,
125
+ 'audio-overlay-active-class',
126
+ 'media:active-class'
127
+ )
128
+ prop_to_meta(
129
+ package.map,
130
+ metadata_tag,
131
+ 'audio-overlay-playback-active-class',
132
+ 'media:playback-active-class'
133
+ )
134
+ prop_to_meta(
135
+ package.map,
136
+ metadata_tag,
137
+ 'audio-overlay-duration',
138
+ 'media:duration'
139
+ )
140
+ prop_to_meta(
141
+ package.map,
142
+ metadata_tag,
143
+ 'audio-overlay-narrator',
144
+ 'media:narrator'
145
+ )
146
+ package.map['spine'].each { |cmpt|
147
+ next unless cmpt['audio-overlay']
148
+ rsrc = package.map['resources'].detect { |r|
149
+ r['path'] == cmpt['audio-overlay']
150
+ }
151
+ cmpt_manifest_item = opf_doc.find("opf|manifest > opf|item##{cmpt['id']}")
152
+ cmpt_manifest_item['media-overlay'] = rsrc['id']
153
+ duration_meta_tag = prop_to_meta(
154
+ cmpt,
155
+ metadata_tag,
156
+ 'audio-overlay-duration',
157
+ 'media:duration'
158
+ )
159
+ duration_meta_tag['refines'] = '#'+rsrc['id']
160
+ narrator_meta_tag = prop_to_meta(
161
+ cmpt,
162
+ metadata_tag,
163
+ 'audio-overlay-narrator',
164
+ 'media:narrator'
165
+ )
166
+ narrator_meta_tag['refines'] = '#'+rsrc['id'] if narrator_meta_tag
167
+ }
168
+ end
169
+
170
+
171
+ protected
172
+
173
+ def meta_to_prop(package, meta_name, prop_name, target = nil)
174
+ return unless package.map['metadata']
175
+ hashes = package.map['metadata'][meta_name]
176
+ return unless hashes && hashes.any?
177
+ hash = block_given? ? yield(hashes) : hashes.first
178
+ if hash
179
+ hashes.delete(hash)
180
+ package.map['metadata'].delete(meta_name) if hashes.empty?
181
+ (target || package.map)[prop_name] = hash['@']
182
+ end
183
+ end
184
+
185
+
186
+ # 5:34:31.396 = 5 hours, 34 minutes, 31 seconds and 396 milliseconds
187
+ # 124:59:36 = 124 hours, 59 minutes and 36 seconds
188
+ # 0:05:01.2 = 5 minutes, 1 second and 200 milliseconds
189
+ # 0:00:04 = 4 seconds
190
+ # 09:58 = 9 minutes and 58 seconds
191
+ # 00:56.78 = 56 seconds and 780 milliseconds
192
+ # 76.2s = 76.2 seconds = 76 seconds and 200 milliseconds
193
+ # 7.75h = 7.75 hours = 7 hours and 45 minutes
194
+ # 13min = 13 minutes
195
+ # 2345ms = 2345 milliseconds
196
+ # 12.345 = 12 seconds and 345 milliseconds
197
+ def smil_clock_value_to_seconds(clock)
198
+ return clock.to_f if clock.kind_of?(Numeric)
199
+ clock = clock.to_s
200
+ if match = clock.match(/(\d+:)?(\d+:)(\d+\.?\d*)/)
201
+ h = match[1].to_s.to_f
202
+ m = match[2].to_s.to_f
203
+ s = match[3].to_s.to_f
204
+ h*60*60 + m*60 + s
205
+ elsif match = clock.match(/^(\d+\.?\d*)h$/)
206
+ match[1].to_f*60*60
207
+ elsif match = clock.match(/^(\d+\.?\d*)min$/)
208
+ match[1].to_f*60
209
+ elsif match = clock.match(/^(\d+\.?\d*)s?$/)
210
+ clock.to_f
211
+ elsif match = clock.match(/^(\d+\.?\d*)ms$/)
212
+ clock.to_f / 1000.0
213
+ end
214
+ end
215
+
216
+
217
+ def prop_to_meta(scope, metadata_tag, prop_name, meta_name)
218
+ if scope[prop_name]
219
+ meta_tag = Nokogiri::XML::Node.new('meta', metadata_tag)
220
+ meta_tag['property'] = meta_name
221
+ meta_tag.content = scope[prop_name]
222
+ metadata_tag.add_child(meta_tag)
223
+ return meta_tag
224
+ end
225
+ end
226
+
227
+ end
@@ -0,0 +1,73 @@
1
+ # For the specification, see the iBooks Asset Guide, specifically the
2
+ # section titled "Ambient Soundtrack" in version 5.1:
3
+ #
4
+ # https://itunesconnect.apple.com/docs/iBooksAssetGuide5.1Revision2.pdf
5
+ #
6
+ #
7
+ class BbEPUB::Transform::AudioSoundtrack < Bookbinder::Transform
8
+
9
+ def dependencies
10
+ [BbEPUB::Transform::Spine]
11
+ end
12
+
13
+
14
+ # Iterate through each spine item, looking for
15
+ #
16
+ # <audio epub:type="ibooks:soundtrack" src="..." />
17
+ #
18
+ def to_map(package)
19
+ package.map['spine'].each { |cmpt|
20
+ find_soundtrack_in_component(cmpt, package.file(cmpt['path']))
21
+ }
22
+ end
23
+
24
+
25
+ # Iterate through each spine item, adding an audio tag to components
26
+ # that don't have one, or setting the src if the audio tag exists.
27
+ #
28
+ def from_map(package)
29
+ package.map['spine'].each { |cmpt|
30
+ if cmpt['audio-soundtrack']
31
+ add_soundtrack_to_component(cmpt, package.file(cmpt['path']))
32
+ end
33
+ }
34
+ end
35
+
36
+
37
+ protected
38
+
39
+ def find_soundtrack_in_component(cmpt, cmpt_file)
40
+ soundtrack_tag = soundtrack_tag_in_document(cmpt_file.document)
41
+ cmpt['audio-soundtrack'] = soundtrack_tag ? soundtrack_tag['src'] : nil
42
+ end
43
+
44
+
45
+ def add_soundtrack_to_component(cmpt, cmpt_file)
46
+ cmpt_doc = cmpt_file.document
47
+ unless soundtrack_tag = soundtrack_tag_in_document(cmpt_doc)
48
+ cmpt_doc.add_namespace('epub')
49
+ cmpt_doc.add_prefix('ibooks', 'epub:prefix')
50
+
51
+ soundtrack_tag = cmpt_doc.new_node('audio', :append => 'body')
52
+ soundtrack_tag['epub:type'] = 'ibooks:soundtrack'
53
+
54
+ cmpt_doc.new_node('style', :append => 'head') { |style_tag|
55
+ style_tag['type'] = 'text/css'
56
+ style_tag['id'] = 'BB_HIDE_AUDIO_SOUNDTRACK'
57
+ style_tag.content = [
58
+ 'audio[epub|type="ibooks:soundtrack"] {',
59
+ 'position: absolute;',
60
+ 'top: -100px;',
61
+ '}'
62
+ ].join
63
+ }
64
+ end
65
+ soundtrack_tag['src'] = cmpt['audio-soundtrack']
66
+ end
67
+
68
+
69
+ def soundtrack_tag_in_document(cmpt_doc)
70
+ cmpt_doc.find('audio[epub|type="ibooks:soundtrack"]')
71
+ end
72
+
73
+ end
@@ -0,0 +1,11 @@
1
+ require(File.join(File.dirname(__FILE__), 'creator.rb'))
2
+
3
+ class BbEPUB::Transform::Contributor < BbEPUB::Transform::Creator
4
+
5
+ protected
6
+
7
+ def actor_type
8
+ 'contributor'
9
+ end
10
+
11
+ end
@@ -0,0 +1,123 @@
1
+ # The best source of information about wading through the EPUB
2
+ # cover image quagmire has always been Keith's article on the
3
+ # Threepress blog:
4
+ #
5
+ # http://blog.safaribooksonline.com/2009/11/20/best-practices-in-epub-cover-images/
6
+ #
7
+ # He added an update for EPUB3, which follows the spec but is
8
+ # a bit easier to grok:
9
+ #
10
+ # http://blog.safaribooksonline.com/2011/05/26/covers-in-epub3/
11
+ #
12
+ class BbEPUB::Transform::CoverImage < Bookbinder::Transform
13
+
14
+ def dependencies
15
+ [
16
+ BbEPUB::Transform::Metadata,
17
+ BbEPUB::Transform::CoverPage
18
+ ]
19
+ end
20
+
21
+
22
+ # If it's EPUB3, the cover will be in the 'properties' attribute
23
+ # of the manifest item: 'cover-image'
24
+ #
25
+ # Otherwise, look for a manifest item with an 'id' of 'cover-image'.
26
+ #
27
+ # Or, look for a meta tag with a 'name' of 'cover', then find the
28
+ # manifest item that has the 'id' that matches meta's 'content'.
29
+ #
30
+ # Set map['cover'] to this item (and remove it from map['resources']).
31
+ #
32
+ def to_map(package)
33
+ package.map['cover'] = {}
34
+ opf_doc = package.file(:opf).document('r')
35
+ cover_item = cover_item_from_manifest(package, opf_doc) ||
36
+ cover_item_from_metadata(package, opf_doc) ||
37
+ cover_item_from_cover_page(package)
38
+ if cover_resource = cover_resource_from_item(package, cover_item)
39
+ package.map['resources'].delete(cover_resource)
40
+ package.map['cover'].update("front" => cover_resource)
41
+ end
42
+ end
43
+
44
+
45
+ # Belt and braces: give the manifest item a property of
46
+ # 'cover-image', an 'id' of 'cover-image' (updating any
47
+ # idrefs) and create a meta tag with 'name'='cover' and
48
+ # 'content'='cover-image'.
49
+ #
50
+ def from_map(package)
51
+ return unless package.map['cover'] && cover = package.map['cover']['front']
52
+ opf_doc = package.file(:opf).document
53
+
54
+ opf_doc.new_node('item', :append => 'opf|manifest') { |manifest_item_tag|
55
+ manifest_item_tag['href'] = package.make_href(cover['path'])
56
+ manifest_item_tag['media-type'] = cover['media-type']
57
+ manifest_item_tag['id'] = 'cover-image'
58
+ manifest_item_tag['properties'] = 'cover-image'
59
+ }
60
+
61
+ cover_id = package.make_id(cover['path'])
62
+ opf_doc.each('[idref="'+cover_id+'"]') { |idref|
63
+ idref['idref'] = cover_id
64
+ }
65
+
66
+ opf_doc.new_node('meta', :append => 'opf|metadata') { |cover_meta_tag|
67
+ cover_meta_tag['name'] = 'cover'
68
+ cover_meta_tag['content'] = 'cover-image'
69
+ }
70
+ end
71
+
72
+
73
+ def cover_item_from_manifest(package, opf_doc)
74
+ opf_doc.find('opf|manifest > opf|item[properties~="cover-image"]') ||
75
+ opf_doc.find('opf|manifest > opf|item[id="cover-image"]') ||
76
+ opf_doc.find('opf|manifest > opf|item[id="cover_image"]')
77
+ end
78
+
79
+
80
+ def cover_item_from_metadata(package, opf_doc)
81
+ cover_meta_props = (package.map['metadata'] || {}).delete('cover')
82
+ if cover_meta_props && cover_meta_props.any?
83
+ cover_image_id = cover_meta_props.first['content']['@']
84
+ opf_doc.find('opf|manifest > opf|item[id="'+cover_image_id+'"]')
85
+ end
86
+ end
87
+
88
+
89
+ def cover_item_from_cover_page(package)
90
+ if (nav = package.map['nav']) && (landmarks = nav['landmarks'])
91
+ if landmark = landmarks.detect { |it| it['type'] == 'cover' }
92
+ return package.map['spine'].detect { |c| c['path'] == landmark['path'] }
93
+ end
94
+ end
95
+ end
96
+
97
+
98
+ # TODO: support SVG images too.
99
+ #
100
+ def cover_item_from_component(package, cmpt)
101
+ package.if_file(cmpt['path']) { |cmpt_file|
102
+ return unless cmpt_doc = cmpt_file.document('r')
103
+ return unless img_tag = cmpt_doc.find('body img')
104
+ opf_doc = package.file(:opf).document('r')
105
+ opf_doc.find('opf|manifest > opf|item[href="'+img_tag['src']+'"]')
106
+ }
107
+ rescue
108
+ nil
109
+ end
110
+
111
+
112
+ def cover_resource_from_item(package, cover_item)
113
+ return nil unless cover_item && cover_item['id']
114
+ if cmpt = package.map['spine'].detect { |c| c['id'] == cover_item['id'] }
115
+ unless cover_item = cover_item_from_component(package, cmpt)
116
+ package.warn("Did not discover cover image in #{cmpt['path']}. SVG?")
117
+ return nil
118
+ end
119
+ end
120
+ package.map['resources'].detect { |r| r['id'] == cover_item['id'] }
121
+ end
122
+
123
+ end
@@ -0,0 +1,158 @@
1
+ class BbEPUB::Transform::CoverPage < Bookbinder::Transform
2
+
3
+ def dependencies
4
+ [BbEPUB::Transform::Spine]
5
+ end
6
+
7
+
8
+ # A: look in the Nav (if it exists) for a landmark li with an
9
+ # epub:type of 'cover', and find the spine item with that href.
10
+ #
11
+ # B: look for an OPF <guide><reference type="cover"> and find
12
+ # the spine item with the same href.
13
+ #
14
+ # C: look at the first spine item:
15
+ # - is it have /cover/ in the filename?
16
+ # - no? does it have an image and no body text?
17
+ # - no? does it have an svg and no body text?
18
+ #
19
+ # -> If found, add to map['nav']['landmarks'] with a 'type'
20
+ # of 'cover'.
21
+ #
22
+ def to_map(package)
23
+ cover_page_item =
24
+ cover_page_item_from_nav(package) ||
25
+ cover_page_item_from_opf_guide(package) ||
26
+ cover_page_item_from_id(package) ||
27
+ cover_page_item_from_first_spine_item(package)
28
+
29
+ if cover_page_item
30
+ package.map['nav'] ||= {}
31
+ package.map['nav']['landmarks'] ||= []
32
+ package.map['nav']['landmarks'].unshift(cover_page_item)
33
+ package.map['spine'].each { |item|
34
+ if item['path'] == cover_page_item['path']
35
+ item['linear'] = false
36
+ end
37
+ }
38
+ end
39
+ end
40
+
41
+
42
+ # Do nothing unless we have a map['nav']['landmark'] type='cover'.
43
+ #
44
+ # In the Nav (if it exists), create a landmark with an
45
+ # epub:type of 'cover'. Actually, don't -- let the landmarks feature
46
+ # handle this.
47
+ #
48
+ # In the OPF, create a <guide> element if it doesn't exist,
49
+ # and create a <reference type="cover" title="Cover" href="..."> tag
50
+ # within it.
51
+ #
52
+ def from_map(package)
53
+ return unless package.map['nav'] && package.map['nav']['landmarks']
54
+ cover_page_item = package.map['nav']['landmarks'].detect { |item|
55
+ item['type'] == 'cover'
56
+ }
57
+ return unless cover_page_item
58
+
59
+ opf_doc = package.file(:opf).document
60
+ unless guide_tag = opf_doc.find('opf|guide')
61
+ guide_tag = opf_doc.new_node('guide', :append => opf_doc.root)
62
+ end
63
+
64
+ opf_doc.new_node('reference', :append => guide_tag) { |ref_tag|
65
+ ref_tag['type'] = 'cover'
66
+ ref_tag['href'] = package.make_href(cover_page_item['path'])
67
+ ref_tag['title'] = cover_page_item['title']
68
+ }
69
+ end
70
+
71
+
72
+ protected
73
+
74
+ # Look for an EPUB3 landmark with type 'cover'.
75
+ #
76
+ def cover_page_item_from_nav(package)
77
+ return unless nav_file = package.file(:nav)
78
+ nav_doc = nav_file.document('r')
79
+ if li = nav_doc.find('nav[epub|type="landmark"] li[epub|type="cover"]')
80
+ href_to_cover_page_item(
81
+ package,
82
+ li['href'],
83
+ li['title'] || li.content.strip
84
+ )
85
+ end
86
+ end
87
+
88
+
89
+ # Look for a guide reference with type 'cover' in the OPF.
90
+ #
91
+ def cover_page_item_from_opf_guide(package)
92
+ opf_doc = package.file(:opf).document('r')
93
+ if guide_ref_tag = opf_doc.find('opf|guide > opf|reference[type="cover"]')
94
+ href_to_cover_page_item(package, guide_ref_tag['href'])
95
+ end
96
+ end
97
+
98
+
99
+ def cover_page_item_from_id(package)
100
+ opf_doc = package.file(:opf).document('r')
101
+ if manifest_tag = opf_doc.find('opf|manifest > opf|item[id^="cover"]')
102
+ if manifest_tag['media-type'].match(/ml$/)
103
+ href_to_cover_page_item(package, manifest_tag['href'])
104
+ end
105
+ end
106
+ end
107
+
108
+
109
+ # Investigate whether the first spine item is a cover page.
110
+ #
111
+ def cover_page_item_from_first_spine_item(package)
112
+ spine = package.map['spine']
113
+ if spine.any? && file_path = package.map['spine'].first['path']
114
+ file_href = package.make_href(file_path)
115
+ if file_path.match(/cover/)
116
+ return href_to_cover_page_item(package, file_href)
117
+ end
118
+ package.if_file(file_path) { |cmpt_file|
119
+ if cmpt_doc = cmpt_file.document('r')
120
+ body = cmpt_doc.find('body')
121
+ nodes = body.xpath('.//text()[normalize-space()]')
122
+ # If the body has no text...
123
+ if nodes.empty?
124
+ # ...and it has an <img> or an <svg>...
125
+ if cmpt_doc.find('body img') || cmpt_doc.find('body svg')
126
+ # ...we'll treat it as a cover page.
127
+ return href_to_cover_page_item(package, file_href)
128
+ end
129
+ end
130
+ end
131
+ }
132
+ end
133
+ end
134
+
135
+
136
+ # Given a href for a cover page, create the cover page item
137
+ # that will go into the landmarks array in the package map.
138
+ #
139
+ def href_to_cover_page_item(package, cover_href, page_title = nil)
140
+ cover_href = cover_href.sub(/#.*$/, '')
141
+ cover_page_path = package.make_path(cover_href)
142
+ package.if_file(cover_page_path) { |cover_page_file|
143
+ return unless doc = cover_page_file.document('r')
144
+ if page_title.nil? || page_title.empty?
145
+ title_tag = cover_page_file.document('r').find('head > title')
146
+ page_title = title_tag ? title_tag.content.strip : 'Cover page'
147
+ end
148
+ {
149
+ 'type' => 'cover',
150
+ 'path' => cover_page_path,
151
+ 'title' => page_title,
152
+ # FIXME: acquire the real media-type from manifest item?
153
+ 'media-type' => cover_page_file.media_type
154
+ }
155
+ }
156
+ end
157
+
158
+ end