peregrin 1.1.1 → 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -146,3 +146,8 @@ it's being kept as simple as possible.
146
146
 
147
147
  All this rhyming on "ook" put me in mind of the Took family. There is no
148
148
  deeper meaning.
149
+
150
+
151
+ ## History
152
+
153
+ * 1.1.4 - Basic EPUB3 and EPUB fixed-layout read support (thanks @klacointe!)
data/lib/formats/epub.rb CHANGED
@@ -7,7 +7,8 @@ class Peregrin::Epub
7
7
  :opf => { 'opf' => 'http://www.idpf.org/2007/opf' },
8
8
  :dc => { 'dc' => 'http://purl.org/dc/elements/1.1/' },
9
9
  :ncx => { 'ncx' => 'http://www.daisy.org/z3986/2005/ncx/' },
10
- :svg => { 'svg' => 'http://www.w3.org/2000/svg' }
10
+ :svg => { 'svg' => 'http://www.w3.org/2000/svg' },
11
+ :nav => { 'nav' => 'http://www.w3.org/1999/xhtml'}
11
12
  }
12
13
  OCF_PATH = "META-INF/container.xml"
13
14
  HTML5_TAGNAMES = %w[section nav article aside hgroup header footer figure figcaption] # FIXME: Which to divify? Which to leave as-is?
@@ -85,13 +86,13 @@ class Peregrin::Epub
85
86
  docs = load_config_documents(zipfile)
86
87
  extract_properties(docs[:opf])
87
88
  extract_components(zipfile, docs[:opf], docs[:opf_root])
88
- extract_chapters(zipfile, docs[:ncx])
89
+ extract_chapters(zipfile, {:ncx => docs[:ncx], :nav => docs[:nav]})
89
90
  extract_cover(zipfile, docs)
90
91
  }
91
92
  @book.read_resource_proc = lambda { |resource|
92
93
  media_path = from_opf_root(docs[:opf_root], resource.src)
93
94
  media_path = URI.unescape(media_path)
94
- Zip::Archive.open(epub_path) { |zipfile| zipfile.read(media_path) }
95
+ Zip::Archive.open(epub_path) { |zipfile| zipfile.content(media_path) }
95
96
  }
96
97
  end
97
98
 
@@ -99,7 +100,8 @@ class Peregrin::Epub
99
100
  def load_config_documents(zipfile)
100
101
  # The OCF file.
101
102
  begin
102
- docs = { :ocf => Nokogiri::XML::Document.parse(zipfile.read(OCF_PATH)) }
103
+ ocf_content = zipfile.content(OCF_PATH)
104
+ docs = { :ocf => Nokogiri::XML::Document.parse(ocf_content) }
103
105
  rescue
104
106
  raise FailureLoadingOCF
105
107
  end
@@ -111,12 +113,17 @@ class Peregrin::Epub
111
113
  NAMESPACES[:ocf]
112
114
  )['full-path']
113
115
  docs[:opf_root] = File.dirname(docs[:opf_path])
114
- docs[:opf] = Nokogiri::XML::Document.parse(zipfile.read(docs[:opf_path]))
116
+ opf_content = zipfile.content(docs[:opf_path])
117
+ docs[:opf] = Nokogiri::XML::Document.parse(opf_content)
115
118
  rescue
116
119
  raise FailureLoadingOPF
117
120
  end
118
121
 
122
+ # Extract Epub version
123
+ @book.version = docs[:opf].at_xpath('//opf:package', NAMESPACES[:opf])['version'].to_f
124
+
119
125
  # The NCX file.
126
+ # Must be present only with Ebook < 3.0 but can be use for forward compatibility
120
127
  begin
121
128
  spine = docs[:opf].at_xpath('//opf:spine', NAMESPACES[:opf])
122
129
  ncx_id = spine['toc'] ? spine['toc'] : 'ncx'
@@ -126,10 +133,25 @@ class Peregrin::Epub
126
133
  )
127
134
 
128
135
  docs[:ncx_path] = from_opf_root(docs[:opf_root], item['href'])
129
- ncx_content = zipfile.read(docs[:ncx_path])
136
+ ncx_content = zipfile.content(docs[:ncx_path])
130
137
  docs[:ncx] = Nokogiri::XML::Document.parse(ncx_content)
131
138
  rescue => e
132
- raise FailureLoadingNCX
139
+ # Only raise an exeption for Ebook with version lower than 3.0
140
+ raise FailureLoadingNCX if @book.version < 3
141
+ end
142
+
143
+ # The NAV file. (Epub3 only)
144
+ if @book.version >= 3
145
+ begin
146
+ docs[:nav_path] = from_opf_root(
147
+ docs[:opf_root],
148
+ docs[:opf].at_xpath("//opf:manifest/opf:item[contains(concat(' ', normalize-space(@properties), ' '), ' nav ')]", NAMESPACES[:opf])['href']
149
+ )
150
+ nav_content = zipfile.content(docs[:nav_path])
151
+ docs[:nav] = Nokogiri::XML::Document.parse(nav_content)
152
+ rescue => e
153
+ raise FailureLoadingNAV
154
+ end
133
155
  end
134
156
 
135
157
  docs
@@ -153,10 +175,14 @@ class Peregrin::Epub
153
175
  end
154
176
  atts = elem.attributes.inject({}) { |acc, pair|
155
177
  key, attr = pair
156
- acc[key] = attr.value unless ["name", "content"].include?(key)
178
+ if !["name", "content", "property"].include?(key)
179
+ acc[key] = attr.value
180
+ elsif key == "property"
181
+ @book.add_property(attr.value, elem.text)
182
+ end
157
183
  acc
158
184
  }
159
- @book.add_property(name, content, atts)
185
+ @book.add_property(name, content, atts) unless name.nil?
160
186
  }
161
187
  end
162
188
 
@@ -175,10 +201,10 @@ class Peregrin::Epub
175
201
  href = item['href']
176
202
  linear = iref['linear'] != 'no'
177
203
  begin
178
- content = zipfile.read(from_opf_root(opf_root, href))
204
+ content = zipfile.content(from_opf_root(opf_root, href))
179
205
  rescue
180
206
  href = URI.unescape(href)
181
- content = zipfile.read(from_opf_root(opf_root, href))
207
+ content = zipfile.content(from_opf_root(opf_root, href))
182
208
  end
183
209
  @book.add_component(
184
210
  href,
@@ -205,8 +231,16 @@ class Peregrin::Epub
205
231
  }
206
232
  end
207
233
 
234
+ def extract_chapters(zipfile, docs)
235
+ if @book.version >= 3 && !docs[:nav].nil?
236
+ extract_nav_chapters(zipfile, docs[:nav])
237
+ else
238
+ extract_ncx_chapters(zipfile, docs[:ncx])
239
+ end
240
+ end
208
241
 
209
- def extract_chapters(zipfile, ncx_doc)
242
+ # Epub < 3.0 only
243
+ def extract_ncx_chapters(zipfile, ncx_doc)
210
244
  curse = lambda { |point|
211
245
  chp = Peregrin::Chapter.new(
212
246
  point.at_xpath('.//ncx:text', NAMESPACES[:ncx]).content,
@@ -225,6 +259,32 @@ class Peregrin::Epub
225
259
  }
226
260
  end
227
261
 
262
+ # Epub >= 3.0 only
263
+ def extract_nav_chapters(zipfile, nav_doc)
264
+ curse = lambda { |point, position|
265
+ chp = Peregrin::Chapter.new(
266
+ point.at_xpath('.//nav:a', NAMESPACES[:nav]).content,
267
+ position,
268
+ point.at_xpath('.//nav:a', NAMESPACES[:nav])['href']
269
+ )
270
+ ol = point.at_xpath('.//nav:ol', NAMESPACES[:nav])
271
+ ol.children.each { |pt|
272
+ next unless pt.element? && pt.name == "li"
273
+ position += 1
274
+ position, chapter = curse.call(pt, position)
275
+ chp.children.push chapter
276
+ } if ol
277
+ [position, chp]
278
+ }
279
+ position = 0
280
+ nav_doc.at_xpath("//nav:nav/nav:ol", NAMESPACES[:nav]).children.each { |pt|
281
+ next unless pt.element? && pt.name == "li"
282
+ position += 1
283
+ position, chapter = curse.call(pt, position)
284
+ @book.chapters.push chapter
285
+ }
286
+ end
287
+
228
288
 
229
289
  def extract_cover(zipfile, docs)
230
290
  @book.cover = nil
@@ -252,7 +312,7 @@ class Peregrin::Epub
252
312
  else
253
313
  path = from_opf_root(docs[:opf_root], res.src)
254
314
  begin
255
- doc = Nokogiri::XML::Document.parse(zipfile.read(path))
315
+ doc = Nokogiri::XML::Document.parse(zipfile.content(path))
256
316
  src = nil
257
317
  if img = doc.at_css('img')
258
318
  src = img['src']
@@ -549,5 +609,6 @@ class Peregrin::Epub
549
609
  class FailureLoadingOCF < ValidationError; end
550
610
  class FailureLoadingOPF < ValidationError; end
551
611
  class FailureLoadingNCX < ValidationError; end
612
+ class FailureLoadingNAV < ValidationError; end
552
613
 
553
614
  end
data/lib/formats/zhook.rb CHANGED
@@ -28,7 +28,7 @@ class Peregrin::Zhook
28
28
  raise MissingCoverPNG.new(path)
29
29
  end
30
30
 
31
- doc = Nokogiri::HTML::Document.parse(zf.read(INDEX_PATH), nil, 'UTF-8')
31
+ doc = Nokogiri::HTML::Document.parse(zf.content(INDEX_PATH), nil, 'UTF-8')
32
32
  raise IndexHTMLRootHasId.new(path) if doc.root['id']
33
33
 
34
34
  ensure
@@ -42,16 +42,14 @@ class Peregrin::Zhook
42
42
  validate(path)
43
43
  book = Peregrin::Book.new
44
44
  Zip::Archive.open(path) { |zf|
45
- book.add_component(INDEX_PATH, zf.read(INDEX_PATH))
45
+ book.add_component(INDEX_PATH, zf.content(INDEX_PATH))
46
46
  zf.each { |entry|
47
47
  ze = entry.name
48
48
  book.add_resource(ze) unless ze == INDEX_PATH || entry.directory?
49
49
  }
50
50
  }
51
51
  book.read_resource_proc = lambda { |resource|
52
- Zip::Archive.open(path) { |zipfile|
53
- zipfile.read(resource.src)
54
- }
52
+ Zip::Archive.open(path) { |zipfile| zipfile.content(resource.src) }
55
53
  }
56
54
 
57
55
  extract_properties_from_index(book)
data/lib/peregrin/book.rb CHANGED
@@ -19,6 +19,10 @@ class Peregrin::Book
19
19
  # A Resource that is used for the book cover.
20
20
  attr_accessor :cover
21
21
 
22
+ # The current version of document specifications
23
+ # Only used for Epub for now
24
+ attr_accessor :version
25
+
22
26
  # A proc that copies a resource to the given destination.
23
27
  attr_writer :read_resource_proc
24
28
 
@@ -1,5 +1,5 @@
1
1
  module Peregrin
2
2
 
3
- VERSION = "1.1.1"
3
+ VERSION = "1.1.4"
4
4
 
5
5
  end
@@ -1,9 +1,10 @@
1
1
  class Zip::Archive
2
2
 
3
- def read(path)
3
+ def content(path)
4
4
  fopen(path) { |f| f.read }
5
5
  end
6
6
 
7
+
7
8
  def find(path)
8
9
  detect { |f| f.name == path }
9
10
  end
@@ -89,6 +89,91 @@ class Peregrin::Tests::EpubTest < Test::Unit::TestCase
89
89
  assert_equal("cover.png", epub.to_book.cover.src)
90
90
  end
91
91
 
92
+ def test_extracting_epub3_fixed_layout_properties
93
+ epub = Peregrin::Epub.read("test/fixtures/epubs/epub3_fixed_layout.epub")
94
+ book = epub.to_book
95
+ assert_equal("2012-05-09T08:58:00Z", book.property_for('dcterms:modified'))
96
+ assert_equal("pre-paginated", book.property_for('rendition:layout'))
97
+ assert_equal("auto", book.property_for('rendition:orientation'))
98
+ assert_equal("both", book.property_for('rendition:spread'))
99
+ end
100
+
101
+ def test_extracting_version
102
+ epub = Peregrin::Epub.read("test/fixtures/epubs/epub3_fixed_layout.epub")
103
+ assert_equal(3.0, epub.to_book.version)
104
+
105
+ epub = Peregrin::Epub.read("test/fixtures/epubs/strunk.epub")
106
+ assert_equal(2.0, epub.to_book.version)
107
+ end
108
+
109
+ def test_extracting_chapters_from_ocx
110
+ epub = Peregrin::Epub.read("test/fixtures/epubs/strunk.epub")
111
+ assert_equal(9, epub.to_book.chapters.count)
112
+ assert_equal("Title", epub.to_book.chapters.first.title)
113
+ assert_equal("title.xml", epub.to_book.chapters.first.src)
114
+ assert_equal(1, epub.to_book.chapters.first.position)
115
+ assert_equal("Recommendations", epub.to_book.chapters.last.title)
116
+ assert_equal("similar.xml", epub.to_book.chapters.last.src)
117
+ assert_equal(27, epub.to_book.chapters.last.position)
118
+ end
119
+
120
+ def test_extracting_chapters_from_nav
121
+ epub = Peregrin::Epub.read("test/fixtures/epubs/epub3_fixed_layout.epub")
122
+ assert_equal(3, epub.to_book.chapters.count)
123
+ assert_equal("Images and Text", epub.to_book.chapters.first.title)
124
+ assert_equal("page01.xhtml", epub.to_book.chapters.first.src)
125
+ assert_equal(1, epub.to_book.chapters.first.position)
126
+ assert_equal("Dragons", epub.to_book.chapters.last.title)
127
+ assert_equal("page04.xhtml", epub.to_book.chapters.last.src)
128
+ assert_equal(3, epub.to_book.chapters.last.position)
129
+ end
130
+
131
+ def test_extracting_nested_chapters_from_nav
132
+ epub = Peregrin::Epub.read("test/fixtures/epubs/epub3_nested_nav.epub")
133
+ assert_equal(11, epub.to_book.chapters.count)
134
+ assert_equal(
135
+ ["EPUB 3.0 Specification",
136
+ "EPUB 3 Specifications - Table of Contents",
137
+ "Terminology",
138
+ "EPUB 3 Overview",
139
+ "EPUB Publications 3.0",
140
+ "EPUB Content Documents 3.0",
141
+ "EPUB Media Overlays 3.0",
142
+ "Acknowledgements and Contributors",
143
+ "References",
144
+ "EPUB Open Container Format (OCF) 3.0",
145
+ "EPUB 3 Changes from EPUB 2.0.1"],
146
+ epub.to_book.chapters.map(&:title)
147
+ )
148
+ assert_equal(
149
+ [1, 2, 3, 4, 30, 85, 184, 230, 231, 232, 265],
150
+ epub.to_book.chapters.map(&:position)
151
+ )
152
+ assert_equal(
153
+ ["1. Introduction",
154
+ "2. Features",
155
+ "3. Global Language Support",
156
+ "4. Accessibility"],
157
+ epub.to_book.chapters[3].children.map(&:title)
158
+ )
159
+ assert_equal(
160
+ [5, 8, 22, 29],
161
+ epub.to_book.chapters[3].children.map(&:position)
162
+ )
163
+ assert_equal(
164
+ ["3.1. Metadata",
165
+ "3.2. Content Documents",
166
+ "3.3. CSS",
167
+ "3.4. Fonts",
168
+ "3.5. Text-to-speech",
169
+ "3.6. Container"],
170
+ epub.to_book.chapters[3].children[2].children.map(&:title)
171
+ )
172
+ assert_equal(
173
+ [23, 24, 25, 26, 27, 28],
174
+ epub.to_book.chapters[3].children[2].children.map(&:position)
175
+ )
176
+ end
92
177
 
93
178
  def test_read_epub_to_write_epub
94
179
  epub = Peregrin::Epub.read("test/fixtures/epubs/strunk.epub")
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: peregrin
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
4
+ hash: 27
5
+ prerelease:
5
6
  segments:
6
7
  - 1
7
8
  - 1
8
- - 1
9
- version: 1.1.1
9
+ - 4
10
+ version: 1.1.4
10
11
  platform: ruby
11
12
  authors:
12
13
  - Joseph Pearson
@@ -14,16 +15,17 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2011-06-28 00:00:00 +10:00
18
- default_executable:
18
+ date: 2012-05-14 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: nokogiri
22
22
  prerelease: false
23
23
  requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
24
25
  requirements:
25
26
  - - ">="
26
27
  - !ruby/object:Gem::Version
28
+ hash: 3
27
29
  segments:
28
30
  - 0
29
31
  version: "0"
@@ -33,9 +35,11 @@ dependencies:
33
35
  name: zipruby
34
36
  prerelease: false
35
37
  requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
36
39
  requirements:
37
40
  - - ">="
38
41
  - !ruby/object:Gem::Version
42
+ hash: 3
39
43
  segments:
40
44
  - 0
41
45
  version: "0"
@@ -45,9 +49,11 @@ dependencies:
45
49
  name: mime-types
46
50
  prerelease: false
47
51
  requirement: &id003 !ruby/object:Gem::Requirement
52
+ none: false
48
53
  requirements:
49
54
  - - ">="
50
55
  - !ruby/object:Gem::Version
56
+ hash: 3
51
57
  segments:
52
58
  - 0
53
59
  version: "0"
@@ -57,9 +63,11 @@ dependencies:
57
63
  name: rake
58
64
  prerelease: false
59
65
  requirement: &id004 !ruby/object:Gem::Requirement
66
+ none: false
60
67
  requirements:
61
68
  - - ">="
62
69
  - !ruby/object:Gem::Version
70
+ hash: 3
63
71
  segments:
64
72
  - 0
65
73
  version: "0"
@@ -98,7 +106,6 @@ files:
98
106
  - test/utils/outliner_test.rb
99
107
  - README.md
100
108
  - MIT-LICENSE
101
- has_rdoc: true
102
109
  homepage: http://ochook.org/peregrin
103
110
  licenses: []
104
111
 
@@ -111,23 +118,27 @@ rdoc_options:
111
118
  require_paths:
112
119
  - lib
113
120
  required_ruby_version: !ruby/object:Gem::Requirement
121
+ none: false
114
122
  requirements:
115
123
  - - ">="
116
124
  - !ruby/object:Gem::Version
125
+ hash: 3
117
126
  segments:
118
127
  - 0
119
128
  version: "0"
120
129
  required_rubygems_version: !ruby/object:Gem::Requirement
130
+ none: false
121
131
  requirements:
122
132
  - - ">="
123
133
  - !ruby/object:Gem::Version
134
+ hash: 3
124
135
  segments:
125
136
  - 0
126
137
  version: "0"
127
138
  requirements: []
128
139
 
129
140
  rubyforge_project: nowarning
130
- rubygems_version: 1.3.6
141
+ rubygems_version: 1.8.24
131
142
  signing_key:
132
143
  specification_version: 3
133
144
  summary: Peregrin - ebook conversion