epub-parser 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitlab-ci.yml +4 -6
- data/CHANGELOG.markdown +7 -0
- data/README.markdown +9 -3
- data/Rakefile +2 -1
- data/docs/Home.markdown +22 -0
- data/epub-parser.gemspec +3 -5
- data/lib/epub/content_document/xhtml.rb +3 -2
- data/lib/epub/ocf/physical_container.rb +0 -1
- data/lib/epub/parser.rb +3 -1
- data/lib/epub/parser/publication.rb +1 -2
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/publication/package/manifest.rb +24 -11
- data/lib/epub/searcher/publication.rb +0 -1
- data/test/test_publication.rb +8 -2
- metadata +16 -38
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e95e303cf2c03479fdc655b8c4ea3c0b668b421f9d63082c40d984542312804b
|
4
|
+
data.tar.gz: 680b503b44bf23e82374da6afc18439762289eeaf87d58c6b0db5588d6ad59b5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc201c61dc61f7f56e4690c4b669bf0bee8e098f6025ceea17bc8604fb8b0b4a49a9937a4c34972d5b4584c83064fdf78d9b1831c75706c7f94ad01eebb23a8f
|
7
|
+
data.tar.gz: 7d08504f668f371dbef455f1498214b3f7bec305f3095638986441a2d6d8800932c329b65c7c62913f4ec126b02db892bbe10a3c22e0a316933f613d5cf7a8d7
|
data/.gitlab-ci.yml
CHANGED
@@ -1,15 +1,9 @@
|
|
1
1
|
before_script:
|
2
|
-
- apt-get update -qq && apt-get install -y zip
|
3
2
|
- ruby -v
|
4
3
|
- which ruby
|
5
4
|
- gem install bundler --no-document
|
6
5
|
- bundle install --jobs=$(nproc) "${FLAGS[@]}"
|
7
6
|
|
8
|
-
test:2.2:
|
9
|
-
image: ruby:2.2
|
10
|
-
script:
|
11
|
-
- bundle exec rake test
|
12
|
-
|
13
7
|
test:2.3:
|
14
8
|
image: ruby:2.3
|
15
9
|
script: bundle exec rake test
|
@@ -17,3 +11,7 @@ test:2.3:
|
|
17
11
|
test:2.4:
|
18
12
|
image: ruby:2.4
|
19
13
|
script: bundle exec rake test
|
14
|
+
|
15
|
+
test:2.5:
|
16
|
+
image: ruby:2.5
|
17
|
+
script: bundle exec rake test
|
data/CHANGELOG.markdown
CHANGED
@@ -1,6 +1,13 @@
|
|
1
1
|
CHANGELOG
|
2
2
|
=========
|
3
3
|
|
4
|
+
0.3.3
|
5
|
+
-----
|
6
|
+
|
7
|
+
* [BUG FIX]Use UnpackedURI adapter for URI
|
8
|
+
* [BREAKING CHANGE]Remove deprecated second argument from `EPUB::Parser::Publication#initialize`
|
9
|
+
* Add `detect_encoding` keyword argument to `Publication::Package::Manifest::Item#read` and `ContentDocument::XHTML#read`
|
10
|
+
|
4
11
|
0.3.2
|
5
12
|
-----
|
6
13
|
|
data/README.markdown
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
EPUB Parser
|
2
2
|
===========
|
3
|
-
[![Build Status](https://secure.travis-ci.org/KitaitiMakoto/epub-parser.png?branch=master)](http://travis-ci.org/KitaitiMakoto/epub-parser)
|
4
3
|
[![build status](https://gitlab.com/KitaitiMakoto/epub-parser/badges/master/build.svg)](https://gitlab.com/KitaitiMakoto/epub-parser/commits/master)
|
5
4
|
[![Dependency Status](https://gemnasium.com/KitaitiMakoto/epub-parser.png)](https://gemnasium.com/KitaitiMakoto/epub-parser)
|
6
5
|
[![Gem Version](https://badge.fury.io/rb/epub-parser.svg)](http://badge.fury.io/rb/epub-parser)
|
6
|
+
[![coverage report](https://gitlab.com/KitaitiMakoto/epub-parser/badges/master/coverage.svg)](https://gitlab.com/KitaitiMakoto/epub-parser/commits/master)
|
7
7
|
|
8
8
|
INSTALLATION
|
9
9
|
-------
|
@@ -134,11 +134,11 @@ Then documentation will be available in `doc` directory.
|
|
134
134
|
|
135
135
|
REQUIREMENTS
|
136
136
|
------------
|
137
|
-
* Ruby 2.
|
137
|
+
* Ruby 2.3.0 or later
|
138
138
|
* `patch` command to install Nokogiri
|
139
139
|
* C compiler to compile Nokogiri
|
140
140
|
|
141
|
-
|
141
|
+
SIMILAR EFFORTS
|
142
142
|
---------------
|
143
143
|
* [gepub](https://github.com/skoji/gepub) - a generic EPUB library for Ruby
|
144
144
|
* [epubinfo](https://github.com/chdorner/epubinfo) - Extracts metadata information from EPUB files. Supports EPUB2 and EPUB3 formats.
|
@@ -153,6 +153,12 @@ If you find other gems, please tell me or request a pull request.
|
|
153
153
|
RECENT CHANGES
|
154
154
|
--------------
|
155
155
|
|
156
|
+
### 0.3.3
|
157
|
+
|
158
|
+
* [BUG FIX]Use UnpackedURI adapter for URI
|
159
|
+
* [BREAKING CHANGE]Remove deprecated second argument from `EPUB::Parser::Publication#initialize`
|
160
|
+
* Add `detect_encoding` keyword argument to `Publication::Package::Manifest::Item#read` and `ContentDocument::XHTML#read`
|
161
|
+
|
156
162
|
### 0.3.2
|
157
163
|
|
158
164
|
* Use epub-cfi gem for EPUB CFI
|
data/Rakefile
CHANGED
@@ -5,6 +5,7 @@ require 'yard'
|
|
5
5
|
require 'rdoc/task'
|
6
6
|
require 'epub/parser/version'
|
7
7
|
require 'zipruby'
|
8
|
+
require 'epub/maker'
|
8
9
|
|
9
10
|
task :default => :test
|
10
11
|
task :test => 'test:default'
|
@@ -18,7 +19,7 @@ namespace :test do
|
|
18
19
|
desc 'Build test fixture EPUB file'
|
19
20
|
task :build => :clean do
|
20
21
|
input_dir = 'test/fixtures/book'
|
21
|
-
|
22
|
+
EPUB::Maker.archive input_dir
|
22
23
|
small_file = File.read("#{input_dir}/OPS/case-sensitive.xhtml")
|
23
24
|
Zip::Archive.open "#{input_dir}.epub" do |archive|
|
24
25
|
archive.add_buffer 'OPS/CASE-SENSITIVE.xhtml', small_file.sub('small file name', 'LARGE FILE NAME')
|
data/docs/Home.markdown
CHANGED
@@ -90,6 +90,23 @@ You are also able to find YourBook object for the first:
|
|
90
90
|
ret == book # => true; this API is not good I feel... Welcome suggestion!
|
91
91
|
# do something with your book
|
92
92
|
|
93
|
+
#### Switching ZIP library
|
94
|
+
|
95
|
+
EPUB Parser uses [Archive::Zip][], a pure Ruby ZIP library, by default. You can use [Zip/Ruby][], a Ruby bindings for [libzip][] if you have already installed Zip/Ruby gem by RubyGems or Bundler.
|
96
|
+
|
97
|
+
Globally:
|
98
|
+
|
99
|
+
EPUB::OCF::PhysicalContainer.adapter = :Zipruby
|
100
|
+
book = EPUB::Parser.parse("path/to/book.epub")
|
101
|
+
|
102
|
+
For each EPUB book:
|
103
|
+
|
104
|
+
book = EPUB::Parser.parse("path/to/book.epub", container_adapter: :Zipruby)
|
105
|
+
|
106
|
+
[Archive::Zip]: https://github.com/javanthropus/archive-zip
|
107
|
+
[Zip/Ruby]: https://bitbucket.org/winebarrel/zip-ruby/wiki/Home
|
108
|
+
[libzip]: https://nih.at/libzip/
|
109
|
+
|
93
110
|
Documentation
|
94
111
|
-------------
|
95
112
|
|
@@ -146,6 +163,11 @@ Requirements
|
|
146
163
|
* `patch` command to install Nokogiri
|
147
164
|
* C compiler to compile Zip/Ruby and Nokogiri
|
148
165
|
|
166
|
+
History
|
167
|
+
-------
|
168
|
+
|
169
|
+
See {file:CHANGELOG.markdown}.
|
170
|
+
|
149
171
|
Note
|
150
172
|
----
|
151
173
|
|
data/epub-parser.gemspec
CHANGED
@@ -11,7 +11,7 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.summary = %q{EPUB 3 Parser}
|
12
12
|
s.description = %q{Parse EPUB 3 book loosely}
|
13
13
|
s.license = 'MIT'
|
14
|
-
s.required_ruby_version = '
|
14
|
+
s.required_ruby_version = '>= 2.3'
|
15
15
|
|
16
16
|
s.files = `git ls-files`.split("\n")
|
17
17
|
.push('test/fixtures/book/OPS/ルートファイル.opf')
|
@@ -36,13 +36,11 @@ Gem::Specification.new do |s|
|
|
36
36
|
s.add_development_dependency 'yard'
|
37
37
|
s.add_development_dependency 'gem-man'
|
38
38
|
s.add_development_dependency 'ronn'
|
39
|
-
s.add_development_dependency 'epzip'
|
40
|
-
s.add_development_dependency 'racc'
|
41
|
-
s.add_development_dependency 'nokogiri-diff'
|
42
39
|
s.add_development_dependency 'pretty_backtrace'
|
40
|
+
s.add_development_dependency 'epub-maker'
|
43
41
|
|
44
42
|
s.add_runtime_dependency 'archive-zip'
|
45
|
-
s.add_runtime_dependency 'nokogiri', '
|
43
|
+
s.add_runtime_dependency 'nokogiri', '>= 1.6.0', '< 1.9'
|
46
44
|
s.add_runtime_dependency 'addressable', '>= 2.3.5'
|
47
45
|
s.add_runtime_dependency 'rchardet', '>= 1.6.1'
|
48
46
|
s.add_runtime_dependency 'epub-cfi'
|
@@ -3,9 +3,10 @@ module EPUB
|
|
3
3
|
class XHTML
|
4
4
|
attr_accessor :item
|
5
5
|
|
6
|
+
# @param [Boolean] detect_encoding See {Publication::Package::Manifest::Item#read}
|
6
7
|
# @return [String] Returns the content string.
|
7
|
-
def read
|
8
|
-
item.read
|
8
|
+
def read(detect_encoding: true)
|
9
|
+
item.read(detect_encoding: detect_encoding)
|
9
10
|
end
|
10
11
|
alias raw_document read
|
11
12
|
|
data/lib/epub/parser.rb
CHANGED
@@ -51,7 +51,9 @@ module EPUB
|
|
51
51
|
|
52
52
|
@filepath = path_is_uri ? filepath : File.realpath(filepath)
|
53
53
|
@book = create_book(options)
|
54
|
-
if
|
54
|
+
if path_is_uri
|
55
|
+
@book.container_adapter = :UnpackedURI
|
56
|
+
elsif File.directory? @filepath
|
55
57
|
@book.container_adapter = :UnpackedDirectory
|
56
58
|
end
|
57
59
|
@book.epub_file = @filepath
|
data/lib/epub/parser/version.rb
CHANGED
@@ -135,7 +135,16 @@ module EPUB
|
|
135
135
|
Addressable::URI.unencode(full_path)
|
136
136
|
end
|
137
137
|
|
138
|
-
|
138
|
+
# Read content from EPUB archive
|
139
|
+
#
|
140
|
+
# @param detect_encoding [Boolean] Whether #read tries auto-detection of character encoding. The default value will become +false+ in the near future.
|
141
|
+
# @return [String] Content with encoding:
|
142
|
+
# US-ASCII when the content is not in text format such images.
|
143
|
+
# UTF-8 when the content is in text format and +detect_encoding+ is +false+.
|
144
|
+
# auto-detected encoding when the content is in text format and +detect_encoding+ is +true+.
|
145
|
+
def read(detect_encoding: true)
|
146
|
+
warn "[#{self.class}##{__method__}]Autodetection of character encoding is deprecated. Pass keyword argument detect_encoding with true explicitly." if detect_encoding
|
147
|
+
|
139
148
|
raw_content = manifest.package.book.container_adapter.read(manifest.package.book.epub_file, entry_name)
|
140
149
|
|
141
150
|
unless media_type.start_with?('text/') or
|
@@ -143,17 +152,21 @@ module EPUB
|
|
143
152
|
['application/json', 'application/javascript', 'application/ecmascript', 'application/xml-dtd'].include?(media_type)
|
144
153
|
return raw_content
|
145
154
|
end
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
155
|
+
if detect_encoding
|
156
|
+
# CharDet.detect doesn't raise Encoding::CompatibilityError
|
157
|
+
# that is caused when trying compare CharDet's internal
|
158
|
+
# ASCII-8BIT RegExp with a String with other encoding
|
159
|
+
# because Zip::File#read returns a String with encoding ASCII-8BIT.
|
160
|
+
# So, no need to rescue the error here.
|
161
|
+
encoding = CharDet.detect(raw_content)['encoding']
|
162
|
+
if encoding
|
163
|
+
raw_content.force_encoding(encoding)
|
164
|
+
else
|
165
|
+
warn "No encoding detected for #{entry_name}. Set to ASCII-8BIT" if $DEBUG || $VERBOSE
|
166
|
+
raw_content
|
167
|
+
end
|
154
168
|
else
|
155
|
-
|
156
|
-
raw_content
|
169
|
+
raw_content.force_encoding("UTF-8");
|
157
170
|
end
|
158
171
|
end
|
159
172
|
|
@@ -82,7 +82,6 @@ module EPUB
|
|
82
82
|
# @param [EPUB::CFI] cfi
|
83
83
|
# @return [Array] Path in EPUB Rendition
|
84
84
|
def search_by_cfi(cfi)
|
85
|
-
# steal from pirka's find_item_and_element
|
86
85
|
path_in_package = cfi.paths.first
|
87
86
|
spine = @package.spine
|
88
87
|
model = [@package.metadata, @package.manifest, spine, @package.guide, @package.bindings].compact[path_in_package.steps.first.value / 2 - 1]
|
data/test/test_publication.rb
CHANGED
@@ -337,14 +337,20 @@ class TestPublication < Test::Unit::TestCase
|
|
337
337
|
end
|
338
338
|
end
|
339
339
|
|
340
|
+
def test_read_without_detect_encoding_resturns_utf_8_string
|
341
|
+
epub = EPUB::Parser.parse("test/fixtures/book.epub")
|
342
|
+
item = epub.package.manifest["utf-8-encoded"]
|
343
|
+
assert_equal Encoding::UTF_8, item.read(detect_encoding: false).encoding
|
344
|
+
end
|
345
|
+
|
340
346
|
data('UTF-8' => [Encoding::UTF_8, 'utf-8-encoded'],
|
341
347
|
'EUC-JP' => [Encoding::EUC_JP, 'euc-jp-encoded'],
|
342
348
|
'Shift-JIS' => [Encoding::Shift_JIS, 'shift_jis-encoded'])
|
343
|
-
def
|
349
|
+
def test_read_with_detect_encoding_detects_encoding(data)
|
344
350
|
encoding, id = data
|
345
351
|
epub = EPUB::Parser.parse('test/fixtures/book.epub')
|
346
352
|
item = epub.package.manifest[id]
|
347
|
-
assert_equal encoding, item.read.encoding
|
353
|
+
assert_equal encoding, item.read(detect_encoding: true).encoding
|
348
354
|
end
|
349
355
|
|
350
356
|
def test_entry_name_returns_normalized_iri
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epub-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- KITAITI Makoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-03-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -179,7 +179,7 @@ dependencies:
|
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: '0'
|
181
181
|
- !ruby/object:Gem::Dependency
|
182
|
-
name:
|
182
|
+
name: pretty_backtrace
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|
184
184
|
requirements:
|
185
185
|
- - ">="
|
@@ -193,7 +193,7 @@ dependencies:
|
|
193
193
|
- !ruby/object:Gem::Version
|
194
194
|
version: '0'
|
195
195
|
- !ruby/object:Gem::Dependency
|
196
|
-
name:
|
196
|
+
name: epub-maker
|
197
197
|
requirement: !ruby/object:Gem::Requirement
|
198
198
|
requirements:
|
199
199
|
- - ">="
|
@@ -207,13 +207,13 @@ dependencies:
|
|
207
207
|
- !ruby/object:Gem::Version
|
208
208
|
version: '0'
|
209
209
|
- !ruby/object:Gem::Dependency
|
210
|
-
name:
|
210
|
+
name: archive-zip
|
211
211
|
requirement: !ruby/object:Gem::Requirement
|
212
212
|
requirements:
|
213
213
|
- - ">="
|
214
214
|
- !ruby/object:Gem::Version
|
215
215
|
version: '0'
|
216
|
-
type: :
|
216
|
+
type: :runtime
|
217
217
|
prerelease: false
|
218
218
|
version_requirements: !ruby/object:Gem::Requirement
|
219
219
|
requirements:
|
@@ -221,47 +221,25 @@ dependencies:
|
|
221
221
|
- !ruby/object:Gem::Version
|
222
222
|
version: '0'
|
223
223
|
- !ruby/object:Gem::Dependency
|
224
|
-
name:
|
224
|
+
name: nokogiri
|
225
225
|
requirement: !ruby/object:Gem::Requirement
|
226
226
|
requirements:
|
227
227
|
- - ">="
|
228
228
|
- !ruby/object:Gem::Version
|
229
|
-
version:
|
230
|
-
|
231
|
-
prerelease: false
|
232
|
-
version_requirements: !ruby/object:Gem::Requirement
|
233
|
-
requirements:
|
234
|
-
- - ">="
|
229
|
+
version: 1.6.0
|
230
|
+
- - "<"
|
235
231
|
- !ruby/object:Gem::Version
|
236
|
-
version: '
|
237
|
-
- !ruby/object:Gem::Dependency
|
238
|
-
name: archive-zip
|
239
|
-
requirement: !ruby/object:Gem::Requirement
|
240
|
-
requirements:
|
241
|
-
- - ">="
|
242
|
-
- !ruby/object:Gem::Version
|
243
|
-
version: '0'
|
232
|
+
version: '1.9'
|
244
233
|
type: :runtime
|
245
234
|
prerelease: false
|
246
235
|
version_requirements: !ruby/object:Gem::Requirement
|
247
236
|
requirements:
|
248
237
|
- - ">="
|
249
238
|
- !ruby/object:Gem::Version
|
250
|
-
version:
|
251
|
-
-
|
252
|
-
name: nokogiri
|
253
|
-
requirement: !ruby/object:Gem::Requirement
|
254
|
-
requirements:
|
255
|
-
- - "~>"
|
239
|
+
version: 1.6.0
|
240
|
+
- - "<"
|
256
241
|
- !ruby/object:Gem::Version
|
257
|
-
version: '1.
|
258
|
-
type: :runtime
|
259
|
-
prerelease: false
|
260
|
-
version_requirements: !ruby/object:Gem::Requirement
|
261
|
-
requirements:
|
262
|
-
- - "~>"
|
263
|
-
- !ruby/object:Gem::Version
|
264
|
-
version: '1.6'
|
242
|
+
version: '1.9'
|
265
243
|
- !ruby/object:Gem::Dependency
|
266
244
|
name: addressable
|
267
245
|
requirement: !ruby/object:Gem::Requirement
|
@@ -416,9 +394,9 @@ require_paths:
|
|
416
394
|
- lib
|
417
395
|
required_ruby_version: !ruby/object:Gem::Requirement
|
418
396
|
requirements:
|
419
|
-
- - "
|
397
|
+
- - ">="
|
420
398
|
- !ruby/object:Gem::Version
|
421
|
-
version: '2'
|
399
|
+
version: '2.3'
|
422
400
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
423
401
|
requirements:
|
424
402
|
- - ">="
|
@@ -426,7 +404,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
426
404
|
version: '0'
|
427
405
|
requirements: []
|
428
406
|
rubyforge_project:
|
429
|
-
rubygems_version: 2.
|
407
|
+
rubygems_version: 2.7.4
|
430
408
|
signing_key:
|
431
409
|
specification_version: 4
|
432
410
|
summary: EPUB 3 Parser
|