epub-parser 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitlab-ci.yml +4 -6
- data/CHANGELOG.markdown +7 -0
- data/README.markdown +9 -3
- data/Rakefile +2 -1
- data/docs/Home.markdown +22 -0
- data/epub-parser.gemspec +3 -5
- data/lib/epub/content_document/xhtml.rb +3 -2
- data/lib/epub/ocf/physical_container.rb +0 -1
- data/lib/epub/parser.rb +3 -1
- data/lib/epub/parser/publication.rb +1 -2
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/publication/package/manifest.rb +24 -11
- data/lib/epub/searcher/publication.rb +0 -1
- data/test/test_publication.rb +8 -2
- metadata +16 -38
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: e95e303cf2c03479fdc655b8c4ea3c0b668b421f9d63082c40d984542312804b
|
|
4
|
+
data.tar.gz: 680b503b44bf23e82374da6afc18439762289eeaf87d58c6b0db5588d6ad59b5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: dc201c61dc61f7f56e4690c4b669bf0bee8e098f6025ceea17bc8604fb8b0b4a49a9937a4c34972d5b4584c83064fdf78d9b1831c75706c7f94ad01eebb23a8f
|
|
7
|
+
data.tar.gz: 7d08504f668f371dbef455f1498214b3f7bec305f3095638986441a2d6d8800932c329b65c7c62913f4ec126b02db892bbe10a3c22e0a316933f613d5cf7a8d7
|
data/.gitlab-ci.yml
CHANGED
|
@@ -1,15 +1,9 @@
|
|
|
1
1
|
before_script:
|
|
2
|
-
- apt-get update -qq && apt-get install -y zip
|
|
3
2
|
- ruby -v
|
|
4
3
|
- which ruby
|
|
5
4
|
- gem install bundler --no-document
|
|
6
5
|
- bundle install --jobs=$(nproc) "${FLAGS[@]}"
|
|
7
6
|
|
|
8
|
-
test:2.2:
|
|
9
|
-
image: ruby:2.2
|
|
10
|
-
script:
|
|
11
|
-
- bundle exec rake test
|
|
12
|
-
|
|
13
7
|
test:2.3:
|
|
14
8
|
image: ruby:2.3
|
|
15
9
|
script: bundle exec rake test
|
|
@@ -17,3 +11,7 @@ test:2.3:
|
|
|
17
11
|
test:2.4:
|
|
18
12
|
image: ruby:2.4
|
|
19
13
|
script: bundle exec rake test
|
|
14
|
+
|
|
15
|
+
test:2.5:
|
|
16
|
+
image: ruby:2.5
|
|
17
|
+
script: bundle exec rake test
|
data/CHANGELOG.markdown
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
CHANGELOG
|
|
2
2
|
=========
|
|
3
3
|
|
|
4
|
+
0.3.3
|
|
5
|
+
-----
|
|
6
|
+
|
|
7
|
+
* [BUG FIX]Use UnpackedURI adapter for URI
|
|
8
|
+
* [BREAKING CHANGE]Remove deprecated second argument from `EPUB::Parser::Publication#initialize`
|
|
9
|
+
* Add `detect_encoding` keyword argument to `Publication::Package::Manifest::Item#read` and `ContentDocument::XHTML#read`
|
|
10
|
+
|
|
4
11
|
0.3.2
|
|
5
12
|
-----
|
|
6
13
|
|
data/README.markdown
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
EPUB Parser
|
|
2
2
|
===========
|
|
3
|
-
[](http://travis-ci.org/KitaitiMakoto/epub-parser)
|
|
4
3
|
[](https://gitlab.com/KitaitiMakoto/epub-parser/commits/master)
|
|
5
4
|
[](https://gemnasium.com/KitaitiMakoto/epub-parser)
|
|
6
5
|
[](http://badge.fury.io/rb/epub-parser)
|
|
6
|
+
[](https://gitlab.com/KitaitiMakoto/epub-parser/commits/master)
|
|
7
7
|
|
|
8
8
|
INSTALLATION
|
|
9
9
|
-------
|
|
@@ -134,11 +134,11 @@ Then documentation will be available in `doc` directory.
|
|
|
134
134
|
|
|
135
135
|
REQUIREMENTS
|
|
136
136
|
------------
|
|
137
|
-
* Ruby 2.
|
|
137
|
+
* Ruby 2.3.0 or later
|
|
138
138
|
* `patch` command to install Nokogiri
|
|
139
139
|
* C compiler to compile Nokogiri
|
|
140
140
|
|
|
141
|
-
|
|
141
|
+
SIMILAR EFFORTS
|
|
142
142
|
---------------
|
|
143
143
|
* [gepub](https://github.com/skoji/gepub) - a generic EPUB library for Ruby
|
|
144
144
|
* [epubinfo](https://github.com/chdorner/epubinfo) - Extracts metadata information from EPUB files. Supports EPUB2 and EPUB3 formats.
|
|
@@ -153,6 +153,12 @@ If you find other gems, please tell me or request a pull request.
|
|
|
153
153
|
RECENT CHANGES
|
|
154
154
|
--------------
|
|
155
155
|
|
|
156
|
+
### 0.3.3
|
|
157
|
+
|
|
158
|
+
* [BUG FIX]Use UnpackedURI adapter for URI
|
|
159
|
+
* [BREAKING CHANGE]Remove deprecated second argument from `EPUB::Parser::Publication#initialize`
|
|
160
|
+
* Add `detect_encoding` keyword argument to `Publication::Package::Manifest::Item#read` and `ContentDocument::XHTML#read`
|
|
161
|
+
|
|
156
162
|
### 0.3.2
|
|
157
163
|
|
|
158
164
|
* Use epub-cfi gem for EPUB CFI
|
data/Rakefile
CHANGED
|
@@ -5,6 +5,7 @@ require 'yard'
|
|
|
5
5
|
require 'rdoc/task'
|
|
6
6
|
require 'epub/parser/version'
|
|
7
7
|
require 'zipruby'
|
|
8
|
+
require 'epub/maker'
|
|
8
9
|
|
|
9
10
|
task :default => :test
|
|
10
11
|
task :test => 'test:default'
|
|
@@ -18,7 +19,7 @@ namespace :test do
|
|
|
18
19
|
desc 'Build test fixture EPUB file'
|
|
19
20
|
task :build => :clean do
|
|
20
21
|
input_dir = 'test/fixtures/book'
|
|
21
|
-
|
|
22
|
+
EPUB::Maker.archive input_dir
|
|
22
23
|
small_file = File.read("#{input_dir}/OPS/case-sensitive.xhtml")
|
|
23
24
|
Zip::Archive.open "#{input_dir}.epub" do |archive|
|
|
24
25
|
archive.add_buffer 'OPS/CASE-SENSITIVE.xhtml', small_file.sub('small file name', 'LARGE FILE NAME')
|
data/docs/Home.markdown
CHANGED
|
@@ -90,6 +90,23 @@ You are also able to find YourBook object for the first:
|
|
|
90
90
|
ret == book # => true; this API is not good I feel... Welcome suggestion!
|
|
91
91
|
# do something with your book
|
|
92
92
|
|
|
93
|
+
#### Switching ZIP library
|
|
94
|
+
|
|
95
|
+
EPUB Parser uses [Archive::Zip][], a pure Ruby ZIP library, by default. You can use [Zip/Ruby][], a Ruby bindings for [libzip][] if you have already installed Zip/Ruby gem by RubyGems or Bundler.
|
|
96
|
+
|
|
97
|
+
Globally:
|
|
98
|
+
|
|
99
|
+
EPUB::OCF::PhysicalContainer.adapter = :Zipruby
|
|
100
|
+
book = EPUB::Parser.parse("path/to/book.epub")
|
|
101
|
+
|
|
102
|
+
For each EPUB book:
|
|
103
|
+
|
|
104
|
+
book = EPUB::Parser.parse("path/to/book.epub", container_adapter: :Zipruby)
|
|
105
|
+
|
|
106
|
+
[Archive::Zip]: https://github.com/javanthropus/archive-zip
|
|
107
|
+
[Zip/Ruby]: https://bitbucket.org/winebarrel/zip-ruby/wiki/Home
|
|
108
|
+
[libzip]: https://nih.at/libzip/
|
|
109
|
+
|
|
93
110
|
Documentation
|
|
94
111
|
-------------
|
|
95
112
|
|
|
@@ -146,6 +163,11 @@ Requirements
|
|
|
146
163
|
* `patch` command to install Nokogiri
|
|
147
164
|
* C compiler to compile Zip/Ruby and Nokogiri
|
|
148
165
|
|
|
166
|
+
History
|
|
167
|
+
-------
|
|
168
|
+
|
|
169
|
+
See {file:CHANGELOG.markdown}.
|
|
170
|
+
|
|
149
171
|
Note
|
|
150
172
|
----
|
|
151
173
|
|
data/epub-parser.gemspec
CHANGED
|
@@ -11,7 +11,7 @@ Gem::Specification.new do |s|
|
|
|
11
11
|
s.summary = %q{EPUB 3 Parser}
|
|
12
12
|
s.description = %q{Parse EPUB 3 book loosely}
|
|
13
13
|
s.license = 'MIT'
|
|
14
|
-
s.required_ruby_version = '
|
|
14
|
+
s.required_ruby_version = '>= 2.3'
|
|
15
15
|
|
|
16
16
|
s.files = `git ls-files`.split("\n")
|
|
17
17
|
.push('test/fixtures/book/OPS/ルートファイル.opf')
|
|
@@ -36,13 +36,11 @@ Gem::Specification.new do |s|
|
|
|
36
36
|
s.add_development_dependency 'yard'
|
|
37
37
|
s.add_development_dependency 'gem-man'
|
|
38
38
|
s.add_development_dependency 'ronn'
|
|
39
|
-
s.add_development_dependency 'epzip'
|
|
40
|
-
s.add_development_dependency 'racc'
|
|
41
|
-
s.add_development_dependency 'nokogiri-diff'
|
|
42
39
|
s.add_development_dependency 'pretty_backtrace'
|
|
40
|
+
s.add_development_dependency 'epub-maker'
|
|
43
41
|
|
|
44
42
|
s.add_runtime_dependency 'archive-zip'
|
|
45
|
-
s.add_runtime_dependency 'nokogiri', '
|
|
43
|
+
s.add_runtime_dependency 'nokogiri', '>= 1.6.0', '< 1.9'
|
|
46
44
|
s.add_runtime_dependency 'addressable', '>= 2.3.5'
|
|
47
45
|
s.add_runtime_dependency 'rchardet', '>= 1.6.1'
|
|
48
46
|
s.add_runtime_dependency 'epub-cfi'
|
|
@@ -3,9 +3,10 @@ module EPUB
|
|
|
3
3
|
class XHTML
|
|
4
4
|
attr_accessor :item
|
|
5
5
|
|
|
6
|
+
# @param [Boolean] detect_encoding See {Publication::Package::Manifest::Item#read}
|
|
6
7
|
# @return [String] Returns the content string.
|
|
7
|
-
def read
|
|
8
|
-
item.read
|
|
8
|
+
def read(detect_encoding: true)
|
|
9
|
+
item.read(detect_encoding: detect_encoding)
|
|
9
10
|
end
|
|
10
11
|
alias raw_document read
|
|
11
12
|
|
data/lib/epub/parser.rb
CHANGED
|
@@ -51,7 +51,9 @@ module EPUB
|
|
|
51
51
|
|
|
52
52
|
@filepath = path_is_uri ? filepath : File.realpath(filepath)
|
|
53
53
|
@book = create_book(options)
|
|
54
|
-
if
|
|
54
|
+
if path_is_uri
|
|
55
|
+
@book.container_adapter = :UnpackedURI
|
|
56
|
+
elsif File.directory? @filepath
|
|
55
57
|
@book.container_adapter = :UnpackedDirectory
|
|
56
58
|
end
|
|
57
59
|
@book.epub_file = @filepath
|
data/lib/epub/parser/version.rb
CHANGED
|
@@ -135,7 +135,16 @@ module EPUB
|
|
|
135
135
|
Addressable::URI.unencode(full_path)
|
|
136
136
|
end
|
|
137
137
|
|
|
138
|
-
|
|
138
|
+
# Read content from EPUB archive
|
|
139
|
+
#
|
|
140
|
+
# @param detect_encoding [Boolean] Whether #read tries auto-detection of character encoding. The default value will become +false+ in the near future.
|
|
141
|
+
# @return [String] Content with encoding:
|
|
142
|
+
# US-ASCII when the content is not in text format such images.
|
|
143
|
+
# UTF-8 when the content is in text format and +detect_encoding+ is +false+.
|
|
144
|
+
# auto-detected encoding when the content is in text format and +detect_encoding+ is +true+.
|
|
145
|
+
def read(detect_encoding: true)
|
|
146
|
+
warn "[#{self.class}##{__method__}]Autodetection of character encoding is deprecated. Pass keyword argument detect_encoding with true explicitly." if detect_encoding
|
|
147
|
+
|
|
139
148
|
raw_content = manifest.package.book.container_adapter.read(manifest.package.book.epub_file, entry_name)
|
|
140
149
|
|
|
141
150
|
unless media_type.start_with?('text/') or
|
|
@@ -143,17 +152,21 @@ module EPUB
|
|
|
143
152
|
['application/json', 'application/javascript', 'application/ecmascript', 'application/xml-dtd'].include?(media_type)
|
|
144
153
|
return raw_content
|
|
145
154
|
end
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
155
|
+
if detect_encoding
|
|
156
|
+
# CharDet.detect doesn't raise Encoding::CompatibilityError
|
|
157
|
+
# that is caused when trying compare CharDet's internal
|
|
158
|
+
# ASCII-8BIT RegExp with a String with other encoding
|
|
159
|
+
# because Zip::File#read returns a String with encoding ASCII-8BIT.
|
|
160
|
+
# So, no need to rescue the error here.
|
|
161
|
+
encoding = CharDet.detect(raw_content)['encoding']
|
|
162
|
+
if encoding
|
|
163
|
+
raw_content.force_encoding(encoding)
|
|
164
|
+
else
|
|
165
|
+
warn "No encoding detected for #{entry_name}. Set to ASCII-8BIT" if $DEBUG || $VERBOSE
|
|
166
|
+
raw_content
|
|
167
|
+
end
|
|
154
168
|
else
|
|
155
|
-
|
|
156
|
-
raw_content
|
|
169
|
+
raw_content.force_encoding("UTF-8");
|
|
157
170
|
end
|
|
158
171
|
end
|
|
159
172
|
|
|
@@ -82,7 +82,6 @@ module EPUB
|
|
|
82
82
|
# @param [EPUB::CFI] cfi
|
|
83
83
|
# @return [Array] Path in EPUB Rendition
|
|
84
84
|
def search_by_cfi(cfi)
|
|
85
|
-
# steal from pirka's find_item_and_element
|
|
86
85
|
path_in_package = cfi.paths.first
|
|
87
86
|
spine = @package.spine
|
|
88
87
|
model = [@package.metadata, @package.manifest, spine, @package.guide, @package.bindings].compact[path_in_package.steps.first.value / 2 - 1]
|
data/test/test_publication.rb
CHANGED
|
@@ -337,14 +337,20 @@ class TestPublication < Test::Unit::TestCase
|
|
|
337
337
|
end
|
|
338
338
|
end
|
|
339
339
|
|
|
340
|
+
def test_read_without_detect_encoding_resturns_utf_8_string
|
|
341
|
+
epub = EPUB::Parser.parse("test/fixtures/book.epub")
|
|
342
|
+
item = epub.package.manifest["utf-8-encoded"]
|
|
343
|
+
assert_equal Encoding::UTF_8, item.read(detect_encoding: false).encoding
|
|
344
|
+
end
|
|
345
|
+
|
|
340
346
|
data('UTF-8' => [Encoding::UTF_8, 'utf-8-encoded'],
|
|
341
347
|
'EUC-JP' => [Encoding::EUC_JP, 'euc-jp-encoded'],
|
|
342
348
|
'Shift-JIS' => [Encoding::Shift_JIS, 'shift_jis-encoded'])
|
|
343
|
-
def
|
|
349
|
+
def test_read_with_detect_encoding_detects_encoding(data)
|
|
344
350
|
encoding, id = data
|
|
345
351
|
epub = EPUB::Parser.parse('test/fixtures/book.epub')
|
|
346
352
|
item = epub.package.manifest[id]
|
|
347
|
-
assert_equal encoding, item.read.encoding
|
|
353
|
+
assert_equal encoding, item.read(detect_encoding: true).encoding
|
|
348
354
|
end
|
|
349
355
|
|
|
350
356
|
def test_entry_name_returns_normalized_iri
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: epub-parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- KITAITI Makoto
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2018-03-02 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|
|
@@ -179,7 +179,7 @@ dependencies:
|
|
|
179
179
|
- !ruby/object:Gem::Version
|
|
180
180
|
version: '0'
|
|
181
181
|
- !ruby/object:Gem::Dependency
|
|
182
|
-
name:
|
|
182
|
+
name: pretty_backtrace
|
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|
|
184
184
|
requirements:
|
|
185
185
|
- - ">="
|
|
@@ -193,7 +193,7 @@ dependencies:
|
|
|
193
193
|
- !ruby/object:Gem::Version
|
|
194
194
|
version: '0'
|
|
195
195
|
- !ruby/object:Gem::Dependency
|
|
196
|
-
name:
|
|
196
|
+
name: epub-maker
|
|
197
197
|
requirement: !ruby/object:Gem::Requirement
|
|
198
198
|
requirements:
|
|
199
199
|
- - ">="
|
|
@@ -207,13 +207,13 @@ dependencies:
|
|
|
207
207
|
- !ruby/object:Gem::Version
|
|
208
208
|
version: '0'
|
|
209
209
|
- !ruby/object:Gem::Dependency
|
|
210
|
-
name:
|
|
210
|
+
name: archive-zip
|
|
211
211
|
requirement: !ruby/object:Gem::Requirement
|
|
212
212
|
requirements:
|
|
213
213
|
- - ">="
|
|
214
214
|
- !ruby/object:Gem::Version
|
|
215
215
|
version: '0'
|
|
216
|
-
type: :
|
|
216
|
+
type: :runtime
|
|
217
217
|
prerelease: false
|
|
218
218
|
version_requirements: !ruby/object:Gem::Requirement
|
|
219
219
|
requirements:
|
|
@@ -221,47 +221,25 @@ dependencies:
|
|
|
221
221
|
- !ruby/object:Gem::Version
|
|
222
222
|
version: '0'
|
|
223
223
|
- !ruby/object:Gem::Dependency
|
|
224
|
-
name:
|
|
224
|
+
name: nokogiri
|
|
225
225
|
requirement: !ruby/object:Gem::Requirement
|
|
226
226
|
requirements:
|
|
227
227
|
- - ">="
|
|
228
228
|
- !ruby/object:Gem::Version
|
|
229
|
-
version:
|
|
230
|
-
|
|
231
|
-
prerelease: false
|
|
232
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
233
|
-
requirements:
|
|
234
|
-
- - ">="
|
|
229
|
+
version: 1.6.0
|
|
230
|
+
- - "<"
|
|
235
231
|
- !ruby/object:Gem::Version
|
|
236
|
-
version: '
|
|
237
|
-
- !ruby/object:Gem::Dependency
|
|
238
|
-
name: archive-zip
|
|
239
|
-
requirement: !ruby/object:Gem::Requirement
|
|
240
|
-
requirements:
|
|
241
|
-
- - ">="
|
|
242
|
-
- !ruby/object:Gem::Version
|
|
243
|
-
version: '0'
|
|
232
|
+
version: '1.9'
|
|
244
233
|
type: :runtime
|
|
245
234
|
prerelease: false
|
|
246
235
|
version_requirements: !ruby/object:Gem::Requirement
|
|
247
236
|
requirements:
|
|
248
237
|
- - ">="
|
|
249
238
|
- !ruby/object:Gem::Version
|
|
250
|
-
version:
|
|
251
|
-
-
|
|
252
|
-
name: nokogiri
|
|
253
|
-
requirement: !ruby/object:Gem::Requirement
|
|
254
|
-
requirements:
|
|
255
|
-
- - "~>"
|
|
239
|
+
version: 1.6.0
|
|
240
|
+
- - "<"
|
|
256
241
|
- !ruby/object:Gem::Version
|
|
257
|
-
version: '1.
|
|
258
|
-
type: :runtime
|
|
259
|
-
prerelease: false
|
|
260
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
261
|
-
requirements:
|
|
262
|
-
- - "~>"
|
|
263
|
-
- !ruby/object:Gem::Version
|
|
264
|
-
version: '1.6'
|
|
242
|
+
version: '1.9'
|
|
265
243
|
- !ruby/object:Gem::Dependency
|
|
266
244
|
name: addressable
|
|
267
245
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -416,9 +394,9 @@ require_paths:
|
|
|
416
394
|
- lib
|
|
417
395
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
418
396
|
requirements:
|
|
419
|
-
- - "
|
|
397
|
+
- - ">="
|
|
420
398
|
- !ruby/object:Gem::Version
|
|
421
|
-
version: '2'
|
|
399
|
+
version: '2.3'
|
|
422
400
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
423
401
|
requirements:
|
|
424
402
|
- - ">="
|
|
@@ -426,7 +404,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
426
404
|
version: '0'
|
|
427
405
|
requirements: []
|
|
428
406
|
rubyforge_project:
|
|
429
|
-
rubygems_version: 2.
|
|
407
|
+
rubygems_version: 2.7.4
|
|
430
408
|
signing_key:
|
|
431
409
|
specification_version: 4
|
|
432
410
|
summary: EPUB 3 Parser
|