epub-parser-io 0.1.6a
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/.gitignore +12 -0
- data/.gitmodules +3 -0
- data/.travis.yml +4 -0
- data/.yardopts +10 -0
- data/CHANGELOG.markdown +61 -0
- data/Gemfile +2 -0
- data/MIT-LICENSE +7 -0
- data/README.markdown +174 -0
- data/Rakefile +68 -0
- data/bin/epub-open +25 -0
- data/bin/epubinfo +64 -0
- data/docs/EpubOpen.markdown +43 -0
- data/docs/Epubinfo.markdown +37 -0
- data/docs/FixedLayout.markdown +96 -0
- data/docs/Home.markdown +128 -0
- data/docs/Item.markdown +80 -0
- data/docs/Navigation.markdown +58 -0
- data/docs/Publication.markdown +54 -0
- data/epub-parser.gemspec +49 -0
- data/features/epubinfo.feature +6 -0
- data/features/step_definitions/epubinfo_steps.rb +5 -0
- data/features/support/env.rb +1 -0
- data/lib/epub/book/features.rb +85 -0
- data/lib/epub/book.rb +7 -0
- data/lib/epub/constants.rb +48 -0
- data/lib/epub/content_document/navigation.rb +104 -0
- data/lib/epub/content_document/xhtml.rb +41 -0
- data/lib/epub/content_document.rb +2 -0
- data/lib/epub/inspector.rb +45 -0
- data/lib/epub/ocf/container.rb +28 -0
- data/lib/epub/ocf/encryption.rb +7 -0
- data/lib/epub/ocf/manifest.rb +6 -0
- data/lib/epub/ocf/metadata.rb +6 -0
- data/lib/epub/ocf/rights.rb +6 -0
- data/lib/epub/ocf/signatures.rb +6 -0
- data/lib/epub/ocf.rb +8 -0
- data/lib/epub/parser/content_document.rb +111 -0
- data/lib/epub/parser/ocf.rb +73 -0
- data/lib/epub/parser/publication.rb +200 -0
- data/lib/epub/parser/utils.rb +20 -0
- data/lib/epub/parser/version.rb +5 -0
- data/lib/epub/parser.rb +103 -0
- data/lib/epub/publication/fixed_layout.rb +208 -0
- data/lib/epub/publication/package/bindings.rb +31 -0
- data/lib/epub/publication/package/guide.rb +51 -0
- data/lib/epub/publication/package/manifest.rb +180 -0
- data/lib/epub/publication/package/metadata.rb +170 -0
- data/lib/epub/publication/package/spine.rb +106 -0
- data/lib/epub/publication/package.rb +68 -0
- data/lib/epub/publication.rb +2 -0
- data/lib/epub.rb +14 -0
- data/man/epubinfo.1.ronn +19 -0
- data/schemas/epub-nav-30.rnc +10 -0
- data/schemas/epub-nav-30.sch +72 -0
- data/schemas/epub-xhtml-30.sch +377 -0
- data/schemas/ocf-container-30.rnc +16 -0
- data/test/fixtures/book/META-INF/container.xml +6 -0
- data/test/fixtures/book/OPS/%E6%97%A5%E6%9C%AC%E8%AA%9E.xhtml +10 -0
- data/test/fixtures/book/OPS/case-sensitive.xhtml +9 -0
- data/test/fixtures/book/OPS/containing space.xhtml +10 -0
- data/test/fixtures/book/OPS/containing%20space.xhtml +10 -0
- data/test/fixtures/book/OPS/nav.xhtml +28 -0
- data/test/fixtures/book/OPS//343/203/253/343/203/274/343/203/210/343/203/225/343/202/241/343/202/244/343/203/253.opf +119 -0
- data/test/fixtures/book/OPS//346/227/245/346/234/254/350/252/236.xhtml +10 -0
- data/test/fixtures/book/mimetype +1 -0
- data/test/helper.rb +9 -0
- data/test/test_content_document.rb +92 -0
- data/test/test_epub.rb +21 -0
- data/test/test_fixed_layout.rb +257 -0
- data/test/test_inspect.rb +121 -0
- data/test/test_parser.rb +60 -0
- data/test/test_parser_content_document.rb +36 -0
- data/test/test_parser_fixed_layout.rb +16 -0
- data/test/test_parser_ocf.rb +38 -0
- data/test/test_parser_publication.rb +247 -0
- data/test/test_publication.rb +324 -0
- metadata +445 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
{file:docs/Home.markdown} > **{file:docs/Publication.markdow}**
|
2
|
+
|
3
|
+
Publication(Information about EPUB book)
|
4
|
+
========================================
|
5
|
+
|
6
|
+
EPUB Publications is information about EPUB books.
|
7
|
+
|
8
|
+
EPUB Parser represents it as {EPUB::Publication} module and classes under the namespace and you can access them such like `EPUB::Parser.parse("path/to/book.epub").package`
|
9
|
+
|
10
|
+
Let
|
11
|
+
|
12
|
+
book = EPUB::Parser.parse("path/to/book.epub")
|
13
|
+
|
14
|
+
for continuing.
|
15
|
+
|
16
|
+
Five Models
|
17
|
+
-----------
|
18
|
+
|
19
|
+
`book.package` is a package document, a root of information tree about the book, and it has attributes to access five major models of the publication; {EPUB::Publication::Package::Metadata Metadata}, {EPUB::Publication::Package::Manifest Manifest}, {EPUB::Publication::Package::Spine Spine}, {EPUB::Publication::Package::Guide Guide} and {EPUB::Publication::Package::Bindings Bindings}.
|
20
|
+
|
21
|
+
Each of them has information the book in the way its own.
|
22
|
+
|
23
|
+
Metadata
|
24
|
+
--------
|
25
|
+
|
26
|
+
{EPUB::Publication::Package::Metadata Metadata} is literally metadata of the book, including identifiers, titles, languages, links and so on.
|
27
|
+
|
28
|
+
You can access them by:
|
29
|
+
|
30
|
+
md = book.package.metadata # => EPUB::Publication::Package::Metadata
|
31
|
+
md.titles # => [#<EPUB::Publication::Package::Metadata::Title...>, #<EPUB::Publication::Package::Metadata::Title...>, ...]
|
32
|
+
# ...
|
33
|
+
|
34
|
+
Manifest
|
35
|
+
--------
|
36
|
+
|
37
|
+
Spine
|
38
|
+
-----
|
39
|
+
|
40
|
+
Guide
|
41
|
+
-----
|
42
|
+
|
43
|
+
Bindings
|
44
|
+
--------
|
45
|
+
|
46
|
+
Package
|
47
|
+
-------
|
48
|
+
|
49
|
+
References
|
50
|
+
----------
|
51
|
+
|
52
|
+
* [EPUB Publications 3.0][publications] on IDPF site
|
53
|
+
|
54
|
+
[publications]: http://www.idpf.org/epub/30/spec/epub30-publications.html
|
data/epub-parser.gemspec
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "epub/parser/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "epub-parser-io"
|
7
|
+
s.version = EPUB::Parser::VERSION
|
8
|
+
s.authors = ["KITAITI Makoto, Brian Glusman"]
|
9
|
+
s.email = ["KitaitiMakoto@gmail.com", "brian@glusman.me"]
|
10
|
+
s.homepage = "https://github.com/bglusman/epub-parser"
|
11
|
+
s.summary = %q{EPUB 3 Parser}
|
12
|
+
s.description = %q{Parse EPUB 3 book loosely}
|
13
|
+
s.license = 'MIT'
|
14
|
+
|
15
|
+
# s.rubyforge_project = "epub-parser"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
.push('test/fixtures/book/OPS/ルートファイル.opf')
|
19
|
+
.push('test/fixtures/book/OPS/日本語.xhtml')
|
20
|
+
.push(Dir['docs/*.md'])
|
21
|
+
s.files.reject! do |fn|
|
22
|
+
['"test/fixtures/book/OPS/\343\203\253\343\203\274\343\203\210\343\203\225\343\202\241\343\202\244\343\203\253.opf"', '"test/fixtures/book/OPS/\346\227\245\346\234\254\350\252\236.xhtml"'].include? fn
|
23
|
+
end
|
24
|
+
s.test_files = s.files & Dir['{test,spec,features}/**/*.{rb,feature}']
|
25
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
26
|
+
s.require_paths = ["lib"]
|
27
|
+
s.has_rdoc = 'yard'
|
28
|
+
|
29
|
+
s.add_development_dependency 'rake'
|
30
|
+
s.add_development_dependency 'pry'
|
31
|
+
s.add_development_dependency 'pry-doc'
|
32
|
+
s.add_development_dependency 'test-unit'
|
33
|
+
s.add_development_dependency 'test-unit-rr'
|
34
|
+
s.add_development_dependency 'test-unit-notify'
|
35
|
+
s.add_development_dependency 'simplecov'
|
36
|
+
s.add_development_dependency 'thin'
|
37
|
+
s.add_development_dependency 'yard'
|
38
|
+
s.add_development_dependency 'gem-man'
|
39
|
+
s.add_development_dependency 'ronn'
|
40
|
+
s.add_development_dependency 'epzip'
|
41
|
+
s.add_development_dependency 'epubcheck'
|
42
|
+
s.add_development_dependency 'epub_validator'
|
43
|
+
s.add_development_dependency 'aruba'
|
44
|
+
|
45
|
+
s.add_runtime_dependency 'enumerabler'
|
46
|
+
s.add_runtime_dependency 'zipruby'
|
47
|
+
s.add_runtime_dependency 'nokogiri', '~> 1.6'
|
48
|
+
s.add_runtime_dependency 'addressable', '>= 2.3.5'
|
49
|
+
end
|
@@ -0,0 +1,6 @@
|
|
1
|
+
Feature: We can see information about EPUB file
|
2
|
+
|
3
|
+
Scenario: See info about existing EPUB file
|
4
|
+
Given the file "test/fixtures/book.epub" exists
|
5
|
+
When I successfully run `bundle exec epubinfo /home/ikeda/ruby/projects/epub-parser/test/fixtures/book.epub`
|
6
|
+
Then the stdout should contain "The New French Cuisine Masters"
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'aruba/cucumber'
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module EPUB
|
2
|
+
class Book
|
3
|
+
module Features
|
4
|
+
modules = [:ocf, :package]
|
5
|
+
attr_reader *modules
|
6
|
+
attr_accessor :epub_file
|
7
|
+
modules.each do |mod|
|
8
|
+
define_method "#{mod}=" do |obj|
|
9
|
+
instance_variable_set "@#{mod}", obj
|
10
|
+
obj.book = self
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
Publication::Package::CONTENT_MODELS.each do |model|
|
15
|
+
define_method model do
|
16
|
+
package.__send__(model)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
%w[ title main_title subtitle short_title collection_title edition_title extended_title description date unique_identifier ].each do |met|
|
21
|
+
define_method met do
|
22
|
+
metadata.__send__(met)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
%w[nav].each do |met|
|
27
|
+
define_method met do
|
28
|
+
manifest.__send__ met
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# @overload each_page_on_spine(&blk)
|
33
|
+
# iterate over items in order of spine when block given
|
34
|
+
# @yieldparam item [Publication::Package::Manifest::Item]
|
35
|
+
# @overload each_page_on_spine
|
36
|
+
# @return [Enumerator] which iterates over {Publication::Package::Manifest::Item}s in order of spine when block not given
|
37
|
+
def each_page_on_spine(&blk)
|
38
|
+
enum = package.spine.items
|
39
|
+
if block_given?
|
40
|
+
enum.each &blk
|
41
|
+
else
|
42
|
+
enum
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def each_page_on_toc(&blk)
|
47
|
+
raise NotImplementedError
|
48
|
+
end
|
49
|
+
|
50
|
+
# @overload each_content(&blk)
|
51
|
+
# iterate all items over when block given
|
52
|
+
# @yieldparam item [Publication::Package::Manifest::Item]
|
53
|
+
# @overload each_content
|
54
|
+
# @return [Enumerator] which iterates over all {Publication::Package::Manifest::Item}s in EPUB package when block not given
|
55
|
+
def each_content(&blk)
|
56
|
+
enum = manifest.items
|
57
|
+
if block_given?
|
58
|
+
enum.each &blk
|
59
|
+
else
|
60
|
+
enum.to_enum
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def other_navigation
|
65
|
+
raise NotImplementedError
|
66
|
+
end
|
67
|
+
|
68
|
+
# @return [Array<Publication::Package::Manifest::Item>] All {Publication::Package::Manifest::Item}s in EPUB package
|
69
|
+
def resources
|
70
|
+
manifest.items
|
71
|
+
end
|
72
|
+
|
73
|
+
# Syntax sugar
|
74
|
+
def rootfile_path
|
75
|
+
ocf.container.rootfile.full_path.to_s
|
76
|
+
end
|
77
|
+
|
78
|
+
# Syntax sugar
|
79
|
+
def cover_image
|
80
|
+
manifest.cover_image
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/epub/book.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
module EPUB
|
2
|
+
module Constants
|
3
|
+
NAMESPACES = {
|
4
|
+
'dc' => 'http://purl.org/dc/elements/1.1/',
|
5
|
+
'ocf' => 'urn:oasis:names:tc:opendocument:xmlns:container',
|
6
|
+
'opf' => 'http://www.idpf.org/2007/opf',
|
7
|
+
'xhtml' => 'http://www.w3.org/1999/xhtml',
|
8
|
+
'epub' => 'http://www.idpf.org/2007/ops',
|
9
|
+
'm' => 'http://www.w3.org/1998/Math/MathML',
|
10
|
+
'svg' => 'http://www.w3.org/2000/svg',
|
11
|
+
'smil' => 'http://www.w3.org/ns/SMIL'
|
12
|
+
}
|
13
|
+
|
14
|
+
module MediaType
|
15
|
+
# @deprecated Use {UnsupportedMediaType} instead
|
16
|
+
class UnsupportedError < StandardError; end
|
17
|
+
class UnsupportedMediaType < StandardError; end
|
18
|
+
|
19
|
+
EPUB = 'application/epub+zip'
|
20
|
+
ROOTFILE = 'application/oebps-package+xml'
|
21
|
+
IMAGE = %w[
|
22
|
+
image/gif
|
23
|
+
image/jpeg
|
24
|
+
image/png
|
25
|
+
image/svg+xml
|
26
|
+
]
|
27
|
+
APPLICATION = %w[
|
28
|
+
application/xhtml+xml
|
29
|
+
application/x-dtbncx+xml
|
30
|
+
application/vnd.ms-opentype
|
31
|
+
application/font-woff
|
32
|
+
application/smil+xml
|
33
|
+
application/pls+xml
|
34
|
+
]
|
35
|
+
AUDIO = %w[
|
36
|
+
audio/mpeg
|
37
|
+
audio/mp4
|
38
|
+
]
|
39
|
+
TEXT = %w[
|
40
|
+
text/css
|
41
|
+
text/javascript
|
42
|
+
]
|
43
|
+
CORE = IMAGE + APPLICATION + AUDIO + TEXT
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
include Constants
|
48
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module EPUB
|
2
|
+
module ContentDocument
|
3
|
+
class Navigation < XHTML
|
4
|
+
attr_accessor :navigations
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@navigations = []
|
8
|
+
super
|
9
|
+
end
|
10
|
+
|
11
|
+
def toc
|
12
|
+
navigations.selector {|nav| nav.type == Navigation::Type::TOC}.first
|
13
|
+
end
|
14
|
+
|
15
|
+
def page_list
|
16
|
+
navigations.selector {|nav| nav.type == Nagivation::Type::PAGE_LIST}.first
|
17
|
+
end
|
18
|
+
|
19
|
+
def landmarks
|
20
|
+
navigations.selector {|nav| nav.type == Navigation::Type::LANDMARKS}.first
|
21
|
+
end
|
22
|
+
|
23
|
+
# Enumerator version of toc
|
24
|
+
# Usage: nagivation.enum_for(:contents)
|
25
|
+
def contents
|
26
|
+
end
|
27
|
+
|
28
|
+
# Enumerator version of page_list
|
29
|
+
# Usage: navigation.enum_for(:pages)
|
30
|
+
def pages
|
31
|
+
end
|
32
|
+
|
33
|
+
# iterator for #toc
|
34
|
+
def each_content
|
35
|
+
end
|
36
|
+
|
37
|
+
# iterator for #page_list
|
38
|
+
def each_page
|
39
|
+
end
|
40
|
+
|
41
|
+
# iterator for #landmark
|
42
|
+
def each_landmark
|
43
|
+
end
|
44
|
+
|
45
|
+
def navigation
|
46
|
+
navigations.first
|
47
|
+
end
|
48
|
+
|
49
|
+
module Hidable
|
50
|
+
attr_accessor :hidden, :parent
|
51
|
+
|
52
|
+
def hidden?
|
53
|
+
if @hidden.nil?
|
54
|
+
@parent ? @parent.hidden? : false
|
55
|
+
else
|
56
|
+
true
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
class Item
|
62
|
+
include Hidable
|
63
|
+
|
64
|
+
attr_accessor :items, :text,
|
65
|
+
:content_document, :href, :item
|
66
|
+
|
67
|
+
def initialize
|
68
|
+
@items = ItemList.new
|
69
|
+
@items.parent = self
|
70
|
+
end
|
71
|
+
|
72
|
+
def traverse(depth=0, &block)
|
73
|
+
block.call self, depth
|
74
|
+
items.each do |item|
|
75
|
+
item.traverse depth + 1, &block
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class Navigation < Item
|
81
|
+
module Type
|
82
|
+
TOC = 'toc'
|
83
|
+
PAGE_LIST = 'page_list'
|
84
|
+
LANDMARKS = 'landmarks'
|
85
|
+
end
|
86
|
+
|
87
|
+
attr_accessor :type
|
88
|
+
alias navigations items
|
89
|
+
alias navigations= items=
|
90
|
+
alias heading text
|
91
|
+
alias heading= text=
|
92
|
+
end
|
93
|
+
|
94
|
+
class ItemList < Array
|
95
|
+
include Hidable
|
96
|
+
|
97
|
+
def <<(item)
|
98
|
+
super
|
99
|
+
item.parent = self
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module EPUB
|
2
|
+
module ContentDocument
|
3
|
+
class XHTML
|
4
|
+
attr_accessor :item
|
5
|
+
|
6
|
+
# @return [String] Returns the content string.
|
7
|
+
def read
|
8
|
+
item.read
|
9
|
+
end
|
10
|
+
alias raw_document read
|
11
|
+
|
12
|
+
# @return [true|false] Whether referenced directly from spine or not.
|
13
|
+
def top_level?
|
14
|
+
!! item.itemref
|
15
|
+
end
|
16
|
+
|
17
|
+
# @return [String] Returns the value of title element.
|
18
|
+
# If none, returns empty string
|
19
|
+
def title
|
20
|
+
title_elem = nokogiri.search('title').first
|
21
|
+
if title_elem
|
22
|
+
title_elem.text
|
23
|
+
else
|
24
|
+
warn 'title element not found'
|
25
|
+
''
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [REXML::Document] content as REXML::Document object
|
30
|
+
def rexml
|
31
|
+
require 'rexml/document'
|
32
|
+
@rexml ||= REXML::Document.new(raw_document)
|
33
|
+
end
|
34
|
+
|
35
|
+
# @return [Nokogiri::XML::Document] content as Nokogiri::XML::Document object
|
36
|
+
def nokogiri
|
37
|
+
@nokogiri ||= Nokogiri.XML(raw_document)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module EPUB
|
2
|
+
module Inspector
|
3
|
+
INSTANCE_VARIABLES_OPTION = {:exclude => []}
|
4
|
+
SIMPLE_TEMPLATE = "#<%{class}:%{object_id}>"
|
5
|
+
|
6
|
+
def inspect_simply
|
7
|
+
SIMPLE_TEMPLATE % {
|
8
|
+
:class => self.class,
|
9
|
+
:object_id => inspect_object_id
|
10
|
+
}
|
11
|
+
end
|
12
|
+
|
13
|
+
def inspect_object_id
|
14
|
+
(__id__ << 1).to_s(16)
|
15
|
+
end
|
16
|
+
|
17
|
+
def inspect_instance_variables(options={})
|
18
|
+
options = INSTANCE_VARIABLES_OPTION.merge(options)
|
19
|
+
exclude = options[:exclude]
|
20
|
+
|
21
|
+
(instance_variables - exclude).map {|name|
|
22
|
+
value = instance_variable_get(name)
|
23
|
+
"#{name}=#{value.inspect}"
|
24
|
+
}.join(' ')
|
25
|
+
end
|
26
|
+
|
27
|
+
module PublicationModel
|
28
|
+
TEMPLATE = "#<%{class}:%{object_id} @package=%{package} %{attributes}>"
|
29
|
+
class << self
|
30
|
+
def included(mod)
|
31
|
+
mod.__send__ :include, Inspector
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def inspect
|
36
|
+
TEMPLATE % {
|
37
|
+
:class => self.class,
|
38
|
+
:package => package.inspect_simply,
|
39
|
+
:object_id => inspect_object_id,
|
40
|
+
:attributes => inspect_instance_variables(exclude: [:@package])
|
41
|
+
}
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|