epub-parser-io 0.1.6a
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/.gitignore +12 -0
- data/.gitmodules +3 -0
- data/.travis.yml +4 -0
- data/.yardopts +10 -0
- data/CHANGELOG.markdown +61 -0
- data/Gemfile +2 -0
- data/MIT-LICENSE +7 -0
- data/README.markdown +174 -0
- data/Rakefile +68 -0
- data/bin/epub-open +25 -0
- data/bin/epubinfo +64 -0
- data/docs/EpubOpen.markdown +43 -0
- data/docs/Epubinfo.markdown +37 -0
- data/docs/FixedLayout.markdown +96 -0
- data/docs/Home.markdown +128 -0
- data/docs/Item.markdown +80 -0
- data/docs/Navigation.markdown +58 -0
- data/docs/Publication.markdown +54 -0
- data/epub-parser.gemspec +49 -0
- data/features/epubinfo.feature +6 -0
- data/features/step_definitions/epubinfo_steps.rb +5 -0
- data/features/support/env.rb +1 -0
- data/lib/epub/book/features.rb +85 -0
- data/lib/epub/book.rb +7 -0
- data/lib/epub/constants.rb +48 -0
- data/lib/epub/content_document/navigation.rb +104 -0
- data/lib/epub/content_document/xhtml.rb +41 -0
- data/lib/epub/content_document.rb +2 -0
- data/lib/epub/inspector.rb +45 -0
- data/lib/epub/ocf/container.rb +28 -0
- data/lib/epub/ocf/encryption.rb +7 -0
- data/lib/epub/ocf/manifest.rb +6 -0
- data/lib/epub/ocf/metadata.rb +6 -0
- data/lib/epub/ocf/rights.rb +6 -0
- data/lib/epub/ocf/signatures.rb +6 -0
- data/lib/epub/ocf.rb +8 -0
- data/lib/epub/parser/content_document.rb +111 -0
- data/lib/epub/parser/ocf.rb +73 -0
- data/lib/epub/parser/publication.rb +200 -0
- data/lib/epub/parser/utils.rb +20 -0
- data/lib/epub/parser/version.rb +5 -0
- data/lib/epub/parser.rb +103 -0
- data/lib/epub/publication/fixed_layout.rb +208 -0
- data/lib/epub/publication/package/bindings.rb +31 -0
- data/lib/epub/publication/package/guide.rb +51 -0
- data/lib/epub/publication/package/manifest.rb +180 -0
- data/lib/epub/publication/package/metadata.rb +170 -0
- data/lib/epub/publication/package/spine.rb +106 -0
- data/lib/epub/publication/package.rb +68 -0
- data/lib/epub/publication.rb +2 -0
- data/lib/epub.rb +14 -0
- data/man/epubinfo.1.ronn +19 -0
- data/schemas/epub-nav-30.rnc +10 -0
- data/schemas/epub-nav-30.sch +72 -0
- data/schemas/epub-xhtml-30.sch +377 -0
- data/schemas/ocf-container-30.rnc +16 -0
- data/test/fixtures/book/META-INF/container.xml +6 -0
- data/test/fixtures/book/OPS/%E6%97%A5%E6%9C%AC%E8%AA%9E.xhtml +10 -0
- data/test/fixtures/book/OPS/case-sensitive.xhtml +9 -0
- data/test/fixtures/book/OPS/containing space.xhtml +10 -0
- data/test/fixtures/book/OPS/containing%20space.xhtml +10 -0
- data/test/fixtures/book/OPS/nav.xhtml +28 -0
- data/test/fixtures/book/OPS//343/203/253/343/203/274/343/203/210/343/203/225/343/202/241/343/202/244/343/203/253.opf +119 -0
- data/test/fixtures/book/OPS//346/227/245/346/234/254/350/252/236.xhtml +10 -0
- data/test/fixtures/book/mimetype +1 -0
- data/test/helper.rb +9 -0
- data/test/test_content_document.rb +92 -0
- data/test/test_epub.rb +21 -0
- data/test/test_fixed_layout.rb +257 -0
- data/test/test_inspect.rb +121 -0
- data/test/test_parser.rb +60 -0
- data/test/test_parser_content_document.rb +36 -0
- data/test/test_parser_fixed_layout.rb +16 -0
- data/test/test_parser_ocf.rb +38 -0
- data/test/test_parser_publication.rb +247 -0
- data/test/test_publication.rb +324 -0
- metadata +445 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
{file:docs/Home.markdown} > **{file:docs/Publication.markdow}**
|
2
|
+
|
3
|
+
Publication(Information about EPUB book)
|
4
|
+
========================================
|
5
|
+
|
6
|
+
EPUB Publications is information about EPUB books.
|
7
|
+
|
8
|
+
EPUB Parser represents it as {EPUB::Publication} module and classes under the namespace and you can access them such like `EPUB::Parser.parse("path/to/book.epub").package`
|
9
|
+
|
10
|
+
Let
|
11
|
+
|
12
|
+
book = EPUB::Parser.parse("path/to/book.epub")
|
13
|
+
|
14
|
+
for continuing.
|
15
|
+
|
16
|
+
Five Models
|
17
|
+
-----------
|
18
|
+
|
19
|
+
`book.package` is a package document, a root of information tree about the book, and it has attributes to access five major models of the publication; {EPUB::Publication::Package::Metadata Metadata}, {EPUB::Publication::Package::Manifest Manifest}, {EPUB::Publication::Package::Spine Spine}, {EPUB::Publication::Package::Guide Guide} and {EPUB::Publication::Package::Bindings Bindings}.
|
20
|
+
|
21
|
+
Each of them has information the book in the way its own.
|
22
|
+
|
23
|
+
Metadata
|
24
|
+
--------
|
25
|
+
|
26
|
+
{EPUB::Publication::Package::Metadata Metadata} is literally metadata of the book, including identifiers, titles, languages, links and so on.
|
27
|
+
|
28
|
+
You can access them by:
|
29
|
+
|
30
|
+
md = book.package.metadata # => EPUB::Publication::Package::Metadata
|
31
|
+
md.titles # => [#<EPUB::Publication::Package::Metadata::Title...>, #<EPUB::Publication::Package::Metadata::Title...>, ...]
|
32
|
+
# ...
|
33
|
+
|
34
|
+
Manifest
|
35
|
+
--------
|
36
|
+
|
37
|
+
Spine
|
38
|
+
-----
|
39
|
+
|
40
|
+
Guide
|
41
|
+
-----
|
42
|
+
|
43
|
+
Bindings
|
44
|
+
--------
|
45
|
+
|
46
|
+
Package
|
47
|
+
-------
|
48
|
+
|
49
|
+
References
|
50
|
+
----------
|
51
|
+
|
52
|
+
* [EPUB Publications 3.0][publications] on IDPF site
|
53
|
+
|
54
|
+
[publications]: http://www.idpf.org/epub/30/spec/epub30-publications.html
|
data/epub-parser.gemspec
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "epub/parser/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "epub-parser-io"
|
7
|
+
s.version = EPUB::Parser::VERSION
|
8
|
+
s.authors = ["KITAITI Makoto, Brian Glusman"]
|
9
|
+
s.email = ["KitaitiMakoto@gmail.com", "brian@glusman.me"]
|
10
|
+
s.homepage = "https://github.com/bglusman/epub-parser"
|
11
|
+
s.summary = %q{EPUB 3 Parser}
|
12
|
+
s.description = %q{Parse EPUB 3 book loosely}
|
13
|
+
s.license = 'MIT'
|
14
|
+
|
15
|
+
# s.rubyforge_project = "epub-parser"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
.push('test/fixtures/book/OPS/ルートファイル.opf')
|
19
|
+
.push('test/fixtures/book/OPS/日本語.xhtml')
|
20
|
+
.push(Dir['docs/*.md'])
|
21
|
+
s.files.reject! do |fn|
|
22
|
+
['"test/fixtures/book/OPS/\343\203\253\343\203\274\343\203\210\343\203\225\343\202\241\343\202\244\343\203\253.opf"', '"test/fixtures/book/OPS/\346\227\245\346\234\254\350\252\236.xhtml"'].include? fn
|
23
|
+
end
|
24
|
+
s.test_files = s.files & Dir['{test,spec,features}/**/*.{rb,feature}']
|
25
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
26
|
+
s.require_paths = ["lib"]
|
27
|
+
s.has_rdoc = 'yard'
|
28
|
+
|
29
|
+
s.add_development_dependency 'rake'
|
30
|
+
s.add_development_dependency 'pry'
|
31
|
+
s.add_development_dependency 'pry-doc'
|
32
|
+
s.add_development_dependency 'test-unit'
|
33
|
+
s.add_development_dependency 'test-unit-rr'
|
34
|
+
s.add_development_dependency 'test-unit-notify'
|
35
|
+
s.add_development_dependency 'simplecov'
|
36
|
+
s.add_development_dependency 'thin'
|
37
|
+
s.add_development_dependency 'yard'
|
38
|
+
s.add_development_dependency 'gem-man'
|
39
|
+
s.add_development_dependency 'ronn'
|
40
|
+
s.add_development_dependency 'epzip'
|
41
|
+
s.add_development_dependency 'epubcheck'
|
42
|
+
s.add_development_dependency 'epub_validator'
|
43
|
+
s.add_development_dependency 'aruba'
|
44
|
+
|
45
|
+
s.add_runtime_dependency 'enumerabler'
|
46
|
+
s.add_runtime_dependency 'zipruby'
|
47
|
+
s.add_runtime_dependency 'nokogiri', '~> 1.6'
|
48
|
+
s.add_runtime_dependency 'addressable', '>= 2.3.5'
|
49
|
+
end
|
@@ -0,0 +1,6 @@
|
|
1
|
+
Feature: We can see information about EPUB file
|
2
|
+
|
3
|
+
Scenario: See info about existing EPUB file
|
4
|
+
Given the file "test/fixtures/book.epub" exists
|
5
|
+
When I successfully run `bundle exec epubinfo /home/ikeda/ruby/projects/epub-parser/test/fixtures/book.epub`
|
6
|
+
Then the stdout should contain "The New French Cuisine Masters"
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'aruba/cucumber'
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module EPUB
|
2
|
+
class Book
|
3
|
+
module Features
|
4
|
+
modules = [:ocf, :package]
|
5
|
+
attr_reader *modules
|
6
|
+
attr_accessor :epub_file
|
7
|
+
modules.each do |mod|
|
8
|
+
define_method "#{mod}=" do |obj|
|
9
|
+
instance_variable_set "@#{mod}", obj
|
10
|
+
obj.book = self
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
Publication::Package::CONTENT_MODELS.each do |model|
|
15
|
+
define_method model do
|
16
|
+
package.__send__(model)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
%w[ title main_title subtitle short_title collection_title edition_title extended_title description date unique_identifier ].each do |met|
|
21
|
+
define_method met do
|
22
|
+
metadata.__send__(met)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
%w[nav].each do |met|
|
27
|
+
define_method met do
|
28
|
+
manifest.__send__ met
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# @overload each_page_on_spine(&blk)
|
33
|
+
# iterate over items in order of spine when block given
|
34
|
+
# @yieldparam item [Publication::Package::Manifest::Item]
|
35
|
+
# @overload each_page_on_spine
|
36
|
+
# @return [Enumerator] which iterates over {Publication::Package::Manifest::Item}s in order of spine when block not given
|
37
|
+
def each_page_on_spine(&blk)
|
38
|
+
enum = package.spine.items
|
39
|
+
if block_given?
|
40
|
+
enum.each &blk
|
41
|
+
else
|
42
|
+
enum
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def each_page_on_toc(&blk)
|
47
|
+
raise NotImplementedError
|
48
|
+
end
|
49
|
+
|
50
|
+
# @overload each_content(&blk)
|
51
|
+
# iterate all items over when block given
|
52
|
+
# @yieldparam item [Publication::Package::Manifest::Item]
|
53
|
+
# @overload each_content
|
54
|
+
# @return [Enumerator] which iterates over all {Publication::Package::Manifest::Item}s in EPUB package when block not given
|
55
|
+
def each_content(&blk)
|
56
|
+
enum = manifest.items
|
57
|
+
if block_given?
|
58
|
+
enum.each &blk
|
59
|
+
else
|
60
|
+
enum.to_enum
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def other_navigation
|
65
|
+
raise NotImplementedError
|
66
|
+
end
|
67
|
+
|
68
|
+
# @return [Array<Publication::Package::Manifest::Item>] All {Publication::Package::Manifest::Item}s in EPUB package
|
69
|
+
def resources
|
70
|
+
manifest.items
|
71
|
+
end
|
72
|
+
|
73
|
+
# Syntax sugar
|
74
|
+
def rootfile_path
|
75
|
+
ocf.container.rootfile.full_path.to_s
|
76
|
+
end
|
77
|
+
|
78
|
+
# Syntax sugar
|
79
|
+
def cover_image
|
80
|
+
manifest.cover_image
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/epub/book.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
module EPUB
|
2
|
+
module Constants
|
3
|
+
NAMESPACES = {
|
4
|
+
'dc' => 'http://purl.org/dc/elements/1.1/',
|
5
|
+
'ocf' => 'urn:oasis:names:tc:opendocument:xmlns:container',
|
6
|
+
'opf' => 'http://www.idpf.org/2007/opf',
|
7
|
+
'xhtml' => 'http://www.w3.org/1999/xhtml',
|
8
|
+
'epub' => 'http://www.idpf.org/2007/ops',
|
9
|
+
'm' => 'http://www.w3.org/1998/Math/MathML',
|
10
|
+
'svg' => 'http://www.w3.org/2000/svg',
|
11
|
+
'smil' => 'http://www.w3.org/ns/SMIL'
|
12
|
+
}
|
13
|
+
|
14
|
+
module MediaType
|
15
|
+
# @deprecated Use {UnsupportedMediaType} instead
|
16
|
+
class UnsupportedError < StandardError; end
|
17
|
+
class UnsupportedMediaType < StandardError; end
|
18
|
+
|
19
|
+
EPUB = 'application/epub+zip'
|
20
|
+
ROOTFILE = 'application/oebps-package+xml'
|
21
|
+
IMAGE = %w[
|
22
|
+
image/gif
|
23
|
+
image/jpeg
|
24
|
+
image/png
|
25
|
+
image/svg+xml
|
26
|
+
]
|
27
|
+
APPLICATION = %w[
|
28
|
+
application/xhtml+xml
|
29
|
+
application/x-dtbncx+xml
|
30
|
+
application/vnd.ms-opentype
|
31
|
+
application/font-woff
|
32
|
+
application/smil+xml
|
33
|
+
application/pls+xml
|
34
|
+
]
|
35
|
+
AUDIO = %w[
|
36
|
+
audio/mpeg
|
37
|
+
audio/mp4
|
38
|
+
]
|
39
|
+
TEXT = %w[
|
40
|
+
text/css
|
41
|
+
text/javascript
|
42
|
+
]
|
43
|
+
CORE = IMAGE + APPLICATION + AUDIO + TEXT
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
include Constants
|
48
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module EPUB
|
2
|
+
module ContentDocument
|
3
|
+
class Navigation < XHTML
|
4
|
+
attr_accessor :navigations
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@navigations = []
|
8
|
+
super
|
9
|
+
end
|
10
|
+
|
11
|
+
def toc
|
12
|
+
navigations.selector {|nav| nav.type == Navigation::Type::TOC}.first
|
13
|
+
end
|
14
|
+
|
15
|
+
def page_list
|
16
|
+
navigations.selector {|nav| nav.type == Nagivation::Type::PAGE_LIST}.first
|
17
|
+
end
|
18
|
+
|
19
|
+
def landmarks
|
20
|
+
navigations.selector {|nav| nav.type == Navigation::Type::LANDMARKS}.first
|
21
|
+
end
|
22
|
+
|
23
|
+
# Enumerator version of toc
|
24
|
+
# Usage: nagivation.enum_for(:contents)
|
25
|
+
def contents
|
26
|
+
end
|
27
|
+
|
28
|
+
# Enumerator version of page_list
|
29
|
+
# Usage: navigation.enum_for(:pages)
|
30
|
+
def pages
|
31
|
+
end
|
32
|
+
|
33
|
+
# iterator for #toc
|
34
|
+
def each_content
|
35
|
+
end
|
36
|
+
|
37
|
+
# iterator for #page_list
|
38
|
+
def each_page
|
39
|
+
end
|
40
|
+
|
41
|
+
# iterator for #landmark
|
42
|
+
def each_landmark
|
43
|
+
end
|
44
|
+
|
45
|
+
def navigation
|
46
|
+
navigations.first
|
47
|
+
end
|
48
|
+
|
49
|
+
module Hidable
|
50
|
+
attr_accessor :hidden, :parent
|
51
|
+
|
52
|
+
def hidden?
|
53
|
+
if @hidden.nil?
|
54
|
+
@parent ? @parent.hidden? : false
|
55
|
+
else
|
56
|
+
true
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
class Item
|
62
|
+
include Hidable
|
63
|
+
|
64
|
+
attr_accessor :items, :text,
|
65
|
+
:content_document, :href, :item
|
66
|
+
|
67
|
+
def initialize
|
68
|
+
@items = ItemList.new
|
69
|
+
@items.parent = self
|
70
|
+
end
|
71
|
+
|
72
|
+
def traverse(depth=0, &block)
|
73
|
+
block.call self, depth
|
74
|
+
items.each do |item|
|
75
|
+
item.traverse depth + 1, &block
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class Navigation < Item
|
81
|
+
module Type
|
82
|
+
TOC = 'toc'
|
83
|
+
PAGE_LIST = 'page_list'
|
84
|
+
LANDMARKS = 'landmarks'
|
85
|
+
end
|
86
|
+
|
87
|
+
attr_accessor :type
|
88
|
+
alias navigations items
|
89
|
+
alias navigations= items=
|
90
|
+
alias heading text
|
91
|
+
alias heading= text=
|
92
|
+
end
|
93
|
+
|
94
|
+
class ItemList < Array
|
95
|
+
include Hidable
|
96
|
+
|
97
|
+
def <<(item)
|
98
|
+
super
|
99
|
+
item.parent = self
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module EPUB
|
2
|
+
module ContentDocument
|
3
|
+
class XHTML
|
4
|
+
attr_accessor :item
|
5
|
+
|
6
|
+
# @return [String] Returns the content string.
|
7
|
+
def read
|
8
|
+
item.read
|
9
|
+
end
|
10
|
+
alias raw_document read
|
11
|
+
|
12
|
+
# @return [true|false] Whether referenced directly from spine or not.
|
13
|
+
def top_level?
|
14
|
+
!! item.itemref
|
15
|
+
end
|
16
|
+
|
17
|
+
# @return [String] Returns the value of title element.
|
18
|
+
# If none, returns empty string
|
19
|
+
def title
|
20
|
+
title_elem = nokogiri.search('title').first
|
21
|
+
if title_elem
|
22
|
+
title_elem.text
|
23
|
+
else
|
24
|
+
warn 'title element not found'
|
25
|
+
''
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [REXML::Document] content as REXML::Document object
|
30
|
+
def rexml
|
31
|
+
require 'rexml/document'
|
32
|
+
@rexml ||= REXML::Document.new(raw_document)
|
33
|
+
end
|
34
|
+
|
35
|
+
# @return [Nokogiri::XML::Document] content as Nokogiri::XML::Document object
|
36
|
+
def nokogiri
|
37
|
+
@nokogiri ||= Nokogiri.XML(raw_document)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module EPUB
|
2
|
+
module Inspector
|
3
|
+
INSTANCE_VARIABLES_OPTION = {:exclude => []}
|
4
|
+
SIMPLE_TEMPLATE = "#<%{class}:%{object_id}>"
|
5
|
+
|
6
|
+
def inspect_simply
|
7
|
+
SIMPLE_TEMPLATE % {
|
8
|
+
:class => self.class,
|
9
|
+
:object_id => inspect_object_id
|
10
|
+
}
|
11
|
+
end
|
12
|
+
|
13
|
+
def inspect_object_id
|
14
|
+
(__id__ << 1).to_s(16)
|
15
|
+
end
|
16
|
+
|
17
|
+
def inspect_instance_variables(options={})
|
18
|
+
options = INSTANCE_VARIABLES_OPTION.merge(options)
|
19
|
+
exclude = options[:exclude]
|
20
|
+
|
21
|
+
(instance_variables - exclude).map {|name|
|
22
|
+
value = instance_variable_get(name)
|
23
|
+
"#{name}=#{value.inspect}"
|
24
|
+
}.join(' ')
|
25
|
+
end
|
26
|
+
|
27
|
+
module PublicationModel
|
28
|
+
TEMPLATE = "#<%{class}:%{object_id} @package=%{package} %{attributes}>"
|
29
|
+
class << self
|
30
|
+
def included(mod)
|
31
|
+
mod.__send__ :include, Inspector
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def inspect
|
36
|
+
TEMPLATE % {
|
37
|
+
:class => self.class,
|
38
|
+
:package => package.inspect_simply,
|
39
|
+
:object_id => inspect_object_id,
|
40
|
+
:attributes => inspect_instance_variables(exclude: [:@package])
|
41
|
+
}
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|