epub-parser 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +3 -0
- data/.yardopts +3 -0
- data/Gemfile +1 -1
- data/MIT-LICENSE +1 -1
- data/README.markdown +64 -11
- data/Rakefile +16 -6
- data/bin/epub-open +25 -0
- data/bin/epubinfo +1 -1
- data/epub-parser.gemspec +1 -2
- data/lib/epub.rb +1 -2
- data/lib/epub/constants.rb +3 -1
- data/lib/epub/content_document/navigation.rb +31 -42
- data/lib/epub/content_document/xhtml.rb +12 -0
- data/lib/epub/parser.rb +36 -6
- data/lib/epub/parser/content_document.rb +74 -6
- data/lib/epub/parser/ocf.rb +4 -1
- data/lib/epub/parser/publication.rb +34 -26
- data/lib/epub/parser/utils.rb +20 -0
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/publication/package/manifest.rb +12 -3
- data/lib/epub/publication/package/metadata.rb +29 -1
- data/test/helper.rb +1 -0
- data/test/test_parser.rb +0 -1
- data/test/test_parser_content_document.rb +17 -4
- data/test/test_parser_ocf.rb +0 -1
- data/test/test_parser_publication.rb +0 -1
- data/test/test_publication.rb +20 -0
- metadata +140 -101
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6b83027d3e13f05cfbfae7040371f72608502d9b
|
4
|
+
data.tar.gz: 4aced6978cc17c4301c7f478e1197b6701d909b1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fcba9ccf2de4b1ab26f3443836e10ead323fcf9139164abd28385cc08e0838339fd3a6287b358b968b83f44a3fa5277c785c13000494581bc874975a84f82e27
|
7
|
+
data.tar.gz: f1109a47200e5e0c94d67fecda3dbd464e170883fff35812bea44fb44cd780b66229f94a3dd9bdde162d8a82473596a1b76b595bfc2c11de3196db9c3ba3b977
|
data/.travis.yml
ADDED
data/.yardopts
ADDED
data/Gemfile
CHANGED
data/MIT-LICENSE
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Copyright (c) 2011, 2012 KITAITIMAKOTO <KitaitiMakoto@gmail.com>
|
1
|
+
Copyright (c) 2011, 2012, 2013 KITAITIMAKOTO <KitaitiMakoto@gmail.com>
|
2
2
|
|
3
3
|
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
4
4
|
|
data/README.markdown
CHANGED
@@ -9,19 +9,11 @@ INSTALLATION
|
|
9
9
|
USAGE
|
10
10
|
-----
|
11
11
|
|
12
|
-
### As a command line tool
|
13
|
-
|
14
|
-
epubinfo path/to/book.epub
|
15
|
-
|
16
|
-
For more info:
|
17
|
-
|
18
|
-
epubinfo -h
|
19
|
-
|
20
12
|
### As a library
|
21
13
|
|
22
14
|
require 'epub/parser'
|
23
15
|
|
24
|
-
book = EPUB::Parser.parse
|
16
|
+
book = EPUB::Parser.parse('book.epub')
|
25
17
|
book.each_page_on_spine do |page|
|
26
18
|
# do somethong...
|
27
19
|
end
|
@@ -31,25 +23,86 @@ See the [wiki][] or [API Documentation][rubydoc] for more info.
|
|
31
23
|
[wiki]: https://github.com/KitaitiMakoto/epub-parser/wiki
|
32
24
|
[rubydoc]: http://rubydoc.info/gems/epub-parser/frames
|
33
25
|
|
26
|
+
### `epubinfo` command-line tool
|
27
|
+
|
28
|
+
`epubinfo` tool extracts and shows the metadata of specified EPUB book.
|
29
|
+
|
30
|
+
epubinfo path/to/book.epub
|
31
|
+
|
32
|
+
For more info:
|
33
|
+
|
34
|
+
epubinfo -h
|
35
|
+
|
36
|
+
### `epub-open` command-line tool
|
37
|
+
|
38
|
+
`epub-open` tool provides interactive shell(IRB) which helps you research about EPUB book.
|
39
|
+
|
40
|
+
epub-open path/to/book.epub
|
41
|
+
|
42
|
+
IRB starts. `self` becomes the EPUB book and can access to methods of `EPUB`.
|
43
|
+
|
44
|
+
title
|
45
|
+
=> "Title of the book"
|
46
|
+
metadata.creators
|
47
|
+
=> [Author 1, Author2, ...]
|
48
|
+
resources.first.properties
|
49
|
+
=> ["nav"] # You know that first resource of this book is nav document
|
50
|
+
nav = resources.first
|
51
|
+
=> ...
|
52
|
+
nav.href
|
53
|
+
=> #<Addressable::URI:0x15ce350 URI:nav.xhtml>
|
54
|
+
nav.media_type
|
55
|
+
=> "application/xhtml+xml"
|
56
|
+
puts nav.read
|
57
|
+
<?xml version="1.0"?>
|
58
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
|
59
|
+
:
|
60
|
+
:
|
61
|
+
:
|
62
|
+
</html>
|
63
|
+
=> nil
|
64
|
+
exit # Enter "exit" when exit the session
|
65
|
+
|
66
|
+
For command-line options:
|
67
|
+
|
68
|
+
epub-open -h
|
69
|
+
|
70
|
+
Development of this tool is still in progress.
|
71
|
+
Welcome comments and suggestions for this!
|
72
|
+
|
34
73
|
REQUIREMENTS
|
35
74
|
------------
|
36
75
|
* libxml2 and libxslt for Nokogiri gem
|
76
|
+
* C compiler to compile Zip/Ruby and Nokogiri
|
37
77
|
|
38
78
|
CHANGELOG
|
39
79
|
---------
|
80
|
+
### 0.1.3
|
81
|
+
* Add `EPUB::Parser::Utils` module
|
82
|
+
* Add a command-line tool `epub-open`
|
83
|
+
* Add support for XHTML Navigation Document
|
84
|
+
* Make `EPUB::Publication::Package::Metadata#to_hash` obsolete. Use `#to_h` instead
|
85
|
+
* Add utility methods `EPUB#description`, `EPUB#date` and `EPUB#unique_identifier`
|
86
|
+
|
40
87
|
### 0.1.2
|
41
|
-
* Fix a bug that `Item#read` couldn't read file when `href` is percent-encoded(Thanks, gambhiro!)
|
88
|
+
* Fix a bug that `Item#read` couldn't read file when `href` is percent-encoded(Thanks, [gambhiro][]!)
|
89
|
+
|
90
|
+
[gambhiro]: https://github.com/gambhiro
|
42
91
|
|
43
92
|
### 0.1.1
|
44
93
|
* Parse package@prefix and attach it as `Package#prefix`
|
45
|
-
* `Manifest::Item#iri`
|
94
|
+
* `Manifest::Item#iri` was removed. It have existed for files in unzipped epub books but now EPUB Parser retrieves files from zip archive directly. `#href` now returns `Addressable::URI` object.
|
46
95
|
* `Metadata::Link#iri`: ditto.
|
47
96
|
* `Guide::Reference#iri`: ditto.
|
48
97
|
|
49
98
|
TODOS
|
50
99
|
-----
|
100
|
+
* Simple inspect for `epub-open` tool
|
101
|
+
* Help features for `epub-open` tool
|
51
102
|
* Vocabulary Association Mechanisms
|
52
103
|
* Implementing navigation document and so on
|
104
|
+
* Media Overlays
|
105
|
+
* Content Document
|
53
106
|
* Fixed Layout
|
54
107
|
* Digital Signature
|
55
108
|
* Using SAX on parsing
|
data/Rakefile
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
require 'bundler/gem_helper'
|
2
|
-
require 'rake/testtask'
|
3
2
|
require 'rake/clean'
|
3
|
+
require 'rake/testtask'
|
4
4
|
require 'yard'
|
5
|
+
require 'rdoc/task'
|
5
6
|
require 'cucumber'
|
6
7
|
require 'cucumber/rake/task'
|
7
8
|
require 'epub/parser/version'
|
@@ -30,24 +31,33 @@ namespace :test do
|
|
30
31
|
Cucumber::Rake::Task.new
|
31
32
|
end
|
32
33
|
|
34
|
+
task :doc => 'doc:default'
|
33
35
|
|
34
|
-
|
35
|
-
task
|
36
|
+
namespace :doc do
|
37
|
+
task :default => [:yard, :rdoc]
|
38
|
+
|
39
|
+
YARD::Rake::YardocTask.new
|
40
|
+
Rake::RDocTask.new do |rdoc|
|
41
|
+
rdoc.rdoc_files = FileList['lib/**/*.rb']
|
42
|
+
rdoc.rdoc_files.include 'README.markdown'
|
43
|
+
rdoc.rdoc_files.include 'MIT-LICENSE'
|
44
|
+
rdoc.rdoc_files.include 'wiki/**/*.md'
|
45
|
+
end
|
36
46
|
end
|
37
47
|
|
38
48
|
namespace :gem do
|
39
49
|
desc "Build epub-parser-#{EPUB::Parser::VERSION}.gem into the pkg directory."
|
40
|
-
task :build
|
50
|
+
task :build do
|
41
51
|
Bundler::GemHelper.new.build_gem
|
42
52
|
end
|
43
53
|
|
44
54
|
desc "Build and install epub-parser-#{EPUB::Parser::VERSION}.gem into system gems."
|
45
|
-
task :install
|
55
|
+
task :install do
|
46
56
|
Bundler::GemHelper.new.install_gem
|
47
57
|
end
|
48
58
|
|
49
59
|
desc "Create tag v#{EPUB::Parser::VERSION} and build and push epub-parser-#{EPUB::Parser::VERSION}.gem to Rubygems"
|
50
|
-
task :release
|
60
|
+
task :release do
|
51
61
|
Bundler::GemHelper.new.release_gem
|
52
62
|
end
|
53
63
|
end
|
data/bin/epub-open
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'English'
|
2
|
+
require 'optparse'
|
3
|
+
require 'irb'
|
4
|
+
require 'epub/parser'
|
5
|
+
|
6
|
+
shell = IRB
|
7
|
+
|
8
|
+
OptionParser.new {|opt|
|
9
|
+
opt.banner = <<EOB
|
10
|
+
Open EPUB file in IRB
|
11
|
+
|
12
|
+
Usage: #{File.basename($PROGRAM_NAME)} EPUBFILE
|
13
|
+
|
14
|
+
EOB
|
15
|
+
opt.on '--pry', 'Use Pry instead of IRB as shell' do
|
16
|
+
require 'pry'
|
17
|
+
shell = Pry
|
18
|
+
end
|
19
|
+
}.parse!
|
20
|
+
|
21
|
+
$0 = File.basename($PROGRAM_NAME)
|
22
|
+
include EPUB
|
23
|
+
EPUB::Parser.parse(ARGV.shift, :book => self)
|
24
|
+
$stderr.puts "Enter \"exit\" to exit #{shell}"
|
25
|
+
shell.start
|
data/bin/epubinfo
CHANGED
@@ -28,7 +28,7 @@ end
|
|
28
28
|
|
29
29
|
book = EPUB::Parser.parse(file)
|
30
30
|
data = {'Title' => [book.title]}
|
31
|
-
data.merge!(book.metadata.
|
31
|
+
data.merge!(book.metadata.to_h)
|
32
32
|
data['Unique identifier'] = [book.metadata.unique_identifier]
|
33
33
|
data['EPUB Version'] = [book.package.version]
|
34
34
|
if options[:format] == :line
|
data/epub-parser.gemspec
CHANGED
@@ -18,7 +18,6 @@ Gem::Specification.new do |s|
|
|
18
18
|
.push('test/fixtures/book/OPS/ルートファイル.opf')
|
19
19
|
.push('test/fixtures/book/OPS/日本語.xhtml')
|
20
20
|
.push(Dir['wiki/*.md'])
|
21
|
-
.push(Dir['doc/*'])
|
22
21
|
s.files.reject! do |fn|
|
23
22
|
['"test/fixtures/book/OPS/\343\203\253\343\203\274\343\203\210\343\203\225\343\202\241\343\202\244\343\203\253.opf"', '"test/fixtures/book/OPS/\346\227\245\346\234\254\350\252\236.xhtml"'].include? fn
|
24
23
|
end
|
@@ -44,6 +43,6 @@ Gem::Specification.new do |s|
|
|
44
43
|
|
45
44
|
s.add_runtime_dependency 'enumerabler'
|
46
45
|
s.add_runtime_dependency 'zipruby'
|
47
|
-
s.add_runtime_dependency 'nokogiri'
|
46
|
+
s.add_runtime_dependency 'nokogiri', '1.5.8'
|
48
47
|
s.add_runtime_dependency 'addressable'
|
49
48
|
end
|
data/lib/epub.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'epub/ocf'
|
2
2
|
require 'epub/publication'
|
3
3
|
require 'epub/content_document'
|
4
|
-
require 'epub/parser'
|
5
4
|
|
6
5
|
module EPUB
|
7
6
|
modules = [ :ocf, :package, :content_document ]
|
@@ -26,7 +25,7 @@ module EPUB
|
|
26
25
|
end
|
27
26
|
end
|
28
27
|
|
29
|
-
%w[ title main_title subtitle short_title collection_title edition_title extended_title ].each do |met|
|
28
|
+
%w[ title main_title subtitle short_title collection_title edition_title extended_title description date unique_identifier ].each do |met|
|
30
29
|
define_method met do
|
31
30
|
metadata.__send__(met)
|
32
31
|
end
|
data/lib/epub/constants.rb
CHANGED
@@ -7,12 +7,14 @@ module EPUB
|
|
7
7
|
'xhtml' => 'http://www.w3.org/1999/xhtml',
|
8
8
|
'epub' => 'http://www.idpf.org/2007/ops',
|
9
9
|
'm' => 'http://www.w3.org/1998/Math/MathML',
|
10
|
-
'svg' => 'http://www.w3.org/2000/svg'
|
10
|
+
'svg' => 'http://www.w3.org/2000/svg',
|
11
|
+
'smil' => 'http://www.w3.org/ns/SMIL'
|
11
12
|
}
|
12
13
|
|
13
14
|
module MediaType
|
14
15
|
class UnsupportedError < StandardError; end
|
15
16
|
|
17
|
+
EPUB = 'application/epub+zip'
|
16
18
|
ROOTFILE = 'application/oebps-package+xml'
|
17
19
|
IMAGE = %w[
|
18
20
|
image/gif
|
@@ -1,26 +1,25 @@
|
|
1
|
+
require 'epub/content_document/xhtml'
|
2
|
+
|
1
3
|
module EPUB
|
2
4
|
module ContentDocument
|
3
|
-
class Navigation
|
4
|
-
|
5
|
-
TOC = 'toc'
|
6
|
-
PAGE_LIST = 'page_list'
|
7
|
-
LANDMARKS = 'landmarks'
|
8
|
-
end
|
5
|
+
class Navigation < XHTML
|
6
|
+
attr_accessor :navigations
|
9
7
|
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
def initialize
|
9
|
+
@navigations = []
|
10
|
+
super
|
11
|
+
end
|
13
12
|
|
14
13
|
def toc
|
15
|
-
|
14
|
+
items.selector {|nav| nav.type == Navigation::Type::TOC}.first
|
16
15
|
end
|
17
16
|
|
18
17
|
def page_list
|
19
|
-
|
18
|
+
items.selector {|nav| nav.type == Nagivation::Type::PAGE_LIST}.first
|
20
19
|
end
|
21
20
|
|
22
21
|
def landmarks
|
23
|
-
|
22
|
+
items.selector {|nav| nav.type == Navigation::Type::LANDMARKS}.first
|
24
23
|
end
|
25
24
|
|
26
25
|
# Enumerator version of toc
|
@@ -45,41 +44,31 @@ module EPUB
|
|
45
44
|
def each_landmark
|
46
45
|
end
|
47
46
|
|
48
|
-
class
|
49
|
-
attr_accessor :
|
50
|
-
:
|
51
|
-
:type, # toc, page-list, landmarks or other
|
52
|
-
:hidden
|
47
|
+
class Item
|
48
|
+
attr_accessor :items, :text, :hidden,
|
49
|
+
:content_document, :href, :item
|
53
50
|
|
54
|
-
|
55
|
-
|
56
|
-
def hidden?
|
51
|
+
def initialize
|
52
|
+
@items = []
|
57
53
|
end
|
58
54
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
def hidden?
|
64
|
-
end
|
65
|
-
|
66
|
-
# may be followed by ol or be a leaf node
|
67
|
-
class A
|
68
|
-
attr_accessor :ol, # optional
|
69
|
-
:hidden
|
70
|
-
|
71
|
-
def hidden?
|
72
|
-
end
|
73
|
-
end
|
55
|
+
def hidden?
|
56
|
+
!! hidden
|
57
|
+
end
|
58
|
+
end
|
74
59
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
end
|
81
|
-
end
|
60
|
+
class Navigation < Item
|
61
|
+
module Type
|
62
|
+
TOC = 'toc'
|
63
|
+
PAGE_LIST = 'page_list'
|
64
|
+
LANDMARKS = 'landmarks'
|
82
65
|
end
|
66
|
+
|
67
|
+
attr_accessor :type
|
68
|
+
alias navigations items
|
69
|
+
alias navigations= items=
|
70
|
+
alias heading text
|
71
|
+
alias heading= text=
|
83
72
|
end
|
84
73
|
end
|
85
74
|
end
|
data/lib/epub/parser.rb
CHANGED
@@ -1,16 +1,40 @@
|
|
1
|
+
require 'epub'
|
1
2
|
require 'epub/constants'
|
2
|
-
require 'epub/parser/version'
|
3
|
-
require 'epub/parser/ocf'
|
4
|
-
require 'epub/parser/publication'
|
5
|
-
require 'epub/parser/content_document'
|
6
3
|
require 'zipruby'
|
7
4
|
require 'nokogiri'
|
8
5
|
|
9
6
|
module EPUB
|
10
7
|
class Parser
|
11
8
|
class << self
|
12
|
-
|
13
|
-
|
9
|
+
# Parse an EPUB file
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# EPUB::Parser.parse('path/to/book.epub') # => EPUB::Book object
|
13
|
+
#
|
14
|
+
# @example
|
15
|
+
# class MyBook
|
16
|
+
# include EPUB
|
17
|
+
# end
|
18
|
+
# book = MyBook.new
|
19
|
+
# parsed_book = EPUB::Parser.parse('path/to/book.epub', :book => book) # => #<MyBook:0x000000019760e8 @epub_file=..>
|
20
|
+
# parsed_book.equal? book # => true
|
21
|
+
#
|
22
|
+
# @example
|
23
|
+
# book = EPUB::Parser.parse('path/to/book.epub', :class => MyBook) # => #<MyBook:0x000000019b0568 @epub_file=...>
|
24
|
+
# book.instance_of? MyBook # => true
|
25
|
+
#
|
26
|
+
# @param [String] filepath
|
27
|
+
# @param [Hash] options the type of return is specified by this argument.
|
28
|
+
# If no options, returns {EPUB::Book} object.
|
29
|
+
# For details of options, see below.
|
30
|
+
# @option options [EPUB] :book instance of class which includes {EPUB} module
|
31
|
+
# @option options [Class] :class class which includes {EPUB} module
|
32
|
+
# @return [EPUB] object which is an instance of class including {EPUB} module.
|
33
|
+
# When option :book passed, returns the same object whose attributes about EPUB are set.
|
34
|
+
# When option :class passed, returns the instance of the class.
|
35
|
+
# Otherwise returns {EPUB::Book} object.
|
36
|
+
def parse(filepath, options = {})
|
37
|
+
new(filepath, options).parse
|
14
38
|
end
|
15
39
|
end
|
16
40
|
|
@@ -48,3 +72,9 @@ module EPUB
|
|
48
72
|
end
|
49
73
|
end
|
50
74
|
end
|
75
|
+
|
76
|
+
require 'epub/parser/version'
|
77
|
+
require 'epub/parser/utils'
|
78
|
+
require 'epub/parser/ocf'
|
79
|
+
require 'epub/parser/publication'
|
80
|
+
require 'epub/parser/content_document'
|