epub-parser-io 0.1.6a

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. data/.gemtest +0 -0
  2. data/.gitignore +12 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +4 -0
  5. data/.yardopts +10 -0
  6. data/CHANGELOG.markdown +61 -0
  7. data/Gemfile +2 -0
  8. data/MIT-LICENSE +7 -0
  9. data/README.markdown +174 -0
  10. data/Rakefile +68 -0
  11. data/bin/epub-open +25 -0
  12. data/bin/epubinfo +64 -0
  13. data/docs/EpubOpen.markdown +43 -0
  14. data/docs/Epubinfo.markdown +37 -0
  15. data/docs/FixedLayout.markdown +96 -0
  16. data/docs/Home.markdown +128 -0
  17. data/docs/Item.markdown +80 -0
  18. data/docs/Navigation.markdown +58 -0
  19. data/docs/Publication.markdown +54 -0
  20. data/epub-parser.gemspec +49 -0
  21. data/features/epubinfo.feature +6 -0
  22. data/features/step_definitions/epubinfo_steps.rb +5 -0
  23. data/features/support/env.rb +1 -0
  24. data/lib/epub/book/features.rb +85 -0
  25. data/lib/epub/book.rb +7 -0
  26. data/lib/epub/constants.rb +48 -0
  27. data/lib/epub/content_document/navigation.rb +104 -0
  28. data/lib/epub/content_document/xhtml.rb +41 -0
  29. data/lib/epub/content_document.rb +2 -0
  30. data/lib/epub/inspector.rb +45 -0
  31. data/lib/epub/ocf/container.rb +28 -0
  32. data/lib/epub/ocf/encryption.rb +7 -0
  33. data/lib/epub/ocf/manifest.rb +6 -0
  34. data/lib/epub/ocf/metadata.rb +6 -0
  35. data/lib/epub/ocf/rights.rb +6 -0
  36. data/lib/epub/ocf/signatures.rb +6 -0
  37. data/lib/epub/ocf.rb +8 -0
  38. data/lib/epub/parser/content_document.rb +111 -0
  39. data/lib/epub/parser/ocf.rb +73 -0
  40. data/lib/epub/parser/publication.rb +200 -0
  41. data/lib/epub/parser/utils.rb +20 -0
  42. data/lib/epub/parser/version.rb +5 -0
  43. data/lib/epub/parser.rb +103 -0
  44. data/lib/epub/publication/fixed_layout.rb +208 -0
  45. data/lib/epub/publication/package/bindings.rb +31 -0
  46. data/lib/epub/publication/package/guide.rb +51 -0
  47. data/lib/epub/publication/package/manifest.rb +180 -0
  48. data/lib/epub/publication/package/metadata.rb +170 -0
  49. data/lib/epub/publication/package/spine.rb +106 -0
  50. data/lib/epub/publication/package.rb +68 -0
  51. data/lib/epub/publication.rb +2 -0
  52. data/lib/epub.rb +14 -0
  53. data/man/epubinfo.1.ronn +19 -0
  54. data/schemas/epub-nav-30.rnc +10 -0
  55. data/schemas/epub-nav-30.sch +72 -0
  56. data/schemas/epub-xhtml-30.sch +377 -0
  57. data/schemas/ocf-container-30.rnc +16 -0
  58. data/test/fixtures/book/META-INF/container.xml +6 -0
  59. data/test/fixtures/book/OPS/%E6%97%A5%E6%9C%AC%E8%AA%9E.xhtml +10 -0
  60. data/test/fixtures/book/OPS/case-sensitive.xhtml +9 -0
  61. data/test/fixtures/book/OPS/containing space.xhtml +10 -0
  62. data/test/fixtures/book/OPS/containing%20space.xhtml +10 -0
  63. data/test/fixtures/book/OPS/nav.xhtml +28 -0
  64. data/test/fixtures/book/OPS//343/203/253/343/203/274/343/203/210/343/203/225/343/202/241/343/202/244/343/203/253.opf +119 -0
  65. data/test/fixtures/book/OPS//346/227/245/346/234/254/350/252/236.xhtml +10 -0
  66. data/test/fixtures/book/mimetype +1 -0
  67. data/test/helper.rb +9 -0
  68. data/test/test_content_document.rb +92 -0
  69. data/test/test_epub.rb +21 -0
  70. data/test/test_fixed_layout.rb +257 -0
  71. data/test/test_inspect.rb +121 -0
  72. data/test/test_parser.rb +60 -0
  73. data/test/test_parser_content_document.rb +36 -0
  74. data/test/test_parser_fixed_layout.rb +16 -0
  75. data/test/test_parser_ocf.rb +38 -0
  76. data/test/test_parser_publication.rb +247 -0
  77. data/test/test_publication.rb +324 -0
  78. metadata +445 -0
@@ -0,0 +1,54 @@
1
+ {file:docs/Home.markdown} > **{file:docs/Publication.markdow}**
2
+
3
+ Publication(Information about EPUB book)
4
+ ========================================
5
+
6
+ EPUB Publications is information about EPUB books.
7
+
8
+ EPUB Parser represents it as {EPUB::Publication} module and classes under the namespace and you can access them such like `EPUB::Parser.parse("path/to/book.epub").package`
9
+
10
+ Let
11
+
12
+ book = EPUB::Parser.parse("path/to/book.epub")
13
+
14
+ for continuing.
15
+
16
+ Five Models
17
+ -----------
18
+
19
+ `book.package` is a package document, a root of information tree about the book, and it has attributes to access five major models of the publication; {EPUB::Publication::Package::Metadata Metadata}, {EPUB::Publication::Package::Manifest Manifest}, {EPUB::Publication::Package::Spine Spine}, {EPUB::Publication::Package::Guide Guide} and {EPUB::Publication::Package::Bindings Bindings}.
20
+
21
+ Each of them has information the book in the way its own.
22
+
23
+ Metadata
24
+ --------
25
+
26
+ {EPUB::Publication::Package::Metadata Metadata} is literally metadata of the book, including identifiers, titles, languages, links and so on.
27
+
28
+ You can access them by:
29
+
30
+ md = book.package.metadata # => EPUB::Publication::Package::Metadata
31
+ md.titles # => [#<EPUB::Publication::Package::Metadata::Title...>, #<EPUB::Publication::Package::Metadata::Title...>, ...]
32
+ # ...
33
+
34
+ Manifest
35
+ --------
36
+
37
+ Spine
38
+ -----
39
+
40
+ Guide
41
+ -----
42
+
43
+ Bindings
44
+ --------
45
+
46
+ Package
47
+ -------
48
+
49
+ References
50
+ ----------
51
+
52
+ * [EPUB Publications 3.0][publications] on IDPF site
53
+
54
+ [publications]: http://www.idpf.org/epub/30/spec/epub30-publications.html
@@ -0,0 +1,49 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "epub/parser/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "epub-parser-io"
7
+ s.version = EPUB::Parser::VERSION
8
+ s.authors = ["KITAITI Makoto, Brian Glusman"]
9
+ s.email = ["KitaitiMakoto@gmail.com", "brian@glusman.me"]
10
+ s.homepage = "https://github.com/bglusman/epub-parser"
11
+ s.summary = %q{EPUB 3 Parser}
12
+ s.description = %q{Parse EPUB 3 book loosely}
13
+ s.license = 'MIT'
14
+
15
+ # s.rubyforge_project = "epub-parser"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ .push('test/fixtures/book/OPS/ルートファイル.opf')
19
+ .push('test/fixtures/book/OPS/日本語.xhtml')
20
+ .push(Dir['docs/*.md'])
21
+ s.files.reject! do |fn|
22
+ ['"test/fixtures/book/OPS/\343\203\253\343\203\274\343\203\210\343\203\225\343\202\241\343\202\244\343\203\253.opf"', '"test/fixtures/book/OPS/\346\227\245\346\234\254\350\252\236.xhtml"'].include? fn
23
+ end
24
+ s.test_files = s.files & Dir['{test,spec,features}/**/*.{rb,feature}']
25
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
+ s.require_paths = ["lib"]
27
+ s.has_rdoc = 'yard'
28
+
29
+ s.add_development_dependency 'rake'
30
+ s.add_development_dependency 'pry'
31
+ s.add_development_dependency 'pry-doc'
32
+ s.add_development_dependency 'test-unit'
33
+ s.add_development_dependency 'test-unit-rr'
34
+ s.add_development_dependency 'test-unit-notify'
35
+ s.add_development_dependency 'simplecov'
36
+ s.add_development_dependency 'thin'
37
+ s.add_development_dependency 'yard'
38
+ s.add_development_dependency 'gem-man'
39
+ s.add_development_dependency 'ronn'
40
+ s.add_development_dependency 'epzip'
41
+ s.add_development_dependency 'epubcheck'
42
+ s.add_development_dependency 'epub_validator'
43
+ s.add_development_dependency 'aruba'
44
+
45
+ s.add_runtime_dependency 'enumerabler'
46
+ s.add_runtime_dependency 'zipruby'
47
+ s.add_runtime_dependency 'nokogiri', '~> 1.6'
48
+ s.add_runtime_dependency 'addressable', '>= 2.3.5'
49
+ end
@@ -0,0 +1,6 @@
1
+ Feature: We can see information about EPUB file
2
+
3
+ Scenario: See info about existing EPUB file
4
+ Given the file "test/fixtures/book.epub" exists
5
+ When I successfully run `bundle exec epubinfo /home/ikeda/ruby/projects/epub-parser/test/fixtures/book.epub`
6
+ Then the stdout should contain "The New French Cuisine Masters"
@@ -0,0 +1,5 @@
1
+ Given /^the file "([^"]*)" exists$/ do |file_name|
2
+ unless File.exist? File.join(File.dirname(__FILE__), '..', '..', file_name)
3
+ raise "File #{file_name} does not exist"
4
+ end
5
+ end
@@ -0,0 +1 @@
1
+ require 'aruba/cucumber'
@@ -0,0 +1,85 @@
1
+ module EPUB
2
+ class Book
3
+ module Features
4
+ modules = [:ocf, :package]
5
+ attr_reader *modules
6
+ attr_accessor :epub_file
7
+ modules.each do |mod|
8
+ define_method "#{mod}=" do |obj|
9
+ instance_variable_set "@#{mod}", obj
10
+ obj.book = self
11
+ end
12
+ end
13
+
14
+ Publication::Package::CONTENT_MODELS.each do |model|
15
+ define_method model do
16
+ package.__send__(model)
17
+ end
18
+ end
19
+
20
+ %w[ title main_title subtitle short_title collection_title edition_title extended_title description date unique_identifier ].each do |met|
21
+ define_method met do
22
+ metadata.__send__(met)
23
+ end
24
+ end
25
+
26
+ %w[nav].each do |met|
27
+ define_method met do
28
+ manifest.__send__ met
29
+ end
30
+ end
31
+
32
+ # @overload each_page_on_spine(&blk)
33
+ # iterate over items in order of spine when block given
34
+ # @yieldparam item [Publication::Package::Manifest::Item]
35
+ # @overload each_page_on_spine
36
+ # @return [Enumerator] which iterates over {Publication::Package::Manifest::Item}s in order of spine when block not given
37
+ def each_page_on_spine(&blk)
38
+ enum = package.spine.items
39
+ if block_given?
40
+ enum.each &blk
41
+ else
42
+ enum
43
+ end
44
+ end
45
+
46
+ def each_page_on_toc(&blk)
47
+ raise NotImplementedError
48
+ end
49
+
50
+ # @overload each_content(&blk)
51
+ # iterate all items over when block given
52
+ # @yieldparam item [Publication::Package::Manifest::Item]
53
+ # @overload each_content
54
+ # @return [Enumerator] which iterates over all {Publication::Package::Manifest::Item}s in EPUB package when block not given
55
+ def each_content(&blk)
56
+ enum = manifest.items
57
+ if block_given?
58
+ enum.each &blk
59
+ else
60
+ enum.to_enum
61
+ end
62
+ end
63
+
64
+ def other_navigation
65
+ raise NotImplementedError
66
+ end
67
+
68
+ # @return [Array<Publication::Package::Manifest::Item>] All {Publication::Package::Manifest::Item}s in EPUB package
69
+ def resources
70
+ manifest.items
71
+ end
72
+
73
+ # Syntax sugar
74
+ def rootfile_path
75
+ ocf.container.rootfile.full_path.to_s
76
+ end
77
+
78
+ # Syntax sugar
79
+ def cover_image
80
+ manifest.cover_image
81
+ end
82
+
83
+ end
84
+ end
85
+ end
data/lib/epub/book.rb ADDED
@@ -0,0 +1,7 @@
1
+ require 'epub'
2
+
3
+ module EPUB
4
+ class Book
5
+ include EPUB::Book::Features
6
+ end
7
+ end
@@ -0,0 +1,48 @@
1
+ module EPUB
2
+ module Constants
3
+ NAMESPACES = {
4
+ 'dc' => 'http://purl.org/dc/elements/1.1/',
5
+ 'ocf' => 'urn:oasis:names:tc:opendocument:xmlns:container',
6
+ 'opf' => 'http://www.idpf.org/2007/opf',
7
+ 'xhtml' => 'http://www.w3.org/1999/xhtml',
8
+ 'epub' => 'http://www.idpf.org/2007/ops',
9
+ 'm' => 'http://www.w3.org/1998/Math/MathML',
10
+ 'svg' => 'http://www.w3.org/2000/svg',
11
+ 'smil' => 'http://www.w3.org/ns/SMIL'
12
+ }
13
+
14
+ module MediaType
15
+ # @deprecated Use {UnsupportedMediaType} instead
16
+ class UnsupportedError < StandardError; end
17
+ class UnsupportedMediaType < StandardError; end
18
+
19
+ EPUB = 'application/epub+zip'
20
+ ROOTFILE = 'application/oebps-package+xml'
21
+ IMAGE = %w[
22
+ image/gif
23
+ image/jpeg
24
+ image/png
25
+ image/svg+xml
26
+ ]
27
+ APPLICATION = %w[
28
+ application/xhtml+xml
29
+ application/x-dtbncx+xml
30
+ application/vnd.ms-opentype
31
+ application/font-woff
32
+ application/smil+xml
33
+ application/pls+xml
34
+ ]
35
+ AUDIO = %w[
36
+ audio/mpeg
37
+ audio/mp4
38
+ ]
39
+ TEXT = %w[
40
+ text/css
41
+ text/javascript
42
+ ]
43
+ CORE = IMAGE + APPLICATION + AUDIO + TEXT
44
+ end
45
+ end
46
+
47
+ include Constants
48
+ end
@@ -0,0 +1,104 @@
1
+ module EPUB
2
+ module ContentDocument
3
+ class Navigation < XHTML
4
+ attr_accessor :navigations
5
+
6
+ def initialize
7
+ @navigations = []
8
+ super
9
+ end
10
+
11
+ def toc
12
+ navigations.selector {|nav| nav.type == Navigation::Type::TOC}.first
13
+ end
14
+
15
+ def page_list
16
+ navigations.selector {|nav| nav.type == Nagivation::Type::PAGE_LIST}.first
17
+ end
18
+
19
+ def landmarks
20
+ navigations.selector {|nav| nav.type == Navigation::Type::LANDMARKS}.first
21
+ end
22
+
23
+ # Enumerator version of toc
24
+ # Usage: nagivation.enum_for(:contents)
25
+ def contents
26
+ end
27
+
28
+ # Enumerator version of page_list
29
+ # Usage: navigation.enum_for(:pages)
30
+ def pages
31
+ end
32
+
33
+ # iterator for #toc
34
+ def each_content
35
+ end
36
+
37
+ # iterator for #page_list
38
+ def each_page
39
+ end
40
+
41
+ # iterator for #landmark
42
+ def each_landmark
43
+ end
44
+
45
+ def navigation
46
+ navigations.first
47
+ end
48
+
49
+ module Hidable
50
+ attr_accessor :hidden, :parent
51
+
52
+ def hidden?
53
+ if @hidden.nil?
54
+ @parent ? @parent.hidden? : false
55
+ else
56
+ true
57
+ end
58
+ end
59
+ end
60
+
61
+ class Item
62
+ include Hidable
63
+
64
+ attr_accessor :items, :text,
65
+ :content_document, :href, :item
66
+
67
+ def initialize
68
+ @items = ItemList.new
69
+ @items.parent = self
70
+ end
71
+
72
+ def traverse(depth=0, &block)
73
+ block.call self, depth
74
+ items.each do |item|
75
+ item.traverse depth + 1, &block
76
+ end
77
+ end
78
+ end
79
+
80
+ class Navigation < Item
81
+ module Type
82
+ TOC = 'toc'
83
+ PAGE_LIST = 'page_list'
84
+ LANDMARKS = 'landmarks'
85
+ end
86
+
87
+ attr_accessor :type
88
+ alias navigations items
89
+ alias navigations= items=
90
+ alias heading text
91
+ alias heading= text=
92
+ end
93
+
94
+ class ItemList < Array
95
+ include Hidable
96
+
97
+ def <<(item)
98
+ super
99
+ item.parent = self
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,41 @@
1
+ module EPUB
2
+ module ContentDocument
3
+ class XHTML
4
+ attr_accessor :item
5
+
6
+ # @return [String] Returns the content string.
7
+ def read
8
+ item.read
9
+ end
10
+ alias raw_document read
11
+
12
+ # @return [true|false] Whether referenced directly from spine or not.
13
+ def top_level?
14
+ !! item.itemref
15
+ end
16
+
17
+ # @return [String] Returns the value of title element.
18
+ # If none, returns empty string
19
+ def title
20
+ title_elem = nokogiri.search('title').first
21
+ if title_elem
22
+ title_elem.text
23
+ else
24
+ warn 'title element not found'
25
+ ''
26
+ end
27
+ end
28
+
29
+ # @return [REXML::Document] content as REXML::Document object
30
+ def rexml
31
+ require 'rexml/document'
32
+ @rexml ||= REXML::Document.new(raw_document)
33
+ end
34
+
35
+ # @return [Nokogiri::XML::Document] content as Nokogiri::XML::Document object
36
+ def nokogiri
37
+ @nokogiri ||= Nokogiri.XML(raw_document)
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,2 @@
1
+ require 'epub/content_document/xhtml'
2
+ require 'epub/content_document/navigation'
@@ -0,0 +1,45 @@
1
+ module EPUB
2
+ module Inspector
3
+ INSTANCE_VARIABLES_OPTION = {:exclude => []}
4
+ SIMPLE_TEMPLATE = "#<%{class}:%{object_id}>"
5
+
6
+ def inspect_simply
7
+ SIMPLE_TEMPLATE % {
8
+ :class => self.class,
9
+ :object_id => inspect_object_id
10
+ }
11
+ end
12
+
13
+ def inspect_object_id
14
+ (__id__ << 1).to_s(16)
15
+ end
16
+
17
+ def inspect_instance_variables(options={})
18
+ options = INSTANCE_VARIABLES_OPTION.merge(options)
19
+ exclude = options[:exclude]
20
+
21
+ (instance_variables - exclude).map {|name|
22
+ value = instance_variable_get(name)
23
+ "#{name}=#{value.inspect}"
24
+ }.join(' ')
25
+ end
26
+
27
+ module PublicationModel
28
+ TEMPLATE = "#<%{class}:%{object_id} @package=%{package} %{attributes}>"
29
+ class << self
30
+ def included(mod)
31
+ mod.__send__ :include, Inspector
32
+ end
33
+ end
34
+
35
+ def inspect
36
+ TEMPLATE % {
37
+ :class => self.class,
38
+ :package => package.inspect_simply,
39
+ :object_id => inspect_object_id,
40
+ :attributes => inspect_instance_variables(exclude: [:@package])
41
+ }
42
+ end
43
+ end
44
+ end
45
+ end