epub-parser-io 0.1.6a

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. data/.gemtest +0 -0
  2. data/.gitignore +12 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +4 -0
  5. data/.yardopts +10 -0
  6. data/CHANGELOG.markdown +61 -0
  7. data/Gemfile +2 -0
  8. data/MIT-LICENSE +7 -0
  9. data/README.markdown +174 -0
  10. data/Rakefile +68 -0
  11. data/bin/epub-open +25 -0
  12. data/bin/epubinfo +64 -0
  13. data/docs/EpubOpen.markdown +43 -0
  14. data/docs/Epubinfo.markdown +37 -0
  15. data/docs/FixedLayout.markdown +96 -0
  16. data/docs/Home.markdown +128 -0
  17. data/docs/Item.markdown +80 -0
  18. data/docs/Navigation.markdown +58 -0
  19. data/docs/Publication.markdown +54 -0
  20. data/epub-parser.gemspec +49 -0
  21. data/features/epubinfo.feature +6 -0
  22. data/features/step_definitions/epubinfo_steps.rb +5 -0
  23. data/features/support/env.rb +1 -0
  24. data/lib/epub/book/features.rb +85 -0
  25. data/lib/epub/book.rb +7 -0
  26. data/lib/epub/constants.rb +48 -0
  27. data/lib/epub/content_document/navigation.rb +104 -0
  28. data/lib/epub/content_document/xhtml.rb +41 -0
  29. data/lib/epub/content_document.rb +2 -0
  30. data/lib/epub/inspector.rb +45 -0
  31. data/lib/epub/ocf/container.rb +28 -0
  32. data/lib/epub/ocf/encryption.rb +7 -0
  33. data/lib/epub/ocf/manifest.rb +6 -0
  34. data/lib/epub/ocf/metadata.rb +6 -0
  35. data/lib/epub/ocf/rights.rb +6 -0
  36. data/lib/epub/ocf/signatures.rb +6 -0
  37. data/lib/epub/ocf.rb +8 -0
  38. data/lib/epub/parser/content_document.rb +111 -0
  39. data/lib/epub/parser/ocf.rb +73 -0
  40. data/lib/epub/parser/publication.rb +200 -0
  41. data/lib/epub/parser/utils.rb +20 -0
  42. data/lib/epub/parser/version.rb +5 -0
  43. data/lib/epub/parser.rb +103 -0
  44. data/lib/epub/publication/fixed_layout.rb +208 -0
  45. data/lib/epub/publication/package/bindings.rb +31 -0
  46. data/lib/epub/publication/package/guide.rb +51 -0
  47. data/lib/epub/publication/package/manifest.rb +180 -0
  48. data/lib/epub/publication/package/metadata.rb +170 -0
  49. data/lib/epub/publication/package/spine.rb +106 -0
  50. data/lib/epub/publication/package.rb +68 -0
  51. data/lib/epub/publication.rb +2 -0
  52. data/lib/epub.rb +14 -0
  53. data/man/epubinfo.1.ronn +19 -0
  54. data/schemas/epub-nav-30.rnc +10 -0
  55. data/schemas/epub-nav-30.sch +72 -0
  56. data/schemas/epub-xhtml-30.sch +377 -0
  57. data/schemas/ocf-container-30.rnc +16 -0
  58. data/test/fixtures/book/META-INF/container.xml +6 -0
  59. data/test/fixtures/book/OPS/%E6%97%A5%E6%9C%AC%E8%AA%9E.xhtml +10 -0
  60. data/test/fixtures/book/OPS/case-sensitive.xhtml +9 -0
  61. data/test/fixtures/book/OPS/containing space.xhtml +10 -0
  62. data/test/fixtures/book/OPS/containing%20space.xhtml +10 -0
  63. data/test/fixtures/book/OPS/nav.xhtml +28 -0
  64. data/test/fixtures/book/OPS//343/203/253/343/203/274/343/203/210/343/203/225/343/202/241/343/202/244/343/203/253.opf +119 -0
  65. data/test/fixtures/book/OPS//346/227/245/346/234/254/350/252/236.xhtml +10 -0
  66. data/test/fixtures/book/mimetype +1 -0
  67. data/test/helper.rb +9 -0
  68. data/test/test_content_document.rb +92 -0
  69. data/test/test_epub.rb +21 -0
  70. data/test/test_fixed_layout.rb +257 -0
  71. data/test/test_inspect.rb +121 -0
  72. data/test/test_parser.rb +60 -0
  73. data/test/test_parser_content_document.rb +36 -0
  74. data/test/test_parser_fixed_layout.rb +16 -0
  75. data/test/test_parser_ocf.rb +38 -0
  76. data/test/test_parser_publication.rb +247 -0
  77. data/test/test_publication.rb +324 -0
  78. metadata +445 -0
@@ -0,0 +1,54 @@
1
+ {file:docs/Home.markdown} > **{file:docs/Publication.markdow}**
2
+
3
+ Publication(Information about EPUB book)
4
+ ========================================
5
+
6
+ EPUB Publications is information about EPUB books.
7
+
8
+ EPUB Parser represents it as {EPUB::Publication} module and classes under the namespace and you can access them such like `EPUB::Parser.parse("path/to/book.epub").package`
9
+
10
+ Let
11
+
12
+ book = EPUB::Parser.parse("path/to/book.epub")
13
+
14
+ for continuing.
15
+
16
+ Five Models
17
+ -----------
18
+
19
+ `book.package` is a package document, a root of information tree about the book, and it has attributes to access five major models of the publication; {EPUB::Publication::Package::Metadata Metadata}, {EPUB::Publication::Package::Manifest Manifest}, {EPUB::Publication::Package::Spine Spine}, {EPUB::Publication::Package::Guide Guide} and {EPUB::Publication::Package::Bindings Bindings}.
20
+
21
+ Each of them has information the book in the way its own.
22
+
23
+ Metadata
24
+ --------
25
+
26
+ {EPUB::Publication::Package::Metadata Metadata} is literally metadata of the book, including identifiers, titles, languages, links and so on.
27
+
28
+ You can access them by:
29
+
30
+ md = book.package.metadata # => EPUB::Publication::Package::Metadata
31
+ md.titles # => [#<EPUB::Publication::Package::Metadata::Title...>, #<EPUB::Publication::Package::Metadata::Title...>, ...]
32
+ # ...
33
+
34
+ Manifest
35
+ --------
36
+
37
+ Spine
38
+ -----
39
+
40
+ Guide
41
+ -----
42
+
43
+ Bindings
44
+ --------
45
+
46
+ Package
47
+ -------
48
+
49
+ References
50
+ ----------
51
+
52
+ * [EPUB Publications 3.0][publications] on IDPF site
53
+
54
+ [publications]: http://www.idpf.org/epub/30/spec/epub30-publications.html
@@ -0,0 +1,49 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "epub/parser/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "epub-parser-io"
7
+ s.version = EPUB::Parser::VERSION
8
+ s.authors = ["KITAITI Makoto, Brian Glusman"]
9
+ s.email = ["KitaitiMakoto@gmail.com", "brian@glusman.me"]
10
+ s.homepage = "https://github.com/bglusman/epub-parser"
11
+ s.summary = %q{EPUB 3 Parser}
12
+ s.description = %q{Parse EPUB 3 book loosely}
13
+ s.license = 'MIT'
14
+
15
+ # s.rubyforge_project = "epub-parser"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ .push('test/fixtures/book/OPS/ルートファイル.opf')
19
+ .push('test/fixtures/book/OPS/日本語.xhtml')
20
+ .push(Dir['docs/*.md'])
21
+ s.files.reject! do |fn|
22
+ ['"test/fixtures/book/OPS/\343\203\253\343\203\274\343\203\210\343\203\225\343\202\241\343\202\244\343\203\253.opf"', '"test/fixtures/book/OPS/\346\227\245\346\234\254\350\252\236.xhtml"'].include? fn
23
+ end
24
+ s.test_files = s.files & Dir['{test,spec,features}/**/*.{rb,feature}']
25
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
+ s.require_paths = ["lib"]
27
+ s.has_rdoc = 'yard'
28
+
29
+ s.add_development_dependency 'rake'
30
+ s.add_development_dependency 'pry'
31
+ s.add_development_dependency 'pry-doc'
32
+ s.add_development_dependency 'test-unit'
33
+ s.add_development_dependency 'test-unit-rr'
34
+ s.add_development_dependency 'test-unit-notify'
35
+ s.add_development_dependency 'simplecov'
36
+ s.add_development_dependency 'thin'
37
+ s.add_development_dependency 'yard'
38
+ s.add_development_dependency 'gem-man'
39
+ s.add_development_dependency 'ronn'
40
+ s.add_development_dependency 'epzip'
41
+ s.add_development_dependency 'epubcheck'
42
+ s.add_development_dependency 'epub_validator'
43
+ s.add_development_dependency 'aruba'
44
+
45
+ s.add_runtime_dependency 'enumerabler'
46
+ s.add_runtime_dependency 'zipruby'
47
+ s.add_runtime_dependency 'nokogiri', '~> 1.6'
48
+ s.add_runtime_dependency 'addressable', '>= 2.3.5'
49
+ end
@@ -0,0 +1,6 @@
1
+ Feature: We can see information about EPUB file
2
+
3
+ Scenario: See info about existing EPUB file
4
+ Given the file "test/fixtures/book.epub" exists
5
+ When I successfully run `bundle exec epubinfo /home/ikeda/ruby/projects/epub-parser/test/fixtures/book.epub`
6
+ Then the stdout should contain "The New French Cuisine Masters"
@@ -0,0 +1,5 @@
1
+ Given /^the file "([^"]*)" exists$/ do |file_name|
2
+ unless File.exist? File.join(File.dirname(__FILE__), '..', '..', file_name)
3
+ raise "File #{file_name} does not exist"
4
+ end
5
+ end
@@ -0,0 +1 @@
1
+ require 'aruba/cucumber'
@@ -0,0 +1,85 @@
1
+ module EPUB
2
+ class Book
3
+ module Features
4
+ modules = [:ocf, :package]
5
+ attr_reader *modules
6
+ attr_accessor :epub_file
7
+ modules.each do |mod|
8
+ define_method "#{mod}=" do |obj|
9
+ instance_variable_set "@#{mod}", obj
10
+ obj.book = self
11
+ end
12
+ end
13
+
14
+ Publication::Package::CONTENT_MODELS.each do |model|
15
+ define_method model do
16
+ package.__send__(model)
17
+ end
18
+ end
19
+
20
+ %w[ title main_title subtitle short_title collection_title edition_title extended_title description date unique_identifier ].each do |met|
21
+ define_method met do
22
+ metadata.__send__(met)
23
+ end
24
+ end
25
+
26
+ %w[nav].each do |met|
27
+ define_method met do
28
+ manifest.__send__ met
29
+ end
30
+ end
31
+
32
+ # @overload each_page_on_spine(&blk)
33
+ # iterate over items in order of spine when block given
34
+ # @yieldparam item [Publication::Package::Manifest::Item]
35
+ # @overload each_page_on_spine
36
+ # @return [Enumerator] which iterates over {Publication::Package::Manifest::Item}s in order of spine when block not given
37
+ def each_page_on_spine(&blk)
38
+ enum = package.spine.items
39
+ if block_given?
40
+ enum.each &blk
41
+ else
42
+ enum
43
+ end
44
+ end
45
+
46
+ def each_page_on_toc(&blk)
47
+ raise NotImplementedError
48
+ end
49
+
50
+ # @overload each_content(&blk)
51
+ # iterate all items over when block given
52
+ # @yieldparam item [Publication::Package::Manifest::Item]
53
+ # @overload each_content
54
+ # @return [Enumerator] which iterates over all {Publication::Package::Manifest::Item}s in EPUB package when block not given
55
+ def each_content(&blk)
56
+ enum = manifest.items
57
+ if block_given?
58
+ enum.each &blk
59
+ else
60
+ enum.to_enum
61
+ end
62
+ end
63
+
64
+ def other_navigation
65
+ raise NotImplementedError
66
+ end
67
+
68
+ # @return [Array<Publication::Package::Manifest::Item>] All {Publication::Package::Manifest::Item}s in EPUB package
69
+ def resources
70
+ manifest.items
71
+ end
72
+
73
+ # Syntax sugar
74
+ def rootfile_path
75
+ ocf.container.rootfile.full_path.to_s
76
+ end
77
+
78
+ # Syntax sugar
79
+ def cover_image
80
+ manifest.cover_image
81
+ end
82
+
83
+ end
84
+ end
85
+ end
data/lib/epub/book.rb ADDED
@@ -0,0 +1,7 @@
1
+ require 'epub'
2
+
3
+ module EPUB
4
+ class Book
5
+ include EPUB::Book::Features
6
+ end
7
+ end
@@ -0,0 +1,48 @@
1
+ module EPUB
2
+ module Constants
3
+ NAMESPACES = {
4
+ 'dc' => 'http://purl.org/dc/elements/1.1/',
5
+ 'ocf' => 'urn:oasis:names:tc:opendocument:xmlns:container',
6
+ 'opf' => 'http://www.idpf.org/2007/opf',
7
+ 'xhtml' => 'http://www.w3.org/1999/xhtml',
8
+ 'epub' => 'http://www.idpf.org/2007/ops',
9
+ 'm' => 'http://www.w3.org/1998/Math/MathML',
10
+ 'svg' => 'http://www.w3.org/2000/svg',
11
+ 'smil' => 'http://www.w3.org/ns/SMIL'
12
+ }
13
+
14
+ module MediaType
15
+ # @deprecated Use {UnsupportedMediaType} instead
16
+ class UnsupportedError < StandardError; end
17
+ class UnsupportedMediaType < StandardError; end
18
+
19
+ EPUB = 'application/epub+zip'
20
+ ROOTFILE = 'application/oebps-package+xml'
21
+ IMAGE = %w[
22
+ image/gif
23
+ image/jpeg
24
+ image/png
25
+ image/svg+xml
26
+ ]
27
+ APPLICATION = %w[
28
+ application/xhtml+xml
29
+ application/x-dtbncx+xml
30
+ application/vnd.ms-opentype
31
+ application/font-woff
32
+ application/smil+xml
33
+ application/pls+xml
34
+ ]
35
+ AUDIO = %w[
36
+ audio/mpeg
37
+ audio/mp4
38
+ ]
39
+ TEXT = %w[
40
+ text/css
41
+ text/javascript
42
+ ]
43
+ CORE = IMAGE + APPLICATION + AUDIO + TEXT
44
+ end
45
+ end
46
+
47
+ include Constants
48
+ end
@@ -0,0 +1,104 @@
1
+ module EPUB
2
+ module ContentDocument
3
+ class Navigation < XHTML
4
+ attr_accessor :navigations
5
+
6
+ def initialize
7
+ @navigations = []
8
+ super
9
+ end
10
+
11
+ def toc
12
+ navigations.selector {|nav| nav.type == Navigation::Type::TOC}.first
13
+ end
14
+
15
+ def page_list
16
+ navigations.selector {|nav| nav.type == Nagivation::Type::PAGE_LIST}.first
17
+ end
18
+
19
+ def landmarks
20
+ navigations.selector {|nav| nav.type == Navigation::Type::LANDMARKS}.first
21
+ end
22
+
23
+ # Enumerator version of toc
24
+ # Usage: nagivation.enum_for(:contents)
25
+ def contents
26
+ end
27
+
28
+ # Enumerator version of page_list
29
+ # Usage: navigation.enum_for(:pages)
30
+ def pages
31
+ end
32
+
33
+ # iterator for #toc
34
+ def each_content
35
+ end
36
+
37
+ # iterator for #page_list
38
+ def each_page
39
+ end
40
+
41
+ # iterator for #landmark
42
+ def each_landmark
43
+ end
44
+
45
+ def navigation
46
+ navigations.first
47
+ end
48
+
49
+ module Hidable
50
+ attr_accessor :hidden, :parent
51
+
52
+ def hidden?
53
+ if @hidden.nil?
54
+ @parent ? @parent.hidden? : false
55
+ else
56
+ true
57
+ end
58
+ end
59
+ end
60
+
61
+ class Item
62
+ include Hidable
63
+
64
+ attr_accessor :items, :text,
65
+ :content_document, :href, :item
66
+
67
+ def initialize
68
+ @items = ItemList.new
69
+ @items.parent = self
70
+ end
71
+
72
+ def traverse(depth=0, &block)
73
+ block.call self, depth
74
+ items.each do |item|
75
+ item.traverse depth + 1, &block
76
+ end
77
+ end
78
+ end
79
+
80
+ class Navigation < Item
81
+ module Type
82
+ TOC = 'toc'
83
+ PAGE_LIST = 'page_list'
84
+ LANDMARKS = 'landmarks'
85
+ end
86
+
87
+ attr_accessor :type
88
+ alias navigations items
89
+ alias navigations= items=
90
+ alias heading text
91
+ alias heading= text=
92
+ end
93
+
94
+ class ItemList < Array
95
+ include Hidable
96
+
97
+ def <<(item)
98
+ super
99
+ item.parent = self
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,41 @@
1
+ module EPUB
2
+ module ContentDocument
3
+ class XHTML
4
+ attr_accessor :item
5
+
6
+ # @return [String] Returns the content string.
7
+ def read
8
+ item.read
9
+ end
10
+ alias raw_document read
11
+
12
+ # @return [true|false] Whether referenced directly from spine or not.
13
+ def top_level?
14
+ !! item.itemref
15
+ end
16
+
17
+ # @return [String] Returns the value of title element.
18
+ # If none, returns empty string
19
+ def title
20
+ title_elem = nokogiri.search('title').first
21
+ if title_elem
22
+ title_elem.text
23
+ else
24
+ warn 'title element not found'
25
+ ''
26
+ end
27
+ end
28
+
29
+ # @return [REXML::Document] content as REXML::Document object
30
+ def rexml
31
+ require 'rexml/document'
32
+ @rexml ||= REXML::Document.new(raw_document)
33
+ end
34
+
35
+ # @return [Nokogiri::XML::Document] content as Nokogiri::XML::Document object
36
+ def nokogiri
37
+ @nokogiri ||= Nokogiri.XML(raw_document)
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,2 @@
1
+ require 'epub/content_document/xhtml'
2
+ require 'epub/content_document/navigation'
@@ -0,0 +1,45 @@
1
+ module EPUB
2
+ module Inspector
3
+ INSTANCE_VARIABLES_OPTION = {:exclude => []}
4
+ SIMPLE_TEMPLATE = "#<%{class}:%{object_id}>"
5
+
6
+ def inspect_simply
7
+ SIMPLE_TEMPLATE % {
8
+ :class => self.class,
9
+ :object_id => inspect_object_id
10
+ }
11
+ end
12
+
13
+ def inspect_object_id
14
+ (__id__ << 1).to_s(16)
15
+ end
16
+
17
+ def inspect_instance_variables(options={})
18
+ options = INSTANCE_VARIABLES_OPTION.merge(options)
19
+ exclude = options[:exclude]
20
+
21
+ (instance_variables - exclude).map {|name|
22
+ value = instance_variable_get(name)
23
+ "#{name}=#{value.inspect}"
24
+ }.join(' ')
25
+ end
26
+
27
+ module PublicationModel
28
+ TEMPLATE = "#<%{class}:%{object_id} @package=%{package} %{attributes}>"
29
+ class << self
30
+ def included(mod)
31
+ mod.__send__ :include, Inspector
32
+ end
33
+ end
34
+
35
+ def inspect
36
+ TEMPLATE % {
37
+ :class => self.class,
38
+ :package => package.inspect_simply,
39
+ :object_id => inspect_object_id,
40
+ :attributes => inspect_instance_variables(exclude: [:@package])
41
+ }
42
+ end
43
+ end
44
+ end
45
+ end