epub-parser 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -0
  3. data/CHANGELOG.markdown +10 -0
  4. data/README.markdown +43 -27
  5. data/bin/epubinfo +22 -0
  6. data/docs/EpubOpen.markdown +43 -0
  7. data/docs/Epubinfo.markdown +37 -0
  8. data/docs/FixedLayout.markdown +3 -5
  9. data/docs/Home.markdown +30 -15
  10. data/docs/Item.markdown +14 -14
  11. data/epub-parser.gemspec +5 -2
  12. data/lib/epub.rb +14 -1
  13. data/lib/epub/content_document.rb +1 -5
  14. data/lib/epub/content_document/navigation.rb +3 -5
  15. data/lib/epub/content_document/xhtml.rb +25 -1
  16. data/lib/epub/inspector.rb +43 -0
  17. data/lib/epub/ocf/container.rb +2 -0
  18. data/lib/epub/parser.rb +0 -2
  19. data/lib/epub/parser/content_document.rb +3 -5
  20. data/lib/epub/parser/ocf.rb +2 -4
  21. data/lib/epub/parser/publication.rb +7 -7
  22. data/lib/epub/parser/version.rb +1 -1
  23. data/lib/epub/publication.rb +1 -0
  24. data/lib/epub/publication/package.rb +20 -1
  25. data/lib/epub/publication/package/bindings.rb +5 -1
  26. data/lib/epub/publication/package/guide.rb +1 -0
  27. data/lib/epub/publication/package/manifest.rb +40 -5
  28. data/lib/epub/publication/package/metadata.rb +7 -10
  29. data/lib/epub/publication/package/spine.rb +14 -4
  30. data/lib/method_decorators/deprecated.rb +84 -0
  31. data/test/fixtures/book/OPS/nav.xhtml +2 -0
  32. data/test/helper.rb +4 -2
  33. data/test/test_content_document.rb +21 -0
  34. data/test/test_epub.rb +12 -0
  35. data/test/test_fixed_layout.rb +0 -1
  36. data/test/test_inspect.rb +121 -0
  37. data/test/test_parser_content_document.rb +3 -0
  38. data/test/test_parser_fixed_layout.rb +1 -1
  39. data/test/test_parser_ocf.rb +1 -1
  40. data/test/test_publication.rb +125 -4
  41. metadata +56 -8
data/epub-parser.gemspec CHANGED
@@ -29,7 +29,9 @@ Gem::Specification.new do |s|
29
29
  s.add_development_dependency 'rake'
30
30
  s.add_development_dependency 'pry'
31
31
  s.add_development_dependency 'pry-doc'
32
- s.add_development_dependency 'test-unit-full'
32
+ s.add_development_dependency 'test-unit'
33
+ s.add_development_dependency 'test-unit-rr'
34
+ s.add_development_dependency 'test-unit-notify'
33
35
  s.add_development_dependency 'simplecov'
34
36
  s.add_development_dependency 'thin'
35
37
  s.add_development_dependency 'yard'
@@ -42,6 +44,7 @@ Gem::Specification.new do |s|
42
44
 
43
45
  s.add_runtime_dependency 'enumerabler'
44
46
  s.add_runtime_dependency 'zipruby'
45
- s.add_runtime_dependency 'nokogiri', '1.5.9'
47
+ s.add_runtime_dependency 'nokogiri', '~> 1.6'
46
48
  s.add_runtime_dependency 'addressable'
49
+ s.add_runtime_dependency 'method_decorators', '0.9.3'
47
50
  end
data/lib/epub.rb CHANGED
@@ -1,3 +1,5 @@
1
+ require 'method_decorators/deprecated'
2
+ require 'epub/inspector'
1
3
  require 'epub/ocf'
2
4
  require 'epub/publication'
3
5
  require 'epub/content_document'
@@ -37,6 +39,11 @@ module EPUB
37
39
  end
38
40
  end
39
41
 
42
+ # @overload each_page_on_spine(&blk)
43
+ # iterate over items in order of spine when block given
44
+ # @yieldparam item [Publication::Package::Manifest::Item]
45
+ # @overload each_page_on_spine
46
+ # @return [Enumerator] which iterates over {Publication::Package::Manifest::Item}s in order of spine when block not given
40
47
  def each_page_on_spine(&blk)
41
48
  enum = package.spine.items
42
49
  if block_given?
@@ -50,6 +57,11 @@ module EPUB
50
57
  raise NotImplementedError
51
58
  end
52
59
 
60
+ # @overload each_content(&blk)
61
+ # iterate all items over when block given
62
+ # @yieldparam item [Publication::Package::Manifest::Item]
63
+ # @overload each_content
64
+ # @return [Enumerator] which iterates over all {Publication::Package::Manifest::Item}s in EPUB package when block not given
53
65
  def each_content(&blk)
54
66
  enum = manifest.items
55
67
  if block_given?
@@ -63,13 +75,14 @@ module EPUB
63
75
  raise NotImplementedError
64
76
  end
65
77
 
78
+ # @return [Array<Publication::Package::Manifest::Item>] All {Publication::Package::Manifest::Item}s in EPUB package
66
79
  def resources
67
80
  manifest.items
68
81
  end
69
82
 
70
83
  # Syntax sugar
71
84
  def rootfile_path
72
- ocf.container.rootfile.full_path
85
+ ocf.container.rootfile.full_path.to_s
73
86
  end
74
87
 
75
88
  # Syntax sugar
@@ -1,6 +1,2 @@
1
+ require 'epub/content_document/xhtml'
1
2
  require 'epub/content_document/navigation'
2
-
3
- module EPUB
4
- module ContentDocument
5
- end
6
- end
@@ -1,5 +1,3 @@
1
- require 'epub/content_document/xhtml'
2
-
3
1
  module EPUB
4
2
  module ContentDocument
5
3
  class Navigation < XHTML
@@ -11,15 +9,15 @@ module EPUB
11
9
  end
12
10
 
13
11
  def toc
14
- items.selector {|nav| nav.type == Navigation::Type::TOC}.first
12
+ navigations.selector {|nav| nav.type == Navigation::Type::TOC}.first
15
13
  end
16
14
 
17
15
  def page_list
18
- items.selector {|nav| nav.type == Nagivation::Type::PAGE_LIST}.first
16
+ navigations.selector {|nav| nav.type == Nagivation::Type::PAGE_LIST}.first
19
17
  end
20
18
 
21
19
  def landmarks
22
- items.selector {|nav| nav.type == Navigation::Type::LANDMARKS}.first
20
+ navigations.selector {|nav| nav.type == Navigation::Type::LANDMARKS}.first
23
21
  end
24
22
 
25
23
  # Enumerator version of toc
@@ -3,15 +3,39 @@ module EPUB
3
3
  class XHTML
4
4
  attr_accessor :item
5
5
 
6
+ # @return [String] Returns the content string.
6
7
  def read
7
8
  item.read
8
9
  end
9
10
  alias raw_document read
10
11
 
11
- # referenced directly from spine?
12
+ # @return [true|false] Whether referenced directly from spine or not.
12
13
  def top_level?
13
14
  !! item.itemref
14
15
  end
16
+
17
+ # @return [String] Returns the value of title element.
18
+ # If none, returns empty string
19
+ def title
20
+ title_elem = Nokogiri.XML(read).search('title').first
21
+ if title_elem
22
+ title_elem.text
23
+ else
24
+ warn 'title element not found'
25
+ ''
26
+ end
27
+ end
28
+
29
+ # @return [REXML::Document] content as REXML::Document object
30
+ def rexml
31
+ require 'rexml/document'
32
+ @rexml ||= REXML::Document.new(raw_document)
33
+ end
34
+
35
+ # @return [Nokogiri::XML::Document] content as Nokogiri::XML::Document object
36
+ def nokogiri
37
+ @nokogiri ||= Nokogiri.XML(raw_document)
38
+ end
15
39
  end
16
40
  end
17
41
  end
@@ -0,0 +1,43 @@
1
+ module EPUB
2
+ module Inspector
3
+ INSTANCE_VARIABLES_OPTION = {:exclude => []}
4
+
5
+ def inspect_simply
6
+ "#<%{class}:%{object_id}>" % {
7
+ :class => self.class,
8
+ :object_id => inspect_object_id
9
+ }
10
+ end
11
+
12
+ def inspect_object_id
13
+ (__id__ << 1).to_s(16)
14
+ end
15
+
16
+ def inspect_instance_variables(options={})
17
+ options = INSTANCE_VARIABLES_OPTION.merge(options)
18
+ exclude = options[:exclude]
19
+
20
+ (instance_variables - exclude).map {|name|
21
+ value = instance_variable_get(name)
22
+ "#{name}=#{value.inspect}"
23
+ }.join(' ')
24
+ end
25
+
26
+ module PublicationModel
27
+ class << self
28
+ def included(mod)
29
+ mod.__send__ :include, Inspector
30
+ end
31
+ end
32
+
33
+ def inspect
34
+ "#<%{class}:%{object_id} @package=%{package} %{attributes}>" % {
35
+ :class => self.class,
36
+ :package => package.inspect_simply,
37
+ :object_id => inspect_object_id,
38
+ :attributes => inspect_instance_variables(exclude: [:@package])
39
+ }
40
+ end
41
+ end
42
+ end
43
+ end
@@ -17,6 +17,8 @@ module EPUB
17
17
  class Rootfile
18
18
  attr_accessor :full_path, :media_type
19
19
 
20
+ # @param full_path [Addressable::URI|nil]
21
+ # @param media_type [String]
20
22
  def initialize(full_path=nil, media_type=EPUB::MediaType::ROOTFILE)
21
23
  @full_path, @media_type = full_path, media_type
22
24
  end
data/lib/epub/parser.rb CHANGED
@@ -50,8 +50,6 @@ module EPUB
50
50
  Zip::Archive.open @filepath do |zip|
51
51
  @book.ocf = OCF.parse(zip)
52
52
  @book.package = Publication.parse(zip, @book.rootfile_path)
53
- # @book.content_document =??? parse_content_document
54
- # ...
55
53
  end
56
54
 
57
55
  @book
@@ -32,8 +32,6 @@ module EPUB
32
32
  # parse_content_document(document)
33
33
  if @item.nav?
34
34
  content_document.navigations = parse_navigations(document)
35
- else
36
- raise NotImplementedError
37
35
  end
38
36
  content_document
39
37
  end
@@ -71,12 +69,12 @@ module EPUB
71
69
  when 'canvas'
72
70
  when 'embed'
73
71
  when 'iframe'
74
- item.text = (extract_attribute(embedded_content, 'name') || extract_attribute(embedded_content, 'srcdoc')).to_s
72
+ item.text = extract_attribute(embedded_content, 'name') || extract_attribute(embedded_content, 'srcdoc')
75
73
  when 'img'
76
- item.text = extract_attribute(embedded_content, 'alt').to_s
74
+ item.text = extract_attribute(embedded_content, 'alt')
77
75
  when 'math'
78
76
  when 'object'
79
- item.text = extract_attribute(embedded_content, 'name').to_s
77
+ item.text = extract_attribute(embedded_content, 'name')
80
78
  when 'svg'
81
79
  when 'video'
82
80
  else
@@ -39,10 +39,8 @@ module EPUB
39
39
  doc = Nokogiri.XML(xml)
40
40
  doc.xpath('/ocf:container/ocf:rootfiles/ocf:rootfile', EPUB::NAMESPACES).each do |elem|
41
41
  rootfile = EPUB::OCF::Container::Rootfile.new
42
- %w[full-path media-type].each do |attr|
43
- value = extract_attribute(elem, attr)
44
- rootfile.__send__(attr.gsub(/-/, '_') + '=', value)
45
- end
42
+ rootfile.full_path = Addressable::URI.parse(extract_attribute(elem, 'full-path'))
43
+ rootfile.media_type = extract_attribute(elem, 'media-type')
46
44
  container.rootfiles << rootfile
47
45
  end
48
46
 
@@ -41,10 +41,7 @@ module EPUB
41
41
  end
42
42
  @unique_identifier_id = elem['unique-identifier']
43
43
  @package.prefix = parse_prefix(extract_attribute(elem, 'prefix'))
44
- if @package.prefix.key? 'rendition'
45
- require 'epub/publication/fixed_layout'
46
- EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout
47
- end
44
+ EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout if @package.prefix.key? EPUB::Publication::FixedLayout::PREFIX_KEY
48
45
 
49
46
  @package
50
47
  end
@@ -76,7 +73,11 @@ module EPUB
76
73
  metadata.titles.each {|t| id_map[t.id] = {metadata: t} if t.respond_to?(:id) && t.id}
77
74
 
78
75
  metadata.languages = elem.xpath('./dc:language', EPUB::NAMESPACES).collect do |e|
79
- e.content
76
+ language = EPUB::Publication::Package::Metadata::DCMES.new
77
+ language.content = e.content
78
+ language.id = e['id'] if e['id']
79
+
80
+ language
80
81
  end
81
82
  metadata.languages.each {|l| id_map[l.id] = {metadata: l} if l.respond_to?(:id) && l.id}
82
83
 
@@ -199,8 +200,7 @@ module EPUB
199
200
  @doc.xpath('/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES).each do |elem|
200
201
  media_type = EPUB::Publication::Package::Bindings::MediaType.new
201
202
  media_type.media_type = extract_attribute(elem, 'media-type')
202
- items = @package.manifest.items
203
- media_type.handler = items.detect {|item| item.id == extract_attribute(elem, 'handler')}
203
+ media_type.handler = @package.manifest[extract_attribute(elem, 'handler')]
204
204
  bindings << media_type
205
205
  end
206
206
 
@@ -1,5 +1,5 @@
1
1
  module EPUB
2
2
  class Parser
3
- VERSION = "0.1.4"
3
+ VERSION = "0.1.5"
4
4
  end
5
5
  end
@@ -1 +1,2 @@
1
1
  require 'epub/publication/package'
2
+ require 'epub/publication/fixed_layout'
@@ -1,6 +1,8 @@
1
1
  module EPUB
2
2
  module Publication
3
3
  class Package
4
+ include Inspector
5
+
4
6
  CONTENT_MODELS = [:metadata, :manifest, :spine, :guide, :bindings]
5
7
  RESERVED_VOCABULARY_PREFIXES = {
6
8
  '' => 'http://idpf.org/epub/vocab/package/#',
@@ -23,7 +25,7 @@ module EPUB
23
25
  end
24
26
  end
25
27
 
26
- attr_accessor :book,
28
+ attr_accessor :book,
27
29
  :version, :prefix, :xml_lang, :dir, :id
28
30
  attr_reader *CONTENT_MODELS
29
31
  alias lang xml_lang
@@ -40,6 +42,23 @@ module EPUB
40
42
  def unique_identifier
41
43
  @metadata.unique_identifier
42
44
  end
45
+
46
+ def inspect
47
+ "#<%{class}:%{object_id} %{attributes} %{models}>" % {
48
+ :class => self.class,
49
+ :object_id => inspect_object_id,
50
+ :attributes => inspect_instance_variables(exclude: CONTENT_MODELS.map {|model| :"@#{model}"}),
51
+ :models => inspect_models
52
+ }
53
+ end
54
+
55
+ def inspect_models
56
+ CONTENT_MODELS.map {|name|
57
+ model = __send__(name)
58
+ representation = model.nil? ? model.inspect : model.inspect_simply
59
+ "@#{name}=#{representation}"
60
+ }.join(' ')
61
+ end
43
62
  end
44
63
  end
45
64
  end
@@ -2,10 +2,14 @@ module EPUB
2
2
  module Publication
3
3
  class Package
4
4
  class Bindings
5
+ include Inspector::PublicationModel
5
6
  attr_accessor :package
6
7
 
8
+ def initialize
9
+ @media_types = {}
10
+ end
11
+
7
12
  def <<(media_type)
8
- @media_types ||= {}
9
13
  @media_types[media_type.media_type] = media_type
10
14
  end
11
15
 
@@ -4,6 +4,7 @@ module EPUB
4
4
  module Publication
5
5
  class Package
6
6
  class Guide
7
+ include Inspector::PublicationModel
7
8
  attr_accessor :package
8
9
 
9
10
  def references
@@ -6,12 +6,17 @@ module EPUB
6
6
  module Publication
7
7
  class Package
8
8
  class Manifest
9
+ include Inspector::PublicationModel
10
+
9
11
  attr_accessor :package,
10
12
  :id
11
13
 
14
+ def initialize
15
+ @items = {}
16
+ end
17
+
12
18
  # @return self
13
19
  def <<(item)
14
- @items ||= {}
15
20
  item.manifest = self
16
21
  @items[item.id] = item
17
22
  self
@@ -29,6 +34,12 @@ module EPUB
29
34
  items.selector {|i| i.properties.include? 'cover-image'}.first
30
35
  end
31
36
 
37
+ def each_item
38
+ @items.each_value do |item|
39
+ yield item
40
+ end
41
+ end
42
+
32
43
  def items
33
44
  @items.values
34
45
  end
@@ -38,12 +49,15 @@ module EPUB
38
49
  end
39
50
 
40
51
  class Item
52
+ include Inspector
53
+
41
54
  # @!attribute [rw] manifest
42
55
  # @return [Manifest] Returns the value of manifest
43
56
  # @!attribute [rw] id
44
57
  # @return [String] Returns the value of id
45
58
  # @!attribute [rw] href
46
- # @return [Addressable::URI] Returns the value of href
59
+ # @return [Addressable::URI] Returns the value of href,
60
+ # which is relative path from rootfile(OPF file)
47
61
  # @!attribute [rw] media_type
48
62
  # @return [String] Returns the value of media_type
49
63
  # @!attribute [rw] properties
@@ -55,19 +69,31 @@ module EPUB
55
69
  attr_accessor :manifest,
56
70
  :id, :href, :media_type, :fallback, :properties, :media_overlay
57
71
 
72
+ def initialize
73
+ @properties = []
74
+ end
75
+
58
76
  # @todo Handle circular fallback chain
59
77
  def fallback_chain
60
78
  @fallback_chain ||= traverse_fallback_chain([])
61
79
  end
62
80
 
81
+ # full path in archive
82
+ def entry_name
83
+ rootfile = manifest.package.book.ocf.container.rootfile.full_path
84
+ Addressable::URI.unescape(rootfile + href.normalize.request_uri)
85
+ end
86
+
63
87
  def read
64
- rootfile = Addressable::URI.parse(manifest.package.book.ocf.container.rootfile.full_path)
65
88
  Zip::Archive.open(manifest.package.book.epub_file) {|zip|
66
- path = Addressable::URI.unescape(rootfile + href.normalize.request_uri)
67
- zip.fopen(path).read
89
+ zip.fopen(entry_name).read
68
90
  }
69
91
  end
70
92
 
93
+ def xhtml?
94
+ media_type == 'application/xhtml+xml'
95
+ end
96
+
71
97
  def nav?
72
98
  properties.include? 'nav'
73
99
  end
@@ -101,6 +127,15 @@ module EPUB
101
127
  manifest.package.spine.itemrefs.find {|itemref| itemref.idref == id}
102
128
  end
103
129
 
130
+ def inspect
131
+ "#<%{class}:%{object_id} %{manifest} %{attributes}>" % {
132
+ :class => self.class,
133
+ :object_id => inspect_object_id,
134
+ :manifest => "@manifest=#{@manifest.inspect_simply}",
135
+ :attributes => inspect_instance_variables(exclude: [:@manifest])
136
+ }
137
+ end
138
+
104
139
  protected
105
140
 
106
141
  def traverse_fallback_chain(chain)