epub-parser 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -0
  3. data/CHANGELOG.markdown +10 -0
  4. data/README.markdown +43 -27
  5. data/bin/epubinfo +22 -0
  6. data/docs/EpubOpen.markdown +43 -0
  7. data/docs/Epubinfo.markdown +37 -0
  8. data/docs/FixedLayout.markdown +3 -5
  9. data/docs/Home.markdown +30 -15
  10. data/docs/Item.markdown +14 -14
  11. data/epub-parser.gemspec +5 -2
  12. data/lib/epub.rb +14 -1
  13. data/lib/epub/content_document.rb +1 -5
  14. data/lib/epub/content_document/navigation.rb +3 -5
  15. data/lib/epub/content_document/xhtml.rb +25 -1
  16. data/lib/epub/inspector.rb +43 -0
  17. data/lib/epub/ocf/container.rb +2 -0
  18. data/lib/epub/parser.rb +0 -2
  19. data/lib/epub/parser/content_document.rb +3 -5
  20. data/lib/epub/parser/ocf.rb +2 -4
  21. data/lib/epub/parser/publication.rb +7 -7
  22. data/lib/epub/parser/version.rb +1 -1
  23. data/lib/epub/publication.rb +1 -0
  24. data/lib/epub/publication/package.rb +20 -1
  25. data/lib/epub/publication/package/bindings.rb +5 -1
  26. data/lib/epub/publication/package/guide.rb +1 -0
  27. data/lib/epub/publication/package/manifest.rb +40 -5
  28. data/lib/epub/publication/package/metadata.rb +7 -10
  29. data/lib/epub/publication/package/spine.rb +14 -4
  30. data/lib/method_decorators/deprecated.rb +84 -0
  31. data/test/fixtures/book/OPS/nav.xhtml +2 -0
  32. data/test/helper.rb +4 -2
  33. data/test/test_content_document.rb +21 -0
  34. data/test/test_epub.rb +12 -0
  35. data/test/test_fixed_layout.rb +0 -1
  36. data/test/test_inspect.rb +121 -0
  37. data/test/test_parser_content_document.rb +3 -0
  38. data/test/test_parser_fixed_layout.rb +1 -1
  39. data/test/test_parser_ocf.rb +1 -1
  40. data/test/test_publication.rb +125 -4
  41. metadata +56 -8
data/epub-parser.gemspec CHANGED
@@ -29,7 +29,9 @@ Gem::Specification.new do |s|
29
29
  s.add_development_dependency 'rake'
30
30
  s.add_development_dependency 'pry'
31
31
  s.add_development_dependency 'pry-doc'
32
- s.add_development_dependency 'test-unit-full'
32
+ s.add_development_dependency 'test-unit'
33
+ s.add_development_dependency 'test-unit-rr'
34
+ s.add_development_dependency 'test-unit-notify'
33
35
  s.add_development_dependency 'simplecov'
34
36
  s.add_development_dependency 'thin'
35
37
  s.add_development_dependency 'yard'
@@ -42,6 +44,7 @@ Gem::Specification.new do |s|
42
44
 
43
45
  s.add_runtime_dependency 'enumerabler'
44
46
  s.add_runtime_dependency 'zipruby'
45
- s.add_runtime_dependency 'nokogiri', '1.5.9'
47
+ s.add_runtime_dependency 'nokogiri', '~> 1.6'
46
48
  s.add_runtime_dependency 'addressable'
49
+ s.add_runtime_dependency 'method_decorators', '0.9.3'
47
50
  end
data/lib/epub.rb CHANGED
@@ -1,3 +1,5 @@
1
+ require 'method_decorators/deprecated'
2
+ require 'epub/inspector'
1
3
  require 'epub/ocf'
2
4
  require 'epub/publication'
3
5
  require 'epub/content_document'
@@ -37,6 +39,11 @@ module EPUB
37
39
  end
38
40
  end
39
41
 
42
+ # @overload each_page_on_spine(&blk)
43
+ # iterate over items in order of spine when block given
44
+ # @yieldparam item [Publication::Package::Manifest::Item]
45
+ # @overload each_page_on_spine
46
+ # @return [Enumerator] which iterates over {Publication::Package::Manifest::Item}s in order of spine when block not given
40
47
  def each_page_on_spine(&blk)
41
48
  enum = package.spine.items
42
49
  if block_given?
@@ -50,6 +57,11 @@ module EPUB
50
57
  raise NotImplementedError
51
58
  end
52
59
 
60
+ # @overload each_content(&blk)
61
+ # iterate all items over when block given
62
+ # @yieldparam item [Publication::Package::Manifest::Item]
63
+ # @overload each_content
64
+ # @return [Enumerator] which iterates over all {Publication::Package::Manifest::Item}s in EPUB package when block not given
53
65
  def each_content(&blk)
54
66
  enum = manifest.items
55
67
  if block_given?
@@ -63,13 +75,14 @@ module EPUB
63
75
  raise NotImplementedError
64
76
  end
65
77
 
78
+ # @return [Array<Publication::Package::Manifest::Item>] All {Publication::Package::Manifest::Item}s in EPUB package
66
79
  def resources
67
80
  manifest.items
68
81
  end
69
82
 
70
83
  # Syntax sugar
71
84
  def rootfile_path
72
- ocf.container.rootfile.full_path
85
+ ocf.container.rootfile.full_path.to_s
73
86
  end
74
87
 
75
88
  # Syntax sugar
@@ -1,6 +1,2 @@
1
+ require 'epub/content_document/xhtml'
1
2
  require 'epub/content_document/navigation'
2
-
3
- module EPUB
4
- module ContentDocument
5
- end
6
- end
@@ -1,5 +1,3 @@
1
- require 'epub/content_document/xhtml'
2
-
3
1
  module EPUB
4
2
  module ContentDocument
5
3
  class Navigation < XHTML
@@ -11,15 +9,15 @@ module EPUB
11
9
  end
12
10
 
13
11
  def toc
14
- items.selector {|nav| nav.type == Navigation::Type::TOC}.first
12
+ navigations.selector {|nav| nav.type == Navigation::Type::TOC}.first
15
13
  end
16
14
 
17
15
  def page_list
18
- items.selector {|nav| nav.type == Nagivation::Type::PAGE_LIST}.first
16
+ navigations.selector {|nav| nav.type == Nagivation::Type::PAGE_LIST}.first
19
17
  end
20
18
 
21
19
  def landmarks
22
- items.selector {|nav| nav.type == Navigation::Type::LANDMARKS}.first
20
+ navigations.selector {|nav| nav.type == Navigation::Type::LANDMARKS}.first
23
21
  end
24
22
 
25
23
  # Enumerator version of toc
@@ -3,15 +3,39 @@ module EPUB
3
3
  class XHTML
4
4
  attr_accessor :item
5
5
 
6
+ # @return [String] Returns the content string.
6
7
  def read
7
8
  item.read
8
9
  end
9
10
  alias raw_document read
10
11
 
11
- # referenced directly from spine?
12
+ # @return [true|false] Whether referenced directly from spine or not.
12
13
  def top_level?
13
14
  !! item.itemref
14
15
  end
16
+
17
+ # @return [String] Returns the value of title element.
18
+ # If none, returns empty string
19
+ def title
20
+ title_elem = Nokogiri.XML(read).search('title').first
21
+ if title_elem
22
+ title_elem.text
23
+ else
24
+ warn 'title element not found'
25
+ ''
26
+ end
27
+ end
28
+
29
+ # @return [REXML::Document] content as REXML::Document object
30
+ def rexml
31
+ require 'rexml/document'
32
+ @rexml ||= REXML::Document.new(raw_document)
33
+ end
34
+
35
+ # @return [Nokogiri::XML::Document] content as Nokogiri::XML::Document object
36
+ def nokogiri
37
+ @nokogiri ||= Nokogiri.XML(raw_document)
38
+ end
15
39
  end
16
40
  end
17
41
  end
@@ -0,0 +1,43 @@
1
+ module EPUB
2
+ module Inspector
3
+ INSTANCE_VARIABLES_OPTION = {:exclude => []}
4
+
5
+ def inspect_simply
6
+ "#<%{class}:%{object_id}>" % {
7
+ :class => self.class,
8
+ :object_id => inspect_object_id
9
+ }
10
+ end
11
+
12
+ def inspect_object_id
13
+ (__id__ << 1).to_s(16)
14
+ end
15
+
16
+ def inspect_instance_variables(options={})
17
+ options = INSTANCE_VARIABLES_OPTION.merge(options)
18
+ exclude = options[:exclude]
19
+
20
+ (instance_variables - exclude).map {|name|
21
+ value = instance_variable_get(name)
22
+ "#{name}=#{value.inspect}"
23
+ }.join(' ')
24
+ end
25
+
26
+ module PublicationModel
27
+ class << self
28
+ def included(mod)
29
+ mod.__send__ :include, Inspector
30
+ end
31
+ end
32
+
33
+ def inspect
34
+ "#<%{class}:%{object_id} @package=%{package} %{attributes}>" % {
35
+ :class => self.class,
36
+ :package => package.inspect_simply,
37
+ :object_id => inspect_object_id,
38
+ :attributes => inspect_instance_variables(exclude: [:@package])
39
+ }
40
+ end
41
+ end
42
+ end
43
+ end
@@ -17,6 +17,8 @@ module EPUB
17
17
  class Rootfile
18
18
  attr_accessor :full_path, :media_type
19
19
 
20
+ # @param full_path [Addressable::URI|nil]
21
+ # @param media_type [String]
20
22
  def initialize(full_path=nil, media_type=EPUB::MediaType::ROOTFILE)
21
23
  @full_path, @media_type = full_path, media_type
22
24
  end
data/lib/epub/parser.rb CHANGED
@@ -50,8 +50,6 @@ module EPUB
50
50
  Zip::Archive.open @filepath do |zip|
51
51
  @book.ocf = OCF.parse(zip)
52
52
  @book.package = Publication.parse(zip, @book.rootfile_path)
53
- # @book.content_document =??? parse_content_document
54
- # ...
55
53
  end
56
54
 
57
55
  @book
@@ -32,8 +32,6 @@ module EPUB
32
32
  # parse_content_document(document)
33
33
  if @item.nav?
34
34
  content_document.navigations = parse_navigations(document)
35
- else
36
- raise NotImplementedError
37
35
  end
38
36
  content_document
39
37
  end
@@ -71,12 +69,12 @@ module EPUB
71
69
  when 'canvas'
72
70
  when 'embed'
73
71
  when 'iframe'
74
- item.text = (extract_attribute(embedded_content, 'name') || extract_attribute(embedded_content, 'srcdoc')).to_s
72
+ item.text = extract_attribute(embedded_content, 'name') || extract_attribute(embedded_content, 'srcdoc')
75
73
  when 'img'
76
- item.text = extract_attribute(embedded_content, 'alt').to_s
74
+ item.text = extract_attribute(embedded_content, 'alt')
77
75
  when 'math'
78
76
  when 'object'
79
- item.text = extract_attribute(embedded_content, 'name').to_s
77
+ item.text = extract_attribute(embedded_content, 'name')
80
78
  when 'svg'
81
79
  when 'video'
82
80
  else
@@ -39,10 +39,8 @@ module EPUB
39
39
  doc = Nokogiri.XML(xml)
40
40
  doc.xpath('/ocf:container/ocf:rootfiles/ocf:rootfile', EPUB::NAMESPACES).each do |elem|
41
41
  rootfile = EPUB::OCF::Container::Rootfile.new
42
- %w[full-path media-type].each do |attr|
43
- value = extract_attribute(elem, attr)
44
- rootfile.__send__(attr.gsub(/-/, '_') + '=', value)
45
- end
42
+ rootfile.full_path = Addressable::URI.parse(extract_attribute(elem, 'full-path'))
43
+ rootfile.media_type = extract_attribute(elem, 'media-type')
46
44
  container.rootfiles << rootfile
47
45
  end
48
46
 
@@ -41,10 +41,7 @@ module EPUB
41
41
  end
42
42
  @unique_identifier_id = elem['unique-identifier']
43
43
  @package.prefix = parse_prefix(extract_attribute(elem, 'prefix'))
44
- if @package.prefix.key? 'rendition'
45
- require 'epub/publication/fixed_layout'
46
- EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout
47
- end
44
+ EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout if @package.prefix.key? EPUB::Publication::FixedLayout::PREFIX_KEY
48
45
 
49
46
  @package
50
47
  end
@@ -76,7 +73,11 @@ module EPUB
76
73
  metadata.titles.each {|t| id_map[t.id] = {metadata: t} if t.respond_to?(:id) && t.id}
77
74
 
78
75
  metadata.languages = elem.xpath('./dc:language', EPUB::NAMESPACES).collect do |e|
79
- e.content
76
+ language = EPUB::Publication::Package::Metadata::DCMES.new
77
+ language.content = e.content
78
+ language.id = e['id'] if e['id']
79
+
80
+ language
80
81
  end
81
82
  metadata.languages.each {|l| id_map[l.id] = {metadata: l} if l.respond_to?(:id) && l.id}
82
83
 
@@ -199,8 +200,7 @@ module EPUB
199
200
  @doc.xpath('/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES).each do |elem|
200
201
  media_type = EPUB::Publication::Package::Bindings::MediaType.new
201
202
  media_type.media_type = extract_attribute(elem, 'media-type')
202
- items = @package.manifest.items
203
- media_type.handler = items.detect {|item| item.id == extract_attribute(elem, 'handler')}
203
+ media_type.handler = @package.manifest[extract_attribute(elem, 'handler')]
204
204
  bindings << media_type
205
205
  end
206
206
 
@@ -1,5 +1,5 @@
1
1
  module EPUB
2
2
  class Parser
3
- VERSION = "0.1.4"
3
+ VERSION = "0.1.5"
4
4
  end
5
5
  end
@@ -1 +1,2 @@
1
1
  require 'epub/publication/package'
2
+ require 'epub/publication/fixed_layout'
@@ -1,6 +1,8 @@
1
1
  module EPUB
2
2
  module Publication
3
3
  class Package
4
+ include Inspector
5
+
4
6
  CONTENT_MODELS = [:metadata, :manifest, :spine, :guide, :bindings]
5
7
  RESERVED_VOCABULARY_PREFIXES = {
6
8
  '' => 'http://idpf.org/epub/vocab/package/#',
@@ -23,7 +25,7 @@ module EPUB
23
25
  end
24
26
  end
25
27
 
26
- attr_accessor :book,
28
+ attr_accessor :book,
27
29
  :version, :prefix, :xml_lang, :dir, :id
28
30
  attr_reader *CONTENT_MODELS
29
31
  alias lang xml_lang
@@ -40,6 +42,23 @@ module EPUB
40
42
  def unique_identifier
41
43
  @metadata.unique_identifier
42
44
  end
45
+
46
+ def inspect
47
+ "#<%{class}:%{object_id} %{attributes} %{models}>" % {
48
+ :class => self.class,
49
+ :object_id => inspect_object_id,
50
+ :attributes => inspect_instance_variables(exclude: CONTENT_MODELS.map {|model| :"@#{model}"}),
51
+ :models => inspect_models
52
+ }
53
+ end
54
+
55
+ def inspect_models
56
+ CONTENT_MODELS.map {|name|
57
+ model = __send__(name)
58
+ representation = model.nil? ? model.inspect : model.inspect_simply
59
+ "@#{name}=#{representation}"
60
+ }.join(' ')
61
+ end
43
62
  end
44
63
  end
45
64
  end
@@ -2,10 +2,14 @@ module EPUB
2
2
  module Publication
3
3
  class Package
4
4
  class Bindings
5
+ include Inspector::PublicationModel
5
6
  attr_accessor :package
6
7
 
8
+ def initialize
9
+ @media_types = {}
10
+ end
11
+
7
12
  def <<(media_type)
8
- @media_types ||= {}
9
13
  @media_types[media_type.media_type] = media_type
10
14
  end
11
15
 
@@ -4,6 +4,7 @@ module EPUB
4
4
  module Publication
5
5
  class Package
6
6
  class Guide
7
+ include Inspector::PublicationModel
7
8
  attr_accessor :package
8
9
 
9
10
  def references
@@ -6,12 +6,17 @@ module EPUB
6
6
  module Publication
7
7
  class Package
8
8
  class Manifest
9
+ include Inspector::PublicationModel
10
+
9
11
  attr_accessor :package,
10
12
  :id
11
13
 
14
+ def initialize
15
+ @items = {}
16
+ end
17
+
12
18
  # @return self
13
19
  def <<(item)
14
- @items ||= {}
15
20
  item.manifest = self
16
21
  @items[item.id] = item
17
22
  self
@@ -29,6 +34,12 @@ module EPUB
29
34
  items.selector {|i| i.properties.include? 'cover-image'}.first
30
35
  end
31
36
 
37
+ def each_item
38
+ @items.each_value do |item|
39
+ yield item
40
+ end
41
+ end
42
+
32
43
  def items
33
44
  @items.values
34
45
  end
@@ -38,12 +49,15 @@ module EPUB
38
49
  end
39
50
 
40
51
  class Item
52
+ include Inspector
53
+
41
54
  # @!attribute [rw] manifest
42
55
  # @return [Manifest] Returns the value of manifest
43
56
  # @!attribute [rw] id
44
57
  # @return [String] Returns the value of id
45
58
  # @!attribute [rw] href
46
- # @return [Addressable::URI] Returns the value of href
59
+ # @return [Addressable::URI] Returns the value of href,
60
+ # which is relative path from rootfile(OPF file)
47
61
  # @!attribute [rw] media_type
48
62
  # @return [String] Returns the value of media_type
49
63
  # @!attribute [rw] properties
@@ -55,19 +69,31 @@ module EPUB
55
69
  attr_accessor :manifest,
56
70
  :id, :href, :media_type, :fallback, :properties, :media_overlay
57
71
 
72
+ def initialize
73
+ @properties = []
74
+ end
75
+
58
76
  # @todo Handle circular fallback chain
59
77
  def fallback_chain
60
78
  @fallback_chain ||= traverse_fallback_chain([])
61
79
  end
62
80
 
81
+ # full path in archive
82
+ def entry_name
83
+ rootfile = manifest.package.book.ocf.container.rootfile.full_path
84
+ Addressable::URI.unescape(rootfile + href.normalize.request_uri)
85
+ end
86
+
63
87
  def read
64
- rootfile = Addressable::URI.parse(manifest.package.book.ocf.container.rootfile.full_path)
65
88
  Zip::Archive.open(manifest.package.book.epub_file) {|zip|
66
- path = Addressable::URI.unescape(rootfile + href.normalize.request_uri)
67
- zip.fopen(path).read
89
+ zip.fopen(entry_name).read
68
90
  }
69
91
  end
70
92
 
93
+ def xhtml?
94
+ media_type == 'application/xhtml+xml'
95
+ end
96
+
71
97
  def nav?
72
98
  properties.include? 'nav'
73
99
  end
@@ -101,6 +127,15 @@ module EPUB
101
127
  manifest.package.spine.itemrefs.find {|itemref| itemref.idref == id}
102
128
  end
103
129
 
130
+ def inspect
131
+ "#<%{class}:%{object_id} %{manifest} %{attributes}>" % {
132
+ :class => self.class,
133
+ :object_id => inspect_object_id,
134
+ :manifest => "@manifest=#{@manifest.inspect_simply}",
135
+ :attributes => inspect_instance_variables(exclude: [:@manifest])
136
+ }
137
+ end
138
+
104
139
  protected
105
140
 
106
141
  def traverse_fallback_chain(chain)