epub-parser-io 0.1.6a

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. data/.gemtest +0 -0
  2. data/.gitignore +12 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +4 -0
  5. data/.yardopts +10 -0
  6. data/CHANGELOG.markdown +61 -0
  7. data/Gemfile +2 -0
  8. data/MIT-LICENSE +7 -0
  9. data/README.markdown +174 -0
  10. data/Rakefile +68 -0
  11. data/bin/epub-open +25 -0
  12. data/bin/epubinfo +64 -0
  13. data/docs/EpubOpen.markdown +43 -0
  14. data/docs/Epubinfo.markdown +37 -0
  15. data/docs/FixedLayout.markdown +96 -0
  16. data/docs/Home.markdown +128 -0
  17. data/docs/Item.markdown +80 -0
  18. data/docs/Navigation.markdown +58 -0
  19. data/docs/Publication.markdown +54 -0
  20. data/epub-parser.gemspec +49 -0
  21. data/features/epubinfo.feature +6 -0
  22. data/features/step_definitions/epubinfo_steps.rb +5 -0
  23. data/features/support/env.rb +1 -0
  24. data/lib/epub/book/features.rb +85 -0
  25. data/lib/epub/book.rb +7 -0
  26. data/lib/epub/constants.rb +48 -0
  27. data/lib/epub/content_document/navigation.rb +104 -0
  28. data/lib/epub/content_document/xhtml.rb +41 -0
  29. data/lib/epub/content_document.rb +2 -0
  30. data/lib/epub/inspector.rb +45 -0
  31. data/lib/epub/ocf/container.rb +28 -0
  32. data/lib/epub/ocf/encryption.rb +7 -0
  33. data/lib/epub/ocf/manifest.rb +6 -0
  34. data/lib/epub/ocf/metadata.rb +6 -0
  35. data/lib/epub/ocf/rights.rb +6 -0
  36. data/lib/epub/ocf/signatures.rb +6 -0
  37. data/lib/epub/ocf.rb +8 -0
  38. data/lib/epub/parser/content_document.rb +111 -0
  39. data/lib/epub/parser/ocf.rb +73 -0
  40. data/lib/epub/parser/publication.rb +200 -0
  41. data/lib/epub/parser/utils.rb +20 -0
  42. data/lib/epub/parser/version.rb +5 -0
  43. data/lib/epub/parser.rb +103 -0
  44. data/lib/epub/publication/fixed_layout.rb +208 -0
  45. data/lib/epub/publication/package/bindings.rb +31 -0
  46. data/lib/epub/publication/package/guide.rb +51 -0
  47. data/lib/epub/publication/package/manifest.rb +180 -0
  48. data/lib/epub/publication/package/metadata.rb +170 -0
  49. data/lib/epub/publication/package/spine.rb +106 -0
  50. data/lib/epub/publication/package.rb +68 -0
  51. data/lib/epub/publication.rb +2 -0
  52. data/lib/epub.rb +14 -0
  53. data/man/epubinfo.1.ronn +19 -0
  54. data/schemas/epub-nav-30.rnc +10 -0
  55. data/schemas/epub-nav-30.sch +72 -0
  56. data/schemas/epub-xhtml-30.sch +377 -0
  57. data/schemas/ocf-container-30.rnc +16 -0
  58. data/test/fixtures/book/META-INF/container.xml +6 -0
  59. data/test/fixtures/book/OPS/%E6%97%A5%E6%9C%AC%E8%AA%9E.xhtml +10 -0
  60. data/test/fixtures/book/OPS/case-sensitive.xhtml +9 -0
  61. data/test/fixtures/book/OPS/containing space.xhtml +10 -0
  62. data/test/fixtures/book/OPS/containing%20space.xhtml +10 -0
  63. data/test/fixtures/book/OPS/nav.xhtml +28 -0
  64. data/test/fixtures/book/OPS//343/203/253/343/203/274/343/203/210/343/203/225/343/202/241/343/202/244/343/203/253.opf +119 -0
  65. data/test/fixtures/book/OPS//346/227/245/346/234/254/350/252/236.xhtml +10 -0
  66. data/test/fixtures/book/mimetype +1 -0
  67. data/test/helper.rb +9 -0
  68. data/test/test_content_document.rb +92 -0
  69. data/test/test_epub.rb +21 -0
  70. data/test/test_fixed_layout.rb +257 -0
  71. data/test/test_inspect.rb +121 -0
  72. data/test/test_parser.rb +60 -0
  73. data/test/test_parser_content_document.rb +36 -0
  74. data/test/test_parser_fixed_layout.rb +16 -0
  75. data/test/test_parser_ocf.rb +38 -0
  76. data/test/test_parser_publication.rb +247 -0
  77. data/test/test_publication.rb +324 -0
  78. metadata +445 -0
@@ -0,0 +1,170 @@
1
+ require 'set'
2
+
3
+ module EPUB
4
+ module Publication
5
+ class Package
6
+ class Metadata
7
+ include Inspector::PublicationModel
8
+
9
+ DC_ELEMS = [:identifiers, :titles, :languages] +
10
+ [:contributors, :coverages, :creators, :dates, :descriptions, :formats, :publishers,
11
+ :relations, :rights, :sources, :subjects, :types]
12
+ attr_accessor :package, :unique_identifier, :metas, :links,
13
+ *(DC_ELEMS.collect {|elem| "dc_#{elem}"})
14
+ DC_ELEMS.each do |elem|
15
+ alias_method elem, "dc_#{elem}"
16
+ alias_method "#{elem}=", "dc_#{elem}="
17
+ end
18
+
19
+ def initialize
20
+ (DC_ELEMS + [:metas, :links]).each do |elem|
21
+ __send__ "#{elem}=", []
22
+ end
23
+ end
24
+
25
+ def title
26
+ return extended_title unless extended_title.empty?
27
+ compositted = titles.select {|title| title.display_seq}.sort.join("\n")
28
+ return compositted unless compositted.empty?
29
+ return main_title unless main_title.empty?
30
+ titles.sort.join("\n")
31
+ end
32
+
33
+ %w[ main short collection edition extended ].each do |type|
34
+ define_method "#{type}_title" do
35
+ titles.select {|title| title.title_type.to_s == type}.sort.join(' ')
36
+ end
37
+ end
38
+
39
+ def subtitle
40
+ titles.select {|title| title.title_type.to_s == 'subtitle'}.sort.join(' ')
41
+ end
42
+
43
+ def description
44
+ descriptions.join ' '
45
+ end
46
+
47
+ def date
48
+ dates.first
49
+ end
50
+
51
+ def language
52
+ languages.first
53
+ end
54
+
55
+ def to_h
56
+ DC_ELEMS.inject({}) do |hsh, elem|
57
+ hsh[elem] = __send__(elem)
58
+ hsh
59
+ end
60
+ end
61
+
62
+ def primary_metas
63
+ metas.select {|meta| meta.primary_expression?}
64
+ end
65
+
66
+ module Refinee
67
+ PROPERTIES = %w[ alternate-script display-seq file-as group-position identifier-type meta-auth role title-type ]
68
+
69
+ attr_writer :refiners
70
+
71
+ def refiners
72
+ @refiners ||= Set.new
73
+ end
74
+
75
+ PROPERTIES.each do |voc|
76
+ met = voc.gsub(/-/, '_')
77
+ attr_writer met
78
+ define_method met do
79
+ refiners.selector {|refiner| refiner.property == voc}.first
80
+ end
81
+ end
82
+ end
83
+
84
+ class DCMES
85
+ include Refinee
86
+
87
+ attr_accessor :content, :id, :lang, :dir
88
+
89
+ def to_s
90
+ content.to_s
91
+ end
92
+ end
93
+
94
+ class Identifier < DCMES
95
+ # @note This is ad-hoc
96
+ # @todo Define and include OPF module for opf:scheme attribute
97
+ # @todo Define generale way to handle with identifier-type refiners
98
+ attr_accessor :scheme
99
+
100
+ # @note This is ad-hoc
101
+ # @todo Define and include OPF module for opf:scheme attribute
102
+ # @todo Define generale way to handle with identifier-type refiners
103
+ def isbn?
104
+ refiners.any? {|refiner|
105
+ refiner.property == 'identifier-type' and
106
+ refiner.scheme == 'onix:codelist5' and
107
+ %w[02 15].include? refiner.content
108
+ } or
109
+ scheme == 'ISBN' or
110
+ content.to_s.downcase.start_with? 'urn:isbn'
111
+ end
112
+ end
113
+
114
+ class Title < DCMES
115
+ include Comparable
116
+
117
+ def <=>(other)
118
+ return 1 if other.display_seq.nil?
119
+ return -1 if display_seq.nil?
120
+ display_seq.to_s.to_i <=> other.display_seq.to_s.to_i
121
+ end
122
+ end
123
+
124
+ class Meta
125
+ include Refinee
126
+
127
+ attr_accessor :property, :id, :scheme, :content
128
+ attr_reader :refines
129
+
130
+ def refines=(refinee)
131
+ @refines = refinee
132
+ refinee.refiners << self
133
+ end
134
+
135
+ def refines?
136
+ ! refines.nil?
137
+ end
138
+ alias subexpression? refines?
139
+
140
+ def primary_expression?
141
+ ! subexpression?
142
+ end
143
+
144
+ def inspect
145
+ ivs = instance_variables.map {|iv|
146
+ [iv, instance_variable_get(iv).inspect].join('=')
147
+ }.join(' ')
148
+ '<#%s:%#0x %s>' % [self.class, __id__, ivs]
149
+ end
150
+
151
+ def to_s
152
+ content.to_s
153
+ end
154
+ end
155
+
156
+ class Link
157
+ include Refinee
158
+
159
+ attr_accessor :href, :rel, :id, :media_type
160
+ attr_reader :refines
161
+
162
+ def refines=(refinee)
163
+ @refines = refinee
164
+ refinee.refiners << self
165
+ end
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,106 @@
1
+ require 'set'
2
+
3
+ module EPUB
4
+ module Publication
5
+ class Package
6
+ class Spine
7
+ include Inspector::PublicationModel
8
+ attr_accessor :package,
9
+ :id, :toc, :page_progression_direction
10
+ attr_reader :itemrefs
11
+
12
+ def initialize
13
+ @itemrefs = []
14
+ end
15
+
16
+ # @return self
17
+ def <<(itemref)
18
+ itemref.spine = self
19
+ @itemrefs << itemref
20
+ self
21
+ end
22
+
23
+ # @yield [itemref]
24
+ # @yieldparam [Itemref] itemref
25
+ # @yieldreturn [Object] returns the last value of block
26
+ # @return [Object, Enumerator]
27
+ # returns the last value of block when block given, Enumerator when not
28
+ def each_itemref
29
+ if block_given?
30
+ itemrefs.each {|itemref| yield itemref}
31
+ else
32
+ enum_for :each_itemref
33
+ end
34
+ end
35
+
36
+ # @return [Enumerator] Enumerator which yeilds {Manifest::Item}
37
+ # referred by each of {#itemrefs}
38
+ def items
39
+ itemrefs.collector {|itemref| itemref.item}
40
+ end
41
+
42
+ class Itemref
43
+ PAGE_SPREAD_PROPERTIES = ['left'.freeze, 'right'.freeze].freeze
44
+ PAGE_SPREAD_PREFIX = 'page-spread-'.freeze
45
+
46
+ attr_accessor :spine,
47
+ :idref, :linear, :id
48
+ attr_reader :properties
49
+
50
+ def initialize
51
+ @properties = Set.new
52
+ end
53
+
54
+ def properties=(props)
55
+ @properties = props.kind_of?(Set) ? props : Set.new(props)
56
+ end
57
+
58
+ # @return [true|false]
59
+ def linear?
60
+ !! linear
61
+ end
62
+
63
+ # @return [Package::Manifest::Item] item referred by this object
64
+ def item
65
+ @item ||= @spine.package.manifest[idref]
66
+ end
67
+
68
+ def item=(item)
69
+ self.idref = item.id
70
+ item
71
+ end
72
+
73
+ def ==(other)
74
+ [:spine, :idref, :id].all? {|meth|
75
+ self.__send__(meth) == other.__send__(meth)
76
+ } and
77
+ (linear? == other.linear?) and
78
+ (properties == other.properties)
79
+ end
80
+
81
+ # @return ["left", "right", nil]
82
+ def page_spread
83
+ property = properties.find {|prop| prop.start_with? PAGE_SPREAD_PREFIX}
84
+ property ? property.gsub(/\A#{Regexp.escape(PAGE_SPREAD_PREFIX)}/, '') : nil
85
+ end
86
+
87
+ # @param new_value ["left", "right", nil]
88
+ def page_spread=(new_value)
89
+ if new_value.nil?
90
+ properties.delete_if {|prop| prop.start_with? PAGE_SPREAD_PREFIX}
91
+ return new_value
92
+ end
93
+
94
+ raise "Unsupported page-spread property: #{new_value}" unless PAGE_SPREAD_PROPERTIES.include? new_value
95
+
96
+ props_to_be_deleted = (PAGE_SPREAD_PROPERTIES - [new_value]).map {|prop| "#{PAGE_SPREAD_PREFIX}#{prop}"}
97
+ properties.delete_if {|prop| props_to_be_deleted.include? prop}
98
+ new_property = "#{PAGE_SPREAD_PREFIX}#{new_value}"
99
+ properties << new_property unless properties.include? new_property
100
+ new_value
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,68 @@
1
+ module EPUB
2
+ module Publication
3
+ class Package
4
+ include Inspector
5
+
6
+ CONTENT_MODELS = [:metadata, :manifest, :spine, :guide, :bindings]
7
+ RESERVED_VOCABULARY_PREFIXES = {
8
+ '' => 'http://idpf.org/epub/vocab/package/#',
9
+ 'dcterms' => 'http://purl.org/dc/terms/',
10
+ 'marc' => 'http://id.loc.gov/vocabulary/',
11
+ 'media' => 'http://www.idpf.org/epub/vocab/overlays/#',
12
+ 'onix' => 'http://www.editeur.org/ONIX/book/codelists/current.html#',
13
+ 'xsd' => 'http://www.w3.org/2001/XMLSchema#'
14
+ }
15
+
16
+
17
+ class << self
18
+ def define_content_model(model_name)
19
+ define_method "#{model_name}=" do |model|
20
+ current_model = __send__(model_name)
21
+ current_model.package = nil if current_model
22
+ model.package = self
23
+ instance_variable_set "@#{model_name}", model
24
+ end
25
+ end
26
+ end
27
+
28
+ attr_accessor :book,
29
+ :version, :prefix, :xml_lang, :dir, :id
30
+ attr_reader *CONTENT_MODELS
31
+ alias lang xml_lang
32
+ alias lang= xml_lang=
33
+
34
+ CONTENT_MODELS.each do |model|
35
+ define_content_model model
36
+ end
37
+
38
+ def initialize
39
+ @prefix = {}
40
+ end
41
+
42
+ def unique_identifier
43
+ @metadata.unique_identifier
44
+ end
45
+
46
+ def inspect
47
+ "#<%{class}:%{object_id} %{attributes} %{models}>" % {
48
+ :class => self.class,
49
+ :object_id => inspect_object_id,
50
+ :attributes => inspect_instance_variables(exclude: CONTENT_MODELS.map {|model| :"@#{model}"}),
51
+ :models => inspect_models
52
+ }
53
+ end
54
+
55
+ def inspect_models
56
+ CONTENT_MODELS.map {|name|
57
+ model = __send__(name)
58
+ representation = model.nil? ? model.inspect : model.inspect_simply
59
+ "@#{name}=#{representation}"
60
+ }.join(' ')
61
+ end
62
+ end
63
+ end
64
+ end
65
+
66
+ EPUB::Publication::Package::CONTENT_MODELS.each do |f|
67
+ require_relative "package/#{f}"
68
+ end
@@ -0,0 +1,2 @@
1
+ require 'epub/publication/package'
2
+ require 'epub/publication/fixed_layout'
data/lib/epub.rb ADDED
@@ -0,0 +1,14 @@
1
+ require 'epub/inspector'
2
+ require 'epub/ocf'
3
+ require 'epub/publication'
4
+ require 'epub/content_document'
5
+ require 'epub/book/features'
6
+
7
+ module EPUB
8
+ class << self
9
+ def included(base)
10
+ warn 'Including EPUB module is deprecated. Include EPUB::Book::Features instead.'
11
+ base.__send__ :include, EPUB::Book::Features
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,19 @@
1
+ epubinfo(1) -- show metadata of an EPUB book
2
+ ============================================
3
+
4
+ SYNOPSIS
5
+ --------
6
+
7
+ `epubinfo` <filename><br>
8
+ `epubinfo` `-f`|`--format=<formnt>` <filename>
9
+
10
+ DESCRIPTION
11
+ -----------
12
+
13
+ **epubinfo** shows metadata of an EPUB book.
14
+
15
+ OPTIONS
16
+ -------
17
+
18
+ * `-f`, `--format`:
19
+ Output format; currently "line"(for human reading), "json" and "yaml" are acceptable
@@ -0,0 +1,10 @@
1
+
2
+
3
+ default namespace = "http://www.w3.org/1999/xhtml"
4
+ namespace epub = "http://www.idpf.org/2007/ops"
5
+
6
+ include "epub-xhtml-30.rnc" {
7
+ html5.nav.content = html5.headings.class?, html5.ol
8
+ html5.oli.content = html5.a.phrasing | ((html5.a.phrasing | html5.span), html5.ol)
9
+ }
10
+
@@ -0,0 +1,72 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <schema xmlns="http://purl.oclc.org/dsdl/schematron">
3
+
4
+ <ns uri="http://www.w3.org/1999/xhtml" prefix="html"/>
5
+ <ns uri="http://www.idpf.org/2007/ops" prefix="epub"/>
6
+
7
+ <pattern id="nav-ocurrence">
8
+ <rule context="html:body">
9
+ <assert test="count(.//html:nav[@epub:type='toc']) = 1">Exactly one 'toc' nav element
10
+ must be present</assert>
11
+ <assert test="count(.//html:nav[@epub:type='page-list']) &lt; 2">Multiple occurrences of
12
+ the 'page-list' nav element</assert>
13
+ <assert test="count(.//html:nav[@epub:type='landmarks']) &lt; 2">Multiple occurrences of
14
+ the 'landmarks' nav element</assert>
15
+ </rule>
16
+ </pattern>
17
+
18
+ <pattern id="span-no-sublist">
19
+ <rule context="html:body//html:nav//html:span">
20
+ <assert test="count(.//ol) = 0"> The span element must only be used as heading for flat
21
+ sublists (not hierarchical navigation structures) </assert>
22
+ </rule>
23
+ </pattern>
24
+
25
+ <pattern id="landmarks">
26
+ <rule context="html:nav[@epub:type='landmarks']//html:ol//html:a">
27
+ <assert test="@epub:type">Missing epub:type attribute on anchor inside 'landmarks' nav
28
+ element</assert>
29
+ </rule>
30
+ </pattern>
31
+
32
+ <pattern id="link-labels">
33
+ <rule context="html:nav//html:ol//html:a">
34
+ <assert test="string-length(normalize-space(string(.))) > 0">Anchors within nav elements
35
+ must contain text</assert>
36
+ </rule>
37
+ </pattern>
38
+
39
+ <pattern id="span-labels">
40
+ <rule context="html:nav//html:ol//html:span">
41
+ <assert test="string-length(normalize-space(string(.))) > 0">Spans within nav elements
42
+ must contain text</assert>
43
+ </rule>
44
+ </pattern>
45
+
46
+ <pattern id="req-heading">
47
+ <rule
48
+ context="html:nav[not(@epub:type = 'toc') and not (@epub:type = 'page-list') and not (@epub:type = 'landmarks')]">
49
+ <let name="fc" value="local-name(./*[1])"/>
50
+ <assert test="(starts-with($fc,'h') and string-length($fc) = 2) or ($fc = 'hgroup')">nav
51
+ elements other than 'toc', 'page-list' and 'landmarks' must contain a heading as the
52
+ first child</assert>
53
+ </rule>
54
+ </pattern>
55
+
56
+ <pattern id="heading-content">
57
+ <rule context="html:h1|html:h2|html:h3|html:h4|html:h5|html:h6|html:hgroup">
58
+ <assert test="string-length(normalize-space(string(.))) > 0">Heading elements must
59
+ contain text</assert>
60
+ </rule>
61
+ </pattern>
62
+
63
+
64
+ <!-- warnings mode <pattern id="page-list-flat">
65
+ <rule context="html:body//html:nav[@epub:type='page-list']">
66
+ <assert test="count(.//html:ol) = 1">The page-list navigation structure should be a
67
+ list, not a nested hierarchy</assert>
68
+ </rule>
69
+ </pattern>
70
+ -->
71
+
72
+ </schema>