epub-parser 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,8 @@ module EPUB
7
7
  'epub' => 'http://www.idpf.org/2007/ops',
8
8
  'm' => 'http://www.w3.org/1998/Math/MathML',
9
9
  'svg' => 'http://www.w3.org/2000/svg',
10
- 'smil' => 'http://www.w3.org/ns/SMIL'
10
+ 'smil' => 'http://www.w3.org/ns/SMIL',
11
+ 'metadata' => 'http://www.idpf.org/2013/metadata'
11
12
  }
12
13
 
13
14
  module MediaType
@@ -25,7 +25,6 @@ module EPUB
25
25
  end
26
26
 
27
27
  module PublicationModel
28
- TEMPLATE = "#<%{class}:%{object_id} @package=%{package} %{attributes}>"
29
28
  class << self
30
29
  def included(mod)
31
30
  mod.__send__ :include, Inspector
@@ -33,13 +32,19 @@ module EPUB
33
32
  end
34
33
 
35
34
  def inspect
36
- TEMPLATE % {
35
+ template % {
37
36
  :class => self.class,
38
- :package => package.inspect_simply,
37
+ :package => (package && package.inspect_simply),
39
38
  :object_id => inspect_object_id,
40
39
  :attributes => inspect_instance_variables(exclude: [:@package])
41
40
  }
42
41
  end
42
+
43
+ def template
44
+ t = "#<%{class}:%{object_id}"
45
+ t << " @package=%{package}" if package
46
+ t << " %{attributes}>"
47
+ end
43
48
  end
44
49
  end
45
50
  end
@@ -0,0 +1,178 @@
1
+ require 'set'
2
+
3
+ module EPUB
4
+ class Metadata
5
+ include Inspector::PublicationModel
6
+
7
+ DC_ELEMS = [:identifiers, :titles, :languages] +
8
+ [:contributors, :coverages, :creators, :dates, :descriptions, :formats, :publishers,
9
+ :relations, :rights, :sources, :subjects, :types]
10
+ attr_accessor :package, :unique_identifier, :metas, :links,
11
+ *(DC_ELEMS.collect {|elem| "dc_#{elem}"})
12
+ DC_ELEMS.each do |elem|
13
+ alias_method elem, "dc_#{elem}"
14
+ alias_method "#{elem}=", "dc_#{elem}="
15
+ end
16
+
17
+ def initialize
18
+ (DC_ELEMS + [:metas, :links]).each do |elem|
19
+ __send__ "#{elem}=", []
20
+ end
21
+ end
22
+
23
+ def release_identifier
24
+ "#{unique_identifier}@#{modified}"
25
+ end
26
+ alias package_identifier release_identifier
27
+
28
+ def title
29
+ return extended_title unless extended_title.empty?
30
+ compositted = titles.select {|title| title.display_seq}.sort.join("\n")
31
+ return compositted unless compositted.empty?
32
+ return main_title unless main_title.empty?
33
+ titles.sort.join("\n")
34
+ end
35
+
36
+ %w[main short collection edition extended].each do |type|
37
+ define_method "#{type}_title" do
38
+ titles.select {|title| title.title_type.to_s == type}.sort.join(' ')
39
+ end
40
+ end
41
+
42
+ def subtitle
43
+ titles.select {|title| title.title_type.to_s == 'subtitle'}.sort.join(' ')
44
+ end
45
+
46
+ def description
47
+ descriptions.join(' ')
48
+ end
49
+
50
+ def date
51
+ dates.first
52
+ end
53
+
54
+ def language
55
+ languages.first
56
+ end
57
+
58
+ def modified
59
+ metas.find {|meta|
60
+ meta.property == 'dcterms:modified' &&
61
+ meta.refiners.empty?
62
+ }
63
+ end
64
+
65
+ def to_h
66
+ DC_ELEMS.inject({}) do |hsh, elem|
67
+ hsh[elem] = __send__(elem)
68
+ hsh
69
+ end
70
+ end
71
+
72
+ def primary_metas
73
+ metas.select {|meta| meta.primary_expression?}
74
+ end
75
+
76
+ module Refinee
77
+ PROPERTIES = %w[alternate-script display-seq file-as group-position identifier-type meta-auth role title-type]
78
+
79
+ attr_writer :refiners
80
+
81
+ def refiners
82
+ @refiners ||= Set.new
83
+ end
84
+
85
+ PROPERTIES.each do |voc|
86
+ met = voc.gsub(/-/, '_')
87
+ attr_writer met
88
+ define_method met do
89
+ refiners.find {|refiner| refiner.property == voc}
90
+ end
91
+ end
92
+ end
93
+
94
+ class DCMES
95
+ include Refinee
96
+
97
+ attr_accessor :content, :id, :lang, :dir
98
+
99
+ def to_s
100
+ content.to_s
101
+ end
102
+ end
103
+
104
+ class Identifier < DCMES
105
+ # @note This is ad-hoc
106
+ # @todo Define and include OPF module for opf:scheme attribute
107
+ # @todo Define general way to handle with identifier-type refiners
108
+ attr_accessor :scheme
109
+
110
+ # @note This is ad-hoc
111
+ # @todo Define and include OPF module for opf:scheme attribute
112
+ # @todo Define general way to handle with identifier-type refiners
113
+ def isbn?
114
+ scheme == 'ISBN' or
115
+ content.to_s.downcase.start_with? 'urn:isbn' or
116
+ refiners.any? {|refiner|
117
+ refiner.property == 'identifier-type' and
118
+ refiner.scheme == 'onix:codelist5' and
119
+ %w[02 15].include? refiner.content
120
+ }
121
+ end
122
+ end
123
+
124
+ class Title < DCMES
125
+ include Comparable
126
+
127
+ def <=>(other)
128
+ return 1 if other.display_seq.nil?
129
+ return -1 if display_seq.nil?
130
+ display_seq.to_s.to_i <=> other.display_seq.to_s.to_i
131
+ end
132
+ end
133
+
134
+ class Meta
135
+ include Refinee
136
+
137
+ attr_accessor :property, :id, :scheme, :content
138
+ attr_reader :refines
139
+
140
+ def refines=(refinee)
141
+ refinee.refiners << self
142
+ @refines = refinee
143
+ end
144
+
145
+ def refines?
146
+ ! refines.nil?
147
+ end
148
+ alias subexpression? refines?
149
+
150
+ def primary_expression?
151
+ ! subexpression?
152
+ end
153
+
154
+ def inspect
155
+ ivs = instance_variables.map {|iv|
156
+ [iv, instance_variable_get(iv).inspect].join('=')
157
+ }.join(' ')
158
+ '<#%s:%#0x %s>' % [self.class, __id__, ivs]
159
+ end
160
+
161
+ def to_s
162
+ content.to_s
163
+ end
164
+ end
165
+
166
+ class Link
167
+ include Refinee
168
+
169
+ attr_accessor :href, :rel, :id, :media_type
170
+ attr_reader :refines
171
+
172
+ def refines=(refinee)
173
+ refinee.refiners << self
174
+ @refines = refinee
175
+ end
176
+ end
177
+ end
178
+ end
@@ -15,7 +15,8 @@ module EPUB
15
15
  end
16
16
 
17
17
  class Rootfile
18
- attr_accessor :full_path, :media_type
18
+ attr_accessor :full_path, :media_type,
19
+ :package
19
20
 
20
21
  # @param full_path [Addressable::URI|nil]
21
22
  # @param media_type [String]
@@ -1,6 +1,7 @@
1
1
  module EPUB
2
2
  class OCF
3
- class Metadata
3
+ class UnknownFormatMetadata
4
+ attr_accessor :content
4
5
  end
5
6
  end
6
7
  end
@@ -1,3 +1,4 @@
1
+ require 'monitor'
1
2
  require 'epub/ocf/physical_container/archive_zip'
2
3
  require 'epub/ocf/physical_container/unpacked_directory'
3
4
  require 'epub/ocf/physical_container/unpacked_uri'
@@ -11,6 +12,14 @@ module EPUB
11
12
  @adapter = ArchiveZip
12
13
 
13
14
  class << self
15
+ def find_adapter(adapter)
16
+ return adapter if adapter.instance_of? Class
17
+ if adapter == :Zipruby && ! const_defined?(adapter)
18
+ require 'epub/ocf/physical_container/zipruby'
19
+ end
20
+ const_get adapter
21
+ end
22
+
14
23
  def adapter
15
24
  raise NoMethodError, "undefined method `#{__method__}' for #{self}" unless self == PhysicalContainer
16
25
  @adapter
@@ -18,8 +27,7 @@ module EPUB
18
27
 
19
28
  def adapter=(adapter)
20
29
  raise NoMethodError, "undefined method `#{__method__}' for #{self}" unless self == PhysicalContainer
21
- @adapter = adapter.instance_of?(Class) ? adapter : const_get(adapter)
22
- adapter
30
+ @adapter = find_adapter(adapter)
23
31
  end
24
32
 
25
33
  def open(container_path)
@@ -43,6 +51,7 @@ module EPUB
43
51
 
44
52
  def initialize(container_path)
45
53
  @container_path = container_path
54
+ @monitor = Monitor.new
46
55
  end
47
56
  end
48
57
  end
@@ -12,11 +12,13 @@ module EPUB
12
12
 
13
13
  def open
14
14
  Archive::Zip.open @container_path do |archive|
15
- @archive = archive
16
- begin
17
- yield self
18
- ensure
19
- @archive = nil
15
+ @monitor.synchronize do
16
+ @archive = archive
17
+ begin
18
+ yield self
19
+ ensure
20
+ @archive = nil
21
+ end
20
22
  end
21
23
  end
22
24
  end
@@ -8,6 +8,10 @@ module EPUB
8
8
 
9
9
  def read(path_name)
10
10
  ::File.read(::File.join(@container_path, path_name))
11
+ rescue ::Errno::ENOENT => error
12
+ no_entry = NoEntry.new(error.message)
13
+ no_entry.set_backtrace error.backtrace
14
+ raise no_entry
11
15
  end
12
16
  end
13
17
  end
@@ -19,6 +19,10 @@ module EPUB
19
19
 
20
20
  def read(path_name)
21
21
  (@container_path + path_name).read
22
+ rescue ::OpenURI::HTTPError => error
23
+ no_entry = NoEntry.new(error.message)
24
+ no_entry.set_backtrace error.backtrace
25
+ raise no_entry
22
26
  end
23
27
  end
24
28
  end
@@ -6,11 +6,17 @@ module EPUB
6
6
  class Zipruby < self
7
7
  def open
8
8
  Zip::Archive.open @container_path do |archive|
9
- begin
10
- @archive = archive
11
- yield self
12
- ensure
13
- @archive = nil
9
+ @monitor.synchronize do
10
+ begin
11
+ @archive = archive
12
+ yield self
13
+ rescue ::Zip::Error => error
14
+ no_entry = NoEntry.new(error.message)
15
+ no_entry.set_backtrace error.backtrace
16
+ raise no_entry
17
+ ensure
18
+ @archive = nil
19
+ end
14
20
  end
15
21
  end
16
22
  end
@@ -21,6 +27,12 @@ module EPUB
21
27
  else
22
28
  open {|container| container.read(path_name)}
23
29
  end
30
+ rescue ::Zip::Error => error
31
+ no_entry = NoEntry.new(error.message)
32
+ no_entry.set_backtrace error.backtrace
33
+ raise no_entry
34
+ ensure
35
+ @archive = nil
24
36
  end
25
37
  end
26
38
  end
data/lib/epub/parser.rb CHANGED
@@ -46,22 +46,29 @@ module EPUB
46
46
  options[:container_adapter] == :UnpackedURI or
47
47
  EPUB::OCF::PhysicalContainer.adapter == EPUB::OCF::PhysicalContainer::UnpackedURI)
48
48
 
49
- raise "File #{filepath} not readable" if
50
- !path_is_uri and !File.readable_real?(filepath)
49
+ raise "File #{filepath} not found" if
50
+ !path_is_uri and !File.exist?(filepath)
51
51
 
52
52
  @filepath = path_is_uri ? filepath : File.realpath(filepath)
53
53
  @book = create_book(options)
54
+ if File.directory? @filepath
55
+ @book.container_adapter = :UnpackedDirectory
56
+ end
54
57
  @book.epub_file = @filepath
55
58
  if options[:container_adapter]
56
- adapter = options[:container_adapter]
57
- @book.container_adapter = adapter
59
+ @book.container_adapter = options[:container_adapter]
58
60
  end
59
61
  end
60
62
 
61
63
  def parse
62
64
  @book.container_adapter.open @filepath do |container|
63
65
  @book.ocf = OCF.parse(container)
64
- @book.package = Publication.parse(container, @book.rootfile_path)
66
+ @book.ocf.container.rootfiles.each {|rootfile|
67
+ package = Publication.parse(container, rootfile.full_path.to_s)
68
+ rootfile.package = package
69
+ @book.packages << package
70
+ package.book = @book
71
+ }
65
72
  end
66
73
 
67
74
  @book
@@ -0,0 +1,67 @@
1
+ module EPUB
2
+ class Parser
3
+ module Metadata
4
+ def parse_metadata(elem, unique_identifier_id, default_namespace)
5
+ metadata = EPUB::Publication::Package::Metadata.new
6
+ id_map = {}
7
+
8
+ metadata.identifiers = extract_model(elem, id_map, './dc:identifier', :Identifier, ['id']) {|identifier, e|
9
+ identifier.scheme = extract_attribute(e, 'scheme', 'opf')
10
+ metadata.unique_identifier = identifier if identifier.id == unique_identifier_id
11
+ }
12
+ metadata.titles = extract_model(elem, id_map, './dc:title', :Title)
13
+ metadata.languages = extract_model(elem, id_map, './dc:language', :DCMES, %w[id])
14
+ %w[contributor coverage creator date description format publisher relation source subject type].each do |dcmes|
15
+ metadata.__send__ "#{dcmes}s=", extract_model(elem, id_map, "./dc:#{dcmes}")
16
+ end
17
+ metadata.rights = extract_model(elem, id_map, './dc:rights')
18
+ metadata.metas = extract_refinee(elem, id_map, "./#{default_namespace}:meta", :Meta, %w[property id scheme])
19
+ metadata.links = extract_refinee(elem, id_map, "./#{default_namespace}:link", :Link, %w[id media-type]) {|link, e|
20
+ link.href = extract_attribute(e, 'href')
21
+ link.rel = Set.new(extract_attribute(e, 'rel').split(nil))
22
+ }
23
+
24
+ id_map.values.each do |hsh|
25
+ next unless hsh[:refiners]
26
+ next unless hsh[:metadata]
27
+ hsh[:refiners].each {|meta| meta.refines = hsh[:metadata]}
28
+ end
29
+
30
+ metadata
31
+ end
32
+
33
+ def extract_model(elem, id_map, xpath, klass=:DCMES, attributes=%w[id lang dir])
34
+ models = elem.xpath(xpath, EPUB::NAMESPACES).collect do |e|
35
+ model = EPUB::Publication::Package::Metadata.const_get(klass).new
36
+ attributes.each do |attr|
37
+ model.__send__ "#{attr.gsub(/-/, '_')}=", extract_attribute(e, attr)
38
+ end
39
+ model.content = e.content unless klass == :Link
40
+
41
+ yield model, e if block_given?
42
+
43
+ model
44
+ end
45
+
46
+ models.each do |model|
47
+ id_map[model.id] = {metadata: model} if model.respond_to?(:id) && model.id
48
+ end
49
+
50
+ models
51
+ end
52
+
53
+ def extract_refinee(elem, id_map, xpath, klass, attributes)
54
+ extract_model(elem, id_map, xpath, klass, attributes) {|model, e|
55
+ yield model, e if block_given?
56
+ refines = extract_attribute(e, 'refines')
57
+ if refines && refines[0] == '#'
58
+ id = refines[1..-1]
59
+ id_map[id] ||= {}
60
+ id_map[id][:refiners] ||= []
61
+ id_map[id][:refiners] << model
62
+ end
63
+ }
64
+ end
65
+ end
66
+ end
67
+ end