epubmeta 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1011bfe3193d925bd9c24d7581533a9517be04f3daae0a045b94a4fd88d87887
4
+ data.tar.gz: 8beea6a90e8502b7a7adbedd6fc52e2a02a22723569c311417c56584d4d41a9c
5
+ SHA512:
6
+ metadata.gz: a47ea2db7f32ec6315a7c8fe5aeeecc56d19a3feb666c6b947214fc46031e1bf9face4c98f7631324be9c537de482dce08afcb58550743caed41a09e4578a837
7
+ data.tar.gz: 1414699d02f38d19332d81c6682efff214f963cf6dd61a50aba0a27320f49f68a1e0489214fe2238e10c33ba7af8ca69e542303deb3d282aab6d8d9aebc7766d
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2018 Andrew Roberts
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,40 @@
1
+ # epubmeta
2
+ Extracts metadata information from EPUB files. Supports EPUB2 and EPUB3 formats.
3
+
4
+ *This lib is a fork of `epubinfo`, which had not been updated since 2014.*
5
+
6
+ ## Usage
7
+
8
+ ```ruby
9
+ require 'epubmeta'
10
+ EPUBMeta.get('path/to/epub/file.epub')
11
+ ```
12
+
13
+ Which returns a `EPUBMeta::Models::Book` instance, please refer to the [API documentation](http://rubydoc.info/gems/epubmeta/frames) from here on
14
+
15
+ ## Changelog
16
+
17
+ **0.5.0** *August 10, 2018*
18
+
19
+ * Convert module name EPUBMeta
20
+ * Fix cover metadata parsing bug (credit: @Gizmokid2005)
21
+
22
+ ** < 0.5.0 **
23
+
24
+ * Changelog for previous versions (pre-fork) can be found in the [epubinfo changelog](https://github.com/chdorner/epubinfo#changelog).
25
+
26
+ ## Contributing to epubmeta
27
+
28
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
29
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
30
+ * Fork the project.
31
+ * Start a feature/bugfix branch.
32
+ * Commit and push until you are happy with your contribution.
33
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
34
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
35
+
36
+ ## Copyright
37
+
38
+ Copyright (c) 2018 Andrew Roberts.
39
+ Copyright (c) 2012--2014 Christof Dorner.
40
+ See LICENSE.txt for further details.
@@ -0,0 +1,20 @@
1
+ require 'zip'
2
+ require 'nokogiri'
3
+ require 'cgi'
4
+
5
+ require 'epubmeta/parser'
6
+ require 'epubmeta/models/book'
7
+ require 'epubmeta/models/cover'
8
+ require 'epubmeta/models/person'
9
+ require 'epubmeta/models/date'
10
+ require 'epubmeta/models/identifier'
11
+ require 'epubmeta/utils'
12
+
13
+ module EPUBMeta
14
+ # Parses an epub file and returns a Book instance.
15
+ # @return [EPUBMeta::Models::Book] a model representation of the epub file
16
+ def self.get(path)
17
+ parser = EPUBMeta::Parser.parse(path)
18
+ EPUBMeta::Models::Book.new(parser)
19
+ end
20
+ end
@@ -0,0 +1,120 @@
1
+ module EPUBMeta
2
+ module Models
3
+ class Book
4
+ # Titles, array of String instances ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.1 EPUB2 reference})
5
+ # @return [Array]
6
+ attr_accessor :titles
7
+ def titles; @titles || []; end
8
+
9
+ # Creators, array of Person instances ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.2 EPUB2 reference})
10
+ # @return [Array]
11
+ attr_accessor :creators
12
+ def creators; @creators || []; end
13
+
14
+ # Subjects, array of String instances ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.3 EPUB2 reference})
15
+ # @return [Array]
16
+ attr_accessor :subjects
17
+ def subjects; @subjects || []; end
18
+
19
+ # Description ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.4 EPUB2 reference})
20
+ # @return [String]
21
+ attr_accessor :description
22
+
23
+ # Publisher ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.5 EPUB2 reference})
24
+ # @return [String]
25
+ attr_accessor :publisher
26
+
27
+ # Contributors, array of Person instances ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.6 EPUB2 reference})
28
+ # @return [Array]
29
+ attr_accessor :contributors
30
+ def contributors; @contributors || []; end
31
+
32
+ # Dates, array of Date instances ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.7 EPUB2 reference})
33
+ # @return [Array]
34
+ attr_accessor :dates
35
+ def dates; @dates || []; end
36
+
37
+ # Identifiers, array of Identifier instances ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.10 EPUB2 reference})
38
+ # @return [Array]
39
+ attr_accessor :identifiers
40
+ def identifiers; @identifiers || []; end
41
+
42
+ # Source ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.11 EPUB2 reference})
43
+ # @return [String]
44
+ attr_accessor :source
45
+
46
+ # Languages, array of String instances ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.12 EPUB2 reference})
47
+ # @return [Array]
48
+ attr_accessor :languages
49
+ def languages; @languages || []; end
50
+
51
+ # Rights ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.15 EPUB2 reference})
52
+ # @return [String]
53
+ attr_accessor :rights
54
+
55
+ # DRM protected
56
+ # @return [Boolean]
57
+ attr_accessor :drm_protected
58
+ def drm_protected; @drm_protected || false; end
59
+ alias :drm_protected? :drm_protected
60
+
61
+ # Cover
62
+ # @return [Cover]
63
+ attr_accessor :cover
64
+
65
+ # EPUB Version ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section1.4.1.2})
66
+ # @return [String]
67
+ attr_accessor :version
68
+
69
+ # Should never be called directly, go through EPUBMeta.get
70
+ def initialize(parser)
71
+ document = parser.metadata_document
72
+ return if document.nil?
73
+ document.remove_namespaces!
74
+ metadata = document.css('metadata')
75
+ self.version = document.css('package')[0]['version']
76
+ self.titles = metadata.xpath('.//title').map(&:content)
77
+ self.creators = metadata.xpath('.//creator').map {|c| EPUBMeta::Models::Person.new(c) }
78
+ self.subjects = metadata.xpath('.//subject').map(&:content)
79
+ self.description = metadata.xpath('.//description').first.content rescue nil
80
+ self.publisher = metadata.xpath('.//publisher').first.content rescue nil
81
+ self.contributors = metadata.xpath('.//contributor').map {|c| EPUBMeta::Models::Person.new(c) }
82
+ self.dates = metadata.xpath('.//date').map { |d| EPUBMeta::Models::Date.new(d) }
83
+ modified_date = metadata.xpath(".//meta[@property='dcterms:modified']").map do |d|
84
+ date = EPUBMeta::Models::Date.new(d)
85
+ date.event = 'modification'
86
+ date
87
+ end
88
+ self.dates += modified_date;
89
+ self.identifiers = metadata.xpath('.//identifier').map { |i| EPUBMeta::Models::Identifier.new(i) }
90
+ self.source = metadata.xpath('.//source').first.content rescue nil
91
+ self.languages = metadata.xpath('.//language').map(&:content)
92
+ self.rights = metadata.xpath('.//rights').first.content rescue nil
93
+ self.drm_protected = parser.drm_protected?
94
+ self.cover = EPUBMeta::Models::Cover.new(parser)
95
+ end
96
+
97
+
98
+ # Returns Hash representation of the book
99
+ # @return [Hash]
100
+ def to_hash
101
+ {
102
+ :titles => @titles,
103
+ :creators => @creators.map(&:to_hash),
104
+ :subjects => @subjects,
105
+ :description => @description,
106
+ :publisher => @publisher,
107
+ :contributors => @contributors.map(&:to_hash),
108
+ :dates => @dates.map(&:to_hash),
109
+ :identifiers => @identifiers.map(&:to_hash),
110
+ :source => @source,
111
+ :languages => @languages,
112
+ :rights => @rights,
113
+ :drm_protected => @drm_protected,
114
+ :cover => @cover,
115
+ }
116
+ end
117
+ end
118
+ end
119
+ end
120
+
@@ -0,0 +1,101 @@
1
+ module EPUBMeta
2
+ module Models
3
+ class Cover
4
+ def self.new(parser)
5
+ return nil unless EPUBMeta::Parser === parser
6
+
7
+ cover = super(parser)
8
+
9
+ if cover.exists?
10
+ cover
11
+ else
12
+ nil
13
+ end
14
+ end
15
+
16
+ def initialize(parser)
17
+ @parser = parser
18
+ @path = epub_cover_file_path
19
+ @content_type = epub_cover_content_type
20
+ end
21
+
22
+ # Original name of cover file
23
+ # @return [String]
24
+ def original_file_name
25
+ File.basename(@path) if @path
26
+ end
27
+
28
+ # Content type of cover file
29
+ # @return [String]
30
+ attr_accessor :content_type
31
+
32
+ # Cover exists?
33
+ # @return [Boolean]
34
+ # @!visibility private
35
+ def exists?
36
+ !!@path && @parser.zip_file.find_entry(zip_file_path)
37
+ end
38
+
39
+ # Cover file
40
+ # @return [File]
41
+ # Tempfile is used to enable access to cover file
42
+ # If block is passed, the tempfile is passed to it
43
+ # and closed after the block is executed
44
+ # cover.file do { |f| puts f.size }
45
+ # Otherwise user is responsible to unlink and close tempfile
46
+ # file = book.cover.file
47
+ # file.size
48
+ # file.close!
49
+ def tempfile(&block)
50
+ tempfile = Tempfile.new('epubmeta')
51
+ tempfile.binmode
52
+
53
+ cover_file = @parser.zip_file.read(zip_file_path)
54
+ tempfile.write(cover_file)
55
+
56
+ if block_given?
57
+ yield tempfile
58
+ tempfile.close!
59
+ else
60
+ # user is responsible for closing file
61
+ tempfile
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ def epub_cover_file_path
68
+ epub_cover_item.attr('href') if epub_cover_item
69
+ end
70
+
71
+ def epub_cover_content_type
72
+ epub_cover_item.attr('media-type') if epub_cover_item
73
+ end
74
+
75
+ def epub_cover_item
76
+ @epub_cover_item ||= begin
77
+ metadata = @parser.metadata_document.css('metadata')
78
+ cover_id = (metadata.css('meta[name=cover]').attr('content').value rescue nil) || 'cover-image'
79
+
80
+ manifest = @parser.metadata_document.css('manifest')
81
+
82
+ (manifest.css("item[id = \"#{cover_id}\"]").first rescue nil) ||
83
+ (manifest.css("item[properties = \"#{cover_id}\"]").first rescue nil) ||
84
+ (manifest.css("item[property = \"#{cover_id}\"]").first rescue nil) ||
85
+ (manifest.css("item[id = img-bookcover-jpeg]").first rescue nil)
86
+ end
87
+ end
88
+
89
+ def zip_file_path
90
+ dir = File.dirname(@parser.metadata_path)
91
+ path =
92
+ if dir == '.'
93
+ @path
94
+ else
95
+ File.join(dir, @path)
96
+ end
97
+ CGI::unescape(path)
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,34 @@
1
+ require 'time'
2
+
3
+ module EPUBMeta
4
+ module Models
5
+ class Date
6
+ # Date ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.7 EPUB2 reference})
7
+ # @return Date
8
+ attr_accessor :date
9
+ # Date as a string ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.7 EPUB2 reference})
10
+ # @return String
11
+ attr_accessor :date_str
12
+ # Event ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.7 EPUB2 reference})
13
+ # @return String
14
+ attr_accessor :event
15
+
16
+ # Should never be called directly, go through EPUBMeta.get
17
+ def initialize(node)
18
+ self.date = Utils.parse_iso_8601_date(node.content) rescue nil
19
+ self.date_str = node.content
20
+ self.event = node.attribute('event').content rescue nil
21
+ end
22
+
23
+ # Returns Hash representation of a date
24
+ # @return [Hash]
25
+ def to_hash
26
+ {
27
+ :time => @time,
28
+ :event => @event
29
+ }
30
+ end
31
+ end
32
+ end
33
+ end
34
+
@@ -0,0 +1,28 @@
1
+ module EPUBMeta
2
+ module Models
3
+ class Identifier
4
+ # Identifier ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.1 EPUB2 reference})
5
+ # @return [String]
6
+ attr_accessor :identifier
7
+ # Scheme ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.1 EPUB2 reference})
8
+ # @return [String]
9
+ attr_accessor :scheme
10
+
11
+ # Should never be called directly, go through EPUBMeta.get
12
+ def initialize(node)
13
+ self.identifier = node.content
14
+ self.scheme = node.attribute('scheme').content rescue nil
15
+ end
16
+
17
+ # Returns Hash representation of an identifier
18
+ # @return [Hash]
19
+ def to_hash
20
+ {
21
+ :identifier => @identifier,
22
+ :scheme => @scheme
23
+ }
24
+ end
25
+ end
26
+ end
27
+ end
28
+
@@ -0,0 +1,33 @@
1
+ module EPUBMeta
2
+ module Models
3
+ class Person
4
+ # Name ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.6 EPUB2 reference})
5
+ # @return [String]
6
+ attr_accessor :name
7
+ # File as ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.6 EPUB2 reference})
8
+ # @return [String]
9
+ attr_accessor :file_as
10
+ # Role ({http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.6 EPUB2 reference})
11
+ # @return [String]
12
+ attr_accessor :role
13
+
14
+ # Should never be called directly, go through EPUBMeta.get
15
+ def initialize(node)
16
+ self.name = node.content
17
+ self.file_as = node.attribute('file-as').content rescue nil
18
+ self.role = node.attribute('role').content rescue nil
19
+ end
20
+
21
+ # Returns Hash representation of a person
22
+ # @return [Hash]
23
+ def to_hash
24
+ {
25
+ :name => @name,
26
+ :file_as => @file_as,
27
+ :role => @role
28
+ }
29
+ end
30
+ end
31
+ end
32
+ end
33
+
@@ -0,0 +1,48 @@
1
+ module EPUBMeta
2
+ class Parser
3
+ attr_accessor :path, :metadata_document
4
+
5
+ def self.parse(path)
6
+ epubmeta = EPUBMeta::Parser.new
7
+ epubmeta.path = path
8
+ epubmeta
9
+ end
10
+
11
+ def metadata_document
12
+ @metadata_document ||= load_metadata_file
13
+ end
14
+
15
+ def drm_protected?
16
+ @drm_protected ||= !!zip_file.find_entry('META-INF/rights.xml')
17
+ end
18
+
19
+ def zip_file
20
+ begin
21
+ @zip_file ||= Zip::File.open(@path)
22
+ rescue Zip::Error => e
23
+ raise NotAnEPUBFileError.new(e)
24
+ end
25
+ end
26
+
27
+ def metadata_path
28
+ @metadata_path ||= begin
29
+ root_document.remove_namespaces!
30
+ root_document.css('container rootfiles rootfile:first-child').attribute('full-path').content
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def root_document
37
+ begin
38
+ @root_document ||= Nokogiri::XML(zip_file.read('META-INF/container.xml'))
39
+ rescue => e
40
+ raise NotAnEPUBFileError.new(e)
41
+ end
42
+ end
43
+
44
+ def load_metadata_file
45
+ Nokogiri::XML(zip_file.read(metadata_path))
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,17 @@
1
+ module EPUBMeta
2
+ module Utils
3
+ def self.parse_iso_8601_date(date_str)
4
+ case date_str.count('-')
5
+ when 0
6
+ Date.strptime(date_str, '%Y')
7
+ when 1
8
+ Date.strptime(date_str, '%Y-%m')
9
+ when 2
10
+ Date.strptime(date_str, '%Y-%m-%d')
11
+ end
12
+ end
13
+ end
14
+
15
+ class NotAnEPUBFileError < StandardError; end
16
+ end
17
+
metadata ADDED
@@ -0,0 +1,139 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: epubmeta
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Roberts
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-08-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rubyzip
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.4.2
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 1.4.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 2.14.1
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 2.14.1
55
+ - !ruby/object:Gem::Dependency
56
+ name: yard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.8.7
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.8.7
69
+ - !ruby/object:Gem::Dependency
70
+ name: jeweler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 2.0.1
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 2.0.1
83
+ - !ruby/object:Gem::Dependency
84
+ name: redcarpet
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Supports EPUB2 and EPUB3 formats.
98
+ email: andy.roberts.uk@gmail.com
99
+ executables: []
100
+ extensions: []
101
+ extra_rdoc_files:
102
+ - LICENSE.txt
103
+ - README.md
104
+ files:
105
+ - LICENSE.txt
106
+ - README.md
107
+ - lib/epubmeta.rb
108
+ - lib/epubmeta/models/book.rb
109
+ - lib/epubmeta/models/cover.rb
110
+ - lib/epubmeta/models/date.rb
111
+ - lib/epubmeta/models/identifier.rb
112
+ - lib/epubmeta/models/person.rb
113
+ - lib/epubmeta/parser.rb
114
+ - lib/epubmeta/utils.rb
115
+ homepage: https://github.com/andyroberts/epubmeta
116
+ licenses:
117
+ - MIT
118
+ metadata: {}
119
+ post_install_message:
120
+ rdoc_options: []
121
+ require_paths:
122
+ - lib
123
+ required_ruby_version: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - ">="
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ required_rubygems_version: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ requirements: []
134
+ rubyforge_project:
135
+ rubygems_version: 2.7.5
136
+ signing_key:
137
+ specification_version: 4
138
+ summary: Extracts metadata information from EPUB files
139
+ test_files: []