mobi 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.rspec +2 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +4 -4
- data/Gemfile.lock +21 -9
- data/README.rdoc +41 -0
- data/Rakefile +3 -17
- data/VERSION +1 -1
- data/lib/mobi.rb +7 -3
- data/lib/mobi/header/exth_header.rb +46 -0
- data/lib/mobi/header/mobi_header.rb +153 -0
- data/lib/mobi/header/palm_doc_header.rb +73 -0
- data/lib/mobi/metadata.rb +45 -68
- data/lib/mobi/metadata_streams.rb +46 -0
- data/lib/mobi/stream_slicer.rb +14 -37
- data/mobi.gemspec +34 -26
- data/spec/fixtures/sherlock.mobi +0 -0
- data/spec/lib/mobi/header/exth_header_spec.rb +36 -0
- data/spec/lib/mobi/header/mobi_header_spec.rb +79 -0
- data/spec/lib/mobi/header/palm_doc_header_spec.rb +45 -0
- data/spec/lib/mobi/metadata_spec.rb +62 -0
- data/spec/lib/mobi/stream_slicer_spec.rb +64 -0
- data/spec/lib/mobi_spec.rb +10 -0
- data/spec/spec_helper.rb +4 -0
- metadata +90 -67
- data/test/helper.rb +0 -18
- data/test/test_mobi.rb +0 -7
data/.rspec
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
CHANGED
@@ -6,8 +6,8 @@ source "http://rubygems.org"
|
|
6
6
|
# Add dependencies to develop your gem here.
|
7
7
|
# Include everything needed to run rake, tests, features, etc.
|
8
8
|
group :development do
|
9
|
-
gem
|
10
|
-
gem
|
11
|
-
gem
|
12
|
-
gem
|
9
|
+
gem 'rspec', '~> 2.11.0'
|
10
|
+
gem 'rr', '~> 1.0.4'
|
11
|
+
gem 'bundler', '~> 1.1.0'
|
12
|
+
gem 'jeweler', '~> 1.8.0'
|
13
13
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,20 +1,32 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
+
diff-lcs (1.1.3)
|
4
5
|
git (1.2.5)
|
5
|
-
jeweler (1.
|
6
|
-
bundler (~> 1.0
|
6
|
+
jeweler (1.8.3)
|
7
|
+
bundler (~> 1.0)
|
7
8
|
git (>= 1.2.5)
|
8
9
|
rake
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
rdoc
|
11
|
+
json (1.6.6)
|
12
|
+
rake (0.9.2.2)
|
13
|
+
rdoc (3.12)
|
14
|
+
json (~> 1.4)
|
15
|
+
rr (1.0.4)
|
16
|
+
rspec (2.11.0)
|
17
|
+
rspec-core (~> 2.11.0)
|
18
|
+
rspec-expectations (~> 2.11.0)
|
19
|
+
rspec-mocks (~> 2.11.0)
|
20
|
+
rspec-core (2.11.1)
|
21
|
+
rspec-expectations (2.11.1)
|
22
|
+
diff-lcs (~> 1.1.3)
|
23
|
+
rspec-mocks (2.11.1)
|
12
24
|
|
13
25
|
PLATFORMS
|
14
26
|
ruby
|
15
27
|
|
16
28
|
DEPENDENCIES
|
17
|
-
bundler (~> 1.
|
18
|
-
jeweler (~> 1.
|
19
|
-
|
20
|
-
|
29
|
+
bundler (~> 1.1.0)
|
30
|
+
jeweler (~> 1.8.0)
|
31
|
+
rr (~> 1.0.4)
|
32
|
+
rspec (~> 2.11.0)
|
data/README.rdoc
CHANGED
@@ -10,6 +10,47 @@ The gem does NOT handle any errors in a mobi document, or some edge cases.
|
|
10
10
|
|
11
11
|
Tests will be added soon.
|
12
12
|
|
13
|
+
== Installation
|
14
|
+
|
15
|
+
gem install mobi
|
16
|
+
|
17
|
+
== Usage
|
18
|
+
|
19
|
+
Creating a Mobi::Metadata object
|
20
|
+
|
21
|
+
Mobi::Metadata.new(File.open('/path/to/file.mobi'))
|
22
|
+
|
23
|
+
A handy convenience method to do the exact same thing
|
24
|
+
|
25
|
+
Mobi.metadata File.open('/path/to/file.mobi')
|
26
|
+
|
27
|
+
Getting metadata information is as simple as:
|
28
|
+
|
29
|
+
metadata = Mobi.metadata File.open('/path/to/file.mobi')
|
30
|
+
author = metadata.author
|
31
|
+
|
32
|
+
Supported metadata options are:
|
33
|
+
|
34
|
+
* author
|
35
|
+
* publisher
|
36
|
+
* imprint
|
37
|
+
* description
|
38
|
+
* isbn
|
39
|
+
* subject
|
40
|
+
* published_at
|
41
|
+
* review
|
42
|
+
* contributor
|
43
|
+
* rights
|
44
|
+
* subject_code
|
45
|
+
* type
|
46
|
+
* source
|
47
|
+
* asin
|
48
|
+
* version
|
49
|
+
|
50
|
+
== Thanks
|
51
|
+
|
52
|
+
* Calibre open source project. I ripped off the idea of a Stream Slicer and got a better understanding of mobi files from their code base. Check them out at http://calibre-ebook.com
|
53
|
+
|
13
54
|
== Contributing to mobi
|
14
55
|
|
15
56
|
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
data/Rakefile
CHANGED
@@ -15,8 +15,8 @@ Jeweler::Tasks.new do |gem|
|
|
15
15
|
gem.name = "mobi"
|
16
16
|
gem.homepage = "http://github.com/jkongie/mobi"
|
17
17
|
gem.license = "MIT"
|
18
|
-
gem.summary = %Q{A Rubygem that inspects MOBI metadata}
|
19
|
-
gem.description = %Q{Mobi is a Rubygem that allows you to easily read MOBI e-book format metadata
|
18
|
+
gem.summary = %Q{A Rubygem that inspects MOBI metadata.}
|
19
|
+
gem.description = %Q{Mobi is a Rubygem that allows you to easily read MOBI e-book format metadata.}
|
20
20
|
gem.email = "jkongie@gmail.com"
|
21
21
|
gem.authors = ["jkongie"]
|
22
22
|
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
@@ -26,23 +26,9 @@ Jeweler::Tasks.new do |gem|
|
|
26
26
|
end
|
27
27
|
Jeweler::RubygemsDotOrgTasks.new
|
28
28
|
|
29
|
-
require 'rake/testtask'
|
30
|
-
Rake::TestTask.new(:test) do |test|
|
31
|
-
test.libs << 'lib' << 'test'
|
32
|
-
test.pattern = 'test/**/test_*.rb'
|
33
|
-
test.verbose = true
|
34
|
-
end
|
35
|
-
|
36
|
-
require 'rcov/rcovtask'
|
37
|
-
Rcov::RcovTask.new do |test|
|
38
|
-
test.libs << 'test'
|
39
|
-
test.pattern = 'test/**/test_*.rb'
|
40
|
-
test.verbose = true
|
41
|
-
end
|
42
|
-
|
43
29
|
task :default => :test
|
44
30
|
|
45
|
-
require '
|
31
|
+
require 'rdoc/task'
|
46
32
|
Rake::RDocTask.new do |rdoc|
|
47
33
|
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
34
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/mobi.rb
CHANGED
@@ -1,10 +1,14 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'mobi/stream_slicer'
|
2
|
+
require 'mobi/metadata'
|
3
|
+
require 'mobi/metadata_streams'
|
4
|
+
require 'mobi/header/palm_doc_header'
|
5
|
+
require 'mobi/header/mobi_header'
|
6
|
+
require 'mobi/header/exth_header'
|
3
7
|
|
4
8
|
module Mobi
|
5
9
|
|
6
10
|
def self.metadata(file)
|
7
11
|
Mobi::Metadata.new(file)
|
8
12
|
end
|
9
|
-
|
13
|
+
|
10
14
|
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Mobi
|
2
|
+
module Header
|
3
|
+
class ExthHeader
|
4
|
+
RECORD_TYPES = { 100 => :author, 101 => :publisher, 102 => :imprint, 103 => :description, 104 => :isbn, 105 => :subject,
|
5
|
+
106 => :published_at, 107 => :review, 108 => :contributor, 109 => :rights, 110 => :subject_code,
|
6
|
+
111 => :type, 112 => :source, 113 => :asin, 114 => :version }
|
7
|
+
|
8
|
+
attr_reader *RECORD_TYPES.values
|
9
|
+
|
10
|
+
def initialize(data)
|
11
|
+
@data = data
|
12
|
+
@record_count, = @data[8, 4].unpack('N*')
|
13
|
+
|
14
|
+
define_data_methods
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def define_data_methods
|
20
|
+
start = 12
|
21
|
+
@record_count.times do
|
22
|
+
record = ExthRecord.new(@data, start)
|
23
|
+
|
24
|
+
if RECORD_TYPES.key?(record.code)
|
25
|
+
instance_variable_set "@#{record.name}", record.value
|
26
|
+
end
|
27
|
+
|
28
|
+
start += record.length
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class ExthRecord
|
33
|
+
|
34
|
+
attr_reader :code, :name, :length, :value
|
35
|
+
|
36
|
+
def initialize(data, start)
|
37
|
+
@code, = data[start, 4].unpack('N*')[0].to_i
|
38
|
+
@name = ExthHeader::RECORD_TYPES[@code]
|
39
|
+
@length, = data[start + 4, 4].unpack('N*')
|
40
|
+
@value = data[start + 8, length - 8]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
# Public: Parses the Mobi Header which follows the 16 bytes of the PalmDOC
|
2
|
+
# header.
|
3
|
+
module Mobi
|
4
|
+
module Header
|
5
|
+
class MobiHeader
|
6
|
+
|
7
|
+
# Initialize the MobiHeader.
|
8
|
+
#
|
9
|
+
# data - A StreamSlicer which starts at record 0 of the PalmDOC.
|
10
|
+
#
|
11
|
+
# Returns self.
|
12
|
+
def initialize(data)
|
13
|
+
@data = data
|
14
|
+
end
|
15
|
+
|
16
|
+
# A MOBI identifier.
|
17
|
+
#
|
18
|
+
# Returns a String.
|
19
|
+
def identifier
|
20
|
+
@identifier ||= @data[16, 4]
|
21
|
+
end
|
22
|
+
|
23
|
+
# The length of the MOBI header.
|
24
|
+
#
|
25
|
+
# Returns a Fixnum.
|
26
|
+
def header_length
|
27
|
+
@header_length ||= @data[20, 4].unpack('N*')[0]
|
28
|
+
end
|
29
|
+
|
30
|
+
# The kind of Mobipocket file as returned from byte code.
|
31
|
+
#
|
32
|
+
# Returns a Fixnum.
|
33
|
+
def raw_mobi_type
|
34
|
+
@raw_mobi_type ||= @data[24, 4].unpack('N*')[0]
|
35
|
+
end
|
36
|
+
|
37
|
+
# The kind of Mobipocket file.
|
38
|
+
#
|
39
|
+
# Returns a String.
|
40
|
+
def mobi_type
|
41
|
+
{ 2 => 'MOBIpocket Book',
|
42
|
+
3 => 'PalmDoc Book',
|
43
|
+
4 => 'Audio',
|
44
|
+
232 => 'MOBIpocket',
|
45
|
+
248 => 'KF8',
|
46
|
+
257 => 'News',
|
47
|
+
258 => 'News Feed',
|
48
|
+
259 => 'News_Magazine',
|
49
|
+
513 => 'PICS',
|
50
|
+
514 => 'WORD',
|
51
|
+
515 => 'XLS',
|
52
|
+
516 => 'PPT',
|
53
|
+
517 => 'TEXT',
|
54
|
+
518 => 'HTML'
|
55
|
+
}.fetch(raw_mobi_type)
|
56
|
+
end
|
57
|
+
|
58
|
+
# The text encoding as return from byte code.
|
59
|
+
#
|
60
|
+
# Returns a Fixnum.
|
61
|
+
def raw_text_encoding
|
62
|
+
@text_encoding ||= @data[28, 4].unpack('N*')[0]
|
63
|
+
end
|
64
|
+
|
65
|
+
# The text encoding.
|
66
|
+
#
|
67
|
+
# Returns a String.
|
68
|
+
def text_encoding
|
69
|
+
{ 1252 => 'CP1252 (WinLatin1)',
|
70
|
+
65001 => 'UTF-8'
|
71
|
+
}.fetch(raw_text_encoding)
|
72
|
+
end
|
73
|
+
|
74
|
+
# The unique ID.
|
75
|
+
#
|
76
|
+
# Returns an Integer.
|
77
|
+
def unique_id
|
78
|
+
@unique_id ||= @data[32, 4].unpack('N*')[0]
|
79
|
+
end
|
80
|
+
|
81
|
+
# The version of the MOBIpocket format used in this file.
|
82
|
+
#
|
83
|
+
# Returns a String
|
84
|
+
def file_version
|
85
|
+
@file_version ||= @data[36, 4].unpack('N*')[0]
|
86
|
+
end
|
87
|
+
|
88
|
+
# The first record number (starting with 0) that is not the book's text.
|
89
|
+
#
|
90
|
+
# Returns an Integer.
|
91
|
+
def first_non_book_index
|
92
|
+
@first_non_book_index ||= @data[80, 4].unpack('N*')[0]
|
93
|
+
end
|
94
|
+
|
95
|
+
# Offset in record 0 (not from start of file) of the full name of the book.
|
96
|
+
#
|
97
|
+
# Returns an Integer.
|
98
|
+
def full_name_offset
|
99
|
+
@full_name_offset ||= @data[84, 4].unpack('N*')[0]
|
100
|
+
end
|
101
|
+
|
102
|
+
# Length in bytes of the full name of the book.
|
103
|
+
#
|
104
|
+
# Returns an Integer.
|
105
|
+
def full_name_length
|
106
|
+
@full_name_length ||= @data[88, 4].unpack('N*')[0]
|
107
|
+
end
|
108
|
+
|
109
|
+
# The raw book locale code. I believe this refers to a LCID code.
|
110
|
+
#
|
111
|
+
# The low byte is the main language: 09 = English.
|
112
|
+
# The next byte is dialect: 08 = British, 04 = US.
|
113
|
+
# Thus US English is 1033, UK English is 2057.
|
114
|
+
#
|
115
|
+
# Returns an Integer.
|
116
|
+
def raw_locale_code
|
117
|
+
@raw_locale_code ||= @data[92, 4].unpack('N*')[0]
|
118
|
+
end
|
119
|
+
|
120
|
+
# The minimum MOBIpocket version support needed to read this file.
|
121
|
+
#
|
122
|
+
# Returns an Integer.
|
123
|
+
def minimum_supported_mobipocket_version
|
124
|
+
@minimum_supported_mobipocket_version ||= @data[104, 4].unpack('N*')[0]
|
125
|
+
end
|
126
|
+
|
127
|
+
# The first record number (starting with 0) that contains an image. Image
|
128
|
+
# records should be sequential.
|
129
|
+
#
|
130
|
+
# Returns an Integer.
|
131
|
+
def first_image_index_record_number
|
132
|
+
@first_image_index_record_number ||= @data[108, 4].unpack('N*')[0]
|
133
|
+
end
|
134
|
+
|
135
|
+
# The EXTH flag.
|
136
|
+
#
|
137
|
+
# If bit 6 is set, then there is an EXTH record.
|
138
|
+
#
|
139
|
+
# Returns a Fixnum, 1 or 0.
|
140
|
+
def exth_flag
|
141
|
+
@exth_flag ||= @data[128, 4].unpack('@3B8').first[1].to_i
|
142
|
+
end
|
143
|
+
|
144
|
+
# Does the book have an EXTH header?
|
145
|
+
#
|
146
|
+
# Returns true if the book has an EXTH header.
|
147
|
+
def exth_header?
|
148
|
+
exth_flag == 1
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# Public:
|
2
|
+
module Mobi
|
3
|
+
module Header
|
4
|
+
class PalmDocHeader
|
5
|
+
|
6
|
+
# Initializes the PalmDOC header.
|
7
|
+
#
|
8
|
+
# data - A StreamSlicer which starts at the PalmDOC header.
|
9
|
+
#
|
10
|
+
# Returns self.
|
11
|
+
def initialize(data)
|
12
|
+
@data = data
|
13
|
+
end
|
14
|
+
|
15
|
+
# The compression type as returned from byte code.
|
16
|
+
#
|
17
|
+
# Returns a Fixnum.
|
18
|
+
def raw_compression_type
|
19
|
+
@compression_type ||= @data[0, 2].unpack('n*')[0]
|
20
|
+
end
|
21
|
+
|
22
|
+
# The compression type.
|
23
|
+
#
|
24
|
+
# Returns a Fixnum.
|
25
|
+
def compression_type
|
26
|
+
{ 1 => 'None',
|
27
|
+
2 => 'PalmDOC',
|
28
|
+
17480 => 'HUFF/CDIC'
|
29
|
+
}.fetch(raw_compression_type)
|
30
|
+
end
|
31
|
+
|
32
|
+
# The uncompressed length of the entire text of the book.
|
33
|
+
#
|
34
|
+
# Returns a Fixnum.
|
35
|
+
def text_length
|
36
|
+
@text_length ||= @data[4, 4].unpack('N*')[0]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Number of PDB records used for the text of the book.
|
40
|
+
#
|
41
|
+
# Returns a Fixnum.
|
42
|
+
def record_count
|
43
|
+
@record_count ||= @data[8, 2].unpack('n*')[0]
|
44
|
+
end
|
45
|
+
|
46
|
+
# Maximum size of each record containing text. Note that this always
|
47
|
+
# returns 4096.
|
48
|
+
#
|
49
|
+
# Returns a Fixnum.
|
50
|
+
def record_size
|
51
|
+
@record_size ||= @data[10, 2].unpack('n*')[0]
|
52
|
+
end
|
53
|
+
|
54
|
+
# The encryption type as returned from byte code.
|
55
|
+
#
|
56
|
+
# Returns a Fixnum
|
57
|
+
def raw_encryption_type
|
58
|
+
@encryption_type ||= @data[12, 2].unpack('n*')[0]
|
59
|
+
end
|
60
|
+
|
61
|
+
# The encryption type.
|
62
|
+
#
|
63
|
+
# Returns a String.
|
64
|
+
def encryption_type
|
65
|
+
{ 0 => 'None',
|
66
|
+
1 => 'Old MOBIpocket',
|
67
|
+
2 => 'MOBIpocket'
|
68
|
+
}.fetch(raw_encryption_type)
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|