mobi 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rspec +2 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +4 -4
- data/Gemfile.lock +21 -9
- data/README.rdoc +41 -0
- data/Rakefile +3 -17
- data/VERSION +1 -1
- data/lib/mobi.rb +7 -3
- data/lib/mobi/header/exth_header.rb +46 -0
- data/lib/mobi/header/mobi_header.rb +153 -0
- data/lib/mobi/header/palm_doc_header.rb +73 -0
- data/lib/mobi/metadata.rb +45 -68
- data/lib/mobi/metadata_streams.rb +46 -0
- data/lib/mobi/stream_slicer.rb +14 -37
- data/mobi.gemspec +34 -26
- data/spec/fixtures/sherlock.mobi +0 -0
- data/spec/lib/mobi/header/exth_header_spec.rb +36 -0
- data/spec/lib/mobi/header/mobi_header_spec.rb +79 -0
- data/spec/lib/mobi/header/palm_doc_header_spec.rb +45 -0
- data/spec/lib/mobi/metadata_spec.rb +62 -0
- data/spec/lib/mobi/stream_slicer_spec.rb +64 -0
- data/spec/lib/mobi_spec.rb +10 -0
- data/spec/spec_helper.rb +4 -0
- metadata +90 -67
- data/test/helper.rb +0 -18
- data/test/test_mobi.rb +0 -7
data/.rspec
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
CHANGED
@@ -6,8 +6,8 @@ source "http://rubygems.org"
|
|
6
6
|
# Add dependencies to develop your gem here.
|
7
7
|
# Include everything needed to run rake, tests, features, etc.
|
8
8
|
group :development do
|
9
|
-
gem
|
10
|
-
gem
|
11
|
-
gem
|
12
|
-
gem
|
9
|
+
gem 'rspec', '~> 2.11.0'
|
10
|
+
gem 'rr', '~> 1.0.4'
|
11
|
+
gem 'bundler', '~> 1.1.0'
|
12
|
+
gem 'jeweler', '~> 1.8.0'
|
13
13
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,20 +1,32 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
+
diff-lcs (1.1.3)
|
4
5
|
git (1.2.5)
|
5
|
-
jeweler (1.
|
6
|
-
bundler (~> 1.0
|
6
|
+
jeweler (1.8.3)
|
7
|
+
bundler (~> 1.0)
|
7
8
|
git (>= 1.2.5)
|
8
9
|
rake
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
rdoc
|
11
|
+
json (1.6.6)
|
12
|
+
rake (0.9.2.2)
|
13
|
+
rdoc (3.12)
|
14
|
+
json (~> 1.4)
|
15
|
+
rr (1.0.4)
|
16
|
+
rspec (2.11.0)
|
17
|
+
rspec-core (~> 2.11.0)
|
18
|
+
rspec-expectations (~> 2.11.0)
|
19
|
+
rspec-mocks (~> 2.11.0)
|
20
|
+
rspec-core (2.11.1)
|
21
|
+
rspec-expectations (2.11.1)
|
22
|
+
diff-lcs (~> 1.1.3)
|
23
|
+
rspec-mocks (2.11.1)
|
12
24
|
|
13
25
|
PLATFORMS
|
14
26
|
ruby
|
15
27
|
|
16
28
|
DEPENDENCIES
|
17
|
-
bundler (~> 1.
|
18
|
-
jeweler (~> 1.
|
19
|
-
|
20
|
-
|
29
|
+
bundler (~> 1.1.0)
|
30
|
+
jeweler (~> 1.8.0)
|
31
|
+
rr (~> 1.0.4)
|
32
|
+
rspec (~> 2.11.0)
|
data/README.rdoc
CHANGED
@@ -10,6 +10,47 @@ The gem does NOT handle any errors in a mobi document, or some edge cases.
|
|
10
10
|
|
11
11
|
Tests will be added soon.
|
12
12
|
|
13
|
+
== Installation
|
14
|
+
|
15
|
+
gem install mobi
|
16
|
+
|
17
|
+
== Usage
|
18
|
+
|
19
|
+
Creating a Mobi::Metadata object
|
20
|
+
|
21
|
+
Mobi::Metadata.new(File.open('/path/to/file.mobi'))
|
22
|
+
|
23
|
+
A handy convenience method to do the exact same thing
|
24
|
+
|
25
|
+
Mobi.metadata File.open('/path/to/file.mobi')
|
26
|
+
|
27
|
+
Getting metadata information is as simple as:
|
28
|
+
|
29
|
+
metadata = Mobi.metadata File.open('/path/to/file.mobi')
|
30
|
+
author = metadata.author
|
31
|
+
|
32
|
+
Supported metadata options are:
|
33
|
+
|
34
|
+
* author
|
35
|
+
* publisher
|
36
|
+
* imprint
|
37
|
+
* description
|
38
|
+
* isbn
|
39
|
+
* subject
|
40
|
+
* published_at
|
41
|
+
* review
|
42
|
+
* contributor
|
43
|
+
* rights
|
44
|
+
* subject_code
|
45
|
+
* type
|
46
|
+
* source
|
47
|
+
* asin
|
48
|
+
* version
|
49
|
+
|
50
|
+
== Thanks
|
51
|
+
|
52
|
+
* Calibre open source project. I ripped off the idea of a Stream Slicer and got a better understanding of mobi files from their code base. Check them out at http://calibre-ebook.com
|
53
|
+
|
13
54
|
== Contributing to mobi
|
14
55
|
|
15
56
|
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
data/Rakefile
CHANGED
@@ -15,8 +15,8 @@ Jeweler::Tasks.new do |gem|
|
|
15
15
|
gem.name = "mobi"
|
16
16
|
gem.homepage = "http://github.com/jkongie/mobi"
|
17
17
|
gem.license = "MIT"
|
18
|
-
gem.summary = %Q{A Rubygem that inspects MOBI metadata}
|
19
|
-
gem.description = %Q{Mobi is a Rubygem that allows you to easily read MOBI e-book format metadata
|
18
|
+
gem.summary = %Q{A Rubygem that inspects MOBI metadata.}
|
19
|
+
gem.description = %Q{Mobi is a Rubygem that allows you to easily read MOBI e-book format metadata.}
|
20
20
|
gem.email = "jkongie@gmail.com"
|
21
21
|
gem.authors = ["jkongie"]
|
22
22
|
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
@@ -26,23 +26,9 @@ Jeweler::Tasks.new do |gem|
|
|
26
26
|
end
|
27
27
|
Jeweler::RubygemsDotOrgTasks.new
|
28
28
|
|
29
|
-
require 'rake/testtask'
|
30
|
-
Rake::TestTask.new(:test) do |test|
|
31
|
-
test.libs << 'lib' << 'test'
|
32
|
-
test.pattern = 'test/**/test_*.rb'
|
33
|
-
test.verbose = true
|
34
|
-
end
|
35
|
-
|
36
|
-
require 'rcov/rcovtask'
|
37
|
-
Rcov::RcovTask.new do |test|
|
38
|
-
test.libs << 'test'
|
39
|
-
test.pattern = 'test/**/test_*.rb'
|
40
|
-
test.verbose = true
|
41
|
-
end
|
42
|
-
|
43
29
|
task :default => :test
|
44
30
|
|
45
|
-
require '
|
31
|
+
require 'rdoc/task'
|
46
32
|
Rake::RDocTask.new do |rdoc|
|
47
33
|
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
34
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/mobi.rb
CHANGED
@@ -1,10 +1,14 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'mobi/stream_slicer'
|
2
|
+
require 'mobi/metadata'
|
3
|
+
require 'mobi/metadata_streams'
|
4
|
+
require 'mobi/header/palm_doc_header'
|
5
|
+
require 'mobi/header/mobi_header'
|
6
|
+
require 'mobi/header/exth_header'
|
3
7
|
|
4
8
|
module Mobi
|
5
9
|
|
6
10
|
def self.metadata(file)
|
7
11
|
Mobi::Metadata.new(file)
|
8
12
|
end
|
9
|
-
|
13
|
+
|
10
14
|
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Mobi
|
2
|
+
module Header
|
3
|
+
class ExthHeader
|
4
|
+
RECORD_TYPES = { 100 => :author, 101 => :publisher, 102 => :imprint, 103 => :description, 104 => :isbn, 105 => :subject,
|
5
|
+
106 => :published_at, 107 => :review, 108 => :contributor, 109 => :rights, 110 => :subject_code,
|
6
|
+
111 => :type, 112 => :source, 113 => :asin, 114 => :version }
|
7
|
+
|
8
|
+
attr_reader *RECORD_TYPES.values
|
9
|
+
|
10
|
+
def initialize(data)
|
11
|
+
@data = data
|
12
|
+
@record_count, = @data[8, 4].unpack('N*')
|
13
|
+
|
14
|
+
define_data_methods
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def define_data_methods
|
20
|
+
start = 12
|
21
|
+
@record_count.times do
|
22
|
+
record = ExthRecord.new(@data, start)
|
23
|
+
|
24
|
+
if RECORD_TYPES.key?(record.code)
|
25
|
+
instance_variable_set "@#{record.name}", record.value
|
26
|
+
end
|
27
|
+
|
28
|
+
start += record.length
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class ExthRecord
|
33
|
+
|
34
|
+
attr_reader :code, :name, :length, :value
|
35
|
+
|
36
|
+
def initialize(data, start)
|
37
|
+
@code, = data[start, 4].unpack('N*')[0].to_i
|
38
|
+
@name = ExthHeader::RECORD_TYPES[@code]
|
39
|
+
@length, = data[start + 4, 4].unpack('N*')
|
40
|
+
@value = data[start + 8, length - 8]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
# Public: Parses the Mobi Header which follows the 16 bytes of the PalmDOC
|
2
|
+
# header.
|
3
|
+
module Mobi
|
4
|
+
module Header
|
5
|
+
class MobiHeader
|
6
|
+
|
7
|
+
# Initialize the MobiHeader.
|
8
|
+
#
|
9
|
+
# data - A StreamSlicer which starts at record 0 of the PalmDOC.
|
10
|
+
#
|
11
|
+
# Returns self.
|
12
|
+
def initialize(data)
|
13
|
+
@data = data
|
14
|
+
end
|
15
|
+
|
16
|
+
# A MOBI identifier.
|
17
|
+
#
|
18
|
+
# Returns a String.
|
19
|
+
def identifier
|
20
|
+
@identifier ||= @data[16, 4]
|
21
|
+
end
|
22
|
+
|
23
|
+
# The length of the MOBI header.
|
24
|
+
#
|
25
|
+
# Returns a Fixnum.
|
26
|
+
def header_length
|
27
|
+
@header_length ||= @data[20, 4].unpack('N*')[0]
|
28
|
+
end
|
29
|
+
|
30
|
+
# The kind of Mobipocket file as returned from byte code.
|
31
|
+
#
|
32
|
+
# Returns a Fixnum.
|
33
|
+
def raw_mobi_type
|
34
|
+
@raw_mobi_type ||= @data[24, 4].unpack('N*')[0]
|
35
|
+
end
|
36
|
+
|
37
|
+
# The kind of Mobipocket file.
|
38
|
+
#
|
39
|
+
# Returns a String.
|
40
|
+
def mobi_type
|
41
|
+
{ 2 => 'MOBIpocket Book',
|
42
|
+
3 => 'PalmDoc Book',
|
43
|
+
4 => 'Audio',
|
44
|
+
232 => 'MOBIpocket',
|
45
|
+
248 => 'KF8',
|
46
|
+
257 => 'News',
|
47
|
+
258 => 'News Feed',
|
48
|
+
259 => 'News_Magazine',
|
49
|
+
513 => 'PICS',
|
50
|
+
514 => 'WORD',
|
51
|
+
515 => 'XLS',
|
52
|
+
516 => 'PPT',
|
53
|
+
517 => 'TEXT',
|
54
|
+
518 => 'HTML'
|
55
|
+
}.fetch(raw_mobi_type)
|
56
|
+
end
|
57
|
+
|
58
|
+
# The text encoding as return from byte code.
|
59
|
+
#
|
60
|
+
# Returns a Fixnum.
|
61
|
+
def raw_text_encoding
|
62
|
+
@text_encoding ||= @data[28, 4].unpack('N*')[0]
|
63
|
+
end
|
64
|
+
|
65
|
+
# The text encoding.
|
66
|
+
#
|
67
|
+
# Returns a String.
|
68
|
+
def text_encoding
|
69
|
+
{ 1252 => 'CP1252 (WinLatin1)',
|
70
|
+
65001 => 'UTF-8'
|
71
|
+
}.fetch(raw_text_encoding)
|
72
|
+
end
|
73
|
+
|
74
|
+
# The unique ID.
|
75
|
+
#
|
76
|
+
# Returns an Integer.
|
77
|
+
def unique_id
|
78
|
+
@unique_id ||= @data[32, 4].unpack('N*')[0]
|
79
|
+
end
|
80
|
+
|
81
|
+
# The version of the MOBIpocket format used in this file.
|
82
|
+
#
|
83
|
+
# Returns a String
|
84
|
+
def file_version
|
85
|
+
@file_version ||= @data[36, 4].unpack('N*')[0]
|
86
|
+
end
|
87
|
+
|
88
|
+
# The first record number (starting with 0) that is not the book's text.
|
89
|
+
#
|
90
|
+
# Returns an Integer.
|
91
|
+
def first_non_book_index
|
92
|
+
@first_non_book_index ||= @data[80, 4].unpack('N*')[0]
|
93
|
+
end
|
94
|
+
|
95
|
+
# Offset in record 0 (not from start of file) of the full name of the book.
|
96
|
+
#
|
97
|
+
# Returns an Integer.
|
98
|
+
def full_name_offset
|
99
|
+
@full_name_offset ||= @data[84, 4].unpack('N*')[0]
|
100
|
+
end
|
101
|
+
|
102
|
+
# Length in bytes of the full name of the book.
|
103
|
+
#
|
104
|
+
# Returns an Integer.
|
105
|
+
def full_name_length
|
106
|
+
@full_name_length ||= @data[88, 4].unpack('N*')[0]
|
107
|
+
end
|
108
|
+
|
109
|
+
# The raw book locale code. I believe this refers to a LCID code.
|
110
|
+
#
|
111
|
+
# The low byte is the main language: 09 = English.
|
112
|
+
# The next byte is dialect: 08 = British, 04 = US.
|
113
|
+
# Thus US English is 1033, UK English is 2057.
|
114
|
+
#
|
115
|
+
# Returns an Integer.
|
116
|
+
def raw_locale_code
|
117
|
+
@raw_locale_code ||= @data[92, 4].unpack('N*')[0]
|
118
|
+
end
|
119
|
+
|
120
|
+
# The minimum MOBIpocket version support needed to read this file.
|
121
|
+
#
|
122
|
+
# Returns an Integer.
|
123
|
+
def minimum_supported_mobipocket_version
|
124
|
+
@minimum_supported_mobipocket_version ||= @data[104, 4].unpack('N*')[0]
|
125
|
+
end
|
126
|
+
|
127
|
+
# The first record number (starting with 0) that contains an image. Image
|
128
|
+
# records should be sequential.
|
129
|
+
#
|
130
|
+
# Returns an Integer.
|
131
|
+
def first_image_index_record_number
|
132
|
+
@first_image_index_record_number ||= @data[108, 4].unpack('N*')[0]
|
133
|
+
end
|
134
|
+
|
135
|
+
# The EXTH flag.
|
136
|
+
#
|
137
|
+
# If bit 6 is set, then there is an EXTH record.
|
138
|
+
#
|
139
|
+
# Returns a Fixnum, 1 or 0.
|
140
|
+
def exth_flag
|
141
|
+
@exth_flag ||= @data[128, 4].unpack('@3B8').first[1].to_i
|
142
|
+
end
|
143
|
+
|
144
|
+
# Does the book have an EXTH header?
|
145
|
+
#
|
146
|
+
# Returns true if the book has an EXTH header.
|
147
|
+
def exth_header?
|
148
|
+
exth_flag == 1
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# Public:
|
2
|
+
module Mobi
|
3
|
+
module Header
|
4
|
+
class PalmDocHeader
|
5
|
+
|
6
|
+
# Initializes the PalmDOC header.
|
7
|
+
#
|
8
|
+
# data - A StreamSlicer which starts at the PalmDOC header.
|
9
|
+
#
|
10
|
+
# Returns self.
|
11
|
+
def initialize(data)
|
12
|
+
@data = data
|
13
|
+
end
|
14
|
+
|
15
|
+
# The compression type as returned from byte code.
|
16
|
+
#
|
17
|
+
# Returns a Fixnum.
|
18
|
+
def raw_compression_type
|
19
|
+
@compression_type ||= @data[0, 2].unpack('n*')[0]
|
20
|
+
end
|
21
|
+
|
22
|
+
# The compression type.
|
23
|
+
#
|
24
|
+
# Returns a Fixnum.
|
25
|
+
def compression_type
|
26
|
+
{ 1 => 'None',
|
27
|
+
2 => 'PalmDOC',
|
28
|
+
17480 => 'HUFF/CDIC'
|
29
|
+
}.fetch(raw_compression_type)
|
30
|
+
end
|
31
|
+
|
32
|
+
# The uncompressed length of the entire text of the book.
|
33
|
+
#
|
34
|
+
# Returns a Fixnum.
|
35
|
+
def text_length
|
36
|
+
@text_length ||= @data[4, 4].unpack('N*')[0]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Number of PDB records used for the text of the book.
|
40
|
+
#
|
41
|
+
# Returns a Fixnum.
|
42
|
+
def record_count
|
43
|
+
@record_count ||= @data[8, 2].unpack('n*')[0]
|
44
|
+
end
|
45
|
+
|
46
|
+
# Maximum size of each record containing text. Note that this always
|
47
|
+
# returns 4096.
|
48
|
+
#
|
49
|
+
# Returns a Fixnum.
|
50
|
+
def record_size
|
51
|
+
@record_size ||= @data[10, 2].unpack('n*')[0]
|
52
|
+
end
|
53
|
+
|
54
|
+
# The encryption type as returned from byte code.
|
55
|
+
#
|
56
|
+
# Returns a Fixnum
|
57
|
+
def raw_encryption_type
|
58
|
+
@encryption_type ||= @data[12, 2].unpack('n*')[0]
|
59
|
+
end
|
60
|
+
|
61
|
+
# The encryption type.
|
62
|
+
#
|
63
|
+
# Returns a String.
|
64
|
+
def encryption_type
|
65
|
+
{ 0 => 'None',
|
66
|
+
1 => 'Old MOBIpocket',
|
67
|
+
2 => 'MOBIpocket'
|
68
|
+
}.fetch(raw_encryption_type)
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|