zim-ruby 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'stringio'
3
+ require 'xz'
4
+
5
+ require 'zim/file'
6
+ require 'zim/structs'
7
+ require 'zim/cluster'
8
+ require 'zim/exceptions'
9
+ require 'zim/url'
10
+
11
+ # Zim module
12
+ #
13
+ # Example:
14
+ # zim = Zim::Zimfile.new('test.zim')
15
+ # p zim.urls.first.blob
16
+ module Zim
17
+ end
18
+
@@ -0,0 +1,61 @@
1
+ module Zim
2
+
3
+ # entry in the cluster list
4
+ class ClusterEntry
5
+
6
+ # compression of the cluster
7
+ attr_reader :compression
8
+
9
+ # read a cluster from the file
10
+ def initialize(f)
11
+ @compression = f.read_int8
12
+
13
+ @f = f
14
+ @pos = f.tell
15
+ @offsets = nil
16
+ end
17
+
18
+ # read the blob +blob+ from the cluster
19
+ def read_blob(blob)
20
+ case @compression
21
+ when 0, 1
22
+ read_blob_uncompressed(nil, blob)
23
+ when 4
24
+ read_blob_lzma(blob)
25
+ else
26
+ raise UnknownCompression
27
+ end
28
+ end
29
+
30
+ def read_blob_uncompressed(io, blob, seek = true) # :nodoc:
31
+ if @offsets.nil?
32
+ @offsets = Array.new
33
+
34
+ @f.seek(@pos, io) if seek
35
+
36
+ off = @f.read_int32(nil, io)
37
+ count = (off >> 2) - 1
38
+
39
+ @offsets << off
40
+ count.times do
41
+ @offsets << @f.read_int32(nil, io)
42
+ end
43
+ end
44
+
45
+ raise InvalidBlobNumber if (blob < 0) || (blob >= (@offsets.count - 1))
46
+
47
+ off = @offsets[blob]
48
+ next_off = @offsets[blob + 1]
49
+ len = next_off - off
50
+ off += @pos if seek
51
+
52
+ @f.read_str(len, off, io)
53
+ end
54
+
55
+ def read_blob_lzma(blob) # :nodoc:
56
+ @f.seek(@pos)
57
+ io = StringIO.new(XZ.decompress_stream(@f.file))
58
+ read_blob_uncompressed(io, blob, false)
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,15 @@
1
+ module Zim
2
+
3
+ # file contains an invalid magic
4
+ class InvalidZimMagic < Exception
5
+ end
6
+
7
+ # the cluster compression is not supported
8
+ class UnknownCompression < Exception
9
+ end
10
+
11
+ # the blob number is out of range
12
+ class InvalidBlobNumber < Exception
13
+ end
14
+ end
15
+
@@ -0,0 +1,122 @@
1
+ module Zim
2
+
3
+ # ZIM magic number
4
+ ZIM_MAGIC = 72173914
5
+
6
+ # Main class of the zim-ruby library
7
+ class ZimFile
8
+ # header informations (used internally)
9
+ attr_reader :header
10
+
11
+ # list of included mime types (used internally)
12
+ attr_reader :mime_types
13
+
14
+ # Directory of urls (used internally)
15
+ attr_reader :urls
16
+
17
+ # mapping of titles to urls (used internally)
18
+ attr_reader :titles
19
+
20
+ # cluster list (used internally)
21
+ attr_reader :clusters
22
+
23
+ # IO associated with this ZimFile
24
+ attr_reader :file
25
+
26
+ # load data from the given file
27
+ def initialize(filename)
28
+ @file = File.new(filename)
29
+
30
+ @header = FileHeader.new(self)
31
+ raise InvalidZimMagic if(@header.magic != ZIM_MAGIC)
32
+
33
+ seek(@header.mime_list_pos)
34
+ @mime_types = StringList.new(self)
35
+
36
+ seek(@header.url_pos)
37
+ @urls = Directory.new(self, @header.article_count, Url)
38
+
39
+ seek(@header.title_pos)
40
+ @titles = TitleList.new(self, @header.article_count)
41
+
42
+ seek(@header.cluster_pos)
43
+ @clusters = Directory.new(self, @header.cluster_count, ClusterEntry)
44
+ end
45
+
46
+ # access a url by full url (including namespace)
47
+ # e.g.: zim['/A/Table of Contents']
48
+ def [](idx)
49
+ @urls.detect { |x| x.to_s == idx }
50
+ end
51
+
52
+ def seek(pos, io = nil) # :nodoc:
53
+ io ||= @file
54
+ io.seek(pos)
55
+ end
56
+
57
+ def tell(io = nil) # :nodoc:
58
+ io ||= @file
59
+ io.tell
60
+ end
61
+
62
+ def read_int8(pos = nil, io = nil) # :nodoc:
63
+ io ||= @file
64
+ seek(pos, io) unless pos.nil?
65
+ io.read(1)[0]
66
+ end
67
+
68
+ def read_int16(pos = nil, io = nil) # :nodoc:
69
+ io ||= @file
70
+ seek(pos, io) unless pos.nil?
71
+ s = io.read(2)
72
+ (s[1] << 8) | s[0]
73
+ end
74
+
75
+ def read_int32(pos = nil, io = nil) # :nodoc:
76
+ io ||= @file
77
+ seek(pos, io) unless pos.nil?
78
+ s = io.read(4)
79
+ (s[3] << 24) |
80
+ (s[2] << 16) |
81
+ (s[1] << 8) |
82
+ s[0]
83
+ end
84
+
85
+ def read_int64(pos = nil, io = nil) # :nodoc:
86
+ io ||= @file
87
+ seek(pos, io) unless pos.nil?
88
+ s = io.read(8)
89
+ (s[7] << 56) |
90
+ (s[6] << 48) |
91
+ (s[5] << 40) |
92
+ (s[4] << 32) |
93
+ (s[3] << 24) |
94
+ (s[2] << 16) |
95
+ (s[1] << 8) |
96
+ s[0]
97
+ end
98
+
99
+ def read_str(size, pos = nil, io = nil) # :nodoc:
100
+ io ||= @file
101
+ seek(pos, io) unless pos.nil?
102
+
103
+ return '' if size == 0
104
+
105
+ io.read(size)
106
+ end
107
+
108
+ def read_cstr(pos = nil, io = nil) # :nodoc:
109
+ io ||= @file
110
+ seek(pos, io) unless pos.nil?
111
+
112
+ str = ''
113
+ begin
114
+ c = io.read(1)
115
+ str += c unless c[0] == 0
116
+ end while(c[0] != 0)
117
+
118
+ str
119
+ end
120
+ end
121
+ end
122
+
@@ -0,0 +1,84 @@
1
+ module Zim
2
+
3
+ # header data of a zim file
4
+ class FileHeader
5
+
6
+ # magic number
7
+ attr_reader :magic
8
+
9
+ # zim version
10
+ attr_reader :version
11
+
12
+ # uuid
13
+ attr_reader :uuid
14
+ attr_reader :article_count
15
+ attr_reader :cluster_count
16
+ attr_reader :url_pos # :nodoc:
17
+ attr_reader :title_pos # :nodoc:
18
+ attr_reader :cluster_pos # :nodoc:
19
+ attr_reader :mime_list_pos # :nodoc:
20
+
21
+ # main page index
22
+ attr_reader :main_page
23
+
24
+ # layout page index
25
+ attr_reader :layout_page
26
+ attr_reader :checksum_pos # :nodoc:
27
+
28
+ # read file header from a file
29
+ def initialize(f)
30
+ @magic = f.read_int32
31
+ @version = f.read_int32
32
+ @uuid = f.read_str(16)
33
+ @article_count = f.read_int32
34
+ @cluster_count = f.read_int32
35
+ @url_pos = f.read_int64
36
+ @title_pos = f.read_int64
37
+ @cluster_pos = f.read_int64
38
+ @mime_list_pos = f.read_int64
39
+ @main_page = f.read_int32
40
+ @layout_page = f.read_int32
41
+ @checksum_pos = f.read_int64
42
+ end
43
+ end
44
+
45
+ class StringList < Array
46
+ def initialize(f)
47
+ super()
48
+
49
+ str = nil
50
+ begin
51
+ str = f.read_cstr
52
+ self << str unless str.length == 0
53
+ end while str.length > 0
54
+ end
55
+ end
56
+
57
+ class Directory < Array
58
+ def initialize(f, count, clazz)
59
+ super()
60
+
61
+ poses = Array.new
62
+ count.times do
63
+ pos = f.read_int64
64
+ poses << pos
65
+ end
66
+
67
+ poses.each do |pos|
68
+ f.seek(pos)
69
+ self << clazz.new(f)
70
+ end
71
+ end
72
+ end
73
+
74
+ class TitleList < Array
75
+ def initialize(f, count)
76
+ super()
77
+
78
+ count.times do
79
+ pos = f.read_int32
80
+ self << pos
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,67 @@
1
+ module Zim
2
+
3
+ # url entry class
4
+ class Url
5
+
6
+ attr_reader :mime_type
7
+ attr_reader :namespace
8
+ attr_reader :revision
9
+ attr_reader :redirect_index
10
+ attr_reader :cluster_number
11
+ attr_reader :blob_number
12
+ attr_reader :url
13
+ attr_reader :title
14
+ attr_reader :params
15
+
16
+ # read url from file
17
+ def initialize(f)
18
+ @f = f
19
+ mime_type = f.read_int16
20
+
21
+ if mime_type == 0xffff
22
+ # redirect
23
+ @mime_type = nil
24
+ param_len = f.read_int8
25
+ @namespace = f.read_int8.chr
26
+ @revision = f.read_int32
27
+ @redirect_index = f.read_int32
28
+ @cluster_number = nil
29
+ @blob_number = nil
30
+ @url = f.read_cstr
31
+ @title = f.read_cstr
32
+ @params = f.read_str(param_len)
33
+ else
34
+ @mime_type = f.mime_types[mime_type]
35
+ param_len = f.read_int8
36
+ @namespace = f.read_int8.chr
37
+ @revision = f.read_int32
38
+ @redirect_index = nil
39
+ @cluster_number = f.read_int32
40
+ @blob_number = f.read_int32
41
+ @url = f.read_cstr
42
+ @title = f.read_cstr
43
+ @params = f.read_str(param_len)
44
+ end
45
+ end
46
+
47
+ # checks if this url is a redirect
48
+ def redirect?
49
+ @mime_type.nil?
50
+ end
51
+
52
+ # receive the blob of this url
53
+ def blob
54
+ url = self
55
+ if redirect?
56
+ url = @f.urls[@redirect_index]
57
+ end
58
+
59
+ @f.clusters[@cluster_number].read_blob(@blob_number)
60
+ end
61
+
62
+ def to_s
63
+ "/#{@namespace}/#{@url}"
64
+ end
65
+ end
66
+ end
67
+
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'zim-ruby'
5
+ s.version = '1.0.1'
6
+ s.platform = Gem::Platform::RUBY
7
+ s.authors = ['Christoph Plank']
8
+ s.email = ['chrisistuff@gmail.com']
9
+ s.homepage = 'http://rubygems.org/gems/zim-ruby'
10
+ s.summary = %q{Library to read openzim (wikipedia) files}
11
+ s.description = %q{This library can be used to read openzim files like the ones exported by wikipedia}
12
+ s.has_rdoc = false
13
+
14
+ #s.add_dependency('ruby-xz', '>= 0.0.2')
15
+ # added xz.rb directly to make it ruby 1.8 compatible
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.require_paths = ['lib']
19
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: zim-ruby
3
+ version: !ruby/object:Gem::Version
4
+ hash: 21
5
+ prerelease:
6
+ segments:
7
+ - 1
8
+ - 0
9
+ - 1
10
+ version: 1.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Christoph Plank
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-11-18 00:00:00 Z
19
+ dependencies: []
20
+
21
+ description: This library can be used to read openzim files like the ones exported by wikipedia
22
+ email:
23
+ - chrisistuff@gmail.com
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - .gitignore
32
+ - LICENSE
33
+ - README.markdown
34
+ - examples/unpack.rb
35
+ - lib/xz.rb
36
+ - lib/zim.rb
37
+ - lib/zim/cluster.rb
38
+ - lib/zim/exceptions.rb
39
+ - lib/zim/file.rb
40
+ - lib/zim/structs.rb
41
+ - lib/zim/url.rb
42
+ - zim-ruby.gemspec
43
+ homepage: http://rubygems.org/gems/zim-ruby
44
+ licenses: []
45
+
46
+ post_install_message:
47
+ rdoc_options: []
48
+
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ hash: 3
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ requirements: []
70
+
71
+ rubyforge_project:
72
+ rubygems_version: 1.8.11
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: Library to read openzim (wikipedia) files
76
+ test_files: []
77
+