georgi-git_store 0.1.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,417 @@
1
+ #
2
+ # converted from the gitrb project
3
+ #
4
+ # authors:
5
+ # Matthias Lederhofer <matled@gmx.net>
6
+ # Simon 'corecode' Schubert <corecode@fs.ei.tum.de>
7
+ # Scott Chacon <schacon@gmail.com>
8
+ #
9
+ # provides native ruby access to git objects and pack files
10
+ #
11
+
12
+ require 'zlib'
13
+
14
+ class GitStore
15
+ PACK_SIGNATURE = "PACK"
16
+ PACK_IDX_SIGNATURE = "\377tOc"
17
+
18
+ class Mmap
19
+ def initialize(file, version = 1)
20
+ @file = file
21
+ @offset = nil
22
+ if version == 2
23
+ @global_offset = 8
24
+ else
25
+ @global_offset = 0
26
+ end
27
+ end
28
+
29
+ def unmap
30
+ @file = nil
31
+ end
32
+
33
+ def [](*idx)
34
+ idx = idx[0] if idx.length == 1
35
+ case idx
36
+ when Range
37
+ offset = idx.first
38
+ len = idx.last - idx.first + idx.exclude_end? ? 0 : 1
39
+ when Fixnum
40
+ offset = idx
41
+ len = nil
42
+ when Array
43
+ offset, len = idx
44
+ else
45
+ raise RuntimeError, "invalid index param: #{idx.class}"
46
+ end
47
+ if @offset != offset
48
+ @file.seek(offset + @global_offset)
49
+ end
50
+ @offset = offset + len ? len : 1
51
+ if not len
52
+ @file.read(1)[0]
53
+ else
54
+ @file.read(len)
55
+ end
56
+ end
57
+ end
58
+
59
+ class PackFormatError < StandardError
60
+ end
61
+
62
+ class PackStorage
63
+ OBJ_OFS_DELTA = 6
64
+ OBJ_REF_DELTA = 7
65
+
66
+ FanOutCount = 256
67
+ SHA1Size = 20
68
+ IdxOffsetSize = 4
69
+ OffsetSize = 4
70
+ CrcSize = 4
71
+ OffsetStart = FanOutCount * IdxOffsetSize
72
+ SHA1Start = OffsetStart + OffsetSize
73
+ EntrySize = OffsetSize + SHA1Size
74
+ EntrySizeV2 = SHA1Size + CrcSize + OffsetSize
75
+
76
+ def initialize(file)
77
+ if file =~ /\.idx$/
78
+ file = file[0...-3] + 'pack'
79
+ end
80
+ @name = file
81
+ @cache = {}
82
+ init_pack
83
+ end
84
+
85
+ def with_idx(index_file = nil)
86
+ if !index_file
87
+ index_file = @name
88
+ idxfile = File.open(@name[0...-4]+'idx')
89
+ else
90
+ idxfile = File.open(index_file)
91
+ end
92
+
93
+ # read header
94
+ sig = idxfile.read(4)
95
+ ver = idxfile.read(4).unpack("N")[0]
96
+
97
+ if sig == PACK_IDX_SIGNATURE
98
+ if(ver != 2)
99
+ raise PackFormatError, "pack #@name has unknown pack file version #{ver}"
100
+ end
101
+ @version = 2
102
+ else
103
+ @version = 1
104
+ end
105
+
106
+ idx = Mmap.new(idxfile, @version)
107
+ yield idx
108
+ idx.unmap
109
+ idxfile.close
110
+ end
111
+
112
+ def with_packfile
113
+ packfile = File.open(@name)
114
+ yield packfile
115
+ packfile.close
116
+ end
117
+
118
+ def cache_objects
119
+ @cache = {}
120
+ with_packfile do |packfile|
121
+ each_entry do |sha, offset|
122
+ data, type = unpack_object(packfile, offset, {:caching => true})
123
+ if data
124
+ @cache[sha] = [type, data]
125
+ end
126
+ end
127
+ end
128
+ end
129
+
130
+ def name
131
+ @name
132
+ end
133
+
134
+ def close
135
+ # shouldnt be anything open now
136
+ end
137
+
138
+ # given an index file, list out the shas that it's packfile contains
139
+ def get_shas
140
+ shas = []
141
+ each_sha1 { |sha| shas << sha.unpack("H*")[0] }
142
+ shas
143
+ end
144
+
145
+ def [](sha1)
146
+ if obj = @cache[sha1]
147
+ return obj
148
+ end
149
+
150
+ offset = find_object(sha1)
151
+ return nil if !offset
152
+ @cache[sha1] = obj = parse_object(offset)
153
+ return obj
154
+ end
155
+
156
+ def init_pack
157
+ with_idx do |idx|
158
+ @offsets = [0]
159
+ FanOutCount.times do |i|
160
+ pos = idx[i * IdxOffsetSize,IdxOffsetSize].unpack('N')[0]
161
+ if pos < @offsets[i]
162
+ raise PackFormatError, "pack #@name has discontinuous index #{i}"
163
+ end
164
+ @offsets << pos
165
+ end
166
+ @size = @offsets[-1]
167
+ end
168
+ end
169
+
170
+ def each_entry
171
+ with_idx do |idx|
172
+ if @version == 2
173
+ data = read_data_v2(idx)
174
+ data.each do |sha1, crc, offset|
175
+ yield sha1, offset
176
+ end
177
+ else
178
+ pos = OffsetStart
179
+ @size.times do
180
+ offset = idx[pos,OffsetSize].unpack('N')[0]
181
+ sha1 = idx[pos+OffsetSize,SHA1Size]
182
+ pos += EntrySize
183
+ yield sha1, offset
184
+ end
185
+ end
186
+ end
187
+ end
188
+
189
+ def read_data_v2(idx)
190
+ data = []
191
+ pos = OffsetStart
192
+ @size.times do |i|
193
+ data[i] = [idx[pos,SHA1Size], 0, 0]
194
+ pos += SHA1Size
195
+ end
196
+ @size.times do |i|
197
+ crc = idx[pos,CrcSize]
198
+ data[i][1] = crc
199
+ pos += CrcSize
200
+ end
201
+ @size.times do |i|
202
+ offset = idx[pos,OffsetSize].unpack('N')[0]
203
+ data[i][2] = offset
204
+ pos += OffsetSize
205
+ end
206
+ data
207
+ end
208
+ private :read_data_v2
209
+
210
+ def each_sha1
211
+ with_idx do |idx|
212
+ if @version == 2
213
+ data = read_data_v2(idx)
214
+ data.each do |sha1, crc, offset|
215
+ yield sha1
216
+ end
217
+ else
218
+ pos = SHA1Start
219
+ @size.times do
220
+ sha1 = idx[pos,SHA1Size]
221
+ pos += EntrySize
222
+ yield sha1
223
+ end
224
+ end
225
+ end
226
+ end
227
+
228
+ def find_object_in_index(idx, sha1)
229
+ slot = sha1[0]
230
+ return nil if !slot
231
+ first, last = @offsets[slot,2]
232
+ while first < last
233
+ mid = (first + last) / 2
234
+ if @version == 2
235
+ midsha1 = idx[OffsetStart + (mid * SHA1Size), SHA1Size]
236
+ cmp = midsha1 <=> sha1
237
+
238
+ if cmp < 0
239
+ first = mid + 1
240
+ elsif cmp > 0
241
+ last = mid
242
+ else
243
+ pos = OffsetStart + (@size * (SHA1Size + CrcSize)) + (mid * OffsetSize)
244
+ offset = idx[pos, OffsetSize].unpack('N')[0]
245
+ return offset
246
+ end
247
+ else
248
+ midsha1 = idx[SHA1Start + mid * EntrySize,SHA1Size]
249
+ cmp = midsha1 <=> sha1
250
+
251
+ if cmp < 0
252
+ first = mid + 1
253
+ elsif cmp > 0
254
+ last = mid
255
+ else
256
+ pos = OffsetStart + mid * EntrySize
257
+ offset = idx[pos,OffsetSize].unpack('N')[0]
258
+ return offset
259
+ end
260
+ end
261
+ end
262
+ nil
263
+ end
264
+
265
+ def find_object(sha1)
266
+ obj = nil
267
+ with_idx do |idx|
268
+ obj = find_object_in_index(idx, sha1)
269
+ end
270
+ obj
271
+ end
272
+ private :find_object
273
+
274
+ def parse_object(offset)
275
+ obj = nil
276
+ with_packfile do |packfile|
277
+ data, type = unpack_object(packfile, offset)
278
+ obj = RawObject.new(OBJ_TYPES[type], data)
279
+ end
280
+ obj
281
+ end
282
+ protected :parse_object
283
+
284
+ def unpack_object(packfile, offset, options = {})
285
+ obj_offset = offset
286
+ packfile.seek(offset)
287
+
288
+ c = packfile.read(1)[0]
289
+ size = c & 0xf
290
+ type = (c >> 4) & 7
291
+ shift = 4
292
+ offset += 1
293
+ while c & 0x80 != 0
294
+ c = packfile.read(1)[0]
295
+ size |= ((c & 0x7f) << shift)
296
+ shift += 7
297
+ offset += 1
298
+ end
299
+
300
+ return [false, false] if !(type == OBJ_COMMIT || type == OBJ_TREE) && options[:caching]
301
+
302
+ case type
303
+ when OBJ_OFS_DELTA, OBJ_REF_DELTA
304
+ data, type = unpack_deltified(packfile, type, offset, obj_offset, size, options)
305
+ #puts type
306
+ when OBJ_COMMIT, OBJ_TREE, OBJ_BLOB, OBJ_TAG
307
+ data = unpack_compressed(offset, size)
308
+ else
309
+ raise PackFormatError, "invalid type #{type}"
310
+ end
311
+ [data, type]
312
+ end
313
+ private :unpack_object
314
+
315
+ def unpack_deltified(packfile, type, offset, obj_offset, size, options = {})
316
+ packfile.seek(offset)
317
+ data = packfile.read(SHA1Size)
318
+
319
+ if type == OBJ_OFS_DELTA
320
+ i = 0
321
+ c = data[i]
322
+ base_offset = c & 0x7f
323
+ while c & 0x80 != 0
324
+ c = data[i += 1]
325
+ base_offset += 1
326
+ base_offset <<= 7
327
+ base_offset |= c & 0x7f
328
+ end
329
+ base_offset = obj_offset - base_offset
330
+ offset += i + 1
331
+ else
332
+ base_offset = find_object(data)
333
+ offset += SHA1Size
334
+ end
335
+
336
+ base, type = unpack_object(packfile, base_offset)
337
+
338
+ return [false, false] if !(type == OBJ_COMMIT || type == OBJ_TREE) && options[:caching]
339
+
340
+ delta = unpack_compressed(offset, size)
341
+ [patch_delta(base, delta), type]
342
+ end
343
+ private :unpack_deltified
344
+
345
+ def unpack_compressed(offset, destsize)
346
+ outdata = ""
347
+ with_packfile do |packfile|
348
+ packfile.seek(offset)
349
+ zstr = Zlib::Inflate.new
350
+ while outdata.size < destsize
351
+ indata = packfile.read(4096)
352
+ if indata.size == 0
353
+ raise PackFormatError, 'error reading pack data'
354
+ end
355
+ outdata += zstr.inflate(indata)
356
+ end
357
+ if outdata.size > destsize
358
+ raise PackFormatError, 'error reading pack data'
359
+ end
360
+ zstr.close
361
+ end
362
+ outdata
363
+ end
364
+ private :unpack_compressed
365
+
366
+ def patch_delta(base, delta)
367
+ src_size, pos = patch_delta_header_size(delta, 0)
368
+ if src_size != base.size
369
+ raise PackFormatError, 'invalid delta data'
370
+ end
371
+
372
+ dest_size, pos = patch_delta_header_size(delta, pos)
373
+ dest = ""
374
+ while pos < delta.size
375
+ c = delta[pos]
376
+ pos += 1
377
+ if c & 0x80 != 0
378
+ pos -= 1
379
+ cp_off = cp_size = 0
380
+ cp_off = delta[pos += 1] if c & 0x01 != 0
381
+ cp_off |= delta[pos += 1] << 8 if c & 0x02 != 0
382
+ cp_off |= delta[pos += 1] << 16 if c & 0x04 != 0
383
+ cp_off |= delta[pos += 1] << 24 if c & 0x08 != 0
384
+ cp_size = delta[pos += 1] if c & 0x10 != 0
385
+ cp_size |= delta[pos += 1] << 8 if c & 0x20 != 0
386
+ cp_size |= delta[pos += 1] << 16 if c & 0x40 != 0
387
+ cp_size = 0x10000 if cp_size == 0
388
+ pos += 1
389
+ dest += base[cp_off,cp_size]
390
+ elsif c != 0
391
+ dest += delta[pos,c]
392
+ pos += c
393
+ else
394
+ raise PackFormatError, 'invalid delta data'
395
+ end
396
+ end
397
+ dest
398
+ end
399
+ private :patch_delta
400
+
401
+ def patch_delta_header_size(delta, pos)
402
+ size = 0
403
+ shift = 0
404
+ begin
405
+ c = delta[pos]
406
+ if c == nil
407
+ raise PackFormatError, 'invalid delta header'
408
+ end
409
+ pos += 1
410
+ size |= (c & 0x7f) << shift
411
+ shift += 7
412
+ end while c & 0x80 != 0
413
+ [size, pos]
414
+ end
415
+ private :patch_delta_header_size
416
+ end
417
+ end
@@ -0,0 +1,207 @@
1
+ require 'strscan'
2
+
3
+ class GitStore
4
+
5
+ class Tree
6
+ TYPE_CLASS = {
7
+ 'tree' => Tree,
8
+ 'blob' => Blob
9
+ }
10
+
11
+ include Enumerable
12
+
13
+ attr_reader :store
14
+ attr_accessor :id, :mode, :path, :data, :table
15
+
16
+ # Initialize a tree with default mode '040000'
17
+ def initialize(store)
18
+ @store = store
19
+ @mode ||= '040000'
20
+ @path = ''
21
+ @table = {}
22
+ end
23
+
24
+ # Set all attributes at once.
25
+ def set(id, mode = '040000', path = nil, data = nil)
26
+ @id, @mode, @path, @data = id, mode, path, data
27
+ end
28
+
29
+ # Does this tree exist in the repository?
30
+ def created?
31
+ not @id.nil?
32
+ end
33
+
34
+ # Has this tree been modified?
35
+ def modified?
36
+ @modified || (table && table.values.any? { |value| value.modified? })
37
+ end
38
+
39
+ # Path of a child element with specified name.
40
+ def child_path(name)
41
+ path.empty? ? name : "#{path}/#{name}"
42
+ end
43
+
44
+ # Find or create a subtree with specified name.
45
+ def tree(name)
46
+ get(name) or put(name, Tree.new(store))
47
+ end
48
+
49
+ # Load this tree from a real directory instead of a repository.
50
+ def load_from_disk
51
+ dir = File.join(store.path, self.path)
52
+ entries = Dir.entries(dir) - ['.', '..']
53
+ @table = entries.inject({}) do |hash, name|
54
+ if name[-1, 1] != '~' && name[0, 1] != '.'
55
+ path = "#{dir}/#{name}"
56
+ stat = File.stat(path)
57
+ mode = '%o' % stat.mode
58
+ klass = stat.directory? ? Tree : Blob
59
+
60
+ child = table[name] ||= klass.new(store)
61
+ child.set(nil, mode, child_path(name), data)
62
+ child.load_from_disk
63
+
64
+ hash[name] = child
65
+ end
66
+ hash
67
+ end
68
+ end
69
+
70
+ # Read the contents of a raw git object.
71
+ #
72
+ # Return an array of [mode, name, id] entries.
73
+ def read_contents(data)
74
+ scanner = StringScanner.new(data)
75
+ contents = []
76
+
77
+ while scanner.scan(/(.*?) (.*?)\0(.{20})/m)
78
+ contents << [scanner[1], scanner[2], scanner[3].unpack("H*").first]
79
+ end
80
+
81
+ contents
82
+ end
83
+
84
+ # Load this tree from a git repository.
85
+ def load_from_store
86
+ @table = read_contents(data).inject({}) do |hash, (mode, name, id)|
87
+ content, type = store.get_object(id)
88
+
89
+ child = table[name] || TYPE_CLASS[type].new(store)
90
+ child.set(id, mode, child_path(name), content)
91
+ child.load_from_store if Tree === child
92
+
93
+ hash[name] = child
94
+ hash
95
+ end
96
+ end
97
+
98
+ # Write this tree back to the git repository.
99
+ #
100
+ # Returns the object id of the tree.
101
+ def write_to_store
102
+ return id if not modified?
103
+
104
+ contents = table.map do |name, entry|
105
+ entry.write_to_store
106
+ "%s %s\0%s" % [entry.mode, name, [entry.id].pack("H*")]
107
+ end
108
+
109
+ @modified = false
110
+ @id = store.put_object(contents.join, 'tree')
111
+ end
112
+
113
+ # Read entry with specified name.
114
+ def get(name)
115
+ name = name.to_s
116
+ entry = table[name]
117
+
118
+ case entry
119
+ when Blob; entry.object
120
+ when Tree; entry
121
+ end
122
+ end
123
+
124
+ # Write entry with specified name.
125
+ def put(name, value)
126
+ @modified = true
127
+ name = name.to_s
128
+
129
+ if value.is_a?(Tree)
130
+ value.path = child_path(name)
131
+ table[name] = value
132
+ else
133
+ blob = table[name]
134
+ blob = Blob.new(store) if not blob.is_a?(Blob)
135
+ blob.path = child_path(name)
136
+ blob.object = value
137
+ table[name] = blob
138
+ end
139
+
140
+ value
141
+ end
142
+
143
+ # Remove entry with specified name.
144
+ def remove(name)
145
+ @modified = true
146
+ table.delete(name.to_s)
147
+ end
148
+
149
+ # Does this key exist in the table?
150
+ def has_key?(name)
151
+ table.has_key?(name)
152
+ end
153
+
154
+ # Read a value on specified path.
155
+ #
156
+ # Use an argument list or a string with slashes.
157
+ def [](*args)
158
+ args = args.first.to_s.split('/') if args.size == 1
159
+ args.inject(self) { |tree, key| tree.get(key) or return nil }
160
+ end
161
+
162
+ # Write a value on specified path.
163
+ #
164
+ # Use an argument list or a string with slashes.
165
+ def []=(*args)
166
+ value = args.pop
167
+ args = args.first.to_s.split('/') if args.size == 1
168
+ tree = args[0..-2].to_a.inject(self) { |tree, name| tree.tree(name) }
169
+ tree.put(args.last, value)
170
+ end
171
+
172
+ # Delete a value on specified path.
173
+ #
174
+ # Use an argument list or a string with slashes.
175
+ def delete(*args)
176
+ args = args.first.to_s.split('/') if args.size == 1
177
+ tree = args[0..-2].to_a.inject(self) do |tree, key|
178
+ tree.get(key) or return
179
+ end
180
+ tree.remove(args.last)
181
+ end
182
+
183
+ # Iterate over all objects found in this subtree.
184
+ def each(&block)
185
+ table.sort.each do |name, entry|
186
+ case entry
187
+ when Blob; yield entry.object
188
+ when Tree; entry.each(&block)
189
+ end
190
+ end
191
+ end
192
+
193
+ # Convert this tree into a hash object.
194
+ def to_hash
195
+ table.inject({}) do |hash, (name, entry)|
196
+ hash[name] = entry.is_a?(Tree) ? entry.to_hash : entry.object
197
+ hash
198
+ end
199
+ end
200
+
201
+ def inspect
202
+ "#<GitStore::Tree #{id} #{mode} #{to_hash.inspect}>"
203
+ end
204
+
205
+ end
206
+
207
+ end