git_store 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/git_store.rb ADDED
@@ -0,0 +1,392 @@
1
+ require 'rubygems'
2
+ require 'zlib'
3
+ require 'digest/sha1'
4
+ require 'yaml'
5
+ require 'fileutils'
6
+
7
+ require 'git_store/blob'
8
+ require 'git_store/diff'
9
+ require 'git_store/tree'
10
+ require 'git_store/tag'
11
+ require 'git_store/user'
12
+ require 'git_store/pack'
13
+ require 'git_store/commit'
14
+ require 'git_store/handlers'
15
+
16
+ # GitStore implements a versioned data store based on the revision
17
+ # management system git. You can store object hierarchies as nested
18
+ # hashes, which will be mapped on the directory structure of a git
19
+ # repository.
20
+ #
21
+ # GitStore supports transactions, so that updates to the store either
22
+ # fail or succeed completely.
23
+ #
24
+ # GitStore manages concurrent access by a file locking scheme. So only
25
+ # one process can start a transaction at one time. This is implemented
26
+ # by locking the `refs/head/<branch>.lock` file, which is also respected
27
+ # by the git binary.
28
+ #
29
+ # A regular commit should be atomic by the nature of git, as the only
30
+ # critical part is writing the 40 bytes SHA1 hash of the commit object
31
+ # to the file `refs/head/<branch>`, which is done atomically by the
32
+ # operating system.
33
+ #
34
+ # So reading a repository should be always consistent in a git
35
+ # repository. The head of a branch points to a commit object, which in
36
+ # turn points to a tree object, which itself is a snapshot of the
37
+ # GitStore at commit time. All involved objects are keyed by their
38
+ # SHA1 value, so there is no chance for another process to write to
39
+ # the same files.
40
+ #
41
+ class GitStore
42
+ include Enumerable
43
+
44
+ TYPE_CLASS = {
45
+ 'tree' => Tree,
46
+ 'blob' => Blob,
47
+ 'commit' => Commit,
48
+ 'tag' => Tag
49
+ }
50
+
51
+ CLASS_TYPE = {
52
+ Tree => 'tree',
53
+ Blob => 'blob',
54
+ Commit => 'commit',
55
+ Tag => 'tag'
56
+ }
57
+
58
+ attr_reader :path, :index, :root, :branch, :lock_file, :head, :packs, :handler, :bare, :objects
59
+
60
+ # Initialize a store.
61
+ def initialize(path, branch = 'master', bare = false)
62
+ if bare && !File.exists?("#{path}") or
63
+ !bare && !File.exists?("#{path}/.git")
64
+ raise ArgumentError, "first argument must be a valid Git repository: `#{path}'"
65
+ end
66
+
67
+ @bare = bare
68
+ @path = path.chomp('/')
69
+ @branch = branch
70
+ @root = Tree.new(self)
71
+ @packs = {}
72
+ @objects = {}
73
+
74
+ @handler = {
75
+ 'yml' => YAMLHandler.new
76
+ }
77
+
78
+ @handler.default = DefaultHandler.new
79
+
80
+ load_packs("#{git_path}/objects/pack")
81
+
82
+ load
83
+ end
84
+
85
+ # Returns the path to the current head file.
86
+ def head_path
87
+ "#{git_path}/refs/heads/#{branch}"
88
+ end
89
+
90
+ # Returns the path to the object file for given id.
91
+ def object_path(id)
92
+ "#{git_path}/objects/#{ id[0...2] }/#{ id[2..39] }"
93
+ end
94
+
95
+ # Returns the path to the git data directory.
96
+ def git_path
97
+ if bare
98
+ "#{path}"
99
+ else
100
+ "#{path}/.git"
101
+ end
102
+ end
103
+
104
+ # Read the id of the head commit.
105
+ #
106
+ # Returns the object id of the last commit.
107
+ def read_head_id
108
+ File.read(head_path).strip if File.exists?(head_path)
109
+ end
110
+
111
+ # Return a handler for a given path.
112
+ def handler_for(path)
113
+ handler[ path.split('.').last ]
114
+ end
115
+
116
+ # Read an object for the specified path.
117
+ def [](path)
118
+ root[path]
119
+ end
120
+
121
+ # Write an object to the specified path.
122
+ def []=(path, data)
123
+ root[path] = data
124
+ end
125
+
126
+ # Iterate over all key-values pairs found in this store.
127
+ def each(&block)
128
+ root.each(&block)
129
+ end
130
+
131
+ # Returns all paths found in this store.
132
+ def paths
133
+ root.paths
134
+ end
135
+
136
+ # Returns all values found in this store.
137
+ def values
138
+ root.values
139
+ end
140
+
141
+ # Remove given path from store.
142
+ def delete(path)
143
+ root.delete(path)
144
+ end
145
+
146
+ # Find or create a tree object with given path.
147
+ def tree(path)
148
+ root.tree(path)
149
+ end
150
+
151
+ # Returns the store as a hash tree.
152
+ def to_hash
153
+ root.to_hash
154
+ end
155
+
156
+ # Inspect the store.
157
+ def inspect
158
+ "#<GitStore #{path} #{branch}>"
159
+ end
160
+
161
+ # Has our store been changed on disk?
162
+ def changed?
163
+ head.nil? or head.id != read_head_id
164
+ end
165
+
166
+ # Load the current head version from repository.
167
+ def load(from_disk = false)
168
+ if id = read_head_id
169
+ @head = get(id)
170
+ @root = @head.tree
171
+ end
172
+
173
+ load_from_disk if from_disk
174
+ end
175
+
176
+ def load_from_disk
177
+ root.each_blob do |path, blob|
178
+ file = "#{self.path}/#{path}"
179
+ if File.file?(file)
180
+ blob.data = File.read(file)
181
+ end
182
+ end
183
+ end
184
+
185
+ # Reload the store, if it has been changed on disk.
186
+ def refresh!
187
+ load if changed?
188
+ end
189
+
190
+ # Is there any transaction going on?
191
+ def in_transaction?
192
+ Thread.current['git_store_lock']
193
+ end
194
+
195
+ # All changes made inside a transaction are atomic. If some
196
+ # exception occurs the transaction will be rolled back.
197
+ #
198
+ # Example:
199
+ # store.transaction { store['a'] = 'b' }
200
+ #
201
+ def transaction(message = "")
202
+ start_transaction
203
+ result = yield
204
+ commit message
205
+
206
+ result
207
+ rescue
208
+ rollback
209
+ raise
210
+ ensure
211
+ finish_transaction
212
+ end
213
+
214
+ # Start a transaction.
215
+ #
216
+ # Tries to get lock on lock file, reload the this store if
217
+ # has changed in the repository.
218
+ def start_transaction
219
+ file = open("#{head_path}.lock", "w")
220
+ file.flock(File::LOCK_EX)
221
+
222
+ Thread.current['git_store_lock'] = file
223
+
224
+ load if changed?
225
+ end
226
+
227
+ # Restore the state of the store.
228
+ #
229
+ # Any changes made to the store are discarded.
230
+ def rollback
231
+ objects.clear
232
+ load
233
+ finish_transaction
234
+ end
235
+
236
+ # Finish the transaction.
237
+ #
238
+ # Release the lock file.
239
+ def finish_transaction
240
+ Thread.current['git_store_lock'].close rescue nil
241
+ Thread.current['git_store_lock'] = nil
242
+
243
+ File.unlink("#{head_path}.lock") rescue nil
244
+ end
245
+
246
+ # Write a commit object to disk and set the head of the current branch.
247
+ #
248
+ # Returns the commit object
249
+ def commit(message = '', author = User.from_config, committer = author)
250
+ root.write
251
+
252
+ commit = Commit.new(self)
253
+ commit.tree = root
254
+ commit.parent << head.id if head
255
+ commit.author = author
256
+ commit.committer = committer
257
+ commit.message = message
258
+ commit.write
259
+
260
+ open(head_path, "wb") do |file|
261
+ file.write(commit.id)
262
+ end
263
+
264
+ @head = commit
265
+ end
266
+
267
+ # Returns a list of commits starting from head commit.
268
+ def commits(limit = 10, start = head)
269
+ entries = []
270
+ current = start
271
+
272
+ while current and entries.size < limit
273
+ entries << current
274
+ current = get(current.parent.first)
275
+ end
276
+
277
+ entries
278
+ end
279
+
280
+ # Get an object by its id.
281
+ #
282
+ # Returns a tree, blob, commit or tag object.
283
+ def get(id)
284
+ return nil if id.nil?
285
+
286
+ return objects[id] if objects.has_key?(id)
287
+
288
+ type, content = get_object(id)
289
+
290
+ klass = TYPE_CLASS[type] or raise NotImplementedError, "type not supported: #{type}"
291
+
292
+ objects[id] = klass.new(self, id, content)
293
+ end
294
+
295
+ # Save a git object to the store.
296
+ #
297
+ # Returns the object id.
298
+ def put(object)
299
+ type = CLASS_TYPE[object.class] or raise NotImplementedError, "class not supported: #{object.class}"
300
+
301
+ id = put_object(type, object.dump)
302
+
303
+ objects[id] = object
304
+
305
+ id
306
+ end
307
+
308
+ # Returns the hash value of an object string.
309
+ def sha(str)
310
+ Digest::SHA1.hexdigest(str)[0, 40]
311
+ end
312
+
313
+ # Calculate the id for a given type and raw data string.
314
+ def id_for(type, content)
315
+ sha "#{type} #{content.length}\0#{content}"
316
+ end
317
+
318
+ # Read the raw object with the given id from the repository.
319
+ #
320
+ # Returns a pair of content and type of the object
321
+ def get_object(id)
322
+ path = object_path(id)
323
+
324
+ if File.exists?(path)
325
+ buf = open(path, "rb") { |f| f.read }
326
+
327
+ raise "not a loose object: #{id}" if not legacy_loose_object?(buf)
328
+
329
+ header, content = Zlib::Inflate.inflate(buf).split(/\0/, 2)
330
+ type, size = header.split(/ /, 2)
331
+
332
+ raise "bad object: #{id}" if content.length != size.to_i
333
+ else
334
+ content, type = get_object_from_pack(id)
335
+ end
336
+
337
+ return type, content
338
+ end
339
+
340
+ # Write a raw object to the repository.
341
+ #
342
+ # Returns the object id.
343
+ def put_object(type, content)
344
+ data = "#{type} #{content.length}\0#{content}"
345
+ id = sha(data)
346
+ path = object_path(id)
347
+
348
+ unless File.exists?(path)
349
+ FileUtils.mkpath(File.dirname(path))
350
+ open(path, 'wb') do |f|
351
+ f.write Zlib::Deflate.deflate(data)
352
+ end
353
+ end
354
+
355
+ id
356
+ end
357
+
358
+ protected
359
+
360
+ if 'String'[0].respond_to?(:ord)
361
+ def legacy_loose_object?(buf)
362
+ buf[0] == ?x && (((buf[0].ord << 8) + buf[1].ord) % 31 == 0)
363
+ end
364
+ else
365
+ def legacy_loose_object?(buf)
366
+ word = (buf[0] << 8) + buf[1]
367
+ buf[0] == 0x78 && word % 31 == 0
368
+ end
369
+ end
370
+
371
+ def get_object_from_pack(id)
372
+ pack, offset = @packs[id]
373
+
374
+ pack.parse_object(offset) if pack
375
+ end
376
+
377
+ def load_packs(path)
378
+ if File.directory?(path)
379
+ Dir.open(path) do |dir|
380
+ entries = dir.select { |entry| entry =~ /\.pack$/i }
381
+ entries.each do |entry|
382
+ pack = PackStorage.new(File.join(path, entry))
383
+ pack.each_entry do |id, offset|
384
+ id = id.unpack("H*").first
385
+ @packs[id] = [pack, offset]
386
+ end
387
+ end
388
+ end
389
+ end
390
+ end
391
+
392
+ end
@@ -0,0 +1,33 @@
1
+ require "#{File.dirname(__FILE__)}/../lib/git_store"
2
+ require "#{File.dirname(__FILE__)}/helper"
3
+ require 'pp'
4
+
5
+ describe GitStore do
6
+
7
+ REPO = '/tmp/git_store_test.git'
8
+
9
+ attr_reader :store
10
+
11
+ before(:each) do
12
+ FileUtils.rm_rf REPO
13
+ Dir.mkdir REPO
14
+ Dir.chdir REPO
15
+
16
+ `git init --bare`
17
+ @store = GitStore.new(REPO, 'master', true)
18
+ end
19
+
20
+ it 'should fail to initialize without a valid git repository' do
21
+ lambda {
22
+ GitStore.new('/foo', 'master', true)
23
+ }.should raise_error(ArgumentError)
24
+ end
25
+
26
+ it 'should save and load entries' do
27
+ store['a'] = 'Hello'
28
+ store.commit
29
+ store.load
30
+
31
+ store['a'].should == 'Hello'
32
+ end
33
+ end
data/test/benchmark.rb ADDED
@@ -0,0 +1,30 @@
1
+ require 'git_store'
2
+ require 'grit'
3
+ require 'benchmark'
4
+ require 'fileutils'
5
+
6
+ REPO = '/tmp/git-store'
7
+
8
+ FileUtils.rm_rf REPO
9
+ FileUtils.mkpath REPO
10
+ Dir.chdir REPO
11
+
12
+ `git init`
13
+
14
+ store = GitStore.new(REPO)
15
+
16
+ Benchmark.bm 20 do |x|
17
+ x.report 'store 1000 objects' do
18
+ store.transaction { 'aaa'.upto('jjj') { |key| store[key] = rand.to_s } }
19
+ end
20
+ x.report 'commit one object' do
21
+ store.transaction { store['aa'] = rand.to_s }
22
+ end
23
+ x.report 'load 1000 objects' do
24
+ GitStore.new('.').values { |v| v }
25
+ end
26
+ x.report 'load 1000 with grit' do
27
+ Grit::Repo.new('.').tree.contents.each { |e| e.data }
28
+ end
29
+ end
30
+
@@ -0,0 +1,81 @@
1
+ require "#{File.dirname(__FILE__)}/../lib/git_store"
2
+ require 'pp'
3
+
4
+ describe GitStore::Commit do
5
+
6
+ REPO = '/tmp/git_store_test'
7
+
8
+ attr_reader :store
9
+
10
+ before(:each) do
11
+ FileUtils.rm_rf REPO
12
+ Dir.mkdir REPO
13
+ Dir.chdir REPO
14
+ `git init`
15
+ @store = GitStore.new(REPO)
16
+ end
17
+
18
+ it "should dump in right format" do
19
+ user = GitStore::User.new("hanni", "hanni@email.de", Time.now)
20
+
21
+ commit = GitStore::Commit.new(nil)
22
+ commit.tree = @store.root
23
+ commit.author = user
24
+ commit.committer = user
25
+ commit.message = "This is a message"
26
+
27
+ commit.dump.should == "tree #{@store.root.id}
28
+ author #{user.dump}
29
+ committer #{user.dump}
30
+
31
+ This is a message"
32
+ end
33
+
34
+ it "should be readable by git binary" do
35
+ time = Time.local(2009, 4, 20)
36
+ author = GitStore::User.new("hans", "hans@email.de", time)
37
+
38
+ store['a'] = "Yay"
39
+ commit = store.commit("Commit Message", author, author)
40
+
41
+ IO.popen("git log") do |io|
42
+ io.gets.should == "commit #{commit.id}\n"
43
+ io.gets.should == "Author: hans <hans@email.de>\n"
44
+ io.gets.should == "Date: Mon Apr 20 00:00:00 2009 #{Time.now.strftime('%z')}\n"
45
+ io.gets.should == "\n"
46
+ io.gets.should == " Commit Message\n"
47
+ end
48
+ end
49
+
50
+ it "should diff 2 commits" do
51
+ store['x'] = 'a'
52
+ store['y'] = "
53
+ First Line.
54
+ Second Line.
55
+ Last Line.
56
+ "
57
+ a = store.commit
58
+
59
+ store.delete('x')
60
+ store['y'] = "
61
+ First Line.
62
+ Last Line.
63
+ Another Line.
64
+ "
65
+ store['z'] = 'c'
66
+
67
+ b = store.commit
68
+
69
+ diff = b.diff(a)
70
+
71
+ diff[0].a_path.should == 'x'
72
+ diff[0].deleted_file.should be_true
73
+
74
+ diff[1].a_path.should == 'y'
75
+ diff[1].diff.should == "--- a/y\n+++ b/y\n@@ -1,4 +1,4 @@\n \n First Line.\n-Second Line.\n Last Line.\n+Another Line."
76
+
77
+ diff[2].a_path.should == 'z'
78
+ diff[2].new_file.should be_true
79
+ end
80
+
81
+ end