git_store 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/git_store.rb ADDED
@@ -0,0 +1,392 @@
1
+ require 'rubygems'
2
+ require 'zlib'
3
+ require 'digest/sha1'
4
+ require 'yaml'
5
+ require 'fileutils'
6
+
7
+ require 'git_store/blob'
8
+ require 'git_store/diff'
9
+ require 'git_store/tree'
10
+ require 'git_store/tag'
11
+ require 'git_store/user'
12
+ require 'git_store/pack'
13
+ require 'git_store/commit'
14
+ require 'git_store/handlers'
15
+
16
+ # GitStore implements a versioned data store based on the revision
17
+ # management system git. You can store object hierarchies as nested
18
+ # hashes, which will be mapped on the directory structure of a git
19
+ # repository.
20
+ #
21
+ # GitStore supports transactions, so that updates to the store either
22
+ # fail or succeed completely.
23
+ #
24
+ # GitStore manages concurrent access by a file locking scheme. So only
25
+ # one process can start a transaction at one time. This is implemented
26
+ # by locking the `refs/head/<branch>.lock` file, which is also respected
27
+ # by the git binary.
28
+ #
29
+ # A regular commit should be atomic by the nature of git, as the only
30
+ # critical part is writing the 40 bytes SHA1 hash of the commit object
31
+ # to the file `refs/head/<branch>`, which is done atomically by the
32
+ # operating system.
33
+ #
34
+ # So reading a repository should be always consistent in a git
35
+ # repository. The head of a branch points to a commit object, which in
36
+ # turn points to a tree object, which itself is a snapshot of the
37
+ # GitStore at commit time. All involved objects are keyed by their
38
+ # SHA1 value, so there is no chance for another process to write to
39
+ # the same files.
40
+ #
41
+ class GitStore
42
+ include Enumerable
43
+
44
+ TYPE_CLASS = {
45
+ 'tree' => Tree,
46
+ 'blob' => Blob,
47
+ 'commit' => Commit,
48
+ 'tag' => Tag
49
+ }
50
+
51
+ CLASS_TYPE = {
52
+ Tree => 'tree',
53
+ Blob => 'blob',
54
+ Commit => 'commit',
55
+ Tag => 'tag'
56
+ }
57
+
58
+ attr_reader :path, :index, :root, :branch, :lock_file, :head, :packs, :handler, :bare, :objects
59
+
60
+ # Initialize a store.
61
+ def initialize(path, branch = 'master', bare = false)
62
+ if bare && !File.exists?("#{path}") or
63
+ !bare && !File.exists?("#{path}/.git")
64
+ raise ArgumentError, "first argument must be a valid Git repository: `#{path}'"
65
+ end
66
+
67
+ @bare = bare
68
+ @path = path.chomp('/')
69
+ @branch = branch
70
+ @root = Tree.new(self)
71
+ @packs = {}
72
+ @objects = {}
73
+
74
+ @handler = {
75
+ 'yml' => YAMLHandler.new
76
+ }
77
+
78
+ @handler.default = DefaultHandler.new
79
+
80
+ load_packs("#{git_path}/objects/pack")
81
+
82
+ load
83
+ end
84
+
85
+ # Returns the path to the current head file.
86
+ def head_path
87
+ "#{git_path}/refs/heads/#{branch}"
88
+ end
89
+
90
+ # Returns the path to the object file for given id.
91
+ def object_path(id)
92
+ "#{git_path}/objects/#{ id[0...2] }/#{ id[2..39] }"
93
+ end
94
+
95
+ # Returns the path to the git data directory.
96
+ def git_path
97
+ if bare
98
+ "#{path}"
99
+ else
100
+ "#{path}/.git"
101
+ end
102
+ end
103
+
104
+ # Read the id of the head commit.
105
+ #
106
+ # Returns the object id of the last commit.
107
+ def read_head_id
108
+ File.read(head_path).strip if File.exists?(head_path)
109
+ end
110
+
111
+ # Return a handler for a given path.
112
+ def handler_for(path)
113
+ handler[ path.split('.').last ]
114
+ end
115
+
116
+ # Read an object for the specified path.
117
+ def [](path)
118
+ root[path]
119
+ end
120
+
121
+ # Write an object to the specified path.
122
+ def []=(path, data)
123
+ root[path] = data
124
+ end
125
+
126
+ # Iterate over all key-values pairs found in this store.
127
+ def each(&block)
128
+ root.each(&block)
129
+ end
130
+
131
+ # Returns all paths found in this store.
132
+ def paths
133
+ root.paths
134
+ end
135
+
136
+ # Returns all values found in this store.
137
+ def values
138
+ root.values
139
+ end
140
+
141
+ # Remove given path from store.
142
+ def delete(path)
143
+ root.delete(path)
144
+ end
145
+
146
+ # Find or create a tree object with given path.
147
+ def tree(path)
148
+ root.tree(path)
149
+ end
150
+
151
+ # Returns the store as a hash tree.
152
+ def to_hash
153
+ root.to_hash
154
+ end
155
+
156
+ # Inspect the store.
157
+ def inspect
158
+ "#<GitStore #{path} #{branch}>"
159
+ end
160
+
161
+ # Has our store been changed on disk?
162
+ def changed?
163
+ head.nil? or head.id != read_head_id
164
+ end
165
+
166
+ # Load the current head version from repository.
167
+ def load(from_disk = false)
168
+ if id = read_head_id
169
+ @head = get(id)
170
+ @root = @head.tree
171
+ end
172
+
173
+ load_from_disk if from_disk
174
+ end
175
+
176
+ def load_from_disk
177
+ root.each_blob do |path, blob|
178
+ file = "#{self.path}/#{path}"
179
+ if File.file?(file)
180
+ blob.data = File.read(file)
181
+ end
182
+ end
183
+ end
184
+
185
+ # Reload the store, if it has been changed on disk.
186
+ def refresh!
187
+ load if changed?
188
+ end
189
+
190
+ # Is there any transaction going on?
191
+ def in_transaction?
192
+ Thread.current['git_store_lock']
193
+ end
194
+
195
+ # All changes made inside a transaction are atomic. If some
196
+ # exception occurs the transaction will be rolled back.
197
+ #
198
+ # Example:
199
+ # store.transaction { store['a'] = 'b' }
200
+ #
201
+ def transaction(message = "")
202
+ start_transaction
203
+ result = yield
204
+ commit message
205
+
206
+ result
207
+ rescue
208
+ rollback
209
+ raise
210
+ ensure
211
+ finish_transaction
212
+ end
213
+
214
+ # Start a transaction.
215
+ #
216
+ # Tries to get lock on lock file, reload the this store if
217
+ # has changed in the repository.
218
+ def start_transaction
219
+ file = open("#{head_path}.lock", "w")
220
+ file.flock(File::LOCK_EX)
221
+
222
+ Thread.current['git_store_lock'] = file
223
+
224
+ load if changed?
225
+ end
226
+
227
+ # Restore the state of the store.
228
+ #
229
+ # Any changes made to the store are discarded.
230
+ def rollback
231
+ objects.clear
232
+ load
233
+ finish_transaction
234
+ end
235
+
236
+ # Finish the transaction.
237
+ #
238
+ # Release the lock file.
239
+ def finish_transaction
240
+ Thread.current['git_store_lock'].close rescue nil
241
+ Thread.current['git_store_lock'] = nil
242
+
243
+ File.unlink("#{head_path}.lock") rescue nil
244
+ end
245
+
246
+ # Write a commit object to disk and set the head of the current branch.
247
+ #
248
+ # Returns the commit object
249
+ def commit(message = '', author = User.from_config, committer = author)
250
+ root.write
251
+
252
+ commit = Commit.new(self)
253
+ commit.tree = root
254
+ commit.parent << head.id if head
255
+ commit.author = author
256
+ commit.committer = committer
257
+ commit.message = message
258
+ commit.write
259
+
260
+ open(head_path, "wb") do |file|
261
+ file.write(commit.id)
262
+ end
263
+
264
+ @head = commit
265
+ end
266
+
267
+ # Returns a list of commits starting from head commit.
268
+ def commits(limit = 10, start = head)
269
+ entries = []
270
+ current = start
271
+
272
+ while current and entries.size < limit
273
+ entries << current
274
+ current = get(current.parent.first)
275
+ end
276
+
277
+ entries
278
+ end
279
+
280
+ # Get an object by its id.
281
+ #
282
+ # Returns a tree, blob, commit or tag object.
283
+ def get(id)
284
+ return nil if id.nil?
285
+
286
+ return objects[id] if objects.has_key?(id)
287
+
288
+ type, content = get_object(id)
289
+
290
+ klass = TYPE_CLASS[type] or raise NotImplementedError, "type not supported: #{type}"
291
+
292
+ objects[id] = klass.new(self, id, content)
293
+ end
294
+
295
+ # Save a git object to the store.
296
+ #
297
+ # Returns the object id.
298
+ def put(object)
299
+ type = CLASS_TYPE[object.class] or raise NotImplementedError, "class not supported: #{object.class}"
300
+
301
+ id = put_object(type, object.dump)
302
+
303
+ objects[id] = object
304
+
305
+ id
306
+ end
307
+
308
+ # Returns the hash value of an object string.
309
+ def sha(str)
310
+ Digest::SHA1.hexdigest(str)[0, 40]
311
+ end
312
+
313
+ # Calculate the id for a given type and raw data string.
314
+ def id_for(type, content)
315
+ sha "#{type} #{content.length}\0#{content}"
316
+ end
317
+
318
+ # Read the raw object with the given id from the repository.
319
+ #
320
+ # Returns a pair of content and type of the object
321
+ def get_object(id)
322
+ path = object_path(id)
323
+
324
+ if File.exists?(path)
325
+ buf = open(path, "rb") { |f| f.read }
326
+
327
+ raise "not a loose object: #{id}" if not legacy_loose_object?(buf)
328
+
329
+ header, content = Zlib::Inflate.inflate(buf).split(/\0/, 2)
330
+ type, size = header.split(/ /, 2)
331
+
332
+ raise "bad object: #{id}" if content.length != size.to_i
333
+ else
334
+ content, type = get_object_from_pack(id)
335
+ end
336
+
337
+ return type, content
338
+ end
339
+
340
+ # Write a raw object to the repository.
341
+ #
342
+ # Returns the object id.
343
+ def put_object(type, content)
344
+ data = "#{type} #{content.length}\0#{content}"
345
+ id = sha(data)
346
+ path = object_path(id)
347
+
348
+ unless File.exists?(path)
349
+ FileUtils.mkpath(File.dirname(path))
350
+ open(path, 'wb') do |f|
351
+ f.write Zlib::Deflate.deflate(data)
352
+ end
353
+ end
354
+
355
+ id
356
+ end
357
+
358
+ protected
359
+
360
+ if 'String'[0].respond_to?(:ord)
361
+ def legacy_loose_object?(buf)
362
+ buf[0] == ?x && (((buf[0].ord << 8) + buf[1].ord) % 31 == 0)
363
+ end
364
+ else
365
+ def legacy_loose_object?(buf)
366
+ word = (buf[0] << 8) + buf[1]
367
+ buf[0] == 0x78 && word % 31 == 0
368
+ end
369
+ end
370
+
371
+ def get_object_from_pack(id)
372
+ pack, offset = @packs[id]
373
+
374
+ pack.parse_object(offset) if pack
375
+ end
376
+
377
+ def load_packs(path)
378
+ if File.directory?(path)
379
+ Dir.open(path) do |dir|
380
+ entries = dir.select { |entry| entry =~ /\.pack$/i }
381
+ entries.each do |entry|
382
+ pack = PackStorage.new(File.join(path, entry))
383
+ pack.each_entry do |id, offset|
384
+ id = id.unpack("H*").first
385
+ @packs[id] = [pack, offset]
386
+ end
387
+ end
388
+ end
389
+ end
390
+ end
391
+
392
+ end
@@ -0,0 +1,33 @@
1
+ require "#{File.dirname(__FILE__)}/../lib/git_store"
2
+ require "#{File.dirname(__FILE__)}/helper"
3
+ require 'pp'
4
+
5
+ describe GitStore do
6
+
7
+ REPO = '/tmp/git_store_test.git'
8
+
9
+ attr_reader :store
10
+
11
+ before(:each) do
12
+ FileUtils.rm_rf REPO
13
+ Dir.mkdir REPO
14
+ Dir.chdir REPO
15
+
16
+ `git init --bare`
17
+ @store = GitStore.new(REPO, 'master', true)
18
+ end
19
+
20
+ it 'should fail to initialize without a valid git repository' do
21
+ lambda {
22
+ GitStore.new('/foo', 'master', true)
23
+ }.should raise_error(ArgumentError)
24
+ end
25
+
26
+ it 'should save and load entries' do
27
+ store['a'] = 'Hello'
28
+ store.commit
29
+ store.load
30
+
31
+ store['a'].should == 'Hello'
32
+ end
33
+ end
data/test/benchmark.rb ADDED
@@ -0,0 +1,30 @@
1
+ require 'git_store'
2
+ require 'grit'
3
+ require 'benchmark'
4
+ require 'fileutils'
5
+
6
+ REPO = '/tmp/git-store'
7
+
8
+ FileUtils.rm_rf REPO
9
+ FileUtils.mkpath REPO
10
+ Dir.chdir REPO
11
+
12
+ `git init`
13
+
14
+ store = GitStore.new(REPO)
15
+
16
+ Benchmark.bm 20 do |x|
17
+ x.report 'store 1000 objects' do
18
+ store.transaction { 'aaa'.upto('jjj') { |key| store[key] = rand.to_s } }
19
+ end
20
+ x.report 'commit one object' do
21
+ store.transaction { store['aa'] = rand.to_s }
22
+ end
23
+ x.report 'load 1000 objects' do
24
+ GitStore.new('.').values { |v| v }
25
+ end
26
+ x.report 'load 1000 with grit' do
27
+ Grit::Repo.new('.').tree.contents.each { |e| e.data }
28
+ end
29
+ end
30
+
@@ -0,0 +1,81 @@
1
+ require "#{File.dirname(__FILE__)}/../lib/git_store"
2
+ require 'pp'
3
+
4
+ describe GitStore::Commit do
5
+
6
+ REPO = '/tmp/git_store_test'
7
+
8
+ attr_reader :store
9
+
10
+ before(:each) do
11
+ FileUtils.rm_rf REPO
12
+ Dir.mkdir REPO
13
+ Dir.chdir REPO
14
+ `git init`
15
+ @store = GitStore.new(REPO)
16
+ end
17
+
18
+ it "should dump in right format" do
19
+ user = GitStore::User.new("hanni", "hanni@email.de", Time.now)
20
+
21
+ commit = GitStore::Commit.new(nil)
22
+ commit.tree = @store.root
23
+ commit.author = user
24
+ commit.committer = user
25
+ commit.message = "This is a message"
26
+
27
+ commit.dump.should == "tree #{@store.root.id}
28
+ author #{user.dump}
29
+ committer #{user.dump}
30
+
31
+ This is a message"
32
+ end
33
+
34
+ it "should be readable by git binary" do
35
+ time = Time.local(2009, 4, 20)
36
+ author = GitStore::User.new("hans", "hans@email.de", time)
37
+
38
+ store['a'] = "Yay"
39
+ commit = store.commit("Commit Message", author, author)
40
+
41
+ IO.popen("git log") do |io|
42
+ io.gets.should == "commit #{commit.id}\n"
43
+ io.gets.should == "Author: hans <hans@email.de>\n"
44
+ io.gets.should == "Date: Mon Apr 20 00:00:00 2009 #{Time.now.strftime('%z')}\n"
45
+ io.gets.should == "\n"
46
+ io.gets.should == " Commit Message\n"
47
+ end
48
+ end
49
+
50
+ it "should diff 2 commits" do
51
+ store['x'] = 'a'
52
+ store['y'] = "
53
+ First Line.
54
+ Second Line.
55
+ Last Line.
56
+ "
57
+ a = store.commit
58
+
59
+ store.delete('x')
60
+ store['y'] = "
61
+ First Line.
62
+ Last Line.
63
+ Another Line.
64
+ "
65
+ store['z'] = 'c'
66
+
67
+ b = store.commit
68
+
69
+ diff = b.diff(a)
70
+
71
+ diff[0].a_path.should == 'x'
72
+ diff[0].deleted_file.should be_true
73
+
74
+ diff[1].a_path.should == 'y'
75
+ diff[1].diff.should == "--- a/y\n+++ b/y\n@@ -1,4 +1,4 @@\n \n First Line.\n-Second Line.\n Last Line.\n+Another Line."
76
+
77
+ diff[2].a_path.should == 'z'
78
+ diff[2].new_file.should be_true
79
+ end
80
+
81
+ end