georgi-git_store 0.1.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/git_store.rb CHANGED
@@ -1,240 +1,320 @@
1
- require 'grit'
2
-
3
- # This fix ensures sorted yaml maps.
4
- class Hash
5
- def to_yaml( opts = {} )
6
- YAML::quick_emit( object_id, opts ) do |out|
7
- out.map( taguri, to_yaml_style ) do |map|
8
- sort_by { |k, v| k.to_s }.each do |k, v|
9
- map.add( k, v )
10
- end
11
- end
12
- end
13
- end
14
- end
15
-
1
+ require 'rubygems'
2
+ require 'zlib'
3
+ require 'digest/sha1'
4
+ require 'yaml'
5
+
6
+ require 'git_store/blob'
7
+ require 'git_store/tree'
8
+ require 'git_store/handlers'
9
+ require 'git_store/pack'
10
+
11
+ # GitStore implements a versioned data store based on the revision
12
+ # management system git. You can store object hierarchies as nested
13
+ # hashes, which will be mapped on the directory structure of a git
14
+ # repository.
15
+ #
16
+ # GitStore supports transactions, so that updates to the store either
17
+ # fail or succeed completely.
18
+ #
19
+ # GitStore manages concurrent access by a file locking scheme. So only
20
+ # one process can start a transaction at one time. This is implemented
21
+ # by locking the `refs/head/<branch>.lock` file, which is also respected
22
+ # by the git binary.
23
+ #
24
+ # A regular commit should be atomic by the nature of git, as the only
25
+ # critical part is writing the 40 bytes SHA1 hash of the commit object
26
+ # to the file `refs/head/<branch>`, which is done atomically by the
27
+ # operating system.
28
+ #
29
+ # So reading a repository should be always consistent in a git
30
+ # repository. The head of a branch points to a commit object, which in
31
+ # turn points to a tree object, which itself is a snapshot of the
32
+ # GitStore at commit time. All involved objects are keyed by their
33
+ # SHA1 value, so there is no chance for another process to write to
34
+ # the same files.
35
+ #
16
36
  class GitStore
37
+ include Enumerable
17
38
 
18
- class DefaultHandler
19
- def read(name, data)
20
- data
21
- end
39
+ attr_reader :path, :index, :root, :branch, :lock_file, :head
22
40
 
23
- def write(data)
24
- data
25
- end
41
+ # Initialize a store.
42
+ def initialize(path, branch = 'master')
43
+ @path = path.chomp('/')
44
+ @branch = branch
45
+ @root = Tree.new(self)
46
+
47
+ load_packs("#{path}/.git/objects/pack")
48
+ load
26
49
  end
27
-
28
- class YAMLHandler
29
- def read(name, data)
30
- YAML.load(data)
31
- end
32
50
 
33
- def write(data)
34
- data.to_yaml
35
- end
51
+ # The path to the current head file.
52
+ def head_path
53
+ "#{path}/.git/refs/heads/#{branch}"
36
54
  end
37
55
 
38
- class RubyHandler
39
- def read(name, data)
40
- Object.module_eval(data)
41
- end
56
+ # The path to the object file for given id.
57
+ def object_path(id)
58
+ "#{path}/.git/objects/#{ id[0...2] }/#{ id[2..39] }"
42
59
  end
43
60
 
44
- class ERBHandler
45
- def read(name, data)
46
- ERB.new(data)
47
- end
61
+ # Read the id of the head commit.
62
+ #
63
+ # Returns the object id of the last commit.
64
+ def read_head
65
+ File.read(head_path).strip if File.exists?(head_path)
48
66
  end
49
67
 
50
- Handler = {
51
- 'yml' => YAMLHandler.new,
52
- 'rhtml' => ERBHandler.new,
53
- 'rxml' => ERBHandler.new,
54
- 'rb' => RubyHandler.new
55
- }
68
+ # Read an object for the specified path.
69
+ #
70
+ # Use multiple arguments or a string with slashes.
71
+ def [](*args)
72
+ root[*args]
73
+ end
56
74
 
57
- Handler.default = DefaultHandler.new
75
+ # Write an object to the specified path.
76
+ #
77
+ # Use multiple arguments or a string with slashes.
78
+ def []=(*args)
79
+ value = args.pop
80
+ root[*args] = value
81
+ end
58
82
 
59
- class Blob
83
+ # Delete the specified path.
84
+ #
85
+ # Use multiple arguments or a string with slashes.
86
+ def delete(*args)
87
+ root.delete(*args)
88
+ end
60
89
 
61
- attr_reader :id
62
- attr_accessor :name
90
+ # Returns the store as a hash tree.
91
+ def to_hash
92
+ root.to_hash
93
+ end
63
94
 
64
- def initialize(*args)
65
- if args.first.is_a?(Grit::Blob)
66
- @blob = args.first
67
- @name = @blob.name
68
- else
69
- @name = args[0]
70
- self.data = args[1]
71
- end
72
- end
95
+ # Inspect the store.
96
+ def inspect
97
+ "#<GitStore #{path} #{branch} #{root.to_hash.inspect}>"
98
+ end
73
99
 
74
- def extname
75
- File.extname(name)[1..-1]
76
- end
100
+ # Iterate over all values found in this store.
101
+ def each(&block)
102
+ root.each(&block)
103
+ end
77
104
 
78
- def load(data)
79
- @data = handler.read(name, data)
80
- end
105
+ # Has our store been changed on disk?
106
+ def changed?
107
+ head != read_head
108
+ end
81
109
 
82
- def handler
83
- Handler[extname]
84
- end
110
+ def refresh!
111
+ load if changed?
112
+ end
85
113
 
86
- def data
87
- @data or (@blob and load(@blob.data))
114
+ # Load the current head version from repository.
115
+ def load
116
+ if @head = read_head
117
+ commit = get_object(head)[0]
118
+ root.id = commit.split(/[ \n]/, 3)[1].strip
119
+ root.data = get_object(root.id)[0]
120
+ root.load_from_store
88
121
  end
122
+ end
89
123
 
90
- def data=(data)
91
- @data = data
92
- end
124
+ # Reload the store, if it has been changed on disk.
125
+ def refresh!
126
+ load if changed?
127
+ end
93
128
 
94
- def to_s
95
- if handler.respond_to?(:write)
96
- handler.write(data)
97
- else
98
- @blob.data
99
- end
100
- end
129
+ # Do we have a current transacation?
130
+ def in_transaction?
131
+ Thread.current['git_store_lock']
132
+ end
133
+
134
+ # All changes made inside a transaction are atomic. If some
135
+ # exception occurs the transaction will be rolled back.
136
+ #
137
+ # Example:
138
+ # store.transaction { store['a'] = 'b' }
139
+ #
140
+ def transaction(message = "")
141
+ start_transaction
142
+ result = yield
143
+ commit message
101
144
 
145
+ result
146
+ rescue
147
+ rollback
148
+ raise
149
+ ensure
150
+ finish_transaction
102
151
  end
103
152
 
104
- class Tree
105
- include Enumerable
153
+ # Start a transaction.
154
+ #
155
+ # Tries to get lock on lock file, reload the this store if
156
+ # has changed in the repository.
157
+ def start_transaction
158
+ file = open("#{head_path}.lock", "w")
159
+ file.flock(File::LOCK_EX)
160
+
161
+ Thread.current['git_store_lock'] = file
162
+
163
+ load if changed?
164
+ end
106
165
 
107
- attr_reader :data
108
- attr_accessor :name
166
+ # Restore the state of the store.
167
+ #
168
+ # Any changes made to the store are discarded.
169
+ def rollback
170
+ root.load_from_store
171
+ finish_transaction
172
+ end
173
+
174
+ # Finish the transaction.
175
+ #
176
+ # Release the lock file.
177
+ def finish_transaction
178
+ Thread.current['git_store_lock'].close rescue nil
179
+ Thread.current['git_store_lock'] = nil
109
180
 
110
- def initialize(name = nil)
111
- @data = {}
112
- @name = name
113
- end
181
+ File.unlink("#{head_path}.lock") rescue nil
182
+ end
114
183
 
115
- def load(tree)
116
- @name = tree.name
117
- @data = tree.contents.inject({}) do |hash, file|
118
- if file.is_a?(Grit::Tree)
119
- hash[file.name] = (@data[file.name] || Tree.new).load(file)
120
- else
121
- hash[file.name] = Blob.new(file)
122
- end
123
- hash
124
- end
125
- self
184
+ # Write the commit object to disk and set the head of the current branch.
185
+ #
186
+ # Returns the id of the commit object
187
+ def commit(message = '', author = 'ruby', committer = 'ruby')
188
+ time = "#{ Time.now.to_i } #{ Time.now.to_s.split[4] }"
189
+ tree = root.write_to_store
190
+
191
+ contents = [ "tree #{tree}", (head and "parent #{head}"),
192
+ "author #{author} #{time}",
193
+ "committer #{committer} #{time}", '', message
194
+ ].compact.join("\n")
195
+
196
+ id = put_object(contents, 'commit')
197
+
198
+ open(head_path, "wb") do |file|
199
+ file.write(id)
126
200
  end
127
201
 
128
- def inspect
129
- "#<GitStore::Tree #{@data.inspect}>"
202
+ @head = id
203
+ end
204
+
205
+ # Read the raw object with the given id from the repository.
206
+ #
207
+ # Returns a pair of content and type of the object
208
+ def get_object(id)
209
+ path = object_path(id)
210
+
211
+ if File.exists?(path)
212
+ buf = open(path, "rb") { |f| f.read }
213
+ else
214
+ get_object_from_pack(id)
130
215
  end
216
+
217
+ raise if not legacy_loose_object?(buf)
218
+
219
+ header, content = Zlib::Inflate.inflate(buf).split(/\0/, 2)
220
+ type, size = header.split(/ /, 2)
221
+
222
+ raise if size.to_i != content.size
223
+
224
+ return content, type
225
+ end
131
226
 
132
- def fetch(name)
133
- name = name.to_s
134
- entry = @data[name]
135
- case entry
136
- when Blob then entry.data
137
- when Tree then entry
138
- end
227
+ def get_object_from_pack(id)
228
+ packs.each do |pack|
229
+ data = pack[id] and return data
139
230
  end
231
+ end
140
232
 
141
- def store(name, value)
142
- name = name.to_s
143
- if value.is_a?(Tree)
144
- value.name = name
145
- @data[name] = value
146
- else
147
- @data[name] = Blob.new(name, value)
233
+ # Returns the hash value of an object string.
234
+ def sha(str)
235
+ Digest::SHA1.hexdigest(str)[0, 40]
236
+ end
237
+
238
+ # Write a raw object to the repository.
239
+ #
240
+ # Returns the object id.
241
+ def put_object(content, type)
242
+ size = content.length.to_s
243
+ header = "#{type} #{size}\0"
244
+ data = header + content
245
+
246
+ id = sha(data)
247
+ path = object_path(id)
248
+
249
+ unless File.exists?(path)
250
+ FileUtils.mkpath(File.dirname(path))
251
+ open(path, 'wb') do |f|
252
+ f.write Zlib::Deflate.deflate(data)
148
253
  end
149
254
  end
255
+
256
+ id
257
+ end
150
258
 
151
- def has_key?(name)
152
- @data.has_key?(name)
153
- end
259
+ def legacy_loose_object?(buf)
260
+ word = (buf[0] << 8) + buf[1]
261
+ buf[0] == 0x78 && word % 31 == 0
262
+ end
154
263
 
155
- def [](*args)
156
- args = args.first.to_s.split('/') if args.size == 1
157
- args.inject(self) { |tree, key| tree.fetch(key) or return nil }
264
+ def load_packs(path)
265
+ if File.directory?(path)
266
+ Dir.open(path) do |dir|
267
+ entries = dir.select { |entry| entry =~ /\.pack$/i }
268
+ @packs = entries.map { |entry| PackStorage.new(File.join(path, entry)) }
269
+ end
270
+ end
271
+ end
272
+
273
+ # FileStore reads a working copy out of a directory. Changes made to
274
+ # the store will not be written to a repository. This is useful, if
275
+ # you want to read a filesystem without having a git repository.
276
+ class FileStore < GitStore
277
+
278
+ def initialize(path)
279
+ @mtime = {}
280
+ super
158
281
  end
159
282
 
160
- def []=(*args)
161
- value = args.pop
162
- args = args.first.to_s.split('/') if args.size == 1
163
- tree = args[0..-2].to_a.inject(self) do |tree, key|
164
- tree.has_key?(key) ? tree.fetch(key) : tree.store(key, Tree.new(key))
283
+ def load
284
+ root.load_from_disk
285
+
286
+ each_blob_in(root) do |blob|
287
+ @mtime[blob.path] = File.mtime("#{path}/#{blob.path}")
165
288
  end
166
- tree.store(args.last, value)
167
289
  end
168
290
 
169
- def delete(name)
170
- @data.delete(name)
171
- end
172
-
173
- def each(&block)
174
- @data.values.each do |entry|
291
+ def each_blob_in(tree, &blob)
292
+ tree.table.each do |name, entry|
175
293
  case entry
176
- when Blob then yield entry.data
177
- when Tree then entry.each(&block)
294
+ when Blob; yield entry
295
+ when Tree; each_blob_in(entry, &blob)
178
296
  end
179
297
  end
180
- end
181
-
182
- def each_with_path(path = [], &block)
183
- @data.each do |name, entry|
184
- child_path = path + [name]
185
- case entry
186
- when Blob then yield entry, child_path.join('/')
187
- when Tree then entry.each_with_path(child_path, &block)
188
- end
298
+ end
299
+
300
+ def refresh!
301
+ each_blob_in(root) do |blob|
302
+ path = "#{self.path}/#{blob.path}"
303
+ if File.exist?(path)
304
+ mtime = File.mtime(path)
305
+ if @mtime[blob.path] != mtime
306
+ @mtime[blob.path] = mtime
307
+ blob.load_from_disk
308
+ end
309
+ else
310
+ delete blob.path
311
+ end
189
312
  end
190
313
  end
191
-
192
- def to_hash
193
- @data.inject({}) do |hash, (name, entry)|
194
- hash[name] = entry.is_a?(Tree) ? entry.to_hash : entry.to_s
195
- hash
196
- end
314
+
315
+ def commit(message="")
197
316
  end
198
317
 
199
318
  end
200
319
 
201
- attr_reader :repo, :index, :tree
202
-
203
- def initialize(path, &block)
204
- @repo = Grit::Repo.new(path)
205
- @index = Grit::Index.new(@repo)
206
- @tree = Tree.new
207
- end
208
-
209
- def commit(message="")
210
- index.tree = tree.to_hash
211
- head = repo.heads.first
212
- index.commit(message, head ? head.commit.id : nil)
213
- end
214
-
215
- def [](*args)
216
- tree[*args]
217
- end
218
-
219
- def []=(*args)
220
- value = args.pop
221
- tree[*args] = value
222
- end
223
-
224
- def delete(path)
225
- tree.delete(path)
226
- end
227
-
228
- def load
229
- tree.load(repo.tree)
230
- end
231
-
232
- def each(&block)
233
- tree.each(&block)
234
- end
235
-
236
- def each_with_path(&block)
237
- tree.each_with_path(&block)
238
- end
239
-
240
320
  end
data/test/benchmark.rb ADDED
@@ -0,0 +1,30 @@
1
+ require 'git_store'
2
+ require 'grit'
3
+ require 'benchmark'
4
+ require 'fileutils'
5
+
6
+ REPO = File.expand_path(File.dirname(__FILE__) + '/repo')
7
+
8
+ FileUtils.rm_rf REPO
9
+ FileUtils.mkpath REPO
10
+ Dir.chdir REPO
11
+
12
+ `git init`
13
+
14
+ store = GitStore.new(REPO)
15
+
16
+ Benchmark.bm 20 do |x|
17
+ x.report 'store 1000 objects' do
18
+ store.transaction { 'aaa'.upto('jjj') { |key| store[key] = rand.to_s } }
19
+ end
20
+ x.report 'commit one object' do
21
+ store.transaction { store['aa'] = rand.to_s }
22
+ end
23
+ x.report 'load 1000 objects' do
24
+ GitStore.new('.')
25
+ end
26
+ x.report 'load 1000 with grit' do
27
+ Grit::Repo.new('.').tree.contents.each { |e| e.data }
28
+ end
29
+ end
30
+