georgi-git_store 0.1.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/README.md +137 -40
- data/git_store.gemspec +7 -2
- data/lib/git_store/blob.rb +64 -0
- data/lib/git_store/handlers.rb +57 -0
- data/lib/git_store/pack.rb +417 -0
- data/lib/git_store/tree.rb +207 -0
- data/lib/git_store.rb +267 -187
- data/test/benchmark.rb +30 -0
- data/test/git_store_spec.rb +212 -0
- metadata +7 -2
- data/spec/git_store_spec.rb +0 -117
data/lib/git_store.rb
CHANGED
@@ -1,240 +1,320 @@
|
|
1
|
-
require '
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
1
|
+
require 'rubygems'
|
2
|
+
require 'zlib'
|
3
|
+
require 'digest/sha1'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
require 'git_store/blob'
|
7
|
+
require 'git_store/tree'
|
8
|
+
require 'git_store/handlers'
|
9
|
+
require 'git_store/pack'
|
10
|
+
|
11
|
+
# GitStore implements a versioned data store based on the revision
|
12
|
+
# management system git. You can store object hierarchies as nested
|
13
|
+
# hashes, which will be mapped on the directory structure of a git
|
14
|
+
# repository.
|
15
|
+
#
|
16
|
+
# GitStore supports transactions, so that updates to the store either
|
17
|
+
# fail or succeed completely.
|
18
|
+
#
|
19
|
+
# GitStore manages concurrent access by a file locking scheme. So only
|
20
|
+
# one process can start a transaction at one time. This is implemented
|
21
|
+
# by locking the `refs/head/<branch>.lock` file, which is also respected
|
22
|
+
# by the git binary.
|
23
|
+
#
|
24
|
+
# A regular commit should be atomic by the nature of git, as the only
|
25
|
+
# critical part is writing the 40 bytes SHA1 hash of the commit object
|
26
|
+
# to the file `refs/head/<branch>`, which is done atomically by the
|
27
|
+
# operating system.
|
28
|
+
#
|
29
|
+
# So reading a repository should be always consistent in a git
|
30
|
+
# repository. The head of a branch points to a commit object, which in
|
31
|
+
# turn points to a tree object, which itself is a snapshot of the
|
32
|
+
# GitStore at commit time. All involved objects are keyed by their
|
33
|
+
# SHA1 value, so there is no chance for another process to write to
|
34
|
+
# the same files.
|
35
|
+
#
|
16
36
|
class GitStore
|
37
|
+
include Enumerable
|
17
38
|
|
18
|
-
|
19
|
-
def read(name, data)
|
20
|
-
data
|
21
|
-
end
|
39
|
+
attr_reader :path, :index, :root, :branch, :lock_file, :head
|
22
40
|
|
23
|
-
|
24
|
-
|
25
|
-
|
41
|
+
# Initialize a store.
|
42
|
+
def initialize(path, branch = 'master')
|
43
|
+
@path = path.chomp('/')
|
44
|
+
@branch = branch
|
45
|
+
@root = Tree.new(self)
|
46
|
+
|
47
|
+
load_packs("#{path}/.git/objects/pack")
|
48
|
+
load
|
26
49
|
end
|
27
|
-
|
28
|
-
class YAMLHandler
|
29
|
-
def read(name, data)
|
30
|
-
YAML.load(data)
|
31
|
-
end
|
32
50
|
|
33
|
-
|
34
|
-
|
35
|
-
|
51
|
+
# The path to the current head file.
|
52
|
+
def head_path
|
53
|
+
"#{path}/.git/refs/heads/#{branch}"
|
36
54
|
end
|
37
55
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
end
|
56
|
+
# The path to the object file for given id.
|
57
|
+
def object_path(id)
|
58
|
+
"#{path}/.git/objects/#{ id[0...2] }/#{ id[2..39] }"
|
42
59
|
end
|
43
60
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
61
|
+
# Read the id of the head commit.
|
62
|
+
#
|
63
|
+
# Returns the object id of the last commit.
|
64
|
+
def read_head
|
65
|
+
File.read(head_path).strip if File.exists?(head_path)
|
48
66
|
end
|
49
67
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
68
|
+
# Read an object for the specified path.
|
69
|
+
#
|
70
|
+
# Use multiple arguments or a string with slashes.
|
71
|
+
def [](*args)
|
72
|
+
root[*args]
|
73
|
+
end
|
56
74
|
|
57
|
-
|
75
|
+
# Write an object to the specified path.
|
76
|
+
#
|
77
|
+
# Use multiple arguments or a string with slashes.
|
78
|
+
def []=(*args)
|
79
|
+
value = args.pop
|
80
|
+
root[*args] = value
|
81
|
+
end
|
58
82
|
|
59
|
-
|
83
|
+
# Delete the specified path.
|
84
|
+
#
|
85
|
+
# Use multiple arguments or a string with slashes.
|
86
|
+
def delete(*args)
|
87
|
+
root.delete(*args)
|
88
|
+
end
|
60
89
|
|
61
|
-
|
62
|
-
|
90
|
+
# Returns the store as a hash tree.
|
91
|
+
def to_hash
|
92
|
+
root.to_hash
|
93
|
+
end
|
63
94
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
else
|
69
|
-
@name = args[0]
|
70
|
-
self.data = args[1]
|
71
|
-
end
|
72
|
-
end
|
95
|
+
# Inspect the store.
|
96
|
+
def inspect
|
97
|
+
"#<GitStore #{path} #{branch} #{root.to_hash.inspect}>"
|
98
|
+
end
|
73
99
|
|
74
|
-
|
75
|
-
|
76
|
-
|
100
|
+
# Iterate over all values found in this store.
|
101
|
+
def each(&block)
|
102
|
+
root.each(&block)
|
103
|
+
end
|
77
104
|
|
78
|
-
|
79
|
-
|
80
|
-
|
105
|
+
# Has our store been changed on disk?
|
106
|
+
def changed?
|
107
|
+
head != read_head
|
108
|
+
end
|
81
109
|
|
82
|
-
|
83
|
-
|
84
|
-
|
110
|
+
def refresh!
|
111
|
+
load if changed?
|
112
|
+
end
|
85
113
|
|
86
|
-
|
87
|
-
|
114
|
+
# Load the current head version from repository.
|
115
|
+
def load
|
116
|
+
if @head = read_head
|
117
|
+
commit = get_object(head)[0]
|
118
|
+
root.id = commit.split(/[ \n]/, 3)[1].strip
|
119
|
+
root.data = get_object(root.id)[0]
|
120
|
+
root.load_from_store
|
88
121
|
end
|
122
|
+
end
|
89
123
|
|
90
|
-
|
91
|
-
|
92
|
-
|
124
|
+
# Reload the store, if it has been changed on disk.
|
125
|
+
def refresh!
|
126
|
+
load if changed?
|
127
|
+
end
|
93
128
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
129
|
+
# Do we have a current transacation?
|
130
|
+
def in_transaction?
|
131
|
+
Thread.current['git_store_lock']
|
132
|
+
end
|
133
|
+
|
134
|
+
# All changes made inside a transaction are atomic. If some
|
135
|
+
# exception occurs the transaction will be rolled back.
|
136
|
+
#
|
137
|
+
# Example:
|
138
|
+
# store.transaction { store['a'] = 'b' }
|
139
|
+
#
|
140
|
+
def transaction(message = "")
|
141
|
+
start_transaction
|
142
|
+
result = yield
|
143
|
+
commit message
|
101
144
|
|
145
|
+
result
|
146
|
+
rescue
|
147
|
+
rollback
|
148
|
+
raise
|
149
|
+
ensure
|
150
|
+
finish_transaction
|
102
151
|
end
|
103
152
|
|
104
|
-
|
105
|
-
|
153
|
+
# Start a transaction.
|
154
|
+
#
|
155
|
+
# Tries to get lock on lock file, reload the this store if
|
156
|
+
# has changed in the repository.
|
157
|
+
def start_transaction
|
158
|
+
file = open("#{head_path}.lock", "w")
|
159
|
+
file.flock(File::LOCK_EX)
|
160
|
+
|
161
|
+
Thread.current['git_store_lock'] = file
|
162
|
+
|
163
|
+
load if changed?
|
164
|
+
end
|
106
165
|
|
107
|
-
|
108
|
-
|
166
|
+
# Restore the state of the store.
|
167
|
+
#
|
168
|
+
# Any changes made to the store are discarded.
|
169
|
+
def rollback
|
170
|
+
root.load_from_store
|
171
|
+
finish_transaction
|
172
|
+
end
|
173
|
+
|
174
|
+
# Finish the transaction.
|
175
|
+
#
|
176
|
+
# Release the lock file.
|
177
|
+
def finish_transaction
|
178
|
+
Thread.current['git_store_lock'].close rescue nil
|
179
|
+
Thread.current['git_store_lock'] = nil
|
109
180
|
|
110
|
-
|
111
|
-
|
112
|
-
@name = name
|
113
|
-
end
|
181
|
+
File.unlink("#{head_path}.lock") rescue nil
|
182
|
+
end
|
114
183
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
184
|
+
# Write the commit object to disk and set the head of the current branch.
|
185
|
+
#
|
186
|
+
# Returns the id of the commit object
|
187
|
+
def commit(message = '', author = 'ruby', committer = 'ruby')
|
188
|
+
time = "#{ Time.now.to_i } #{ Time.now.to_s.split[4] }"
|
189
|
+
tree = root.write_to_store
|
190
|
+
|
191
|
+
contents = [ "tree #{tree}", (head and "parent #{head}"),
|
192
|
+
"author #{author} #{time}",
|
193
|
+
"committer #{committer} #{time}", '', message
|
194
|
+
].compact.join("\n")
|
195
|
+
|
196
|
+
id = put_object(contents, 'commit')
|
197
|
+
|
198
|
+
open(head_path, "wb") do |file|
|
199
|
+
file.write(id)
|
126
200
|
end
|
127
201
|
|
128
|
-
|
129
|
-
|
202
|
+
@head = id
|
203
|
+
end
|
204
|
+
|
205
|
+
# Read the raw object with the given id from the repository.
|
206
|
+
#
|
207
|
+
# Returns a pair of content and type of the object
|
208
|
+
def get_object(id)
|
209
|
+
path = object_path(id)
|
210
|
+
|
211
|
+
if File.exists?(path)
|
212
|
+
buf = open(path, "rb") { |f| f.read }
|
213
|
+
else
|
214
|
+
get_object_from_pack(id)
|
130
215
|
end
|
216
|
+
|
217
|
+
raise if not legacy_loose_object?(buf)
|
218
|
+
|
219
|
+
header, content = Zlib::Inflate.inflate(buf).split(/\0/, 2)
|
220
|
+
type, size = header.split(/ /, 2)
|
221
|
+
|
222
|
+
raise if size.to_i != content.size
|
223
|
+
|
224
|
+
return content, type
|
225
|
+
end
|
131
226
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
case entry
|
136
|
-
when Blob then entry.data
|
137
|
-
when Tree then entry
|
138
|
-
end
|
227
|
+
def get_object_from_pack(id)
|
228
|
+
packs.each do |pack|
|
229
|
+
data = pack[id] and return data
|
139
230
|
end
|
231
|
+
end
|
140
232
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
233
|
+
# Returns the hash value of an object string.
|
234
|
+
def sha(str)
|
235
|
+
Digest::SHA1.hexdigest(str)[0, 40]
|
236
|
+
end
|
237
|
+
|
238
|
+
# Write a raw object to the repository.
|
239
|
+
#
|
240
|
+
# Returns the object id.
|
241
|
+
def put_object(content, type)
|
242
|
+
size = content.length.to_s
|
243
|
+
header = "#{type} #{size}\0"
|
244
|
+
data = header + content
|
245
|
+
|
246
|
+
id = sha(data)
|
247
|
+
path = object_path(id)
|
248
|
+
|
249
|
+
unless File.exists?(path)
|
250
|
+
FileUtils.mkpath(File.dirname(path))
|
251
|
+
open(path, 'wb') do |f|
|
252
|
+
f.write Zlib::Deflate.deflate(data)
|
148
253
|
end
|
149
254
|
end
|
255
|
+
|
256
|
+
id
|
257
|
+
end
|
150
258
|
|
151
|
-
|
152
|
-
|
153
|
-
|
259
|
+
def legacy_loose_object?(buf)
|
260
|
+
word = (buf[0] << 8) + buf[1]
|
261
|
+
buf[0] == 0x78 && word % 31 == 0
|
262
|
+
end
|
154
263
|
|
155
|
-
|
156
|
-
|
157
|
-
|
264
|
+
def load_packs(path)
|
265
|
+
if File.directory?(path)
|
266
|
+
Dir.open(path) do |dir|
|
267
|
+
entries = dir.select { |entry| entry =~ /\.pack$/i }
|
268
|
+
@packs = entries.map { |entry| PackStorage.new(File.join(path, entry)) }
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
# FileStore reads a working copy out of a directory. Changes made to
|
274
|
+
# the store will not be written to a repository. This is useful, if
|
275
|
+
# you want to read a filesystem without having a git repository.
|
276
|
+
class FileStore < GitStore
|
277
|
+
|
278
|
+
def initialize(path)
|
279
|
+
@mtime = {}
|
280
|
+
super
|
158
281
|
end
|
159
282
|
|
160
|
-
def
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
283
|
+
def load
|
284
|
+
root.load_from_disk
|
285
|
+
|
286
|
+
each_blob_in(root) do |blob|
|
287
|
+
@mtime[blob.path] = File.mtime("#{path}/#{blob.path}")
|
165
288
|
end
|
166
|
-
tree.store(args.last, value)
|
167
289
|
end
|
168
290
|
|
169
|
-
def
|
170
|
-
|
171
|
-
end
|
172
|
-
|
173
|
-
def each(&block)
|
174
|
-
@data.values.each do |entry|
|
291
|
+
def each_blob_in(tree, &blob)
|
292
|
+
tree.table.each do |name, entry|
|
175
293
|
case entry
|
176
|
-
when Blob
|
177
|
-
when Tree
|
294
|
+
when Blob; yield entry
|
295
|
+
when Tree; each_blob_in(entry, &blob)
|
178
296
|
end
|
179
297
|
end
|
180
|
-
end
|
181
|
-
|
182
|
-
def
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
298
|
+
end
|
299
|
+
|
300
|
+
def refresh!
|
301
|
+
each_blob_in(root) do |blob|
|
302
|
+
path = "#{self.path}/#{blob.path}"
|
303
|
+
if File.exist?(path)
|
304
|
+
mtime = File.mtime(path)
|
305
|
+
if @mtime[blob.path] != mtime
|
306
|
+
@mtime[blob.path] = mtime
|
307
|
+
blob.load_from_disk
|
308
|
+
end
|
309
|
+
else
|
310
|
+
delete blob.path
|
311
|
+
end
|
189
312
|
end
|
190
313
|
end
|
191
|
-
|
192
|
-
def
|
193
|
-
@data.inject({}) do |hash, (name, entry)|
|
194
|
-
hash[name] = entry.is_a?(Tree) ? entry.to_hash : entry.to_s
|
195
|
-
hash
|
196
|
-
end
|
314
|
+
|
315
|
+
def commit(message="")
|
197
316
|
end
|
198
317
|
|
199
318
|
end
|
200
319
|
|
201
|
-
attr_reader :repo, :index, :tree
|
202
|
-
|
203
|
-
def initialize(path, &block)
|
204
|
-
@repo = Grit::Repo.new(path)
|
205
|
-
@index = Grit::Index.new(@repo)
|
206
|
-
@tree = Tree.new
|
207
|
-
end
|
208
|
-
|
209
|
-
def commit(message="")
|
210
|
-
index.tree = tree.to_hash
|
211
|
-
head = repo.heads.first
|
212
|
-
index.commit(message, head ? head.commit.id : nil)
|
213
|
-
end
|
214
|
-
|
215
|
-
def [](*args)
|
216
|
-
tree[*args]
|
217
|
-
end
|
218
|
-
|
219
|
-
def []=(*args)
|
220
|
-
value = args.pop
|
221
|
-
tree[*args] = value
|
222
|
-
end
|
223
|
-
|
224
|
-
def delete(path)
|
225
|
-
tree.delete(path)
|
226
|
-
end
|
227
|
-
|
228
|
-
def load
|
229
|
-
tree.load(repo.tree)
|
230
|
-
end
|
231
|
-
|
232
|
-
def each(&block)
|
233
|
-
tree.each(&block)
|
234
|
-
end
|
235
|
-
|
236
|
-
def each_with_path(&block)
|
237
|
-
tree.each_with_path(&block)
|
238
|
-
end
|
239
|
-
|
240
320
|
end
|
data/test/benchmark.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'git_store'
|
2
|
+
require 'grit'
|
3
|
+
require 'benchmark'
|
4
|
+
require 'fileutils'
|
5
|
+
|
6
|
+
REPO = File.expand_path(File.dirname(__FILE__) + '/repo')
|
7
|
+
|
8
|
+
FileUtils.rm_rf REPO
|
9
|
+
FileUtils.mkpath REPO
|
10
|
+
Dir.chdir REPO
|
11
|
+
|
12
|
+
`git init`
|
13
|
+
|
14
|
+
store = GitStore.new(REPO)
|
15
|
+
|
16
|
+
Benchmark.bm 20 do |x|
|
17
|
+
x.report 'store 1000 objects' do
|
18
|
+
store.transaction { 'aaa'.upto('jjj') { |key| store[key] = rand.to_s } }
|
19
|
+
end
|
20
|
+
x.report 'commit one object' do
|
21
|
+
store.transaction { store['aa'] = rand.to_s }
|
22
|
+
end
|
23
|
+
x.report 'load 1000 objects' do
|
24
|
+
GitStore.new('.')
|
25
|
+
end
|
26
|
+
x.report 'load 1000 with grit' do
|
27
|
+
Grit::Repo.new('.').tree.contents.each { |e| e.data }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|