georgi-git_store 0.1.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/README.md +137 -40
- data/git_store.gemspec +7 -2
- data/lib/git_store/blob.rb +64 -0
- data/lib/git_store/handlers.rb +57 -0
- data/lib/git_store/pack.rb +417 -0
- data/lib/git_store/tree.rb +207 -0
- data/lib/git_store.rb +267 -187
- data/test/benchmark.rb +30 -0
- data/test/git_store_spec.rb +212 -0
- metadata +7 -2
- data/spec/git_store_spec.rb +0 -117
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,21 +1,25 @@
|
|
1
1
|
Git Store - using Git as versioned data store in Ruby
|
2
2
|
=====================================================
|
3
3
|
|
4
|
-
GitStore
|
5
|
-
|
6
|
-
|
7
|
-
out the repository into a
|
8
|
-
modified and finally committed.
|
9
|
-
folder structure and can be checked out and examined, but the
|
10
|
-
application may access the data in a convenient hash-like way. This
|
11
|
-
library is based on [Grit][2], the main technology behind [GitHub][3].
|
4
|
+
GitStore implements a versioned data store based on the revision
|
5
|
+
management system [Git][1]. You can store object hierarchies as nested
|
6
|
+
hashes, which will be mapped on the directory structure of a git
|
7
|
+
repository. Basically GitStore checks out the repository into a
|
8
|
+
in-memory representation, which can be modified and finally committed.
|
12
9
|
|
10
|
+
GitStore supports transactions, so that updates to the store either
|
11
|
+
fail or succeed completely.
|
13
12
|
|
14
|
-
|
13
|
+
GitStore manages concurrent access by a file locking scheme. So only
|
14
|
+
one process can start a transaction at one time. This is implemented
|
15
|
+
by locking the `refs/head/<branch>.lock` file, which is also
|
16
|
+
respected by the git binary.
|
17
|
+
|
18
|
+
### Installation
|
15
19
|
|
16
20
|
GitStore can be installed as gem easily, if you have RubyGems 1.2.0:
|
17
21
|
|
18
|
-
$ gem sources -a http://gems.github.com
|
22
|
+
$ gem sources -a http://gems.github.com
|
19
23
|
$ sudo gem install georgi-git_store
|
20
24
|
|
21
25
|
If you don't have RubyGems 1.2.0, you may download the package on the
|
@@ -25,7 +29,7 @@ If you don't have RubyGems 1.2.0, you may download the package on the
|
|
25
29
|
$ sudo gem install git_store
|
26
30
|
|
27
31
|
|
28
|
-
|
32
|
+
### Usage Example
|
29
33
|
|
30
34
|
First thing you should do, is to initialize a new git repository.
|
31
35
|
|
@@ -37,29 +41,90 @@ Now you can instantiate a GitStore instance and store some data. The
|
|
37
41
|
data will be serialized depending on the file extension. So for YAML
|
38
42
|
storage you can use the 'yml' extension:
|
39
43
|
|
40
|
-
|
41
|
-
class User < Struct.new(:name); end
|
44
|
+
@@ruby
|
42
45
|
|
43
|
-
store = GitStore.new('
|
46
|
+
store = GitStore.new('/path/to/repo')
|
44
47
|
|
45
48
|
store['users/matthias.yml'] = User.new('Matthias')
|
46
|
-
store['pages/home.yml'] =
|
49
|
+
store['pages/home.yml'] = Page.new('matthias', 'Home')
|
47
50
|
|
48
51
|
store.commit 'Added user and page'
|
49
52
|
|
50
|
-
Note that directories will be created automatically.
|
53
|
+
# Note, that directories will be created automatically.
|
54
|
+
# Another way to access a path is:
|
55
|
+
|
56
|
+
store['config', 'wiki.yml'] = { 'name' => 'My Personal Wiki' }
|
57
|
+
|
58
|
+
# Finally you can access the git store as a Hash of Hashes, but in
|
59
|
+
# this case you have to create the Tree objects manually:
|
60
|
+
|
61
|
+
puts store['users']['wiki.yml']['name']
|
62
|
+
|
63
|
+
|
64
|
+
### Transactions
|
65
|
+
|
66
|
+
If you access the repository from different processes, you should
|
67
|
+
write to your store using transactions. If something goes wrong inside
|
68
|
+
a transaction, all changes will be rolled back to the original state.
|
69
|
+
|
70
|
+
@@ruby
|
71
|
+
|
72
|
+
store = GitStore.new('/path/to/repo')
|
73
|
+
|
74
|
+
store.transaction do
|
75
|
+
# If an exception happens here, the transaction will be aborted.
|
76
|
+
store['pages/home.yml'] = Page.new('matthias', 'Home')
|
77
|
+
end
|
78
|
+
|
79
|
+
# transaction without a block
|
80
|
+
|
81
|
+
store.start_transaction
|
82
|
+
|
83
|
+
store['pages/home.yml'] = Page.new('matthias', 'Home')
|
84
|
+
|
85
|
+
store.rollback # This will restore the original state
|
86
|
+
|
87
|
+
|
88
|
+
### Performance
|
89
|
+
|
90
|
+
Maintaining 1000 objects in one folder seems to yield quite usable
|
91
|
+
results. If I run the following benchmark:
|
51
92
|
|
52
|
-
|
93
|
+
@@ruby
|
94
|
+
|
95
|
+
Benchmark.bm 20 do |x|
|
96
|
+
x.report 'store 1000 objects' do
|
97
|
+
store.transaction { 'aaa'.upto('jjj') { |key| store[key] = rand.to_s } }
|
98
|
+
end
|
99
|
+
x.report 'commit one object' do
|
100
|
+
store.transaction { store['aa'] = rand.to_s }
|
101
|
+
end
|
102
|
+
x.report 'load 1000 objects' do
|
103
|
+
GitStore.new('.')
|
104
|
+
end
|
105
|
+
x.report 'load 1000 with grit' do
|
106
|
+
Grit::Repo.new('.').tree.contents.each { |e| e.data }
|
107
|
+
end
|
108
|
+
end
|
53
109
|
|
54
|
-
store[config', 'wiki.yml'] = { 'name' => 'My Personal Wiki' }
|
55
110
|
|
56
|
-
|
57
|
-
case you have to create the Tree objects manually:
|
111
|
+
I get following results:
|
58
112
|
|
59
|
-
|
60
|
-
store
|
113
|
+
user system total real
|
114
|
+
store 1000 objects 4.150000 0.880000 5.030000 ( 5.035804)
|
115
|
+
commit one object 0.070000 0.020000 0.090000 ( 0.082252)
|
116
|
+
load 1000 objects 0.630000 0.120000 0.750000 ( 0.750765)
|
117
|
+
load 1000 with grit 1.960000 0.260000 2.220000 ( 2.228583)
|
61
118
|
|
62
|
-
|
119
|
+
|
120
|
+
In a real world scenario, you should partition your data. For example,
|
121
|
+
my blog engine [Shinmun][7], stores posts in folders by month.
|
122
|
+
|
123
|
+
One nice thing about the results is, that GitStore loads large
|
124
|
+
directories three times faster than [Grit][2].
|
125
|
+
|
126
|
+
|
127
|
+
### Where is my data?
|
63
128
|
|
64
129
|
When you call the `commit` method, your data is written back straight
|
65
130
|
into the git repository. No intermediate file representation. So if
|
@@ -69,33 +134,58 @@ you want to look into your data, you can use some git browser like
|
|
69
134
|
$ git checkout
|
70
135
|
|
71
136
|
|
72
|
-
|
137
|
+
### Development Mode
|
138
|
+
|
139
|
+
There is also some kind of development mode, which is convenient to
|
140
|
+
use. Imagine you are tweaking the design of your blog, which is
|
141
|
+
storing its pages in a GitStore. You don't want to commit each change
|
142
|
+
to some change in your browser. FileStore helps you here:
|
143
|
+
|
144
|
+
@@ruby
|
145
|
+
|
146
|
+
store = GitStore::FileStore.new('.')
|
147
|
+
|
148
|
+
# Access the file 'posts/2009/1/git-store.md'
|
149
|
+
|
150
|
+
p store['posts', 2009, 1, 'git-store.md']
|
151
|
+
|
152
|
+
|
153
|
+
FileStore forbids you to write to the disk, as this makes no sense. If
|
154
|
+
you want to store something programmatically, you have to use the real
|
155
|
+
GitStore.
|
156
|
+
|
157
|
+
|
158
|
+
### Iteration
|
73
159
|
|
74
160
|
Iterating over the data objects is quite easy. Furthermore you can
|
75
161
|
iterate over trees and subtrees, so you can partition your data in a
|
76
162
|
meaningful way. For example you may separate the config files and the
|
77
163
|
pages of a wiki:
|
78
164
|
|
79
|
-
|
80
|
-
|
81
|
-
store['pages/
|
165
|
+
@@ruby
|
166
|
+
|
167
|
+
store['pages/home.yml'] = Page.new('matthias', 'Home')
|
168
|
+
store['pages/about.yml'] = Page.new('matthias', 'About')
|
169
|
+
store['pages/links.yml'] = WikiPage.new('matthias', 'Links')
|
82
170
|
store['config/wiki.yml'] = { 'name' => 'My Personal Wiki' }
|
83
171
|
|
84
|
-
store.each { |obj| ... } # yields all pages and the config
|
172
|
+
store.each { |obj| ... } # yields all pages and the config file
|
85
173
|
store['pages'].each { |page| ... } # yields only the pages
|
86
174
|
|
87
175
|
|
88
|
-
|
176
|
+
### Serialization
|
89
177
|
|
90
178
|
Serialization is dependent on the filename extension. You can add more
|
91
179
|
handlers if you like, the interface is like this:
|
92
180
|
|
181
|
+
@@ruby
|
182
|
+
|
93
183
|
class YAMLHandler
|
94
|
-
def read(
|
184
|
+
def read(path, data)
|
95
185
|
YAML.load(data)
|
96
186
|
end
|
97
187
|
|
98
|
-
def write(data)
|
188
|
+
def write(path, data)
|
99
189
|
data.to_yaml
|
100
190
|
end
|
101
191
|
end
|
@@ -105,28 +195,35 @@ handlers if you like, the interface is like this:
|
|
105
195
|
|
106
196
|
Shinmun uses its own handler for files with `md` extension:
|
107
197
|
|
198
|
+
@@ruby
|
199
|
+
|
108
200
|
class PostHandler
|
109
|
-
def read(
|
110
|
-
Post.new(:
|
201
|
+
def read(path, data)
|
202
|
+
Post.new(:path => path, :src => data)
|
111
203
|
end
|
112
204
|
|
113
|
-
def write(post)
|
205
|
+
def write(path, post)
|
114
206
|
post.dump
|
115
207
|
end
|
116
208
|
end
|
117
209
|
|
118
|
-
GitStore::Handler[md'] = PostHandler.new
|
210
|
+
GitStore::Handler['md'] = PostHandler.new
|
211
|
+
|
212
|
+
|
213
|
+
### GitStore on GitHub
|
214
|
+
|
215
|
+
Download or fork the project on its [Github page][5]
|
216
|
+
|
217
|
+
|
218
|
+
### Related Work
|
119
219
|
|
220
|
+
John Wiegley already has done [something similar for Python][4].
|
120
221
|
|
121
|
-
## Related Work
|
122
222
|
|
123
|
-
John Wiegley already has done [something similar for Python][4]. His
|
124
|
-
implementation has its own git interface, GitStore uses the wonderful
|
125
|
-
[Grit][2] library.
|
126
223
|
|
127
224
|
[1]: http://git.or.cz/
|
128
225
|
[2]: http://github.com/mojombo/grit
|
129
|
-
[3]: http://github.com/
|
130
226
|
[4]: http://www.newartisans.com/blog_files/git.versioned.data.store.php
|
131
227
|
[5]: http://github.com/georgi/git_store
|
132
228
|
[6]: http://www.kernel.org/pub/software/scm/git/docs/git-gui.html
|
229
|
+
[7]: http://www.matthias-georgi.de/shinmun
|
data/git_store.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'git_store'
|
3
|
-
s.version = '0.
|
3
|
+
s.version = '0.2'
|
4
4
|
s.date = '2008-12-17'
|
5
5
|
s.summary = 'a simple data store based on git'
|
6
6
|
s.author = 'Matthias Georgi'
|
@@ -16,7 +16,12 @@ LICENSE
|
|
16
16
|
README.md
|
17
17
|
git_store.gemspec
|
18
18
|
lib/git_store.rb
|
19
|
-
|
19
|
+
lib/git_store/blob.rb
|
20
|
+
lib/git_store/tree.rb
|
21
|
+
lib/git_store/handlers.rb
|
22
|
+
lib/git_store/pack.rb
|
23
|
+
test/git_store_spec.rb
|
24
|
+
test/benchmark.rb
|
20
25
|
}
|
21
26
|
end
|
22
27
|
|
@@ -0,0 +1,64 @@
|
|
1
|
+
class GitStore
|
2
|
+
|
3
|
+
# This class stores the raw string data of a blob, but also the
|
4
|
+
# deserialized data object.
|
5
|
+
class Blob
|
6
|
+
|
7
|
+
attr_accessor :store, :id, :mode, :path, :data
|
8
|
+
|
9
|
+
# Initialize a Blob with default mode of '100644'.
|
10
|
+
def initialize(store)
|
11
|
+
@store = store
|
12
|
+
@mode = '100644'
|
13
|
+
end
|
14
|
+
|
15
|
+
# Set all attributes at once.
|
16
|
+
def set(id, mode = nil, path = nil, data = nil, object = nil)
|
17
|
+
@id, @mode, @path, @data, @object = id, mode, path, data, object
|
18
|
+
end
|
19
|
+
|
20
|
+
# Returns the extension of the filename.
|
21
|
+
def extname
|
22
|
+
File.extname(path)[1..-1]
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns the handler for serializing the blob data.
|
26
|
+
def handler
|
27
|
+
Handler[extname]
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns true if data is new or hash value is different from current id.
|
31
|
+
def modified?
|
32
|
+
id.nil? || @modified
|
33
|
+
end
|
34
|
+
|
35
|
+
# Returns the data object.
|
36
|
+
def object
|
37
|
+
@object ||= handler.read(path, data)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Set the data object.
|
41
|
+
def object=(value)
|
42
|
+
@modified = true
|
43
|
+
@object = value
|
44
|
+
@data = handler.respond_to?(:write) ? handler.write(path, value) : value
|
45
|
+
end
|
46
|
+
|
47
|
+
def load_from_disk
|
48
|
+
@object = nil
|
49
|
+
@data = open("#{store.path}/#{path}", 'rb') { |f| f.read }
|
50
|
+
end
|
51
|
+
|
52
|
+
# Write the data to the git object store
|
53
|
+
def write_to_store
|
54
|
+
if modified?
|
55
|
+
@modified = false
|
56
|
+
@id = store.put_object(data, 'blob')
|
57
|
+
else
|
58
|
+
@id
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
|
2
|
+
# This fix ensures sorted yaml maps.
|
3
|
+
class Hash
|
4
|
+
def to_yaml( opts = {} )
|
5
|
+
YAML::quick_emit( object_id, opts ) do |out|
|
6
|
+
out.map( taguri, to_yaml_style ) do |map|
|
7
|
+
sort_by { |k, v| k.to_s }.each do |k, v|
|
8
|
+
map.add( k, v )
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class GitStore
|
16
|
+
|
17
|
+
class DefaultHandler
|
18
|
+
def read(path, data)
|
19
|
+
data
|
20
|
+
end
|
21
|
+
|
22
|
+
def write(path, data)
|
23
|
+
data.to_s
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
class YAMLHandler
|
28
|
+
def read(path, data)
|
29
|
+
YAML.load(data)
|
30
|
+
end
|
31
|
+
|
32
|
+
def write(path, data)
|
33
|
+
data.to_yaml
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class RubyHandler
|
38
|
+
def read(path, data)
|
39
|
+
Object.module_eval(data)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class ERBHandler
|
44
|
+
def read(path, data)
|
45
|
+
ERB.new(data)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
Handler = {
|
50
|
+
'yml' => YAMLHandler.new,
|
51
|
+
'rhtml' => ERBHandler.new,
|
52
|
+
'rxml' => ERBHandler.new,
|
53
|
+
'rb' => RubyHandler.new
|
54
|
+
}
|
55
|
+
|
56
|
+
Handler.default = DefaultHandler.new
|
57
|
+
end
|