treet 0.8.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.json
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in treet.gemspec
4
+ gemspec
5
+
6
+ gem 'thor'
data/Guardfile ADDED
@@ -0,0 +1,9 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard 'rspec', :version => 2 do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/treet/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
7
+ watch(%r{^(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
8
+ watch('spec/spec_helper.rb') { "spec" }
9
+ end
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Jason May
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,90 @@
1
+ # Treet
2
+
3
+ Comparisons and transformation between trees of files and JSON blobs
4
+
5
+ The "JSON blobs" that are supported are not unlimited in structure, but must define:
6
+
7
+ * hashes, where are the values are either {hashes where the values are all scalars} or {arrays of hashes where the values are all scalars}
8
+ * or arrays of hashes as described above.
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ gem 'treet'
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install treet
23
+
24
+ ## Usage - Command Line
25
+
26
+ treet expand [path] [jsonfile]
27
+ treet explode [jsonfile] [rootdir]
28
+ treet import [rootdir] [xrefkey]
29
+
30
+ ## Usage - API
31
+
32
+ require 'treet'
33
+
34
+ hash = Treet::Hash.new(jsonfile)
35
+ repo = Treet::Repo.new(directory)
36
+ farm = Treet::Farm.new(rootdir, :xref => 'label')
37
+
38
+ hash = repo.to_hash
39
+ repo = hash.to_repo(root)
40
+ hash = farm.export
41
+
42
+ Treet.init(jsonfile, root) # when jsonfile contains an array which is exploded to multiple files
43
+
44
+ ## Concepts
45
+
46
+ A *repo* is a directory that contains other files & directories. Any text files in this tree structure must contain JSON-formatted data.
47
+
48
+ A *farm* is a directory containing one or more repos. When a farm is exported to JSON, each record is augmented with an xref value that contains the root filename of that repo.
49
+
50
+ For example:
51
+
52
+ farm = Treet::Farm.new(rootdir, :xref => 'keycode')
53
+ puts farm.export
54
+
55
+ should produce something like:
56
+
57
+ {
58
+ "subdir1": {
59
+ "field": "value"
60
+ },
61
+ "subdir2": {
62
+ "field": "value",
63
+ "field2": "value2"
64
+ },
65
+ "xref": {
66
+ "keycode": "repo-dir-name"
67
+ }
68
+ }
69
+
70
+ ## Structures
71
+
72
+ All the nodes at the top level are mapped to subdirectories.
73
+
74
+ At the second level, arrays elements are converted to individual subdirectories. Subdirectories are
75
+ named with unique digest values computed from the data contents. This means that duplicate entries
76
+ are not allowed.
77
+
78
+ ## Contributing
79
+
80
+ 1. Fork it
81
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
82
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
83
+ 4. Push to the branch (`git push origin my-new-feature`)
84
+ 5. Create new Pull Request
85
+
86
+ ## TODO
87
+
88
+ * Enforce limitation on structure depth (top-level elements can contain flat hashes or arrays, nothing else)
89
+ * refac: move diff stuff from hash.rb to Treet::Diff class, to encapsulate the structure of a diff (array of arrays); create methods for hunting for special stuff in a diff
90
+ * Check all exceptions for explicit classes
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task :default => :spec
data/bin/treet ADDED
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'treet'
4
+ require "thor"
5
+
6
+ class TreetCommand < Thor
7
+ desc "export ROOTDIR", "convert a collection of repositories to a single JSON blob"
8
+ method_option :xref, :desc => "optional fieldname to be added under `xref` with basename of repository"
9
+ def export(path)
10
+ # if !File.directory?(path)
11
+ # raise "treet export: could not find #{path}"
12
+ # end
13
+
14
+ farm = Treet::Farm.new(:root => path, :xref => options[:xref])
15
+ jj farm.export
16
+ end
17
+
18
+
19
+ desc "create JSONFILE", "build a single repository from a JSON file"
20
+ method_option :root, :required => true
21
+ def create(jsonfile)
22
+ hash = Treet::Hash.new(jsonfile)
23
+ hash.to_repo(options[:root])
24
+ $stderr.puts "Wrote repository to #{options[:root]}"
25
+ end
26
+
27
+
28
+ desc "show DIRECTORY", "convert a single repository to a JSON blob"
29
+ def show(path)
30
+ repo = Treet::Repo.new(path)
31
+ jj repo.to_hash
32
+ end
33
+
34
+
35
+ desc "explode JSONFILE", "Build a collection of repositories from a JSON file"
36
+ method_option :root, :required => true, :desc => "where to create repositories (will be created if does not exist)"
37
+ def explode(jsonfile)
38
+ if !File.directory?(options[:root])
39
+ Dir.mkdir(options[:root])
40
+ end
41
+
42
+ farm = Treet::Farm.plant(:json => jsonfile, :root => options[:root])
43
+ filecount = Dir.glob("#{farm.root}/*").count
44
+ $stderr.puts "Wrote #{filecount} files to #{farm.root}"
45
+ end
46
+
47
+
48
+ desc "patch JSONFILE", "Apply patches from file to specified repository tree"
49
+ method_option :root, :required => true, :desc => "where to find repositories to patch"
50
+ method_option :xref, :required => true, :desc => "fieldname under `xref` to use for repository identification"
51
+ def patch(patchfile)
52
+ patches = JSON.load(File.open(patchfile))
53
+ farm = Treet::Farm.new(:root => options[:root], :xref => options[:xref])
54
+ results = farm.patch(patches)
55
+ $stderr.puts "Patched #{results.count} records."
56
+ end
57
+
58
+ desc "version", "show Treet version"
59
+ def version
60
+ puts "Treet #{Treet::VERSION}"
61
+ end
62
+ end
63
+
64
+ TreetCommand.start
data/lib/treet/farm.rb ADDED
@@ -0,0 +1,70 @@
1
+ # encoding: UTF-8
2
+
3
+ require "uuidtools"
4
+
5
+ class Treet::Farm
6
+ attr_reader :root, :xrefkey
7
+
8
+ def initialize(opts)
9
+ raise Errno::ENOENT unless File.directory?(opts[:root])
10
+
11
+ @root = opts[:root]
12
+ @xrefkey = opts[:xref]
13
+ end
14
+
15
+ def repos
16
+ @repos_cache ||= Dir.glob("#{root}/*").each_with_object({}) do |subdir,h|
17
+ # in a Farm we are looking for repositories under the root
18
+ if File.directory?(subdir)
19
+ xref = File.basename(subdir)
20
+ h[xref] = Treet::Repo.new(subdir, :xrefkey => xrefkey, :xref => xref)
21
+ end
22
+ end
23
+ end
24
+
25
+ def reset
26
+ @repos_cache = nil
27
+ end
28
+
29
+ # export as an array, not as a hash
30
+ # the xref for each repo will be included under `xref.{xrefkey}`
31
+ def export
32
+ repos.map {|xref,repo| repo.to_hash}
33
+ end
34
+
35
+ # "plant" a new farm: given an array of hashes (in JSON), create a directory
36
+ # of Treet repositories, one per hash. Generate directory names for each repo.
37
+ def self.plant(opts)
38
+ jsonfile = opts[:json]
39
+ rootdir = opts[:root]
40
+
41
+ array_of_hashes = JSON.load(File.open(jsonfile))
42
+ Dir.chdir(rootdir) do
43
+ array_of_hashes.each do |h|
44
+ uuid = UUIDTools::UUID.random_create.to_s
45
+ thash = Treet::Hash.new(h)
46
+ thash.to_repo(uuid)
47
+ end
48
+ end
49
+
50
+ Treet::Farm.new(:root => rootdir, :xref => opts[:xref])
51
+ end
52
+
53
+ # apply patches to a farm of repos
54
+ def patch(patches)
55
+ patches.map do |k,diffs|
56
+ repos[k].patch(diffs)
57
+ end
58
+ end
59
+
60
+ def [](xref)
61
+ repos[xref]
62
+ end
63
+
64
+ # add a new repo, with data from an input hash
65
+ def add(hash)
66
+ uuid = UUIDTools::UUID.random_create.to_s
67
+ thash = Treet::Hash.new(hash)
68
+ thash.to_repo("#{root}/#{uuid}")
69
+ end
70
+ end
data/lib/treet/hash.rb ADDED
@@ -0,0 +1,219 @@
1
+ # encoding: UTF-8
2
+
3
+ require "json"
4
+ require "digest/sha1"
5
+
6
+ class Treet::Hash
7
+ attr_reader :data
8
+
9
+ # when loading an Array (at the top level), members are always sorted
10
+ # so that array comparisons will be order-independent
11
+ def initialize(source)
12
+ d = case source
13
+ when Hash
14
+ source
15
+ when String
16
+ # treat as filename
17
+ JSON.load(File.read(source))
18
+ else
19
+ raise "Invalid source data type #{source.class} for Treet::Hash"
20
+ end
21
+
22
+ @data = normalize(d)
23
+ end
24
+
25
+ def to_repo(root)
26
+ construct(data, root)
27
+ Treet::Repo.new(root)
28
+ end
29
+
30
+ def to_hash
31
+ data.to_hash
32
+ end
33
+
34
+ def compare(target)
35
+ # HashDiff.diff(data, target.to_hash)
36
+ Treet::Hash.diff(data.to_hash, target.to_hash)
37
+ end
38
+
39
+ # apply diffs (created via the `#compare` function) to create a new object
40
+ def patch(diffs)
41
+ newhash = Treet::Hash.patch(self.to_hash, diffs)
42
+ Treet::Hash.new(newhash)
43
+ end
44
+
45
+ def self.digestify(hash)
46
+ Digest::SHA1.hexdigest(hash.to_a.sort.flatten.join)
47
+ end
48
+
49
+ private
50
+
51
+ def construct(data, filename)
52
+ unless filename == '.'
53
+ # create the root of the repository tree
54
+ Dir.mkdir(filename) rescue nil
55
+ end
56
+
57
+ Dir.chdir(filename) do
58
+ data.each do |k,v|
59
+ case v
60
+ when Hash
61
+ File.open(k, "w") {|f| f << JSON.pretty_generate(v)}
62
+
63
+ when Array
64
+ Dir.mkdir(k.to_s)
65
+ v.each do |v2|
66
+ case v2
67
+ when String
68
+ # create empty file with this name
69
+ File.open("#{k}/#{v2}", "w")
70
+
71
+ else
72
+ # store object contents as JSON into a generated filename
73
+ subfile = "#{k}/#{Treet::Hash.digestify(v2)}"
74
+ File.open(subfile, "w") {|f| f << JSON.pretty_generate(v2)}
75
+ end
76
+ end
77
+
78
+ when String
79
+ File.open(k.to_s, "w") {|f| f << v}
80
+
81
+ else
82
+ raise "Unsupported object type #{v.class} for '#{k}'"
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ def normalize(hash)
89
+ hash.each_with_object({}) do |(k,v),h|
90
+ case v
91
+ when Array
92
+ if v.map(&:class).uniq == Hash
93
+ # all elements are Hashes
94
+ h[k] = v.sort do |a,b|
95
+ a.to_a.sort_by(&:first).flatten <=> b.to_a.sort_by(&:first).flatten
96
+ end
97
+ else
98
+ h[k] =v
99
+ end
100
+
101
+ else
102
+ h[k] = v
103
+ end
104
+ end
105
+ end
106
+
107
+ # Diffs need to be idempotent when applied via patch.
108
+ # Therefore we can't specify individual index positions for an array, because items can move.
109
+ # Instead, we must include the entire contents of the sub-hash, and during the patch process
110
+ # compare that against each element in the array.
111
+ # This means that an array cannot have exact duplicate entries.
112
+ def self.diff(hash1, hash2)
113
+ diffs = []
114
+
115
+ keys = hash1.keys | hash2.keys
116
+ keys.each do |k|
117
+ # if a value is missing from hash1, create a dummy of the same type that appears in hash2
118
+ v1 = hash1[k] || hash2[k].class.new
119
+ v2 = hash2[k] || hash1[k].class.new
120
+
121
+ case v1
122
+ when Hash
123
+ (v2.keys - v1.keys).each do |k2|
124
+ # new sub-elements: (-, key, after-value)
125
+ diffs << ['+', "#{k}.#{k2}", v2[k2]]
126
+ end
127
+ (v1.keys - v2.keys).each do |k2|
128
+ # deleted sub-elements: (-, key, before-value)
129
+ diffs << ['-', "#{k}.#{k2}", v1[k2]]
130
+ end
131
+ (v1.keys & v2.keys).each do |k2|
132
+ if v1[k2] != v2[k2]
133
+ # altered sub-elements: (~, key, after-value, before-value-for-reference)
134
+ diffs << ['~', "#{k}.#{k2}", v2[k2], v1[k2]]
135
+ end
136
+ end
137
+
138
+ when Array
139
+ v1.each do |e1|
140
+ if !v2.include?(e1)
141
+ # element has been removed
142
+ diffs << ['-', "#{k}[]", e1]
143
+ end
144
+ end
145
+
146
+ (v2 - v1).each do |e2|
147
+ # new array element
148
+ diffs << ['+', "#{k}[]", e2]
149
+ end
150
+
151
+ else # scalar values
152
+ if v1 != v2
153
+ if v1.nil?
154
+ diffs << ['+', k, v2]
155
+ elsif v2.nil?
156
+ diffs << ['-', k, v1]
157
+ else
158
+ diffs << ['~', k, v2, v1]
159
+ end
160
+ end
161
+
162
+ end
163
+ end
164
+
165
+ diffs
166
+ end
167
+
168
+ def self.patch(hash, diffs)
169
+ result = hash.dup
170
+
171
+ diffs.each do |diff|
172
+ flag, key, v1, v2 = diff
173
+ if key =~ /\[/
174
+ keyname, is_array = key.match(/^(.*)(\[\])$/).captures
175
+ elsif key =~ /\./
176
+ keyname, subkey = key.match(/^(.*)\.(.*)$/).captures
177
+ else
178
+ keyname = key
179
+ end
180
+
181
+ case flag
182
+ when '~'
183
+ # change a value in place
184
+
185
+ if subkey
186
+ result[keyname][subkey] = v1
187
+ else
188
+ result[keyname] = v1
189
+ end
190
+
191
+ when '+'
192
+ # add something
193
+ if subkey
194
+ result[keyname] ||= {}
195
+ result[keyname][subkey] = v1
196
+ elsif is_array
197
+ result[keyname] ||= []
198
+ result[keyname] << v1
199
+ else
200
+ result[keyname] = v1
201
+ end
202
+
203
+ when '-'
204
+ # remove something
205
+ if subkey
206
+ result[keyname].delete(subkey)
207
+ elsif is_array
208
+ result[keyname].delete_if {|v| v == v1}
209
+ else
210
+ result.delete(keyname)
211
+ end
212
+ end
213
+ end
214
+
215
+ result.delete_if {|k,v| v.nil? || v.empty?}
216
+
217
+ result
218
+ end
219
+ end
data/lib/treet/repo.rb ADDED
@@ -0,0 +1,141 @@
1
+ # encoding: UTF-8
2
+
3
+ # require 'hashdiff'
4
+
5
+ class Treet::Repo
6
+ attr_reader :root, :hash, :opts
7
+
8
+ def initialize(path, opts = {})
9
+ # TODO: validate that path exists and is a directory (symlinks should work)
10
+
11
+ @root = path
12
+ raise "Missing or invalid source path #{path}" unless File.directory?(path)
13
+ @opts = opts
14
+ end
15
+
16
+ def to_hash
17
+ @hash ||= expand(root)
18
+ end
19
+
20
+ def compare(target)
21
+ Treet::Hash.diff(to_hash, target.to_hash)
22
+ # HashDiff.diff(to_hash, hash)
23
+ end
24
+
25
+ # patch keys can look like
26
+ # name.first
27
+ # emails[]
28
+ # (address[1] syntax has been eliminated, we recognize array elements by matching the entire content)
29
+ def self.filefor(keyname)
30
+ if keyname =~ /\[/
31
+ keyname, is_array, index = keyname.match(/^(.*)(\[\])$/).captures
32
+ [keyname, '', nil]
33
+ elsif keyname =~ /\./
34
+ # subelement
35
+ filename,field = keyname.split('.')
36
+ ['.', filename, field]
37
+ else
38
+ [nil, keyname]
39
+ end
40
+ end
41
+
42
+ # Patching a repo is not the same as patching a hash. Make the changes
43
+ # directly to the data files.
44
+ def patch(diffs)
45
+ @hash = nil # invalidate any cached image
46
+
47
+ Dir.chdir(root) do
48
+ diffs.each do |diff|
49
+ flag, key, v1, v2 = diff
50
+ if key =~ /\[/
51
+ keyname, is_array = key.match(/^(.*)(\[\])$/).captures
52
+ elsif key =~ /\./
53
+ keyname, subkey = key.match(/^(.*)\.(.*)$/).captures
54
+ else
55
+ keyname = key
56
+ end
57
+
58
+ dirname, filename, fieldname = Treet::Repo.filefor(key)
59
+ filepath = "#{dirname}/#{filename}"
60
+ case flag
61
+ when '~'
62
+ # change a value in place
63
+ # load the current data & overwrite with the new value
64
+ # idempotent: this will overwrite the file with the same contents
65
+ data = File.exists?(filepath) ? JSON.load(File.open(filepath)) : {}
66
+ data[fieldname] = v1
67
+ File.open(filepath, "w") {|f| f << JSON.pretty_generate(data)}
68
+
69
+ when '+'
70
+ # add something
71
+ if fieldname
72
+ # writing a value into a hash
73
+ # idempotent: this will overwrite the file with the same contents
74
+ data = File.exists?(filepath) ? JSON.load(File.open(filepath)) : {}
75
+ data[fieldname] = v1
76
+ Dir.mkdir(dirname) unless Dir.exists?(dirname)
77
+ File.open(filepath, "w") {|f| f << JSON.pretty_generate(data)}
78
+ else
79
+ # writing an entire hash into an array entry
80
+ # idempotent: this will overwrite the file with the same contents
81
+ subfile = "#{dirname}/#{Treet::Hash.digestify(v1)}"
82
+ Dir.mkdir(dirname) unless Dir.exists?(dirname)
83
+ File.open(subfile, "w") {|f| f << JSON.pretty_generate(v1)}
84
+ end
85
+
86
+ when '-'
87
+ # remove something
88
+ if fieldname
89
+ data = JSON.load(File.open(filepath))
90
+ data.delete(fieldname)
91
+ if data.empty?
92
+ File.delete(filename)
93
+ else
94
+ File.open(filepath, "w") {|f| f << JSON.pretty_generate(data)}
95
+ end
96
+ else
97
+ # this is an array, we look for a match on the entire contents via digest
98
+ subfile = "#{dirname}/#{Treet::Hash.digestify(v1)}"
99
+ File.delete(subfile) if File.exists?(subfile) # need the existence check for idempotence
100
+ # TODO: if dirname is now empty, should it be removed? is that worthwhile?
101
+ end
102
+ end
103
+ end
104
+ end
105
+
106
+ to_hash # ?? return the patched data? or no return value? true/false for success?
107
+ end
108
+
109
+ private
110
+
111
+ def expand_json(path)
112
+ if File.file?(path)
113
+ if File.zero?(path)
114
+ # empty files are just keys or string elements in an array
115
+ File.basename(path)
116
+ else
117
+ # if file contents is JSON, then parse it
118
+ # otherwise treat it as a raw string value
119
+ s = File.read(path)
120
+ JSON.load(s) rescue s
121
+ end
122
+ else
123
+ # should be a subdirectory containing files named with numbers, each containing JSON
124
+ files = Dir.entries(path).select {|f| f !~ /^\./}
125
+ files.sort_by(&:to_i).each_with_object([]) do |f, ary|
126
+ ary << expand_json("#{path}/#{f}")
127
+ end
128
+ end
129
+ end
130
+
131
+ def expand(path)
132
+ files = Dir.entries(path).select {|f| f !~ /^\./}
133
+ hash = files.each_with_object({}) {|f,h| h[f] = expand_json("#{path}/#{f}")}
134
+
135
+ if opts[:xrefkey]
136
+ hash['xref'] ||= {}
137
+ hash['xref'][opts[:xrefkey]] = opts[:xref]
138
+ end
139
+ hash
140
+ end
141
+ end
@@ -0,0 +1,3 @@
1
+ module Treet
2
+ VERSION = "0.8.2"
3
+ end
data/lib/treet.rb ADDED
@@ -0,0 +1,13 @@
1
+ unless Kernel.respond_to?(:require_relative)
2
+ module Kernel
3
+ def require_relative(path)
4
+ require File.join(File.dirname(caller[0]), path.to_str)
5
+ end
6
+ end
7
+ end
8
+
9
+ require_relative "treet/version"
10
+
11
+ require_relative "treet/repo"
12
+ require_relative "treet/hash"
13
+ require_relative "treet/farm"
@@ -0,0 +1,24 @@
1
+ {
2
+ "name": {
3
+ "first": "Bob",
4
+ "last": "Smith",
5
+ "full": "Bob Smith"
6
+ },
7
+ "emails": [
8
+ {
9
+ "label": "home",
10
+ "email": "bob@home.com"
11
+ },
12
+ {
13
+ "label": "work",
14
+ "email": "bob@work.com"
15
+ },
16
+ {
17
+ "label": "other",
18
+ "email": "bob@vacation.com"
19
+ }
20
+ ],
21
+ "other": {
22
+ "notes": "some commentary"
23
+ }
24
+ }