json-inference 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +19 -0
- data/LICENSE +20 -0
- data/README.md +13 -0
- data/Rakefile +1 -0
- data/json-inference.gemspec +23 -0
- data/lib/json-inference.rb +149 -0
- data/lib/json-inference/version.rb +5 -0
- data/test/json_inference_test.rb +161 -0
- metadata +97 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
json-inference (0.0.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
rake (10.1.1)
|
10
|
+
shoulda-context (1.1.6)
|
11
|
+
|
12
|
+
PLATFORMS
|
13
|
+
ruby
|
14
|
+
|
15
|
+
DEPENDENCIES
|
16
|
+
bundler (~> 1.3)
|
17
|
+
json-inference!
|
18
|
+
rake
|
19
|
+
shoulda-context (~> 1.1.6)
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014 Francis Hwang
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# JsonInference
|
2
|
+
|
3
|
+
Given a bunch of JSON documents that are assumed to be similar, collects
|
4
|
+
info about common structure. This can be useful for getting a top-level
|
5
|
+
overview of a document datastore.
|
6
|
+
|
7
|
+
## Example
|
8
|
+
|
9
|
+
report = JsonInference.new_report
|
10
|
+
huge_json['docs'].each do |doc|
|
11
|
+
report << doc
|
12
|
+
end
|
13
|
+
puts report.to_s
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'json-inference/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "json-inference"
|
8
|
+
spec.version = Json::Inference::VERSION
|
9
|
+
spec.authors = ["Francis Hwang"]
|
10
|
+
spec.email = ["sera@fhwang.net"]
|
11
|
+
spec.description = %q{Given a bunch of JSON documents that are assumed to be similar, collects info about common structure.}
|
12
|
+
spec.summary = %q{Given a bunch of JSON documents that are assumed to be similar, collects info about common structure.}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
module JsonInference
|
2
|
+
def self.new_report
|
3
|
+
Report.new
|
4
|
+
end
|
5
|
+
|
6
|
+
class BaseNode
|
7
|
+
def initialize
|
8
|
+
@value_classes = Hash.new 0
|
9
|
+
@sub_nodes = Hash.new { |h,k|
|
10
|
+
if k == :nth_child
|
11
|
+
sub_node = NthChildNode.new(self)
|
12
|
+
else
|
13
|
+
sub_node = Node.new(k, self)
|
14
|
+
end
|
15
|
+
h[k] = sub_node
|
16
|
+
}
|
17
|
+
end
|
18
|
+
|
19
|
+
def <<(value)
|
20
|
+
if value.is_a?(Hash)
|
21
|
+
value.each do |key, sub_value|
|
22
|
+
@sub_nodes[key] << sub_value
|
23
|
+
end
|
24
|
+
elsif value.is_a?(Array)
|
25
|
+
@sub_nodes[:nth_child]
|
26
|
+
value.each do |sub_value|
|
27
|
+
@sub_nodes[:nth_child] << sub_value
|
28
|
+
end
|
29
|
+
end
|
30
|
+
if value.class == String && value =~ /^(\d){4}-(\d){2}-(\d){2}T(\d){2}:(\d){2}:(\d){2}\.(\d){3}Z$/
|
31
|
+
@value_classes[Date] += 1
|
32
|
+
elsif [true, false].include?(value)
|
33
|
+
@value_classes['Boolean'] += 1
|
34
|
+
else
|
35
|
+
@value_classes[value.class] += 1
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def each_sub_node
|
40
|
+
@sub_nodes.keys.sort.each do |key|
|
41
|
+
sub_node = @sub_nodes[key]
|
42
|
+
yield sub_node
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def indent_level
|
47
|
+
@parent.indent_level + 1
|
48
|
+
end
|
49
|
+
|
50
|
+
def indent
|
51
|
+
' ' * indent_level
|
52
|
+
end
|
53
|
+
|
54
|
+
def total_count
|
55
|
+
@value_classes.values.inject { |sum, i| sum + i } || 0
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class Node < BaseNode
|
60
|
+
def initialize(name = nil, parent = nil)
|
61
|
+
super()
|
62
|
+
@name, @parent = name, parent
|
63
|
+
end
|
64
|
+
|
65
|
+
def selector
|
66
|
+
"#{@parent.selector} > .#{@name}"
|
67
|
+
end
|
68
|
+
|
69
|
+
def selector_line(documents_count)
|
70
|
+
"#{indent}#{selector}: #{total_count}/#{documents_count} (#{(total_count.to_f / documents_count * 100).round}%)\n"
|
71
|
+
end
|
72
|
+
|
73
|
+
def to_s(documents_count)
|
74
|
+
str = ""
|
75
|
+
str << selector_line(documents_count)
|
76
|
+
@value_classes.each do |klass, count|
|
77
|
+
str << " #{indent}#{klass}: #{(count / total_count.to_f * 100).round}%\n"
|
78
|
+
end
|
79
|
+
each_sub_node do |sub_node|
|
80
|
+
str << sub_node.to_s(documents_count)
|
81
|
+
end
|
82
|
+
str
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class NthChildNode < BaseNode
|
87
|
+
def initialize(parent)
|
88
|
+
super()
|
89
|
+
@parent = parent
|
90
|
+
end
|
91
|
+
|
92
|
+
def selector
|
93
|
+
"#{@parent.selector}:nth-child()"
|
94
|
+
end
|
95
|
+
|
96
|
+
def selector_line(documents_count)
|
97
|
+
"#{indent}#{selector}: #{total_count} child#{'ren' unless total_count == 1}\n"
|
98
|
+
end
|
99
|
+
|
100
|
+
def to_s(documents_count)
|
101
|
+
str = ""
|
102
|
+
str << selector_line(documents_count)
|
103
|
+
@value_classes.each do |klass, count|
|
104
|
+
str << " #{indent}#{klass}: #{(count / total_count.to_f * 100).round}%\n"
|
105
|
+
end
|
106
|
+
each_sub_node do |sub_node|
|
107
|
+
str << sub_node.to_s(total_count)
|
108
|
+
end
|
109
|
+
str
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
class RootNode < BaseNode
|
114
|
+
def indent_level
|
115
|
+
-1
|
116
|
+
end
|
117
|
+
|
118
|
+
def selector
|
119
|
+
':root'
|
120
|
+
end
|
121
|
+
|
122
|
+
def to_s(documents_count)
|
123
|
+
str = ""
|
124
|
+
each_sub_node do |sub_node|
|
125
|
+
str << sub_node.to_s(documents_count)
|
126
|
+
str << "\n"
|
127
|
+
end
|
128
|
+
str
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
class Report
|
133
|
+
def initialize
|
134
|
+
@documents = []
|
135
|
+
@root = RootNode.new
|
136
|
+
end
|
137
|
+
|
138
|
+
def <<(document)
|
139
|
+
@documents << document
|
140
|
+
@root << document
|
141
|
+
end
|
142
|
+
|
143
|
+
def to_s
|
144
|
+
str = "JsonInference report: #{@documents.size} documents\n"
|
145
|
+
str << @root.to_s(@documents.size)
|
146
|
+
str
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,161 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
$: << '.'
|
3
|
+
require 'lib/json-inference'
|
4
|
+
require 'shoulda-context'
|
5
|
+
|
6
|
+
class JsonInferenceTestCase < Test::Unit::TestCase
|
7
|
+
context "no depth, only strings" do
|
8
|
+
setup do
|
9
|
+
report = JsonInference.new_report
|
10
|
+
report << {foo: 'one'}
|
11
|
+
report << {foo: 'two', bar: 'ONE'}
|
12
|
+
report << {foo: 'three', baz: 'won'}
|
13
|
+
@string = report.to_s
|
14
|
+
end
|
15
|
+
|
16
|
+
should "count selectors as part of the total" do
|
17
|
+
assert_match(/:root > .foo/, @string)
|
18
|
+
assert_match(/3\/3 \(100%\)/, @string)
|
19
|
+
end
|
20
|
+
|
21
|
+
should "count classes per selector" do
|
22
|
+
assert_match(/String: 100%/, @string)
|
23
|
+
end
|
24
|
+
|
25
|
+
should "sort report by selector" do
|
26
|
+
assert_match(/bar.*baz.*foo/m, @string)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
context "no depth, date fields" do
|
31
|
+
setup do
|
32
|
+
report = JsonInference.new_report
|
33
|
+
report << {created_at: '2013-08-21T20:50:16.921Z'}
|
34
|
+
report << {created_at: '2013-08-21T20:50:16.555Z'}
|
35
|
+
@string = report.to_s
|
36
|
+
end
|
37
|
+
|
38
|
+
should "recognize date fields based on format" do
|
39
|
+
assert_match(/Date: 100%/, @string)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
context "no depth, boolean fields" do
|
44
|
+
setup do
|
45
|
+
report = JsonInference.new_report
|
46
|
+
report << {featured: true}
|
47
|
+
report << {featured: false}
|
48
|
+
@string = report.to_s
|
49
|
+
end
|
50
|
+
|
51
|
+
should "group boolean fields" do
|
52
|
+
assert_match(/Boolean: 100%/, @string)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context "hash with uniform keys" do
|
57
|
+
setup do
|
58
|
+
report = JsonInference.new_report
|
59
|
+
report << {embedded: {title: 'title', position: 1}}
|
60
|
+
report << {embedded: {title: 'title two', position: 2}}
|
61
|
+
@string = report.to_s
|
62
|
+
end
|
63
|
+
|
64
|
+
should "show full selectors" do
|
65
|
+
assert_match(/:root > .embedded > .title/, @string)
|
66
|
+
assert_match(/2\/2 \(100%\)/, @string)
|
67
|
+
end
|
68
|
+
|
69
|
+
should "count classes per selector" do
|
70
|
+
assert_match(/String: 100%/, @string)
|
71
|
+
end
|
72
|
+
|
73
|
+
should "sort report by selector" do
|
74
|
+
assert_match(/embedded.*position/m, @string)
|
75
|
+
end
|
76
|
+
|
77
|
+
should "display count for the overall hash too" do
|
78
|
+
assert_match(/:root > .embedded: 2\/2 \(100%\)/, @string)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
context "hash with inconsistent keys" do
|
83
|
+
setup do
|
84
|
+
report = JsonInference.new_report
|
85
|
+
report << {embedded: {title: 'title'}}
|
86
|
+
report << {embedded: {}}
|
87
|
+
@string = report.to_s
|
88
|
+
end
|
89
|
+
|
90
|
+
should "calculate percentages related to occurrences of the field" do
|
91
|
+
assert_match(/String: 100%/, @string)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
context "field that is sometimes a hash and sometimes not" do
|
96
|
+
setup do
|
97
|
+
report = JsonInference.new_report
|
98
|
+
report << {embedded: {title: 'title'}}
|
99
|
+
report << {embedded: "what's this doing here"}
|
100
|
+
@string = report.to_s
|
101
|
+
end
|
102
|
+
|
103
|
+
should "display all top-level classes" do
|
104
|
+
assert_match(/Hash: 50%/, @string)
|
105
|
+
assert_match(/String: 50%/, @string)
|
106
|
+
end
|
107
|
+
|
108
|
+
should "display sub nodes" do
|
109
|
+
assert_match(/:root > .embedded > .title: 1\/2/, @string)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
context "array" do
|
114
|
+
setup do
|
115
|
+
report = JsonInference.new_report
|
116
|
+
report << {items: [1, 2, 3]}
|
117
|
+
report << {items: [4, 5, 6]}
|
118
|
+
@string = report.to_s
|
119
|
+
end
|
120
|
+
|
121
|
+
should "display a different sort of selector" do
|
122
|
+
assert_match(/:root > .items:nth-child\(\): 6 children$/, @string)
|
123
|
+
end
|
124
|
+
|
125
|
+
should "count types of children" do
|
126
|
+
assert_match(/Fixnum: 100%/, @string)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
context "array of hashes" do
|
131
|
+
setup do
|
132
|
+
report = JsonInference.new_report
|
133
|
+
report << {items: [{one: 'one', two: 'two'}, {one: 'ONE', two: 'TWO'}]}
|
134
|
+
report << {items: [{one: 'won', two: 'too'}, {one: 1, two: 'two'}]}
|
135
|
+
@string = report.to_s
|
136
|
+
end
|
137
|
+
|
138
|
+
should "count elements in each hash" do
|
139
|
+
assert_match(/:root > .items:nth-child\(\) > .one: 4\/4 \(100%\)$/, @string)
|
140
|
+
assert_match(/:root > .items:nth-child\(\) > .two: 4\/4 \(100%\)$/, @string)
|
141
|
+
end
|
142
|
+
|
143
|
+
should "count value classes in hashes too" do
|
144
|
+
assert_match(/String: 75%/, @string)
|
145
|
+
assert_match(/Fixnum: 25%/, @string)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
context "empty array" do
|
150
|
+
setup do
|
151
|
+
report = JsonInference.new_report
|
152
|
+
report << {items: []}
|
153
|
+
report << {items: []}
|
154
|
+
@string = report.to_s
|
155
|
+
end
|
156
|
+
|
157
|
+
should "display that there are zero children" do
|
158
|
+
assert_match(/:root > .items:nth-child\(\): 0 children$/, @string)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
metadata
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: json-inference
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Francis Hwang
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2014-01-20 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.3'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.3'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
description: Given a bunch of JSON documents that are assumed to be similar, collects
|
47
|
+
info about common structure.
|
48
|
+
email:
|
49
|
+
- sera@fhwang.net
|
50
|
+
executables: []
|
51
|
+
extensions: []
|
52
|
+
extra_rdoc_files: []
|
53
|
+
files:
|
54
|
+
- .gitignore
|
55
|
+
- Gemfile
|
56
|
+
- Gemfile.lock
|
57
|
+
- LICENSE
|
58
|
+
- README.md
|
59
|
+
- Rakefile
|
60
|
+
- json-inference.gemspec
|
61
|
+
- lib/json-inference.rb
|
62
|
+
- lib/json-inference/version.rb
|
63
|
+
- test/json_inference_test.rb
|
64
|
+
homepage: ''
|
65
|
+
licenses:
|
66
|
+
- MIT
|
67
|
+
post_install_message:
|
68
|
+
rdoc_options: []
|
69
|
+
require_paths:
|
70
|
+
- lib
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
segments:
|
78
|
+
- 0
|
79
|
+
hash: -2986096997133027698
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
segments:
|
87
|
+
- 0
|
88
|
+
hash: -2986096997133027698
|
89
|
+
requirements: []
|
90
|
+
rubyforge_project:
|
91
|
+
rubygems_version: 1.8.23
|
92
|
+
signing_key:
|
93
|
+
specification_version: 3
|
94
|
+
summary: Given a bunch of JSON documents that are assumed to be similar, collects
|
95
|
+
info about common structure.
|
96
|
+
test_files:
|
97
|
+
- test/json_inference_test.rb
|