json-inference 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in json-inference.gemspec
4
+ gemspec
5
+
6
+ group :test do
7
+ gem 'shoulda-context', '~> 1.1.6'
8
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,19 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ json-inference (0.0.1)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ rake (10.1.1)
10
+ shoulda-context (1.1.6)
11
+
12
+ PLATFORMS
13
+ ruby
14
+
15
+ DEPENDENCIES
16
+ bundler (~> 1.3)
17
+ json-inference!
18
+ rake
19
+ shoulda-context (~> 1.1.6)
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Francis Hwang
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,13 @@
1
+ # JsonInference
2
+
3
+ Given a bunch of JSON documents that are assumed to be similar, collects
4
+ info about common structure. This can be useful for getting a top-level
5
+ overview of a document datastore.
6
+
7
+ ## Example
8
+
9
+ report = JsonInference.new_report
10
+ huge_json['docs'].each do |doc|
11
+ report << doc
12
+ end
13
+ puts report.to_s
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'json-inference/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "json-inference"
8
+ spec.version = Json::Inference::VERSION
9
+ spec.authors = ["Francis Hwang"]
10
+ spec.email = ["sera@fhwang.net"]
11
+ spec.description = %q{Given a bunch of JSON documents that are assumed to be similar, collects info about common structure.}
12
+ spec.summary = %q{Given a bunch of JSON documents that are assumed to be similar, collects info about common structure.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ end
@@ -0,0 +1,149 @@
1
+ module JsonInference
2
+ def self.new_report
3
+ Report.new
4
+ end
5
+
6
+ class BaseNode
7
+ def initialize
8
+ @value_classes = Hash.new 0
9
+ @sub_nodes = Hash.new { |h,k|
10
+ if k == :nth_child
11
+ sub_node = NthChildNode.new(self)
12
+ else
13
+ sub_node = Node.new(k, self)
14
+ end
15
+ h[k] = sub_node
16
+ }
17
+ end
18
+
19
+ def <<(value)
20
+ if value.is_a?(Hash)
21
+ value.each do |key, sub_value|
22
+ @sub_nodes[key] << sub_value
23
+ end
24
+ elsif value.is_a?(Array)
25
+ @sub_nodes[:nth_child]
26
+ value.each do |sub_value|
27
+ @sub_nodes[:nth_child] << sub_value
28
+ end
29
+ end
30
+ if value.class == String && value =~ /^(\d){4}-(\d){2}-(\d){2}T(\d){2}:(\d){2}:(\d){2}\.(\d){3}Z$/
31
+ @value_classes[Date] += 1
32
+ elsif [true, false].include?(value)
33
+ @value_classes['Boolean'] += 1
34
+ else
35
+ @value_classes[value.class] += 1
36
+ end
37
+ end
38
+
39
+ def each_sub_node
40
+ @sub_nodes.keys.sort.each do |key|
41
+ sub_node = @sub_nodes[key]
42
+ yield sub_node
43
+ end
44
+ end
45
+
46
+ def indent_level
47
+ @parent.indent_level + 1
48
+ end
49
+
50
+ def indent
51
+ ' ' * indent_level
52
+ end
53
+
54
+ def total_count
55
+ @value_classes.values.inject { |sum, i| sum + i } || 0
56
+ end
57
+ end
58
+
59
+ class Node < BaseNode
60
+ def initialize(name = nil, parent = nil)
61
+ super()
62
+ @name, @parent = name, parent
63
+ end
64
+
65
+ def selector
66
+ "#{@parent.selector} > .#{@name}"
67
+ end
68
+
69
+ def selector_line(documents_count)
70
+ "#{indent}#{selector}: #{total_count}/#{documents_count} (#{(total_count.to_f / documents_count * 100).round}%)\n"
71
+ end
72
+
73
+ def to_s(documents_count)
74
+ str = ""
75
+ str << selector_line(documents_count)
76
+ @value_classes.each do |klass, count|
77
+ str << " #{indent}#{klass}: #{(count / total_count.to_f * 100).round}%\n"
78
+ end
79
+ each_sub_node do |sub_node|
80
+ str << sub_node.to_s(documents_count)
81
+ end
82
+ str
83
+ end
84
+ end
85
+
86
+ class NthChildNode < BaseNode
87
+ def initialize(parent)
88
+ super()
89
+ @parent = parent
90
+ end
91
+
92
+ def selector
93
+ "#{@parent.selector}:nth-child()"
94
+ end
95
+
96
+ def selector_line(documents_count)
97
+ "#{indent}#{selector}: #{total_count} child#{'ren' unless total_count == 1}\n"
98
+ end
99
+
100
+ def to_s(documents_count)
101
+ str = ""
102
+ str << selector_line(documents_count)
103
+ @value_classes.each do |klass, count|
104
+ str << " #{indent}#{klass}: #{(count / total_count.to_f * 100).round}%\n"
105
+ end
106
+ each_sub_node do |sub_node|
107
+ str << sub_node.to_s(total_count)
108
+ end
109
+ str
110
+ end
111
+ end
112
+
113
+ class RootNode < BaseNode
114
+ def indent_level
115
+ -1
116
+ end
117
+
118
+ def selector
119
+ ':root'
120
+ end
121
+
122
+ def to_s(documents_count)
123
+ str = ""
124
+ each_sub_node do |sub_node|
125
+ str << sub_node.to_s(documents_count)
126
+ str << "\n"
127
+ end
128
+ str
129
+ end
130
+ end
131
+
132
+ class Report
133
+ def initialize
134
+ @documents = []
135
+ @root = RootNode.new
136
+ end
137
+
138
+ def <<(document)
139
+ @documents << document
140
+ @root << document
141
+ end
142
+
143
+ def to_s
144
+ str = "JsonInference report: #{@documents.size} documents\n"
145
+ str << @root.to_s(@documents.size)
146
+ str
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,5 @@
1
+ module Json
2
+ module Inference
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
@@ -0,0 +1,161 @@
1
+ require 'test/unit'
2
+ $: << '.'
3
+ require 'lib/json-inference'
4
+ require 'shoulda-context'
5
+
6
+ class JsonInferenceTestCase < Test::Unit::TestCase
7
+ context "no depth, only strings" do
8
+ setup do
9
+ report = JsonInference.new_report
10
+ report << {foo: 'one'}
11
+ report << {foo: 'two', bar: 'ONE'}
12
+ report << {foo: 'three', baz: 'won'}
13
+ @string = report.to_s
14
+ end
15
+
16
+ should "count selectors as part of the total" do
17
+ assert_match(/:root > .foo/, @string)
18
+ assert_match(/3\/3 \(100%\)/, @string)
19
+ end
20
+
21
+ should "count classes per selector" do
22
+ assert_match(/String: 100%/, @string)
23
+ end
24
+
25
+ should "sort report by selector" do
26
+ assert_match(/bar.*baz.*foo/m, @string)
27
+ end
28
+ end
29
+
30
+ context "no depth, date fields" do
31
+ setup do
32
+ report = JsonInference.new_report
33
+ report << {created_at: '2013-08-21T20:50:16.921Z'}
34
+ report << {created_at: '2013-08-21T20:50:16.555Z'}
35
+ @string = report.to_s
36
+ end
37
+
38
+ should "recognize date fields based on format" do
39
+ assert_match(/Date: 100%/, @string)
40
+ end
41
+ end
42
+
43
+ context "no depth, boolean fields" do
44
+ setup do
45
+ report = JsonInference.new_report
46
+ report << {featured: true}
47
+ report << {featured: false}
48
+ @string = report.to_s
49
+ end
50
+
51
+ should "group boolean fields" do
52
+ assert_match(/Boolean: 100%/, @string)
53
+ end
54
+ end
55
+
56
+ context "hash with uniform keys" do
57
+ setup do
58
+ report = JsonInference.new_report
59
+ report << {embedded: {title: 'title', position: 1}}
60
+ report << {embedded: {title: 'title two', position: 2}}
61
+ @string = report.to_s
62
+ end
63
+
64
+ should "show full selectors" do
65
+ assert_match(/:root > .embedded > .title/, @string)
66
+ assert_match(/2\/2 \(100%\)/, @string)
67
+ end
68
+
69
+ should "count classes per selector" do
70
+ assert_match(/String: 100%/, @string)
71
+ end
72
+
73
+ should "sort report by selector" do
74
+ assert_match(/embedded.*position/m, @string)
75
+ end
76
+
77
+ should "display count for the overall hash too" do
78
+ assert_match(/:root > .embedded: 2\/2 \(100%\)/, @string)
79
+ end
80
+ end
81
+
82
+ context "hash with inconsistent keys" do
83
+ setup do
84
+ report = JsonInference.new_report
85
+ report << {embedded: {title: 'title'}}
86
+ report << {embedded: {}}
87
+ @string = report.to_s
88
+ end
89
+
90
+ should "calculate percentages related to occurrences of the field" do
91
+ assert_match(/String: 100%/, @string)
92
+ end
93
+ end
94
+
95
+ context "field that is sometimes a hash and sometimes not" do
96
+ setup do
97
+ report = JsonInference.new_report
98
+ report << {embedded: {title: 'title'}}
99
+ report << {embedded: "what's this doing here"}
100
+ @string = report.to_s
101
+ end
102
+
103
+ should "display all top-level classes" do
104
+ assert_match(/Hash: 50%/, @string)
105
+ assert_match(/String: 50%/, @string)
106
+ end
107
+
108
+ should "display sub nodes" do
109
+ assert_match(/:root > .embedded > .title: 1\/2/, @string)
110
+ end
111
+ end
112
+
113
+ context "array" do
114
+ setup do
115
+ report = JsonInference.new_report
116
+ report << {items: [1, 2, 3]}
117
+ report << {items: [4, 5, 6]}
118
+ @string = report.to_s
119
+ end
120
+
121
+ should "display a different sort of selector" do
122
+ assert_match(/:root > .items:nth-child\(\): 6 children$/, @string)
123
+ end
124
+
125
+ should "count types of children" do
126
+ assert_match(/Fixnum: 100%/, @string)
127
+ end
128
+ end
129
+
130
+ context "array of hashes" do
131
+ setup do
132
+ report = JsonInference.new_report
133
+ report << {items: [{one: 'one', two: 'two'}, {one: 'ONE', two: 'TWO'}]}
134
+ report << {items: [{one: 'won', two: 'too'}, {one: 1, two: 'two'}]}
135
+ @string = report.to_s
136
+ end
137
+
138
+ should "count elements in each hash" do
139
+ assert_match(/:root > .items:nth-child\(\) > .one: 4\/4 \(100%\)$/, @string)
140
+ assert_match(/:root > .items:nth-child\(\) > .two: 4\/4 \(100%\)$/, @string)
141
+ end
142
+
143
+ should "count value classes in hashes too" do
144
+ assert_match(/String: 75%/, @string)
145
+ assert_match(/Fixnum: 25%/, @string)
146
+ end
147
+ end
148
+
149
+ context "empty array" do
150
+ setup do
151
+ report = JsonInference.new_report
152
+ report << {items: []}
153
+ report << {items: []}
154
+ @string = report.to_s
155
+ end
156
+
157
+ should "display that there are zero children" do
158
+ assert_match(/:root > .items:nth-child\(\): 0 children$/, @string)
159
+ end
160
+ end
161
+ end
metadata ADDED
@@ -0,0 +1,97 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: json-inference
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Francis Hwang
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-01-20 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.3'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: Given a bunch of JSON documents that are assumed to be similar, collects
47
+ info about common structure.
48
+ email:
49
+ - sera@fhwang.net
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .gitignore
55
+ - Gemfile
56
+ - Gemfile.lock
57
+ - LICENSE
58
+ - README.md
59
+ - Rakefile
60
+ - json-inference.gemspec
61
+ - lib/json-inference.rb
62
+ - lib/json-inference/version.rb
63
+ - test/json_inference_test.rb
64
+ homepage: ''
65
+ licenses:
66
+ - MIT
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ segments:
78
+ - 0
79
+ hash: -2986096997133027698
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ segments:
87
+ - 0
88
+ hash: -2986096997133027698
89
+ requirements: []
90
+ rubyforge_project:
91
+ rubygems_version: 1.8.23
92
+ signing_key:
93
+ specification_version: 3
94
+ summary: Given a bunch of JSON documents that are assumed to be similar, collects
95
+ info about common structure.
96
+ test_files:
97
+ - test/json_inference_test.rb