json-inference 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in json-inference.gemspec
4
+ gemspec
5
+
6
+ group :test do
7
+ gem 'shoulda-context', '~> 1.1.6'
8
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,19 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ json-inference (0.0.1)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ rake (10.1.1)
10
+ shoulda-context (1.1.6)
11
+
12
+ PLATFORMS
13
+ ruby
14
+
15
+ DEPENDENCIES
16
+ bundler (~> 1.3)
17
+ json-inference!
18
+ rake
19
+ shoulda-context (~> 1.1.6)
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Francis Hwang
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,13 @@
1
+ # JsonInference
2
+
3
+ Given a bunch of JSON documents that are assumed to be similar, collects
4
+ info about common structure. This can be useful for getting a top-level
5
+ overview of a document datastore.
6
+
7
+ ## Example
8
+
9
+ report = JsonInference.new_report
10
+ huge_json['docs'].each do |doc|
11
+ report << doc
12
+ end
13
+ puts report.to_s
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'json-inference/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "json-inference"
8
+ spec.version = Json::Inference::VERSION
9
+ spec.authors = ["Francis Hwang"]
10
+ spec.email = ["sera@fhwang.net"]
11
+ spec.description = %q{Given a bunch of JSON documents that are assumed to be similar, collects info about common structure.}
12
+ spec.summary = %q{Given a bunch of JSON documents that are assumed to be similar, collects info about common structure.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ end
@@ -0,0 +1,149 @@
1
+ module JsonInference
2
+ def self.new_report
3
+ Report.new
4
+ end
5
+
6
+ class BaseNode
7
+ def initialize
8
+ @value_classes = Hash.new 0
9
+ @sub_nodes = Hash.new { |h,k|
10
+ if k == :nth_child
11
+ sub_node = NthChildNode.new(self)
12
+ else
13
+ sub_node = Node.new(k, self)
14
+ end
15
+ h[k] = sub_node
16
+ }
17
+ end
18
+
19
+ def <<(value)
20
+ if value.is_a?(Hash)
21
+ value.each do |key, sub_value|
22
+ @sub_nodes[key] << sub_value
23
+ end
24
+ elsif value.is_a?(Array)
25
+ @sub_nodes[:nth_child]
26
+ value.each do |sub_value|
27
+ @sub_nodes[:nth_child] << sub_value
28
+ end
29
+ end
30
+ if value.class == String && value =~ /^(\d){4}-(\d){2}-(\d){2}T(\d){2}:(\d){2}:(\d){2}\.(\d){3}Z$/
31
+ @value_classes[Date] += 1
32
+ elsif [true, false].include?(value)
33
+ @value_classes['Boolean'] += 1
34
+ else
35
+ @value_classes[value.class] += 1
36
+ end
37
+ end
38
+
39
+ def each_sub_node
40
+ @sub_nodes.keys.sort.each do |key|
41
+ sub_node = @sub_nodes[key]
42
+ yield sub_node
43
+ end
44
+ end
45
+
46
+ def indent_level
47
+ @parent.indent_level + 1
48
+ end
49
+
50
+ def indent
51
+ ' ' * indent_level
52
+ end
53
+
54
+ def total_count
55
+ @value_classes.values.inject { |sum, i| sum + i } || 0
56
+ end
57
+ end
58
+
59
+ class Node < BaseNode
60
+ def initialize(name = nil, parent = nil)
61
+ super()
62
+ @name, @parent = name, parent
63
+ end
64
+
65
+ def selector
66
+ "#{@parent.selector} > .#{@name}"
67
+ end
68
+
69
+ def selector_line(documents_count)
70
+ "#{indent}#{selector}: #{total_count}/#{documents_count} (#{(total_count.to_f / documents_count * 100).round}%)\n"
71
+ end
72
+
73
+ def to_s(documents_count)
74
+ str = ""
75
+ str << selector_line(documents_count)
76
+ @value_classes.each do |klass, count|
77
+ str << " #{indent}#{klass}: #{(count / total_count.to_f * 100).round}%\n"
78
+ end
79
+ each_sub_node do |sub_node|
80
+ str << sub_node.to_s(documents_count)
81
+ end
82
+ str
83
+ end
84
+ end
85
+
86
+ class NthChildNode < BaseNode
87
+ def initialize(parent)
88
+ super()
89
+ @parent = parent
90
+ end
91
+
92
+ def selector
93
+ "#{@parent.selector}:nth-child()"
94
+ end
95
+
96
+ def selector_line(documents_count)
97
+ "#{indent}#{selector}: #{total_count} child#{'ren' unless total_count == 1}\n"
98
+ end
99
+
100
+ def to_s(documents_count)
101
+ str = ""
102
+ str << selector_line(documents_count)
103
+ @value_classes.each do |klass, count|
104
+ str << " #{indent}#{klass}: #{(count / total_count.to_f * 100).round}%\n"
105
+ end
106
+ each_sub_node do |sub_node|
107
+ str << sub_node.to_s(total_count)
108
+ end
109
+ str
110
+ end
111
+ end
112
+
113
+ class RootNode < BaseNode
114
+ def indent_level
115
+ -1
116
+ end
117
+
118
+ def selector
119
+ ':root'
120
+ end
121
+
122
+ def to_s(documents_count)
123
+ str = ""
124
+ each_sub_node do |sub_node|
125
+ str << sub_node.to_s(documents_count)
126
+ str << "\n"
127
+ end
128
+ str
129
+ end
130
+ end
131
+
132
+ class Report
133
+ def initialize
134
+ @documents = []
135
+ @root = RootNode.new
136
+ end
137
+
138
+ def <<(document)
139
+ @documents << document
140
+ @root << document
141
+ end
142
+
143
+ def to_s
144
+ str = "JsonInference report: #{@documents.size} documents\n"
145
+ str << @root.to_s(@documents.size)
146
+ str
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,5 @@
1
+ module Json
2
+ module Inference
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
@@ -0,0 +1,161 @@
1
+ require 'test/unit'
2
+ $: << '.'
3
+ require 'lib/json-inference'
4
+ require 'shoulda-context'
5
+
6
+ class JsonInferenceTestCase < Test::Unit::TestCase
7
+ context "no depth, only strings" do
8
+ setup do
9
+ report = JsonInference.new_report
10
+ report << {foo: 'one'}
11
+ report << {foo: 'two', bar: 'ONE'}
12
+ report << {foo: 'three', baz: 'won'}
13
+ @string = report.to_s
14
+ end
15
+
16
+ should "count selectors as part of the total" do
17
+ assert_match(/:root > .foo/, @string)
18
+ assert_match(/3\/3 \(100%\)/, @string)
19
+ end
20
+
21
+ should "count classes per selector" do
22
+ assert_match(/String: 100%/, @string)
23
+ end
24
+
25
+ should "sort report by selector" do
26
+ assert_match(/bar.*baz.*foo/m, @string)
27
+ end
28
+ end
29
+
30
+ context "no depth, date fields" do
31
+ setup do
32
+ report = JsonInference.new_report
33
+ report << {created_at: '2013-08-21T20:50:16.921Z'}
34
+ report << {created_at: '2013-08-21T20:50:16.555Z'}
35
+ @string = report.to_s
36
+ end
37
+
38
+ should "recognize date fields based on format" do
39
+ assert_match(/Date: 100%/, @string)
40
+ end
41
+ end
42
+
43
+ context "no depth, boolean fields" do
44
+ setup do
45
+ report = JsonInference.new_report
46
+ report << {featured: true}
47
+ report << {featured: false}
48
+ @string = report.to_s
49
+ end
50
+
51
+ should "group boolean fields" do
52
+ assert_match(/Boolean: 100%/, @string)
53
+ end
54
+ end
55
+
56
+ context "hash with uniform keys" do
57
+ setup do
58
+ report = JsonInference.new_report
59
+ report << {embedded: {title: 'title', position: 1}}
60
+ report << {embedded: {title: 'title two', position: 2}}
61
+ @string = report.to_s
62
+ end
63
+
64
+ should "show full selectors" do
65
+ assert_match(/:root > .embedded > .title/, @string)
66
+ assert_match(/2\/2 \(100%\)/, @string)
67
+ end
68
+
69
+ should "count classes per selector" do
70
+ assert_match(/String: 100%/, @string)
71
+ end
72
+
73
+ should "sort report by selector" do
74
+ assert_match(/embedded.*position/m, @string)
75
+ end
76
+
77
+ should "display count for the overall hash too" do
78
+ assert_match(/:root > .embedded: 2\/2 \(100%\)/, @string)
79
+ end
80
+ end
81
+
82
+ context "hash with inconsistent keys" do
83
+ setup do
84
+ report = JsonInference.new_report
85
+ report << {embedded: {title: 'title'}}
86
+ report << {embedded: {}}
87
+ @string = report.to_s
88
+ end
89
+
90
+ should "calculate percentages related to occurrences of the field" do
91
+ assert_match(/String: 100%/, @string)
92
+ end
93
+ end
94
+
95
+ context "field that is sometimes a hash and sometimes not" do
96
+ setup do
97
+ report = JsonInference.new_report
98
+ report << {embedded: {title: 'title'}}
99
+ report << {embedded: "what's this doing here"}
100
+ @string = report.to_s
101
+ end
102
+
103
+ should "display all top-level classes" do
104
+ assert_match(/Hash: 50%/, @string)
105
+ assert_match(/String: 50%/, @string)
106
+ end
107
+
108
+ should "display sub nodes" do
109
+ assert_match(/:root > .embedded > .title: 1\/2/, @string)
110
+ end
111
+ end
112
+
113
+ context "array" do
114
+ setup do
115
+ report = JsonInference.new_report
116
+ report << {items: [1, 2, 3]}
117
+ report << {items: [4, 5, 6]}
118
+ @string = report.to_s
119
+ end
120
+
121
+ should "display a different sort of selector" do
122
+ assert_match(/:root > .items:nth-child\(\): 6 children$/, @string)
123
+ end
124
+
125
+ should "count types of children" do
126
+ assert_match(/Fixnum: 100%/, @string)
127
+ end
128
+ end
129
+
130
+ context "array of hashes" do
131
+ setup do
132
+ report = JsonInference.new_report
133
+ report << {items: [{one: 'one', two: 'two'}, {one: 'ONE', two: 'TWO'}]}
134
+ report << {items: [{one: 'won', two: 'too'}, {one: 1, two: 'two'}]}
135
+ @string = report.to_s
136
+ end
137
+
138
+ should "count elements in each hash" do
139
+ assert_match(/:root > .items:nth-child\(\) > .one: 4\/4 \(100%\)$/, @string)
140
+ assert_match(/:root > .items:nth-child\(\) > .two: 4\/4 \(100%\)$/, @string)
141
+ end
142
+
143
+ should "count value classes in hashes too" do
144
+ assert_match(/String: 75%/, @string)
145
+ assert_match(/Fixnum: 25%/, @string)
146
+ end
147
+ end
148
+
149
+ context "empty array" do
150
+ setup do
151
+ report = JsonInference.new_report
152
+ report << {items: []}
153
+ report << {items: []}
154
+ @string = report.to_s
155
+ end
156
+
157
+ should "display that there are zero children" do
158
+ assert_match(/:root > .items:nth-child\(\): 0 children$/, @string)
159
+ end
160
+ end
161
+ end
metadata ADDED
@@ -0,0 +1,97 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: json-inference
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Francis Hwang
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-01-20 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.3'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: Given a bunch of JSON documents that are assumed to be similar, collects
47
+ info about common structure.
48
+ email:
49
+ - sera@fhwang.net
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .gitignore
55
+ - Gemfile
56
+ - Gemfile.lock
57
+ - LICENSE
58
+ - README.md
59
+ - Rakefile
60
+ - json-inference.gemspec
61
+ - lib/json-inference.rb
62
+ - lib/json-inference/version.rb
63
+ - test/json_inference_test.rb
64
+ homepage: ''
65
+ licenses:
66
+ - MIT
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ segments:
78
+ - 0
79
+ hash: -2986096997133027698
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ segments:
87
+ - 0
88
+ hash: -2986096997133027698
89
+ requirements: []
90
+ rubyforge_project:
91
+ rubygems_version: 1.8.23
92
+ signing_key:
93
+ specification_version: 3
94
+ summary: Given a bunch of JSON documents that are assumed to be similar, collects
95
+ info about common structure.
96
+ test_files:
97
+ - test/json_inference_test.rb