json-inference 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/lib/json-inference.rb +53 -116
- data/lib/json-inference/base_node.rb +44 -0
- data/lib/json-inference/node.rb +26 -0
- data/lib/json-inference/nth_child_node.rb +26 -0
- data/lib/json-inference/root_node.rb +20 -0
- data/lib/json-inference/version.rb +1 -1
- data/test/json_inference_test.rb +14 -0
- metadata +8 -4
data/Gemfile.lock
CHANGED
data/lib/json-inference.rb
CHANGED
@@ -3,147 +3,84 @@ module JsonInference
|
|
3
3
|
Report.new
|
4
4
|
end
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
@value_classes = Hash.new 0
|
9
|
-
@sub_nodes = Hash.new { |h,k|
|
10
|
-
if k == :nth_child
|
11
|
-
sub_node = NthChildNode.new(self)
|
12
|
-
else
|
13
|
-
sub_node = Node.new(k, self)
|
14
|
-
end
|
15
|
-
h[k] = sub_node
|
16
|
-
}
|
17
|
-
end
|
18
|
-
|
19
|
-
def <<(value)
|
20
|
-
if value.is_a?(Hash)
|
21
|
-
value.each do |key, sub_value|
|
22
|
-
@sub_nodes[key] << sub_value
|
23
|
-
end
|
24
|
-
elsif value.is_a?(Array)
|
25
|
-
@sub_nodes[:nth_child]
|
26
|
-
value.each do |sub_value|
|
27
|
-
@sub_nodes[:nth_child] << sub_value
|
28
|
-
end
|
29
|
-
end
|
30
|
-
if value.class == String && value =~ /^(\d){4}-(\d){2}-(\d){2}T(\d){2}:(\d){2}:(\d){2}\.(\d){3}Z$/
|
31
|
-
@value_classes[Date] += 1
|
32
|
-
elsif [true, false].include?(value)
|
33
|
-
@value_classes['Boolean'] += 1
|
34
|
-
else
|
35
|
-
@value_classes[value.class] += 1
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def each_sub_node
|
40
|
-
@sub_nodes.keys.sort.each do |key|
|
41
|
-
sub_node = @sub_nodes[key]
|
42
|
-
yield sub_node
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def indent_level
|
47
|
-
@parent.indent_level + 1
|
48
|
-
end
|
49
|
-
|
50
|
-
def indent
|
51
|
-
' ' * indent_level
|
52
|
-
end
|
53
|
-
|
54
|
-
def total_count
|
55
|
-
@value_classes.values.inject { |sum, i| sum + i } || 0
|
56
|
-
end
|
6
|
+
def self.percent_string(numerator, denominator)
|
7
|
+
"#{(numerator / denominator.to_f * 100).round}%"
|
57
8
|
end
|
58
9
|
|
59
|
-
class
|
60
|
-
def initialize
|
61
|
-
|
62
|
-
@
|
63
|
-
end
|
64
|
-
|
65
|
-
def selector
|
66
|
-
"#{@parent.selector} > .#{@name}"
|
10
|
+
class Report
|
11
|
+
def initialize
|
12
|
+
@documents = []
|
13
|
+
@root = RootNode.new
|
67
14
|
end
|
68
15
|
|
69
|
-
def
|
70
|
-
|
16
|
+
def <<(document)
|
17
|
+
@documents << document
|
18
|
+
@root << document
|
71
19
|
end
|
72
20
|
|
73
|
-
def to_s
|
74
|
-
str = ""
|
75
|
-
str <<
|
76
|
-
@value_classes.each do |klass, count|
|
77
|
-
str << " #{indent}#{klass}: #{(count / total_count.to_f * 100).round}%\n"
|
78
|
-
end
|
79
|
-
each_sub_node do |sub_node|
|
80
|
-
str << sub_node.to_s(documents_count)
|
81
|
-
end
|
21
|
+
def to_s
|
22
|
+
str = "JsonInference report: #{@documents.size} documents\n"
|
23
|
+
str << @root.to_s(@documents.size)
|
82
24
|
str
|
83
25
|
end
|
84
26
|
end
|
85
27
|
|
86
|
-
class
|
87
|
-
def initialize
|
88
|
-
|
89
|
-
@parent = parent
|
28
|
+
class NodeValuesCollection
|
29
|
+
def initialize
|
30
|
+
@value_counters = Hash.new { |h,k| h[k] = ValueCounter.new(k) }
|
90
31
|
end
|
91
32
|
|
92
|
-
def
|
93
|
-
|
33
|
+
def <<(value)
|
34
|
+
if value.class == String && value =~ /^(\d){4}-(\d){2}-(\d){2}T(\d){2}:(\d){2}:(\d){2}\.(\d){3}Z$/
|
35
|
+
@value_counters[Date] << value
|
36
|
+
elsif [true, false].include?(value)
|
37
|
+
@value_counters['Boolean'] << value
|
38
|
+
else
|
39
|
+
@value_counters[value.class] << value
|
40
|
+
end
|
94
41
|
end
|
95
42
|
|
96
|
-
def
|
97
|
-
|
43
|
+
def size
|
44
|
+
@value_counters.values.inject(0) { |sum, counter| sum + counter.size } || 0
|
98
45
|
end
|
99
46
|
|
100
|
-
def to_s(
|
47
|
+
def to_s(indent)
|
101
48
|
str = ""
|
102
|
-
|
103
|
-
|
104
|
-
str << " #{indent}#{klass}: #{(count / total_count.to_f * 100).round}%\n"
|
105
|
-
end
|
106
|
-
each_sub_node do |sub_node|
|
107
|
-
str << sub_node.to_s(total_count)
|
49
|
+
@value_counters.values.each do |value_counter|
|
50
|
+
str << " #{indent}#{value_counter.to_s(size)}\n"
|
108
51
|
end
|
109
52
|
str
|
110
53
|
end
|
111
|
-
end
|
112
54
|
|
113
|
-
|
114
|
-
|
115
|
-
-1
|
116
|
-
end
|
55
|
+
class ValueCounter
|
56
|
+
attr_reader :size
|
117
57
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
def to_s(documents_count)
|
123
|
-
str = ""
|
124
|
-
each_sub_node do |sub_node|
|
125
|
-
str << sub_node.to_s(documents_count)
|
126
|
-
str << "\n"
|
58
|
+
def initialize(reported_class)
|
59
|
+
@reported_class = reported_class
|
60
|
+
@size = 0
|
61
|
+
@empties = 0
|
127
62
|
end
|
128
|
-
str
|
129
|
-
end
|
130
|
-
end
|
131
63
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
def <<(document)
|
139
|
-
@documents << document
|
140
|
-
@root << document
|
141
|
-
end
|
64
|
+
def <<(value)
|
65
|
+
@size += 1
|
66
|
+
if [Array, String].include?(@reported_class)
|
67
|
+
@empties += 1 if value.empty?
|
68
|
+
end
|
69
|
+
end
|
142
70
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
71
|
+
def to_s(all_values_count)
|
72
|
+
str = "#{@reported_class}: #{JsonInference.percent_string(size, all_values_count)}"
|
73
|
+
if [Array, String].include?(@reported_class)
|
74
|
+
str << ", #{JsonInference.percent_string(@empties, size)} empty"
|
75
|
+
end
|
76
|
+
str
|
77
|
+
end
|
147
78
|
end
|
148
79
|
end
|
149
80
|
end
|
81
|
+
|
82
|
+
dir = File.dirname(__FILE__) + "/json-inference"
|
83
|
+
require "#{dir}/base_node"
|
84
|
+
require "#{dir}/node"
|
85
|
+
require "#{dir}/nth_child_node"
|
86
|
+
require "#{dir}/root_node"
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module JsonInference
|
2
|
+
class BaseNode
|
3
|
+
def initialize
|
4
|
+
@values = NodeValuesCollection.new
|
5
|
+
@sub_nodes = Hash.new { |h,k|
|
6
|
+
if k == :nth_child
|
7
|
+
sub_node = JsonInference::NthChildNode.new(self)
|
8
|
+
else
|
9
|
+
sub_node = JsonInference::Node.new(k, self)
|
10
|
+
end
|
11
|
+
h[k] = sub_node
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
def <<(value)
|
16
|
+
if value.is_a?(Hash)
|
17
|
+
value.each do |key, sub_value|
|
18
|
+
@sub_nodes[key] << sub_value
|
19
|
+
end
|
20
|
+
elsif value.is_a?(Array)
|
21
|
+
@sub_nodes[:nth_child]
|
22
|
+
value.each do |sub_value|
|
23
|
+
@sub_nodes[:nth_child] << sub_value
|
24
|
+
end
|
25
|
+
end
|
26
|
+
@values << value
|
27
|
+
end
|
28
|
+
|
29
|
+
def each_sub_node
|
30
|
+
@sub_nodes.keys.sort.each do |key|
|
31
|
+
sub_node = @sub_nodes[key]
|
32
|
+
yield sub_node
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def indent_level
|
37
|
+
@parent.indent_level + 1
|
38
|
+
end
|
39
|
+
|
40
|
+
def indent
|
41
|
+
' ' * indent_level
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module JsonInference
|
2
|
+
class Node < BaseNode
|
3
|
+
def initialize(name = nil, parent = nil)
|
4
|
+
super()
|
5
|
+
@name, @parent = name, parent
|
6
|
+
end
|
7
|
+
|
8
|
+
def selector
|
9
|
+
"#{@parent.selector} > .#{@name}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def selector_line(documents_count)
|
13
|
+
"#{indent}#{selector}: #{@values.size}/#{documents_count} (#{JsonInference.percent_string(@values.size, documents_count)})\n"
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_s(documents_count)
|
17
|
+
str = ""
|
18
|
+
str << selector_line(documents_count)
|
19
|
+
str << @values.to_s(indent)
|
20
|
+
each_sub_node do |sub_node|
|
21
|
+
str << sub_node.to_s(documents_count)
|
22
|
+
end
|
23
|
+
str
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module JsonInference
|
2
|
+
class NthChildNode < BaseNode
|
3
|
+
def initialize(parent)
|
4
|
+
super()
|
5
|
+
@parent = parent
|
6
|
+
end
|
7
|
+
|
8
|
+
def selector
|
9
|
+
"#{@parent.selector}:nth-child()"
|
10
|
+
end
|
11
|
+
|
12
|
+
def selector_line(documents_count)
|
13
|
+
"#{indent}#{selector}: #{@values.size} child#{'ren' unless @values.size == 1}\n"
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_s(documents_count)
|
17
|
+
str = ""
|
18
|
+
str << selector_line(documents_count)
|
19
|
+
str << @values.to_s(indent)
|
20
|
+
each_sub_node do |sub_node|
|
21
|
+
str << sub_node.to_s(@values.size)
|
22
|
+
end
|
23
|
+
str
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module JsonInference
|
2
|
+
class RootNode < BaseNode
|
3
|
+
def indent_level
|
4
|
+
-1
|
5
|
+
end
|
6
|
+
|
7
|
+
def selector
|
8
|
+
':root'
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s(documents_count)
|
12
|
+
str = ""
|
13
|
+
each_sub_node do |sub_node|
|
14
|
+
str << sub_node.to_s(documents_count)
|
15
|
+
str << "\n"
|
16
|
+
end
|
17
|
+
str
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/test/json_inference_test.rb
CHANGED
@@ -158,4 +158,18 @@ class JsonInferenceTestCase < Test::Unit::TestCase
|
|
158
158
|
assert_match(/:root > .items:nth-child\(\): 0 children$/, @string)
|
159
159
|
end
|
160
160
|
end
|
161
|
+
|
162
|
+
context "field with empty strings" do
|
163
|
+
setup do
|
164
|
+
report = JsonInference.new_report
|
165
|
+
report << {foo: 'one'}
|
166
|
+
report << {foo: '', bar: 'ONE'}
|
167
|
+
report << {foo: '', baz: 'won'}
|
168
|
+
@string = report.to_s
|
169
|
+
end
|
170
|
+
|
171
|
+
should "note how likely it is to be empty" do
|
172
|
+
assert_match(/String: 100%, 67% empty/, @string)
|
173
|
+
end
|
174
|
+
end
|
161
175
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json-inference
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-01-
|
12
|
+
date: 2014-01-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -59,6 +59,10 @@ files:
|
|
59
59
|
- Rakefile
|
60
60
|
- json-inference.gemspec
|
61
61
|
- lib/json-inference.rb
|
62
|
+
- lib/json-inference/base_node.rb
|
63
|
+
- lib/json-inference/node.rb
|
64
|
+
- lib/json-inference/nth_child_node.rb
|
65
|
+
- lib/json-inference/root_node.rb
|
62
66
|
- lib/json-inference/version.rb
|
63
67
|
- test/json_inference_test.rb
|
64
68
|
homepage: ''
|
@@ -76,7 +80,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
76
80
|
version: '0'
|
77
81
|
segments:
|
78
82
|
- 0
|
79
|
-
hash: -
|
83
|
+
hash: -3344776264180278235
|
80
84
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
85
|
none: false
|
82
86
|
requirements:
|
@@ -85,7 +89,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
85
89
|
version: '0'
|
86
90
|
segments:
|
87
91
|
- 0
|
88
|
-
hash: -
|
92
|
+
hash: -3344776264180278235
|
89
93
|
requirements: []
|
90
94
|
rubyforge_project:
|
91
95
|
rubygems_version: 1.8.23
|