bxtjson 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/bxtjson.rb +285 -0
- metadata +91 -0
data/lib/bxtjson.rb
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
# input stream style (new-line separated) json objects
|
|
2
|
+
# main method map_onto_skeleton_of_schema
|
|
3
|
+
# JSON -> JSON
|
|
4
|
+
|
|
5
|
+
require 'multi_json'
|
|
6
|
+
require 'json_schema'
|
|
7
|
+
# == dependencies
|
|
8
|
+
# * gem: json_schema
|
|
9
|
+
# * gem: multi_json
|
|
10
|
+
# * JSON standard library or other (e.g. oj) json parsers
|
|
11
|
+
# * You implement a model in Sequel or ActiveRecord if you want.
|
|
12
|
+
# == Constants (examples)
|
|
13
|
+
# cleaner_proc = ->(str) {str.gsub(/\W+/, " ").lstrip
|
|
14
|
+
# .gsub(" ", "_")
|
|
15
|
+
# .gsub(/PPPO_|PPCO_/, "")
|
|
16
|
+
# .downcase
|
|
17
|
+
# }
|
|
18
|
+
# json_filename = "../../../fsms-tmp/PP_PROPOSAL_2015.JSON"
|
|
19
|
+
# schema_filename = "./docs/schema.json"
|
|
20
|
+
# authorizinng_pointer '#/departments/primary_dept'
|
|
21
|
+
|
|
22
|
+
module Bxtjson
|
|
23
|
+
# Initialize an empty hashmap given a json-schema[http://json-schema.org]
|
|
24
|
+
#
|
|
25
|
+
# @param [Hash] schema_data the data from json-schema
|
|
26
|
+
# @param [String, nil] entity the resource name to enfocus
|
|
27
|
+
# @return [Hash] the an empty "initialized" json-schema
|
|
28
|
+
def self.skeleton(schema_data:,
|
|
29
|
+
entity: nil)
|
|
30
|
+
schema = JsonSchema.parse!(schema_data)
|
|
31
|
+
schema.expand_references!
|
|
32
|
+
if entity.nil?
|
|
33
|
+
entity_schema = schema
|
|
34
|
+
else
|
|
35
|
+
entity_schema = schema.properties[entity]
|
|
36
|
+
end
|
|
37
|
+
return _skeleton(entity_schema, acc: {})
|
|
38
|
+
end
|
|
39
|
+
# process a file of jsonl (linefeed) and clean keys with proc
|
|
40
|
+
#
|
|
41
|
+
# @param [String] json_filename source of json objects in jsonl[https://github.com/stephenplusplus/jsonl] format
|
|
42
|
+
# @param [Proc] a function to clean up keys
|
|
43
|
+
# @return [Hash] hash of arrays `[{ }]`
|
|
44
|
+
def self.text_to_lazy_json(json_filename:,
|
|
45
|
+
clean_proc:)
|
|
46
|
+
File.foreach(json_filename)
|
|
47
|
+
.lazy
|
|
48
|
+
.map do |line|
|
|
49
|
+
_key_cleaner(data: MultiJson.load(line), clean_proc: clean_proc)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
# Parse json-schema file and map contents of json file into
|
|
53
|
+
# initialized schema
|
|
54
|
+
#
|
|
55
|
+
# Mapping of contents will search for the first key in the source,
|
|
56
|
+
# that match the schema (recursively). So, the source should be
|
|
57
|
+
# flat for clarity, and the schema can be nested.
|
|
58
|
+
#
|
|
59
|
+
# If it cannot find a key, it will look for "top/next/final" path
|
|
60
|
+
# key in the source data.
|
|
61
|
+
#
|
|
62
|
+
# For example, in the skeleton
|
|
63
|
+
# {key: {nest: "this"} }
|
|
64
|
+
# Will be filled with "muscle" if the source has a ket
|
|
65
|
+
# {"key/nest": "data muscle"}
|
|
66
|
+
#
|
|
67
|
+
# TODO: design interface from csv to json that fits these
|
|
68
|
+
# principles.
|
|
69
|
+
#
|
|
70
|
+
# @param [String] json_filename filename for jsonl source
|
|
71
|
+
# @param [String] schema_filename filename for json-schema
|
|
72
|
+
# @param [Proc] clean_proc a function to clean up keys
|
|
73
|
+
# @param [String, #create] model the name of a model to call :create on
|
|
74
|
+
# @param [String] authorizing_pointer json-pointer[https://tools.ietf.org/html/rfc6901] that fills in the key "authorized_by"
|
|
75
|
+
def self.muscle(json_filename:,
|
|
76
|
+
schema_filename:,
|
|
77
|
+
clean_proc: ->(str){str},
|
|
78
|
+
model: nil,
|
|
79
|
+
schema_entity: nil,
|
|
80
|
+
authorizing_pointer:,
|
|
81
|
+
data_attr: :data)
|
|
82
|
+
skeleton = Bxtjson.skeleton(schema_data: MultiJson.load(File.read(schema_filename)),
|
|
83
|
+
entity: schema_entity)
|
|
84
|
+
if model
|
|
85
|
+
model = constantize(model.to_s.capitalize)
|
|
86
|
+
|
|
87
|
+
text_to_lazy_json(json_filename: json_filename, clean_proc: clean_proc )
|
|
88
|
+
.map {|data|
|
|
89
|
+
data = fillin(source_hash: _map_onto_skeleton_of_schema( data,
|
|
90
|
+
skeleton: skeleton ),
|
|
91
|
+
skeleton: skeleton)
|
|
92
|
+
result = model.create( data_attr => data)
|
|
93
|
+
}
|
|
94
|
+
else
|
|
95
|
+
out = []
|
|
96
|
+
text_to_lazy_json(json_filename: json_filename, clean_proc: clean_proc )
|
|
97
|
+
.map {|data| out << fillin(source_hash: _map_onto_skeleton_of_schema( data,
|
|
98
|
+
skeleton: skeleton ),
|
|
99
|
+
skeleton: skeleton)
|
|
100
|
+
}
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
# Recursively remove falsey values from hash
|
|
104
|
+
# Falsey values are those that return true from respond_to(:empty?)
|
|
105
|
+
# or :nil?
|
|
106
|
+
# @param [Hash] hash
|
|
107
|
+
# @return [Hash]
|
|
108
|
+
def self.compact_hash!(hash)
|
|
109
|
+
p = proc do |_, v|
|
|
110
|
+
v.delete_if(&p) if v.respond_to? :delete_if
|
|
111
|
+
v.respond_to?(:empty?) && v.empty? || v.nil?
|
|
112
|
+
end
|
|
113
|
+
hash.delete_if(&p)
|
|
114
|
+
end
|
|
115
|
+
def self.compact_values!(hash)
|
|
116
|
+
Hash[hash.map do |key, value|
|
|
117
|
+
[key,
|
|
118
|
+
if value.is_a?(Array)
|
|
119
|
+
value.map {|item| Bxtjson.compact_hash!(item) }
|
|
120
|
+
elsif value.respond_to?( :delete_if)
|
|
121
|
+
Bxtjson.compact_hash!(value)
|
|
122
|
+
else
|
|
123
|
+
value
|
|
124
|
+
end
|
|
125
|
+
]
|
|
126
|
+
end
|
|
127
|
+
]
|
|
128
|
+
end
|
|
129
|
+
private
|
|
130
|
+
# Creates a skeleton for object and array from a Json Schema
|
|
131
|
+
# Boolean, String, Number, Integer, Null are given a nil value to start.
|
|
132
|
+
# Hash -> Hash
|
|
133
|
+
def self._skeleton(json_schema, acc={})
|
|
134
|
+
case json_schema.type
|
|
135
|
+
when ["object"]
|
|
136
|
+
acc = Hash[json_schema.properties.map do |key, value|
|
|
137
|
+
[key, _skeleton(value, acc)]
|
|
138
|
+
end
|
|
139
|
+
]
|
|
140
|
+
when ["array"] # at this point the key is already in the Hash,
|
|
141
|
+
# just need to return an array with one hash
|
|
142
|
+
acc = [
|
|
143
|
+
json_schema.items.properties.map { |key, value|
|
|
144
|
+
[key, _skeleton(value, acc)]
|
|
145
|
+
}.to_h
|
|
146
|
+
]
|
|
147
|
+
else
|
|
148
|
+
return nil
|
|
149
|
+
end
|
|
150
|
+
return acc
|
|
151
|
+
end
|
|
152
|
+
# given a key, return value of lookup recursively
|
|
153
|
+
# if that lookup fails, try by path
|
|
154
|
+
# (String, Hash) -> Hash
|
|
155
|
+
def self.lookup(key, source_hash, path=[])
|
|
156
|
+
source_hash.fetch(key, nil) || source_hash.fetch(path.join("/"), nil)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Take an array of hashes with a hash that contains values to
|
|
160
|
+
# insert. Expand the arrays into objects
|
|
161
|
+
# (e.g. key: [1,2,3] -> [{key: 1}, {key: 2}, {key: 3})
|
|
162
|
+
# (Array, Hash) -> {[]}
|
|
163
|
+
def self.expand_array_to_objects(array:, source_hash: )
|
|
164
|
+
matrix = array.first.map do |key, _|
|
|
165
|
+
# if a plain string put into array. Flatten all others.
|
|
166
|
+
[lookup(key, source_hash)].flatten.map {|value|
|
|
167
|
+
# zipmap behavior here so that if one array is shorter
|
|
168
|
+
# the result is nill when mapped against longer array
|
|
169
|
+
# ["a"].zip ["a", "b"] | reverse # => {"a":"a", "b":nil]
|
|
170
|
+
# h = Hash[ [[value].zip( [key]).map(&:reverse).flatten ] ]
|
|
171
|
+
[value].zip( [key]).map(&:reverse).flatten
|
|
172
|
+
|
|
173
|
+
}
|
|
174
|
+
end
|
|
175
|
+
# pad the array if current array length is not eq max length of
|
|
176
|
+
# arrays. Pad is the first element. This is how some reports treat
|
|
177
|
+
# repeating values (a la sql reporting)
|
|
178
|
+
sorted = matrix.sort_by(&:length)
|
|
179
|
+
max = sorted.last.length
|
|
180
|
+
sorted.map {|item| # padding done here. Second element in array,
|
|
181
|
+
# below, could be nil. Todo: paramaterize that as option
|
|
182
|
+
item.fill( [sorted.first.first[0], sorted.first.first[1] ], (item.length)..(max - 1) )
|
|
183
|
+
}
|
|
184
|
+
sorted
|
|
185
|
+
|
|
186
|
+
# transpose keeping a slot if empty (like a speadsheet)
|
|
187
|
+
head, *tail = sorted
|
|
188
|
+
(head.zip *tail).map(&:to_h)
|
|
189
|
+
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# given a source_hash, find the first key from a skeleton hash
|
|
193
|
+
# and insert value. Depends on flat source hash
|
|
194
|
+
# remember the path during lookup with skeleton
|
|
195
|
+
# (Hash, Hash) -> Hash
|
|
196
|
+
# a bit lost here
|
|
197
|
+
def self.fillin(source_hash:, skeleton:, acc: {}, path: [])
|
|
198
|
+
case
|
|
199
|
+
when skeleton.kind_of?( Hash )
|
|
200
|
+
acc = Hash[skeleton.map do |key, value|
|
|
201
|
+
path.push key # save hash depth to stack-like []
|
|
202
|
+
# recurse on skeleton levels
|
|
203
|
+
[
|
|
204
|
+
[ path.last, (fillin(source_hash: source_hash,
|
|
205
|
+
skeleton: nil,
|
|
206
|
+
acc: lookup(key, source_hash, path),
|
|
207
|
+
path: path) or
|
|
208
|
+
fillin(source_hash: source_hash,
|
|
209
|
+
skeleton: value,
|
|
210
|
+
path: path))
|
|
211
|
+
],
|
|
212
|
+
path.pop # pop the path at end of recursion,
|
|
213
|
+
# and drop from returned array
|
|
214
|
+
][0]
|
|
215
|
+
end
|
|
216
|
+
]
|
|
217
|
+
when (skeleton.kind_of?( Array) and skeleton.first.empty?)
|
|
218
|
+
# when an array with no inner objects/hashmaps
|
|
219
|
+
acc = lookup(path.last, source_hash)
|
|
220
|
+
when skeleton.kind_of?( Array )
|
|
221
|
+
# when an array (eg Key: [1,2,3]) but we want obj: [{key:1}, {key: 2}]
|
|
222
|
+
acc = expand_array_to_objects( array: skeleton,
|
|
223
|
+
source_hash: source_hash)
|
|
224
|
+
when skeleton.nil? # the acc value should be a string, so join if possible
|
|
225
|
+
if acc.respond_to?(:join)
|
|
226
|
+
acc = acc.join
|
|
227
|
+
elsif acc.respond_to?(:empty?)
|
|
228
|
+
acc = acc.empty? ? nil : acc
|
|
229
|
+
else
|
|
230
|
+
acc = acc
|
|
231
|
+
end
|
|
232
|
+
else
|
|
233
|
+
acc = nil
|
|
234
|
+
end
|
|
235
|
+
return acc
|
|
236
|
+
end
|
|
237
|
+
# loop through hash, cleaning keys
|
|
238
|
+
# of note: if a "key/key" pointer where key == key then only the
|
|
239
|
+
# value of the nested key will be returned. Use a naming convention
|
|
240
|
+
# of "keys/key" or "unique/uniqueNest"
|
|
241
|
+
# Hash -> Hash
|
|
242
|
+
def self._map_onto_skeleton_of_schema(json_data,
|
|
243
|
+
acc: {},
|
|
244
|
+
skeleton:)
|
|
245
|
+
|
|
246
|
+
case
|
|
247
|
+
when json_data.kind_of?(Hash)
|
|
248
|
+
acc = Hash[json_data.map do |key, value|
|
|
249
|
+
[key,
|
|
250
|
+
_map_onto_skeleton_of_schema(value,
|
|
251
|
+
acc: acc,
|
|
252
|
+
skeleton: skeleton)
|
|
253
|
+
]
|
|
254
|
+
end
|
|
255
|
+
]
|
|
256
|
+
when json_data.kind_of?(Array)
|
|
257
|
+
acc = json_data.map do |item|
|
|
258
|
+
_map_onto_skeleton_of_schema(item, skeleton: skeleton)
|
|
259
|
+
end
|
|
260
|
+
else
|
|
261
|
+
acc = json_data
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
def self._key_cleaner(data:, clean_proc: ->(str){str}, acc: {})
|
|
265
|
+
case
|
|
266
|
+
when data.kind_of?(Hash)
|
|
267
|
+
acc = Hash[ data.map do |key, value|
|
|
268
|
+
[ clean_proc.call(key), _key_cleaner(data: value) ]
|
|
269
|
+
end
|
|
270
|
+
]
|
|
271
|
+
else
|
|
272
|
+
acc = data
|
|
273
|
+
end
|
|
274
|
+
acc
|
|
275
|
+
end
|
|
276
|
+
# File activesupport/lib/active_support/inflector.rb, line 278
|
|
277
|
+
def self.constantize(camel_cased_word)
|
|
278
|
+
unless /\A(?:::)?([A-Z]\w*(?:::[A-Z]\w*)*)\z/ =~ camel_cased_word
|
|
279
|
+
raise NameError, "#{camel_cased_word.inspect} is not a valid constant name!"
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
Object.module_eval("::#{$1}", __FILE__, __LINE__)
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
metadata
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: bxtjson
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
prerelease:
|
|
6
|
+
platform: ruby
|
|
7
|
+
authors:
|
|
8
|
+
- Jacob Kroeze
|
|
9
|
+
autorequire:
|
|
10
|
+
bindir: bin
|
|
11
|
+
cert_chain: []
|
|
12
|
+
date: 2015-03-26 00:00:00.000000000 Z
|
|
13
|
+
dependencies:
|
|
14
|
+
- !ruby/object:Gem::Dependency
|
|
15
|
+
name: multi_json
|
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
|
17
|
+
none: false
|
|
18
|
+
requirements:
|
|
19
|
+
- - ~>
|
|
20
|
+
- !ruby/object:Gem::Version
|
|
21
|
+
version: '1.10'
|
|
22
|
+
- - ! '>='
|
|
23
|
+
- !ruby/object:Gem::Version
|
|
24
|
+
version: 1.10.0
|
|
25
|
+
type: :runtime
|
|
26
|
+
prerelease: false
|
|
27
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
28
|
+
none: false
|
|
29
|
+
requirements:
|
|
30
|
+
- - ~>
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '1.10'
|
|
33
|
+
- - ! '>='
|
|
34
|
+
- !ruby/object:Gem::Version
|
|
35
|
+
version: 1.10.0
|
|
36
|
+
- !ruby/object:Gem::Dependency
|
|
37
|
+
name: json_schema
|
|
38
|
+
requirement: !ruby/object:Gem::Requirement
|
|
39
|
+
none: false
|
|
40
|
+
requirements:
|
|
41
|
+
- - ~>
|
|
42
|
+
- !ruby/object:Gem::Version
|
|
43
|
+
version: '0.5'
|
|
44
|
+
- - ! '>='
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: 0.5.0
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
none: false
|
|
51
|
+
requirements:
|
|
52
|
+
- - ~>
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '0.5'
|
|
55
|
+
- - ! '>='
|
|
56
|
+
- !ruby/object:Gem::Version
|
|
57
|
+
version: 0.5.0
|
|
58
|
+
description: initialize empty hash from schema, map from hash to schema, return a
|
|
59
|
+
lazy enumerable
|
|
60
|
+
email: jlkroeze@gmail.com
|
|
61
|
+
executables: []
|
|
62
|
+
extensions: []
|
|
63
|
+
extra_rdoc_files: []
|
|
64
|
+
files:
|
|
65
|
+
- lib/bxtjson.rb
|
|
66
|
+
homepage: https://github.com/jacob-kroeze/bxtjson
|
|
67
|
+
licenses:
|
|
68
|
+
- MIT
|
|
69
|
+
post_install_message:
|
|
70
|
+
rdoc_options: []
|
|
71
|
+
require_paths:
|
|
72
|
+
- lib
|
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
74
|
+
none: false
|
|
75
|
+
requirements:
|
|
76
|
+
- - ! '>='
|
|
77
|
+
- !ruby/object:Gem::Version
|
|
78
|
+
version: '0'
|
|
79
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
80
|
+
none: false
|
|
81
|
+
requirements:
|
|
82
|
+
- - ! '>='
|
|
83
|
+
- !ruby/object:Gem::Version
|
|
84
|
+
version: '0'
|
|
85
|
+
requirements: []
|
|
86
|
+
rubyforge_project:
|
|
87
|
+
rubygems_version: 1.8.23
|
|
88
|
+
signing_key:
|
|
89
|
+
specification_version: 3
|
|
90
|
+
summary: Map between json schema
|
|
91
|
+
test_files: []
|