bxtjson 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/bxtjson.rb +285 -0
  2. metadata +91 -0
@@ -0,0 +1,285 @@
1
+ # input stream style (new-line separated) json objects
2
+ # main method map_onto_skeleton_of_schema
3
+ # JSON -> JSON
4
+
5
+ require 'multi_json'
6
+ require 'json_schema'
7
+ # == dependencies
8
+ # * gem: json_schema
9
+ # * gem: multi_json
10
+ # * JSON standard library or other (e.g. oj) json parsers
11
+ # * You implement a model in Sequel or ActiveRecord if you want.
12
+ # == Constants (examples)
13
+ # cleaner_proc = ->(str) {str.gsub(/\W+/, " ").lstrip
14
+ # .gsub(" ", "_")
15
+ # .gsub(/PPPO_|PPCO_/, "")
16
+ # .downcase
17
+ # }
18
+ # json_filename = "../../../fsms-tmp/PP_PROPOSAL_2015.JSON"
19
+ # schema_filename = "./docs/schema.json"
20
+ # authorizinng_pointer '#/departments/primary_dept'
21
+
22
+ module Bxtjson
23
+ # Initialize an empty hashmap given a json-schema[http://json-schema.org]
24
+ #
25
+ # @param [Hash] schema_data the data from json-schema
26
+ # @param [String, nil] entity the resource name to enfocus
27
+ # @return [Hash] the an empty "initialized" json-schema
28
+ def self.skeleton(schema_data:,
29
+ entity: nil)
30
+ schema = JsonSchema.parse!(schema_data)
31
+ schema.expand_references!
32
+ if entity.nil?
33
+ entity_schema = schema
34
+ else
35
+ entity_schema = schema.properties[entity]
36
+ end
37
+ return _skeleton(entity_schema, acc: {})
38
+ end
39
+ # process a file of jsonl (linefeed) and clean keys with proc
40
+ #
41
+ # @param [String] json_filename source of json objects in jsonl[https://github.com/stephenplusplus/jsonl] format
42
+ # @param [Proc] a function to clean up keys
43
+ # @return [Hash] hash of arrays `[{ }]`
44
+ def self.text_to_lazy_json(json_filename:,
45
+ clean_proc:)
46
+ File.foreach(json_filename)
47
+ .lazy
48
+ .map do |line|
49
+ _key_cleaner(data: MultiJson.load(line), clean_proc: clean_proc)
50
+ end
51
+ end
52
+ # Parse json-schema file and map contents of json file into
53
+ # initialized schema
54
+ #
55
+ # Mapping of contents will search for the first key in the source,
56
+ # that match the schema (recursively). So, the source should be
57
+ # flat for clarity, and the schema can be nested.
58
+ #
59
+ # If it cannot find a key, it will look for "top/next/final" path
60
+ # key in the source data.
61
+ #
62
+ # For example, in the skeleton
63
+ # {key: {nest: "this"} }
64
+ # Will be filled with "muscle" if the source has a ket
65
+ # {"key/nest": "data muscle"}
66
+ #
67
+ # TODO: design interface from csv to json that fits these
68
+ # principles.
69
+ #
70
+ # @param [String] json_filename filename for jsonl source
71
+ # @param [String] schema_filename filename for json-schema
72
+ # @param [Proc] clean_proc a function to clean up keys
73
+ # @param [String, #create] model the name of a model to call :create on
74
+ # @param [String] authorizing_pointer json-pointer[https://tools.ietf.org/html/rfc6901] that fills in the key "authorized_by"
75
+ def self.muscle(json_filename:,
76
+ schema_filename:,
77
+ clean_proc: ->(str){str},
78
+ model: nil,
79
+ schema_entity: nil,
80
+ authorizing_pointer:,
81
+ data_attr: :data)
82
+ skeleton = Bxtjson.skeleton(schema_data: MultiJson.load(File.read(schema_filename)),
83
+ entity: schema_entity)
84
+ if model
85
+ model = constantize(model.to_s.capitalize)
86
+
87
+ text_to_lazy_json(json_filename: json_filename, clean_proc: clean_proc )
88
+ .map {|data|
89
+ data = fillin(source_hash: _map_onto_skeleton_of_schema( data,
90
+ skeleton: skeleton ),
91
+ skeleton: skeleton)
92
+ result = model.create( data_attr => data)
93
+ }
94
+ else
95
+ out = []
96
+ text_to_lazy_json(json_filename: json_filename, clean_proc: clean_proc )
97
+ .map {|data| out << fillin(source_hash: _map_onto_skeleton_of_schema( data,
98
+ skeleton: skeleton ),
99
+ skeleton: skeleton)
100
+ }
101
+ end
102
+ end
103
+ # Recursively remove falsey values from hash
104
+ # Falsey values are those that return true from respond_to(:empty?)
105
+ # or :nil?
106
+ # @param [Hash] hash
107
+ # @return [Hash]
108
+ def self.compact_hash!(hash)
109
+ p = proc do |_, v|
110
+ v.delete_if(&p) if v.respond_to? :delete_if
111
+ v.respond_to?(:empty?) && v.empty? || v.nil?
112
+ end
113
+ hash.delete_if(&p)
114
+ end
115
+ def self.compact_values!(hash)
116
+ Hash[hash.map do |key, value|
117
+ [key,
118
+ if value.is_a?(Array)
119
+ value.map {|item| Bxtjson.compact_hash!(item) }
120
+ elsif value.respond_to?( :delete_if)
121
+ Bxtjson.compact_hash!(value)
122
+ else
123
+ value
124
+ end
125
+ ]
126
+ end
127
+ ]
128
+ end
129
+ private
130
+ # Creates a skeleton for object and array from a Json Schema
131
+ # Boolean, String, Number, Integer, Null are given a nil value to start.
132
+ # Hash -> Hash
133
+ def self._skeleton(json_schema, acc={})
134
+ case json_schema.type
135
+ when ["object"]
136
+ acc = Hash[json_schema.properties.map do |key, value|
137
+ [key, _skeleton(value, acc)]
138
+ end
139
+ ]
140
+ when ["array"] # at this point the key is already in the Hash,
141
+ # just need to return an array with one hash
142
+ acc = [
143
+ json_schema.items.properties.map { |key, value|
144
+ [key, _skeleton(value, acc)]
145
+ }.to_h
146
+ ]
147
+ else
148
+ return nil
149
+ end
150
+ return acc
151
+ end
152
+ # given a key, return value of lookup recursively
153
+ # if that lookup fails, try by path
154
+ # (String, Hash) -> Hash
155
+ def self.lookup(key, source_hash, path=[])
156
+ source_hash.fetch(key, nil) || source_hash.fetch(path.join("/"), nil)
157
+ end
158
+
159
+ # Take an array of hashes with a hash that contains values to
160
+ # insert. Expand the arrays into objects
161
+ # (e.g. key: [1,2,3] -> [{key: 1}, {key: 2}, {key: 3})
162
+ # (Array, Hash) -> {[]}
163
+ def self.expand_array_to_objects(array:, source_hash: )
164
+ matrix = array.first.map do |key, _|
165
+ # if a plain string put into array. Flatten all others.
166
+ [lookup(key, source_hash)].flatten.map {|value|
167
+ # zipmap behavior here so that if one array is shorter
168
+ # the result is nill when mapped against longer array
169
+ # ["a"].zip ["a", "b"] | reverse # => {"a":"a", "b":nil]
170
+ # h = Hash[ [[value].zip( [key]).map(&:reverse).flatten ] ]
171
+ [value].zip( [key]).map(&:reverse).flatten
172
+
173
+ }
174
+ end
175
+ # pad the array if current array length is not eq max length of
176
+ # arrays. Pad is the first element. This is how some reports treat
177
+ # repeating values (a la sql reporting)
178
+ sorted = matrix.sort_by(&:length)
179
+ max = sorted.last.length
180
+ sorted.map {|item| # padding done here. Second element in array,
181
+ # below, could be nil. Todo: paramaterize that as option
182
+ item.fill( [sorted.first.first[0], sorted.first.first[1] ], (item.length)..(max - 1) )
183
+ }
184
+ sorted
185
+
186
+ # transpose keeping a slot if empty (like a speadsheet)
187
+ head, *tail = sorted
188
+ (head.zip *tail).map(&:to_h)
189
+
190
+ end
191
+
192
+ # given a source_hash, find the first key from a skeleton hash
193
+ # and insert value. Depends on flat source hash
194
+ # remember the path during lookup with skeleton
195
+ # (Hash, Hash) -> Hash
196
+ # a bit lost here
197
+ def self.fillin(source_hash:, skeleton:, acc: {}, path: [])
198
+ case
199
+ when skeleton.kind_of?( Hash )
200
+ acc = Hash[skeleton.map do |key, value|
201
+ path.push key # save hash depth to stack-like []
202
+ # recurse on skeleton levels
203
+ [
204
+ [ path.last, (fillin(source_hash: source_hash,
205
+ skeleton: nil,
206
+ acc: lookup(key, source_hash, path),
207
+ path: path) or
208
+ fillin(source_hash: source_hash,
209
+ skeleton: value,
210
+ path: path))
211
+ ],
212
+ path.pop # pop the path at end of recursion,
213
+ # and drop from returned array
214
+ ][0]
215
+ end
216
+ ]
217
+ when (skeleton.kind_of?( Array) and skeleton.first.empty?)
218
+ # when an array with no inner objects/hashmaps
219
+ acc = lookup(path.last, source_hash)
220
+ when skeleton.kind_of?( Array )
221
+ # when an array (eg Key: [1,2,3]) but we want obj: [{key:1}, {key: 2}]
222
+ acc = expand_array_to_objects( array: skeleton,
223
+ source_hash: source_hash)
224
+ when skeleton.nil? # the acc value should be a string, so join if possible
225
+ if acc.respond_to?(:join)
226
+ acc = acc.join
227
+ elsif acc.respond_to?(:empty?)
228
+ acc = acc.empty? ? nil : acc
229
+ else
230
+ acc = acc
231
+ end
232
+ else
233
+ acc = nil
234
+ end
235
+ return acc
236
+ end
237
+ # loop through hash, cleaning keys
238
+ # of note: if a "key/key" pointer where key == key then only the
239
+ # value of the nested key will be returned. Use a naming convention
240
+ # of "keys/key" or "unique/uniqueNest"
241
+ # Hash -> Hash
242
+ def self._map_onto_skeleton_of_schema(json_data,
243
+ acc: {},
244
+ skeleton:)
245
+
246
+ case
247
+ when json_data.kind_of?(Hash)
248
+ acc = Hash[json_data.map do |key, value|
249
+ [key,
250
+ _map_onto_skeleton_of_schema(value,
251
+ acc: acc,
252
+ skeleton: skeleton)
253
+ ]
254
+ end
255
+ ]
256
+ when json_data.kind_of?(Array)
257
+ acc = json_data.map do |item|
258
+ _map_onto_skeleton_of_schema(item, skeleton: skeleton)
259
+ end
260
+ else
261
+ acc = json_data
262
+ end
263
+ end
264
+ def self._key_cleaner(data:, clean_proc: ->(str){str}, acc: {})
265
+ case
266
+ when data.kind_of?(Hash)
267
+ acc = Hash[ data.map do |key, value|
268
+ [ clean_proc.call(key), _key_cleaner(data: value) ]
269
+ end
270
+ ]
271
+ else
272
+ acc = data
273
+ end
274
+ acc
275
+ end
276
+ # File activesupport/lib/active_support/inflector.rb, line 278
277
+ def self.constantize(camel_cased_word)
278
+ unless /\A(?:::)?([A-Z]\w*(?:::[A-Z]\w*)*)\z/ =~ camel_cased_word
279
+ raise NameError, "#{camel_cased_word.inspect} is not a valid constant name!"
280
+ end
281
+
282
+ Object.module_eval("::#{$1}", __FILE__, __LINE__)
283
+ end
284
+ end
285
+
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bxtjson
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jacob Kroeze
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-03-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: multi_json
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.10'
22
+ - - ! '>='
23
+ - !ruby/object:Gem::Version
24
+ version: 1.10.0
25
+ type: :runtime
26
+ prerelease: false
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: '1.10'
33
+ - - ! '>='
34
+ - !ruby/object:Gem::Version
35
+ version: 1.10.0
36
+ - !ruby/object:Gem::Dependency
37
+ name: json_schema
38
+ requirement: !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: '0.5'
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: 0.5.0
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '0.5'
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: 0.5.0
58
+ description: initialize empty hash from schema, map from hash to schema, return a
59
+ lazy enumerable
60
+ email: jlkroeze@gmail.com
61
+ executables: []
62
+ extensions: []
63
+ extra_rdoc_files: []
64
+ files:
65
+ - lib/bxtjson.rb
66
+ homepage: https://github.com/jacob-kroeze/bxtjson
67
+ licenses:
68
+ - MIT
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ! '>='
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ! '>='
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubyforge_project:
87
+ rubygems_version: 1.8.23
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: Map between json schema
91
+ test_files: []