bxtjson 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/bxtjson.rb +285 -0
  2. metadata +91 -0
@@ -0,0 +1,285 @@
1
+ # input stream style (new-line separated) json objects
2
+ # main method map_onto_skeleton_of_schema
3
+ # JSON -> JSON
4
+
5
+ require 'multi_json'
6
+ require 'json_schema'
7
+ # == dependencies
8
+ # * gem: json_schema
9
+ # * gem: multi_json
10
+ # * JSON standard library or other (e.g. oj) json parsers
11
+ # * You implement a model in Sequel or ActiveRecord if you want.
12
+ # == Constants (examples)
13
+ # cleaner_proc = ->(str) {str.gsub(/\W+/, " ").lstrip
14
+ # .gsub(" ", "_")
15
+ # .gsub(/PPPO_|PPCO_/, "")
16
+ # .downcase
17
+ # }
18
+ # json_filename = "../../../fsms-tmp/PP_PROPOSAL_2015.JSON"
19
+ # schema_filename = "./docs/schema.json"
20
+ # authorizinng_pointer '#/departments/primary_dept'
21
+
22
+ module Bxtjson
23
+ # Initialize an empty hashmap given a json-schema[http://json-schema.org]
24
+ #
25
+ # @param [Hash] schema_data the data from json-schema
26
+ # @param [String, nil] entity the resource name to enfocus
27
+ # @return [Hash] the an empty "initialized" json-schema
28
+ def self.skeleton(schema_data:,
29
+ entity: nil)
30
+ schema = JsonSchema.parse!(schema_data)
31
+ schema.expand_references!
32
+ if entity.nil?
33
+ entity_schema = schema
34
+ else
35
+ entity_schema = schema.properties[entity]
36
+ end
37
+ return _skeleton(entity_schema, acc: {})
38
+ end
39
+ # process a file of jsonl (linefeed) and clean keys with proc
40
+ #
41
+ # @param [String] json_filename source of json objects in jsonl[https://github.com/stephenplusplus/jsonl] format
42
+ # @param [Proc] a function to clean up keys
43
+ # @return [Hash] hash of arrays `[{ }]`
44
+ def self.text_to_lazy_json(json_filename:,
45
+ clean_proc:)
46
+ File.foreach(json_filename)
47
+ .lazy
48
+ .map do |line|
49
+ _key_cleaner(data: MultiJson.load(line), clean_proc: clean_proc)
50
+ end
51
+ end
52
+ # Parse json-schema file and map contents of json file into
53
+ # initialized schema
54
+ #
55
+ # Mapping of contents will search for the first key in the source,
56
+ # that match the schema (recursively). So, the source should be
57
+ # flat for clarity, and the schema can be nested.
58
+ #
59
+ # If it cannot find a key, it will look for "top/next/final" path
60
+ # key in the source data.
61
+ #
62
+ # For example, in the skeleton
63
+ # {key: {nest: "this"} }
64
+ # Will be filled with "muscle" if the source has a ket
65
+ # {"key/nest": "data muscle"}
66
+ #
67
+ # TODO: design interface from csv to json that fits these
68
+ # principles.
69
+ #
70
+ # @param [String] json_filename filename for jsonl source
71
+ # @param [String] schema_filename filename for json-schema
72
+ # @param [Proc] clean_proc a function to clean up keys
73
+ # @param [String, #create] model the name of a model to call :create on
74
+ # @param [String] authorizing_pointer json-pointer[https://tools.ietf.org/html/rfc6901] that fills in the key "authorized_by"
75
+ def self.muscle(json_filename:,
76
+ schema_filename:,
77
+ clean_proc: ->(str){str},
78
+ model: nil,
79
+ schema_entity: nil,
80
+ authorizing_pointer:,
81
+ data_attr: :data)
82
+ skeleton = Bxtjson.skeleton(schema_data: MultiJson.load(File.read(schema_filename)),
83
+ entity: schema_entity)
84
+ if model
85
+ model = constantize(model.to_s.capitalize)
86
+
87
+ text_to_lazy_json(json_filename: json_filename, clean_proc: clean_proc )
88
+ .map {|data|
89
+ data = fillin(source_hash: _map_onto_skeleton_of_schema( data,
90
+ skeleton: skeleton ),
91
+ skeleton: skeleton)
92
+ result = model.create( data_attr => data)
93
+ }
94
+ else
95
+ out = []
96
+ text_to_lazy_json(json_filename: json_filename, clean_proc: clean_proc )
97
+ .map {|data| out << fillin(source_hash: _map_onto_skeleton_of_schema( data,
98
+ skeleton: skeleton ),
99
+ skeleton: skeleton)
100
+ }
101
+ end
102
+ end
103
+ # Recursively remove falsey values from hash
104
+ # Falsey values are those that return true from respond_to(:empty?)
105
+ # or :nil?
106
+ # @param [Hash] hash
107
+ # @return [Hash]
108
+ def self.compact_hash!(hash)
109
+ p = proc do |_, v|
110
+ v.delete_if(&p) if v.respond_to? :delete_if
111
+ v.respond_to?(:empty?) && v.empty? || v.nil?
112
+ end
113
+ hash.delete_if(&p)
114
+ end
115
+ def self.compact_values!(hash)
116
+ Hash[hash.map do |key, value|
117
+ [key,
118
+ if value.is_a?(Array)
119
+ value.map {|item| Bxtjson.compact_hash!(item) }
120
+ elsif value.respond_to?( :delete_if)
121
+ Bxtjson.compact_hash!(value)
122
+ else
123
+ value
124
+ end
125
+ ]
126
+ end
127
+ ]
128
+ end
129
+ private
130
+ # Creates a skeleton for object and array from a Json Schema
131
+ # Boolean, String, Number, Integer, Null are given a nil value to start.
132
+ # Hash -> Hash
133
+ def self._skeleton(json_schema, acc={})
134
+ case json_schema.type
135
+ when ["object"]
136
+ acc = Hash[json_schema.properties.map do |key, value|
137
+ [key, _skeleton(value, acc)]
138
+ end
139
+ ]
140
+ when ["array"] # at this point the key is already in the Hash,
141
+ # just need to return an array with one hash
142
+ acc = [
143
+ json_schema.items.properties.map { |key, value|
144
+ [key, _skeleton(value, acc)]
145
+ }.to_h
146
+ ]
147
+ else
148
+ return nil
149
+ end
150
+ return acc
151
+ end
152
+ # given a key, return value of lookup recursively
153
+ # if that lookup fails, try by path
154
+ # (String, Hash) -> Hash
155
+ def self.lookup(key, source_hash, path=[])
156
+ source_hash.fetch(key, nil) || source_hash.fetch(path.join("/"), nil)
157
+ end
158
+
159
+ # Take an array of hashes with a hash that contains values to
160
+ # insert. Expand the arrays into objects
161
+ # (e.g. key: [1,2,3] -> [{key: 1}, {key: 2}, {key: 3})
162
+ # (Array, Hash) -> {[]}
163
+ def self.expand_array_to_objects(array:, source_hash: )
164
+ matrix = array.first.map do |key, _|
165
+ # if a plain string put into array. Flatten all others.
166
+ [lookup(key, source_hash)].flatten.map {|value|
167
+ # zipmap behavior here so that if one array is shorter
168
+ # the result is nill when mapped against longer array
169
+ # ["a"].zip ["a", "b"] | reverse # => {"a":"a", "b":nil]
170
+ # h = Hash[ [[value].zip( [key]).map(&:reverse).flatten ] ]
171
+ [value].zip( [key]).map(&:reverse).flatten
172
+
173
+ }
174
+ end
175
+ # pad the array if current array length is not eq max length of
176
+ # arrays. Pad is the first element. This is how some reports treat
177
+ # repeating values (a la sql reporting)
178
+ sorted = matrix.sort_by(&:length)
179
+ max = sorted.last.length
180
+ sorted.map {|item| # padding done here. Second element in array,
181
+ # below, could be nil. Todo: paramaterize that as option
182
+ item.fill( [sorted.first.first[0], sorted.first.first[1] ], (item.length)..(max - 1) )
183
+ }
184
+ sorted
185
+
186
+ # transpose keeping a slot if empty (like a speadsheet)
187
+ head, *tail = sorted
188
+ (head.zip *tail).map(&:to_h)
189
+
190
+ end
191
+
192
+ # given a source_hash, find the first key from a skeleton hash
193
+ # and insert value. Depends on flat source hash
194
+ # remember the path during lookup with skeleton
195
+ # (Hash, Hash) -> Hash
196
+ # a bit lost here
197
+ def self.fillin(source_hash:, skeleton:, acc: {}, path: [])
198
+ case
199
+ when skeleton.kind_of?( Hash )
200
+ acc = Hash[skeleton.map do |key, value|
201
+ path.push key # save hash depth to stack-like []
202
+ # recurse on skeleton levels
203
+ [
204
+ [ path.last, (fillin(source_hash: source_hash,
205
+ skeleton: nil,
206
+ acc: lookup(key, source_hash, path),
207
+ path: path) or
208
+ fillin(source_hash: source_hash,
209
+ skeleton: value,
210
+ path: path))
211
+ ],
212
+ path.pop # pop the path at end of recursion,
213
+ # and drop from returned array
214
+ ][0]
215
+ end
216
+ ]
217
+ when (skeleton.kind_of?( Array) and skeleton.first.empty?)
218
+ # when an array with no inner objects/hashmaps
219
+ acc = lookup(path.last, source_hash)
220
+ when skeleton.kind_of?( Array )
221
+ # when an array (eg Key: [1,2,3]) but we want obj: [{key:1}, {key: 2}]
222
+ acc = expand_array_to_objects( array: skeleton,
223
+ source_hash: source_hash)
224
+ when skeleton.nil? # the acc value should be a string, so join if possible
225
+ if acc.respond_to?(:join)
226
+ acc = acc.join
227
+ elsif acc.respond_to?(:empty?)
228
+ acc = acc.empty? ? nil : acc
229
+ else
230
+ acc = acc
231
+ end
232
+ else
233
+ acc = nil
234
+ end
235
+ return acc
236
+ end
237
+ # loop through hash, cleaning keys
238
+ # of note: if a "key/key" pointer where key == key then only the
239
+ # value of the nested key will be returned. Use a naming convention
240
+ # of "keys/key" or "unique/uniqueNest"
241
+ # Hash -> Hash
242
+ def self._map_onto_skeleton_of_schema(json_data,
243
+ acc: {},
244
+ skeleton:)
245
+
246
+ case
247
+ when json_data.kind_of?(Hash)
248
+ acc = Hash[json_data.map do |key, value|
249
+ [key,
250
+ _map_onto_skeleton_of_schema(value,
251
+ acc: acc,
252
+ skeleton: skeleton)
253
+ ]
254
+ end
255
+ ]
256
+ when json_data.kind_of?(Array)
257
+ acc = json_data.map do |item|
258
+ _map_onto_skeleton_of_schema(item, skeleton: skeleton)
259
+ end
260
+ else
261
+ acc = json_data
262
+ end
263
+ end
264
+ def self._key_cleaner(data:, clean_proc: ->(str){str}, acc: {})
265
+ case
266
+ when data.kind_of?(Hash)
267
+ acc = Hash[ data.map do |key, value|
268
+ [ clean_proc.call(key), _key_cleaner(data: value) ]
269
+ end
270
+ ]
271
+ else
272
+ acc = data
273
+ end
274
+ acc
275
+ end
276
+ # File activesupport/lib/active_support/inflector.rb, line 278
277
+ def self.constantize(camel_cased_word)
278
+ unless /\A(?:::)?([A-Z]\w*(?:::[A-Z]\w*)*)\z/ =~ camel_cased_word
279
+ raise NameError, "#{camel_cased_word.inspect} is not a valid constant name!"
280
+ end
281
+
282
+ Object.module_eval("::#{$1}", __FILE__, __LINE__)
283
+ end
284
+ end
285
+
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bxtjson
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jacob Kroeze
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-03-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: multi_json
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.10'
22
+ - - ! '>='
23
+ - !ruby/object:Gem::Version
24
+ version: 1.10.0
25
+ type: :runtime
26
+ prerelease: false
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: '1.10'
33
+ - - ! '>='
34
+ - !ruby/object:Gem::Version
35
+ version: 1.10.0
36
+ - !ruby/object:Gem::Dependency
37
+ name: json_schema
38
+ requirement: !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: '0.5'
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: 0.5.0
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '0.5'
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: 0.5.0
58
+ description: initialize empty hash from schema, map from hash to schema, return a
59
+ lazy enumerable
60
+ email: jlkroeze@gmail.com
61
+ executables: []
62
+ extensions: []
63
+ extra_rdoc_files: []
64
+ files:
65
+ - lib/bxtjson.rb
66
+ homepage: https://github.com/jacob-kroeze/bxtjson
67
+ licenses:
68
+ - MIT
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ! '>='
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ! '>='
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubyforge_project:
87
+ rubygems_version: 1.8.23
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: Map between json schema
91
+ test_files: []