json_data_extractor 0.0.11 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +129 -7
- data/lib/json_data_extractor.rb +24 -5
- data/lib/src/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bb96ee66beec96f117282087fadea7c8bf91febb79eddf8d588bfb26a4301466
|
|
4
|
+
data.tar.gz: b0a5221e3eb012da640233fac6fb5ff6a41715327de6cb381bd8896e184095bd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 48e16964d90fc098e3d2b1c9ad5136f642546a9fa04121aea8b2ed28847224fe85ee43a576476e61fbcdf501e0ae188488c23cdd1bab6bf9d1af9781d301df60
|
|
7
|
+
data.tar.gz: 8b7fd89bbd4ecd36b24a542112da82baeccc789e8cf86b8ce8cbbbf3444d28c2073809ba105cb96f261d88c847ca74ba31d8cbeacfe95d74d79a22b2af913907
|
data/README.md
CHANGED
|
@@ -120,6 +120,35 @@ The resulting json will be:
|
|
|
120
120
|
|
|
121
121
|
```
|
|
122
122
|
|
|
123
|
+
|
|
124
|
+
### Handling Default Values
|
|
125
|
+
|
|
126
|
+
With JsonDataExtractor, you can specify default values in your schema for keys that might be absent in the input JSON. Use the `path` and `default` keys in the schema for this purpose.
|
|
127
|
+
|
|
128
|
+
```ruby
|
|
129
|
+
schema = {
|
|
130
|
+
absent_value: { path: nil },
|
|
131
|
+
default: { path: '$.some_real_path', default: 'foo' },
|
|
132
|
+
default_with_lambda: { path: '$.table', default: -> { 'DEFAULT' } },
|
|
133
|
+
absent_with_default: { path: nil, default: 'bar' }
|
|
134
|
+
}
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
- `absent_value`: Will be `nil` in the output as there's no corresponding key in the input JSON and no default is provided.
|
|
138
|
+
- `default`: Will either take the value from `$.some_real_path` in the input JSON or 'foo' if the path does not exist.
|
|
139
|
+
- `default_with_lambda`: Will take the value from `$..table` in the input JSON or 'DEFAULT' if the path does not exist.
|
|
140
|
+
- `absent_with_default`: Will be 'bar' in the output as there's no corresponding key in the input JSON but a default is provided.
|
|
141
|
+
|
|
142
|
+
#### Simplified Syntax for Absent Values
|
|
143
|
+
|
|
144
|
+
For keys that you expect to be absent in the input JSON but still want to include in the output with a `nil` value, you can use a simplified syntax by setting the schema value to `nil`.
|
|
145
|
+
|
|
146
|
+
```ruby
|
|
147
|
+
schema = {
|
|
148
|
+
absent_value: nil
|
|
149
|
+
}
|
|
150
|
+
```
|
|
151
|
+
|
|
123
152
|
### Modifiers
|
|
124
153
|
|
|
125
154
|
Modifiers can be supplied on object creation and/or added later by calling `#add_modifier` method.
|
|
@@ -163,9 +192,91 @@ results = extractor.extract(schema)
|
|
|
163
192
|
```
|
|
164
193
|
|
|
165
194
|
Modifiers are called in the order in which they are defined, so keep that in mind when defining your
|
|
166
|
-
schema. By default JDE raises an ArgumentError if a modifier is not applicable, but this behaviour
|
|
195
|
+
schema. By default JDE raises an ArgumentError if a modifier is not applicable, but this behaviour
|
|
167
196
|
can be configured to ignore missing modifiers. See Configuration options for details
|
|
168
197
|
|
|
198
|
+
### Maps
|
|
199
|
+
|
|
200
|
+
The JsonDataExtractor gem provides a powerful feature called "maps" that allows you to transform
|
|
201
|
+
extracted data using predefined mappings. Maps are useful when you want to convert specific values
|
|
202
|
+
from the source data into different values based on predefined rules. The best use case is when you
|
|
203
|
+
need to traverse a complex tree to get to a value and them just convert it to your own disctionary.
|
|
204
|
+
E.g.:
|
|
205
|
+
|
|
206
|
+
```ruby
|
|
207
|
+
data = {
|
|
208
|
+
cars: [
|
|
209
|
+
{ make: 'A', fuel: 1 },
|
|
210
|
+
{ make: 'B', fuel: 2 },
|
|
211
|
+
{ make: 'C', fuel: 3 },
|
|
212
|
+
{ make: 'D', fuel: nil },
|
|
213
|
+
]
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
FUEL_TYPES = { 1 => 'Petrol', 2 => 'Diesel', nil => 'Unknown' }
|
|
217
|
+
schema = {
|
|
218
|
+
fuel: {
|
|
219
|
+
path: '$.cars[*].fuel',
|
|
220
|
+
map: FUEL_TYPES
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
result = JsonDataExtractor.new(data).extract(schema) # => {"fuel":["Petrol","Diesel",nil,"Unknown"]}
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
A map is essentially a dictionary that defines key-value pairs, where the keys represent the source
|
|
227
|
+
values and the corresponding values represent the transformed values. When extracting data, you can
|
|
228
|
+
apply one or multiple maps to modify the extracted values.
|
|
229
|
+
|
|
230
|
+
#### Syntax
|
|
231
|
+
|
|
232
|
+
To define a map, you can use the `map` or `maps` key in the schema. The map value can be a single
|
|
233
|
+
hash or an array of hashes, where each hash represents a separate mapping rule. Here's an example:
|
|
234
|
+
|
|
235
|
+
```ruby
|
|
236
|
+
{
|
|
237
|
+
path: "$.data[*].category",
|
|
238
|
+
map: {
|
|
239
|
+
"fruit" => "Fresh Fruit",
|
|
240
|
+
"vegetable" => "Organic Vegetable",
|
|
241
|
+
"meat" => "Premium Meat"
|
|
242
|
+
},
|
|
243
|
+
}
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
Multiple maps can also be provided. In this case, each map is applied to the result of previous
|
|
247
|
+
transformation:
|
|
248
|
+
|
|
249
|
+
```ruby
|
|
250
|
+
{
|
|
251
|
+
path: "$.data[*].category",
|
|
252
|
+
maps: [
|
|
253
|
+
{
|
|
254
|
+
"fruit" => "Fresh Fruit",
|
|
255
|
+
"vegetable" => "Organic Vegetable",
|
|
256
|
+
"meat" => "Premium Meat",
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
"Fresh Fruit" => "Frisches Obst",
|
|
260
|
+
"Organic Vegetable" => "Biologisches Gemüse",
|
|
261
|
+
"Premium Meat" => "Hochwertiges Fleisch",
|
|
262
|
+
}
|
|
263
|
+
]
|
|
264
|
+
}
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
_(the example is a little bit silly, but you should get the idea of chaining maps)_
|
|
268
|
+
|
|
269
|
+
You can use keys `:map` and `:maps` interchangeably much like `:modifier`, `:modifiers`.
|
|
270
|
+
|
|
271
|
+
#### Notes
|
|
272
|
+
|
|
273
|
+
- Maps can be used together with modifiers but this has less sense as you can always apply complex
|
|
274
|
+
mapping rules in modifiers themselves.
|
|
275
|
+
- If used together with modifiers, maps are applied **after** modifiers.
|
|
276
|
+
- If a map does not have a key corresponding to a transformed value, it will return nil, be careful
|
|
277
|
+
- Maps are applied in the order they are defined in the schema. Be cautious of the order if you have
|
|
278
|
+
overlapping or conflicting mapping rules.
|
|
279
|
+
|
|
169
280
|
### Nested schemas
|
|
170
281
|
|
|
171
282
|
JDE supports nested schemas. Just provide your element with a type of `array` and add a `schema` key
|
|
@@ -189,26 +300,37 @@ E.g. this is a valid real-life schema with nested data:
|
|
|
189
300
|
}
|
|
190
301
|
}
|
|
191
302
|
```
|
|
303
|
+
|
|
192
304
|
Nested schema can be also applied to objects, not arrays. See specs for more examples.
|
|
193
305
|
|
|
194
306
|
## Configuration Options
|
|
195
|
-
|
|
307
|
+
|
|
308
|
+
The JsonDataExtractor gem provides a configuration option to control the behavior when encountering
|
|
309
|
+
invalid modifiers.
|
|
196
310
|
|
|
197
311
|
### Strict Modifiers
|
|
198
|
-
By default, the gem operates in strict mode, which means that if an invalid modifier is encountered, an `ArgumentError` will be raised. This ensures that only valid modifiers are applied to the extracted data.
|
|
199
312
|
|
|
200
|
-
|
|
313
|
+
By default, the gem operates in strict mode, which means that if an invalid modifier is encountered,
|
|
314
|
+
an `ArgumentError` will be raised. This ensures that only valid modifiers are applied to the
|
|
315
|
+
extracted data.
|
|
316
|
+
|
|
317
|
+
To change this behavior and allow the use of invalid modifiers without raising an error, you can
|
|
318
|
+
configure the gem to operate in non-strict mode.
|
|
201
319
|
|
|
202
320
|
```ruby
|
|
203
321
|
JsonDataExtractor.configure do |config|
|
|
204
322
|
config.strict_modifiers = false
|
|
205
323
|
end
|
|
206
324
|
```
|
|
207
|
-
When `strict_modifiers` is set to `false`, any invalid modifiers will be ignored, and the original value will be returned without applying any modification.
|
|
208
325
|
|
|
209
|
-
|
|
326
|
+
When `strict_modifiers` is set to `false`, any invalid modifiers will be ignored, and the original
|
|
327
|
+
value will be returned without applying any modification.
|
|
328
|
+
|
|
329
|
+
It is important to note that enabling non-strict mode should be done with caution, as it can lead to
|
|
330
|
+
unexpected behavior if there are typos or incorrect modifiers specified in the schema.
|
|
210
331
|
|
|
211
|
-
By default, `strict_modifiers` is set to `true`, providing a safe and strict behavior. However, you
|
|
332
|
+
By default, `strict_modifiers` is set to `true`, providing a safe and strict behavior. However, you
|
|
333
|
+
can customize this configuration option according to your specific needs.
|
|
212
334
|
|
|
213
335
|
## TODO
|
|
214
336
|
|
data/lib/json_data_extractor.rb
CHANGED
|
@@ -20,9 +20,18 @@ class JsonDataExtractor
|
|
|
20
20
|
def extract(schema)
|
|
21
21
|
results = {}
|
|
22
22
|
schema.each do |key, val|
|
|
23
|
+
default_value = nil
|
|
23
24
|
if val.is_a?(Hash)
|
|
24
25
|
val.transform_keys!(&:to_sym)
|
|
25
26
|
path = val[:path]
|
|
27
|
+
default_value = val[:default]
|
|
28
|
+
maps = Array([val[:maps] || val[:map]]).flatten.compact.map do |map|
|
|
29
|
+
if map.is_a?(Hash)
|
|
30
|
+
map
|
|
31
|
+
else
|
|
32
|
+
raise ArgumentError, "Invalid map: #{map.inspect}"
|
|
33
|
+
end
|
|
34
|
+
end
|
|
26
35
|
modifiers = Array(val[:modifiers] || val[:modifier]).map do |mod|
|
|
27
36
|
case mod
|
|
28
37
|
when Symbol, Proc
|
|
@@ -38,14 +47,17 @@ class JsonDataExtractor
|
|
|
38
47
|
else
|
|
39
48
|
path = val
|
|
40
49
|
modifiers = []
|
|
50
|
+
maps = []
|
|
41
51
|
end
|
|
42
52
|
|
|
43
|
-
extracted_data = JsonPath.on(@data, path)
|
|
53
|
+
extracted_data = JsonPath.on(@data, path) if path
|
|
44
54
|
|
|
45
|
-
if extracted_data.empty?
|
|
46
|
-
results[key] = nil
|
|
55
|
+
if extracted_data.nil? || extracted_data.empty?
|
|
56
|
+
results[key] = default_value.is_a?(Proc) ? default_value.call : (default_value || nil)
|
|
47
57
|
else
|
|
48
|
-
|
|
58
|
+
extracted_data.map! { |val| val.nil? ? default_value : val }
|
|
59
|
+
transformed_data = apply_modifiers(extracted_data, modifiers)
|
|
60
|
+
results[key] = apply_maps(transformed_data, maps)
|
|
49
61
|
|
|
50
62
|
if array_type && nested
|
|
51
63
|
results[key] = extract_nested_data(results[key], nested)
|
|
@@ -72,6 +84,14 @@ class JsonDataExtractor
|
|
|
72
84
|
end
|
|
73
85
|
end
|
|
74
86
|
|
|
87
|
+
def apply_maps(data, maps)
|
|
88
|
+
data.map do |value|
|
|
89
|
+
mapped_value = value
|
|
90
|
+
maps.each { |map| mapped_value = map[mapped_value] }
|
|
91
|
+
mapped_value
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
75
95
|
def apply_modifiers(data, modifiers)
|
|
76
96
|
data.map do |value|
|
|
77
97
|
modified_value = value
|
|
@@ -96,7 +116,6 @@ class JsonDataExtractor
|
|
|
96
116
|
end
|
|
97
117
|
end
|
|
98
118
|
|
|
99
|
-
|
|
100
119
|
class << self
|
|
101
120
|
def configuration
|
|
102
121
|
@configuration ||= Configuration.new
|
data/lib/src/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: json_data_extractor
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.13
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Max Buslaev
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-
|
|
11
|
+
date: 2023-09-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|