json_data_extractor 0.0.16 → 0.1.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -2
- data/json_data_extractor.gemspec +2 -1
- data/lib/{src → json_data_extractor}/configuration.rb +5 -2
- data/lib/json_data_extractor/extractor.rb +98 -0
- data/lib/json_data_extractor/schema_element.rb +62 -0
- data/lib/json_data_extractor/version.rb +5 -0
- data/lib/json_data_extractor.rb +15 -122
- metadata +19 -3
- data/lib/src/version.rb +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '012010608b89947225b392976e3210abca9e88d681fa7378b3539fe13743fd04'
|
4
|
+
data.tar.gz: e1f53a06b1dc6484462933b7251b7b190e739d01679a5d3788c993e5102a9728
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5bd3b61458071e431776fbbb35f8d6756c32ecd2167c545a0da8479f0ae9dbfa2fe6c8e678219dbda23a5dd03b9ebe8c59aeebac39f9045a4e3ec199ceff6034
|
7
|
+
data.tar.gz: 3d343943e8b028680c22d11e3fc100a45d3b2d1379635f765663c770756eed3f0f59437e615406fc9a9e56f7952dbb8508debffa002d7d5a5a0c8cb1f735571b
|
data/README.md
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# JsonDataExtractor
|
2
2
|
|
3
|
-
NOTE: This is still a very early beta.
|
4
|
-
|
5
3
|
Transform JSON data structures with the help of a simple schema and JsonPath expressions.
|
6
4
|
Use the JsonDataExtractor gem to extract and modify data from complex JSON structures using a
|
7
5
|
straightforward syntax
|
data/json_data_extractor.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
lib = File.expand_path('../lib', __FILE__)
|
2
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
-
require '
|
3
|
+
require 'json_data_extractor/version'
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = 'json_data_extractor'
|
@@ -31,6 +31,7 @@ transformations. The schema is defined as a simple Ruby hash that maps keys to p
|
|
31
31
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
32
32
|
spec.add_development_dependency 'pry'
|
33
33
|
spec.add_development_dependency 'amazing_print'
|
34
|
+
spec.add_development_dependency 'rubocop'
|
34
35
|
|
35
36
|
spec.add_dependency 'jsonpath'
|
36
37
|
end
|
@@ -1,4 +1,7 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JsonDataExtractor
|
4
|
+
# handles the settings for JSON data extraction.
|
2
5
|
class Configuration
|
3
6
|
attr_accessor :strict_modifiers
|
4
7
|
|
@@ -6,4 +9,4 @@ class JsonDataExtractor
|
|
6
9
|
@strict_modifiers = true
|
7
10
|
end
|
8
11
|
end
|
9
|
-
end
|
12
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JsonDataExtractor
|
4
|
+
# does the main job of the gem
|
5
|
+
class Extractor
|
6
|
+
attr_reader :data, :modifiers
|
7
|
+
|
8
|
+
# @param json_data [Hash,String]
|
9
|
+
# @param modifiers [Hash]
|
10
|
+
def initialize(json_data, modifiers = {})
|
11
|
+
@data = json_data.is_a?(Hash) ? json_data.to_json : json_data
|
12
|
+
@modifiers = modifiers.transform_keys(&:to_sym)
|
13
|
+
@results = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
# @param modifier_name [String, Symbol]
|
17
|
+
def add_modifier(modifier_name, &block)
|
18
|
+
modifier_name = modifier_name.to_sym unless modifier_name.is_a?(Symbol)
|
19
|
+
modifiers[modifier_name] = block
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param schema [Hash] schema of the expected data mapping
|
23
|
+
def extract(schema)
|
24
|
+
schema.each do |key, val|
|
25
|
+
element = JsonDataExtractor::SchemaElement.new(val.is_a?(Hash) ? val : { path: val })
|
26
|
+
|
27
|
+
extracted_data = JsonPath.on(@data, element.path) if element.path
|
28
|
+
|
29
|
+
if extracted_data.nil? || extracted_data.empty?
|
30
|
+
# we either got nothing or the `path` was initially nil
|
31
|
+
@results[key] = element.fetch_default_value
|
32
|
+
next
|
33
|
+
end
|
34
|
+
|
35
|
+
# check for nils and apply defaults if applicable
|
36
|
+
extracted_data.map! { |item| item.nil? ? element.fetch_default_value : item }
|
37
|
+
|
38
|
+
# apply modifiers if present
|
39
|
+
extracted_data = apply_modifiers(extracted_data, element.modifiers) if element.modifiers.any?
|
40
|
+
|
41
|
+
# apply maps if present
|
42
|
+
@results[key] = element.maps.any? ? apply_maps(extracted_data, element.maps) : extracted_data
|
43
|
+
|
44
|
+
@results[key] = resolve_result_structure(@results[key], element)
|
45
|
+
end
|
46
|
+
|
47
|
+
@results
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def resolve_result_structure(result, element)
|
53
|
+
if element.nested
|
54
|
+
# Process nested data
|
55
|
+
result = extract_nested_data(result, element.nested)
|
56
|
+
return element.array_type ? result : result.first
|
57
|
+
end
|
58
|
+
|
59
|
+
# Handle single-item extraction if not explicitly an array type or having multiple items
|
60
|
+
return result.first if result.size == 1 && !element.array_type
|
61
|
+
|
62
|
+
# Default case: simply return the result, assuming it's correctly formed
|
63
|
+
result
|
64
|
+
end
|
65
|
+
|
66
|
+
def extract_nested_data(data, schema)
|
67
|
+
Array(data).map do |item|
|
68
|
+
self.class.new(item, modifiers).extract(schema)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def apply_maps(data, maps)
|
73
|
+
data.map do |value|
|
74
|
+
maps.reduce(value) { |mapped_value, map| map[mapped_value] }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def apply_modifiers(data, modifiers)
|
79
|
+
data.map do |value|
|
80
|
+
modifiers.reduce(value) do |modified_value, modifier|
|
81
|
+
apply_single_modifier(modifier, modified_value)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def apply_single_modifier(modifier, value)
|
87
|
+
return modifier.call(value) if modifier.respond_to?(:call)
|
88
|
+
return modifiers[modifier].call(value) if modifiers.key?(modifier)
|
89
|
+
return value.public_send(modifier) if value.respond_to?(modifier)
|
90
|
+
|
91
|
+
if JsonDataExtractor.configuration.strict_modifiers
|
92
|
+
raise ArgumentError, "Modifier: <:#{modifier}> cannot be applied to value <#{value.inspect}>"
|
93
|
+
end
|
94
|
+
|
95
|
+
value
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JsonDataExtractor
|
4
|
+
# parses the input schema element
|
5
|
+
class SchemaElement
|
6
|
+
attr_reader :path, :default_value, :maps, :modifiers, :array_type, :nested
|
7
|
+
|
8
|
+
def initialize(schema_definition)
|
9
|
+
validate_schema_definition(schema_definition)
|
10
|
+
|
11
|
+
@path = schema_definition[:path] if schema_definition.key?(:path)
|
12
|
+
@default_value = schema_definition[:default]
|
13
|
+
@maps = fetch_maps(schema_definition[:maps] || schema_definition[:map])
|
14
|
+
@modifiers = fetch_modifiers(schema_definition[:modifiers] || schema_definition[:modifier])
|
15
|
+
@array_type = schema_definition[:type] == 'array'
|
16
|
+
@nested = schema_definition[:schema]
|
17
|
+
end
|
18
|
+
|
19
|
+
def fetch_default_value
|
20
|
+
@default_value.respond_to?(:call) ? @default_value.call : @default_value
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def validate_schema_definition(schema_definition)
|
26
|
+
raise ArgumentError, 'Schema definition must be a Hash' unless schema_definition.is_a?(Hash)
|
27
|
+
raise ArgumentError, 'Schema definition must not be empty' if schema_definition.empty?
|
28
|
+
|
29
|
+
schema_definition.transform_keys!(&:to_sym)
|
30
|
+
|
31
|
+
return if schema_definition.key?(:path) || schema_definition.key?(:default)
|
32
|
+
|
33
|
+
raise ArgumentError, 'Either path or default_value must be present in schema definition'
|
34
|
+
end
|
35
|
+
|
36
|
+
def fetch_maps(map_value)
|
37
|
+
Array([map_value]).flatten.compact.map do |map|
|
38
|
+
raise ArgumentError, "Invalid map: #{map.inspect}" unless map.is_a?(Hash)
|
39
|
+
|
40
|
+
map
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def fetch_modifiers(modifier_value)
|
45
|
+
Array(modifier_value).map do |mod|
|
46
|
+
case mod
|
47
|
+
when Symbol, Proc; then mod
|
48
|
+
when Class; then validate_modifier_class(mod)
|
49
|
+
when String; then mod.to_sym
|
50
|
+
else
|
51
|
+
raise ArgumentError, "Invalid modifier: #{mod.inspect}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def validate_modifier_class(mod)
|
57
|
+
raise ArgumentError, "Modifier class must respond to call: #{mod.inspect}" unless mod.respond_to?(:call)
|
58
|
+
|
59
|
+
mod
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/lib/json_data_extractor.rb
CHANGED
@@ -1,128 +1,21 @@
|
|
1
|
-
|
2
|
-
require 'src/configuration'
|
3
|
-
require 'jsonpath'
|
4
|
-
|
5
|
-
class JsonDataExtractor
|
6
|
-
attr_reader :data, :modifiers
|
7
|
-
|
8
|
-
def initialize(json_data, modifiers = {})
|
9
|
-
@data = json_data.is_a?(Hash) ? json_data.to_json : json_data # hopefully it's a string; maybe we'll add some validation here
|
10
|
-
@modifiers = modifiers.transform_keys(&:to_sym) # todo address this later
|
11
|
-
end
|
12
|
-
|
13
|
-
# @param modifier_name [String, Symbol]
|
14
|
-
def add_modifier(modifier_name, &block)
|
15
|
-
modifier_name = modifier_name.to_sym unless modifier_name.is_a?(Symbol)
|
16
|
-
modifiers[modifier_name] = block
|
17
|
-
end
|
18
|
-
|
19
|
-
# @param schema [Hash] schema of the expected data mapping
|
20
|
-
def extract(schema)
|
21
|
-
results = {}
|
22
|
-
schema.each do |key, val|
|
23
|
-
default_value = nil
|
24
|
-
if val.is_a?(Hash)
|
25
|
-
val.transform_keys!(&:to_sym)
|
26
|
-
path = val[:path]
|
27
|
-
default_value = val[:default]
|
28
|
-
maps = Array([val[:maps] || val[:map]]).flatten.compact.map do |map|
|
29
|
-
if map.is_a?(Hash)
|
30
|
-
map
|
31
|
-
else
|
32
|
-
raise ArgumentError, "Invalid map: #{map.inspect}"
|
33
|
-
end
|
34
|
-
end
|
35
|
-
modifiers = Array(val[:modifiers] || val[:modifier]).map do |mod|
|
36
|
-
case mod
|
37
|
-
when Symbol, Proc
|
38
|
-
mod
|
39
|
-
when Class
|
40
|
-
if mod.respond_to?(:call)
|
41
|
-
mod
|
42
|
-
else
|
43
|
-
raise ArgumentError, "Modifier class must respond to call: #{mod.inspect}"
|
44
|
-
end
|
45
|
-
when String
|
46
|
-
mod.to_sym
|
47
|
-
else
|
48
|
-
raise ArgumentError, "Invalid modifier: #{mod.inspect}"
|
49
|
-
end
|
50
|
-
end
|
51
|
-
array_type = 'array' == val[:type]
|
52
|
-
nested = val.dup.delete(:schema)
|
53
|
-
else
|
54
|
-
path = val
|
55
|
-
modifiers = []
|
56
|
-
maps = []
|
57
|
-
end
|
58
|
-
|
59
|
-
extracted_data = JsonPath.on(@data, path) if path
|
60
|
-
|
61
|
-
if extracted_data.nil? || extracted_data.empty?
|
62
|
-
results[key] = default_value.is_a?(Proc) ? default_value.call : (default_value || nil)
|
63
|
-
else
|
64
|
-
extracted_data.map! { |val| val.nil? ? default_value : val }
|
65
|
-
transformed_data = apply_modifiers(extracted_data, modifiers)
|
66
|
-
results[key] = apply_maps(transformed_data, maps)
|
67
|
-
|
68
|
-
if array_type && nested
|
69
|
-
results[key] = extract_nested_data(results[key], nested)
|
70
|
-
elsif !array_type && nested
|
71
|
-
results[key] = extract_nested_data(results[key], nested).first
|
72
|
-
elsif !array_type && 1 < results[key].size
|
73
|
-
# TODO: handle case where results[key] has more than one item
|
74
|
-
# do nothing for now
|
75
|
-
elsif array_type && !nested
|
76
|
-
# do nothing, it is already an array
|
77
|
-
else
|
78
|
-
results[key] = results[key].first
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
82
|
-
results
|
83
|
-
end
|
84
|
-
|
85
|
-
private
|
1
|
+
# frozen_string_literal: true
|
86
2
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
def apply_modifiers(data, modifiers)
|
102
|
-
data.map do |value|
|
103
|
-
modified_value = value
|
104
|
-
modifiers.each do |modifier|
|
105
|
-
modified_value = apply_single_modifier(modifier, modified_value)
|
106
|
-
end
|
107
|
-
modified_value
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
def apply_single_modifier(modifier, value)
|
112
|
-
if modifier.respond_to?(:call) # Matches Proc, Lambda, Method, and callable objects
|
113
|
-
modifier.call(value)
|
114
|
-
elsif modifiers.key?(modifier)
|
115
|
-
modifiers[modifier].call(value)
|
116
|
-
elsif value.respond_to?(modifier)
|
117
|
-
value.send(modifier)
|
118
|
-
elsif self.class.configuration.strict_modifiers
|
119
|
-
raise ArgumentError, "Modifier: <:#{modifier}> cannot be applied to value <#{value.inspect}>"
|
120
|
-
else
|
121
|
-
value
|
3
|
+
require 'jsonpath'
|
4
|
+
require_relative 'json_data_extractor/version'
|
5
|
+
require_relative 'json_data_extractor/configuration'
|
6
|
+
require_relative 'json_data_extractor/extractor'
|
7
|
+
require_relative 'json_data_extractor/schema_element'
|
8
|
+
|
9
|
+
# Transform JSON data structures with the help of a simple schema and JsonPath expressions.
|
10
|
+
# Use the JsonDataExtractor gem to extract and modify data from complex JSON structures using a straightforward syntax
|
11
|
+
# and a range of built-in or custom modifiers.
|
12
|
+
module JsonDataExtractor
|
13
|
+
class << self
|
14
|
+
# Backward compatibility
|
15
|
+
def new(*args)
|
16
|
+
Extractor.new(*args)
|
122
17
|
end
|
123
|
-
end
|
124
18
|
|
125
|
-
class << self
|
126
19
|
def configuration
|
127
20
|
@configuration ||= Configuration.new
|
128
21
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json_data_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.01
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Max Buslaev
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rubocop
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: jsonpath
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -116,8 +130,10 @@ files:
|
|
116
130
|
- bin/setup
|
117
131
|
- json_data_extractor.gemspec
|
118
132
|
- lib/json_data_extractor.rb
|
119
|
-
- lib/
|
120
|
-
- lib/
|
133
|
+
- lib/json_data_extractor/configuration.rb
|
134
|
+
- lib/json_data_extractor/extractor.rb
|
135
|
+
- lib/json_data_extractor/schema_element.rb
|
136
|
+
- lib/json_data_extractor/version.rb
|
121
137
|
homepage: https://github.com/austerlitz/json_data_extractor
|
122
138
|
licenses:
|
123
139
|
- MIT
|
data/lib/src/version.rb
DELETED