json_data_extractor 0.1.02 → 0.1.04
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +47 -25
- data/json_data_extractor.gemspec +9 -8
- data/lib/json_data_extractor/extractor.rb +13 -4
- data/lib/json_data_extractor/version.rb +1 -1
- data/lib/json_data_extractor.rb +6 -0
- metadata +33 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 78d2adf9786c1444ad0307cbc8b5a613be04a8b4ace56c5b1973f619fed58177
|
4
|
+
data.tar.gz: 78bdd81ae9c8b6bb68742b64c7d389fa4844fad20b498cd927ca3dd85a03e9a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ac6bc721be1214813aecefc887cfddb7cfb6b55730506dec85fd853d35a01cd403fd1726f93bbabe7af8ed1ad25763df42e8b41ca8794e3a602f87aff040165e
|
7
|
+
data.tar.gz: 7c1ba2814904cba8d1041f652d313eb097c2c02e59ccd386abd95cbc64b32414ddfff8e2cf7dfb57c98045b1bb50631490ebde0adaede4b14b4455d1fe9b878e
|
data/README.md
CHANGED
@@ -146,54 +146,76 @@ schema = {
|
|
146
146
|
absent_value: nil
|
147
147
|
}
|
148
148
|
```
|
149
|
-
|
150
149
|
### Modifiers
|
151
150
|
|
152
|
-
Modifiers can be supplied on object creation and/or added later by calling `#add_modifier` method.
|
153
|
-
|
154
|
-
Modifiers
|
155
|
-
|
151
|
+
Modifiers can be supplied on object creation and/or added later by calling the `#add_modifier` method. Modifiers allow you to perform transformations on the extracted data before it is returned. They are useful for cleaning up data, formatting it, or applying any custom logic.
|
152
|
+
|
153
|
+
Modifiers can now be defined in several ways:
|
154
|
+
|
155
|
+
1. **By providing a symbol**: This symbol should correspond to the name of a method (e.g., `:to_i`) that will be called on each extracted value.
|
156
|
+
2. **By providing an anonymous lambda or block**: Use a lambda or block to define the transformation logic inline.
|
157
|
+
3. **By providing any callable object**: A class or object that implements a `call` method can be used as a modifier. This makes it flexible to use pre-defined classes, lambdas, or procs.
|
156
158
|
|
157
|
-
|
158
|
-
or lambda that should be called on each extracted value, or by providing an anonymous lambda. Here's
|
159
|
-
an example schema that uses both types of modifiers:
|
159
|
+
Here’s an example schema showcasing the use of modifiers:
|
160
160
|
|
161
161
|
```ruby
|
162
162
|
schema = {
|
163
|
-
name: '$.name',
|
164
|
-
age: { path: '$.age', modifier: :to_i },
|
165
|
-
email: {
|
163
|
+
name: '$.name', # Extract as-is
|
164
|
+
age: { path: '$.age', modifier: :to_i }, # Apply the `to_i` method
|
165
|
+
email: {
|
166
|
+
path: '$.contact.email',
|
167
|
+
modifiers: [
|
168
|
+
:downcase,
|
169
|
+
->(email) { email.gsub(/\s/, '') } # Lambda to remove whitespace
|
170
|
+
]
|
171
|
+
}
|
166
172
|
}
|
167
|
-
|
168
173
|
```
|
169
174
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
+
- **Name**: The value is simply extracted as-is.
|
176
|
+
- **Age**: The extracted value is converted to an integer using the `to_i` method.
|
177
|
+
- **Email**:
|
178
|
+
1. The value is transformed to lowercase using `downcase`.
|
179
|
+
2. Whitespace is removed using an anonymous lambda.
|
180
|
+
|
181
|
+
#### Defining Custom Modifiers
|
175
182
|
|
176
|
-
You can
|
177
|
-
JsonDataExtractor instance:
|
183
|
+
You can define your own custom modifiers using `add_modifier`. A modifier can be defined using a block, a lambda, or any callable object (such as a class that implements `call`):
|
178
184
|
|
179
185
|
```ruby
|
186
|
+
# Using a block
|
180
187
|
extractor = JsonDataExtractor.new(json_data)
|
181
188
|
extractor.add_modifier(:remove_newlines) { |value| value.gsub("\n", '') }
|
182
189
|
|
190
|
+
# Using a class with a `call` method
|
191
|
+
class ReverseString
|
192
|
+
def call(value)
|
193
|
+
value.reverse
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
extractor.add_modifier(:reverse_string, ReverseString.new)
|
198
|
+
|
199
|
+
# Lambda example
|
200
|
+
capitalize = ->(value) { value.capitalize }
|
201
|
+
extractor.add_modifier(:capitalize, capitalize)
|
202
|
+
|
203
|
+
# Apply these modifiers in a schema
|
183
204
|
schema = {
|
184
205
|
name: 'name',
|
185
|
-
bio: { path: 'bio', modifiers: [:remove_newlines] }
|
206
|
+
bio: { path: 'bio', modifiers: [:remove_newlines, :reverse_string] },
|
207
|
+
category: { path: 'category', modifier: :capitalize }
|
186
208
|
}
|
187
209
|
|
210
|
+
# Extract data
|
188
211
|
results = extractor.extract(schema)
|
189
|
-
|
190
212
|
```
|
191
213
|
|
192
|
-
|
214
|
+
#### Modifier Order
|
215
|
+
|
216
|
+
Modifiers are called in the order in which they are defined. Keep this in mind when chaining multiple modifiers for complex transformations. For example, if you want to first format a string and then clean it up (or vice versa), define the order accordingly.
|
193
217
|
|
194
|
-
|
195
|
-
schema. By default JDE raises an ArgumentError if a modifier is not applicable, but this behaviour
|
196
|
-
can be configured to ignore missing modifiers. See Configuration options for details
|
218
|
+
You can also configure the behavior of modifiers. By default, JDE raises an `ArgumentError` if a modifier cannot be applied to the extracted value. However, this strict behavior can be configured to ignore such errors. See the **Configuration** section for more details.
|
197
219
|
|
198
220
|
### Maps
|
199
221
|
|
data/json_data_extractor.gemspec
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
lib = File.expand_path('
|
1
|
+
lib = File.expand_path('lib', __dir__)
|
2
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
3
|
require 'json_data_extractor/version'
|
4
4
|
|
@@ -8,30 +8,31 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.authors = ['Max Buslaev']
|
9
9
|
spec.email = ['max@buslaev.net']
|
10
10
|
|
11
|
-
spec.summary =
|
11
|
+
spec.summary = 'Transform JSON data structures with the help of a simple schema and JsonPath expressions.
|
12
12
|
Use the JsonDataExtractor gem to extract and modify data from complex JSON structures using a straightforward syntax
|
13
|
-
and a range of built-in or custom modifiers.
|
14
|
-
spec.description =
|
13
|
+
and a range of built-in or custom modifiers.'
|
14
|
+
spec.description = 'json_data_extractor makes it easy to extract data from complex JSON structures,
|
15
15
|
such as API responses or configuration files, using a schema that defines the path to the data and any necessary
|
16
|
-
transformations. The schema is defined as a simple Ruby hash that maps keys to paths and optional modifiers.
|
16
|
+
transformations. The schema is defined as a simple Ruby hash that maps keys to paths and optional modifiers.'
|
17
17
|
spec.homepage = 'https://github.com/austerlitz/json_data_extractor'
|
18
18
|
spec.license = 'MIT'
|
19
19
|
|
20
20
|
# Specify which files should be added to the gem when it is released.
|
21
21
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
22
|
-
spec.files
|
22
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
23
23
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
24
24
|
end
|
25
25
|
spec.bindir = 'exe'
|
26
26
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
27
27
|
spec.require_paths = ['lib']
|
28
28
|
|
29
|
+
spec.add_development_dependency 'amazing_print'
|
29
30
|
spec.add_development_dependency 'bundler'
|
31
|
+
spec.add_development_dependency 'pry'
|
30
32
|
spec.add_development_dependency 'rake', '~> 10.0'
|
31
33
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
32
|
-
spec.add_development_dependency 'pry'
|
33
|
-
spec.add_development_dependency 'amazing_print'
|
34
34
|
spec.add_development_dependency 'rubocop'
|
35
35
|
|
36
36
|
spec.add_dependency 'jsonpath'
|
37
|
+
spec.add_dependency 'oj'
|
37
38
|
end
|
@@ -8,15 +8,21 @@ module JsonDataExtractor
|
|
8
8
|
# @param json_data [Hash,String]
|
9
9
|
# @param modifiers [Hash]
|
10
10
|
def initialize(json_data, modifiers = {})
|
11
|
-
@data = json_data.is_a?(Hash) ? json_data
|
11
|
+
@data = json_data.is_a?(Hash) ? Oj.dump(json_data, mode: :compat) : json_data
|
12
12
|
@modifiers = modifiers.transform_keys(&:to_sym)
|
13
13
|
@results = {}
|
14
|
+
@path_cache = {}
|
14
15
|
end
|
15
16
|
|
16
17
|
# @param modifier_name [String, Symbol]
|
17
|
-
|
18
|
+
# @param callable [#call, nil] Optional callable object
|
19
|
+
def add_modifier(modifier_name, callable = nil, &block)
|
18
20
|
modifier_name = modifier_name.to_sym unless modifier_name.is_a?(Symbol)
|
19
|
-
modifiers[modifier_name] = block
|
21
|
+
modifiers[modifier_name] = callable || block
|
22
|
+
|
23
|
+
return if modifiers[modifier_name].respond_to?(:call)
|
24
|
+
|
25
|
+
raise ArgumentError, 'Modifier must be a callable object or a block'
|
20
26
|
end
|
21
27
|
|
22
28
|
# @param schema [Hash] schema of the expected data mapping
|
@@ -24,7 +30,10 @@ module JsonDataExtractor
|
|
24
30
|
schema.each do |key, val|
|
25
31
|
element = JsonDataExtractor::SchemaElement.new(val.is_a?(Hash) ? val : { path: val })
|
26
32
|
|
27
|
-
|
33
|
+
path = element.path
|
34
|
+
json_path = path ? (@path_cache[path] ||= JsonPath.new(path)) : nil
|
35
|
+
|
36
|
+
extracted_data = json_path&.on(@data)
|
28
37
|
|
29
38
|
if extracted_data.nil? || extracted_data.empty?
|
30
39
|
# we either got nothing or the `path` was initially nil
|
data/lib/json_data_extractor.rb
CHANGED
@@ -1,11 +1,17 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'jsonpath'
|
4
|
+
require 'multi_json'
|
5
|
+
require 'oj'
|
4
6
|
require_relative 'json_data_extractor/version'
|
5
7
|
require_relative 'json_data_extractor/configuration'
|
6
8
|
require_relative 'json_data_extractor/extractor'
|
7
9
|
require_relative 'json_data_extractor/schema_element'
|
8
10
|
|
11
|
+
# Set MultiJson to use Oj for performance
|
12
|
+
MultiJson.use(:oj)
|
13
|
+
Oj.default_options = { mode: :compat }
|
14
|
+
|
9
15
|
# Transform JSON data structures with the help of a simple schema and JsonPath expressions.
|
10
16
|
# Use the JsonDataExtractor gem to extract and modify data from complex JSON structures using a straightforward syntax
|
11
17
|
# and a range of built-in or custom modifiers.
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json_data_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.04
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Max Buslaev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-04-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: amazing_print
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -25,49 +39,49 @@ dependencies:
|
|
25
39
|
- !ruby/object:Gem::Version
|
26
40
|
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
42
|
+
name: pry
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
|
-
- - "
|
45
|
+
- - ">="
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
47
|
+
version: '0'
|
34
48
|
type: :development
|
35
49
|
prerelease: false
|
36
50
|
version_requirements: !ruby/object:Gem::Requirement
|
37
51
|
requirements:
|
38
|
-
- - "
|
52
|
+
- - ">="
|
39
53
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
54
|
+
version: '0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
56
|
+
name: rake
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
59
|
- - "~>"
|
46
60
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
61
|
+
version: '10.0'
|
48
62
|
type: :development
|
49
63
|
prerelease: false
|
50
64
|
version_requirements: !ruby/object:Gem::Requirement
|
51
65
|
requirements:
|
52
66
|
- - "~>"
|
53
67
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
68
|
+
version: '10.0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
70
|
+
name: rspec
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
58
72
|
requirements:
|
59
|
-
- - "
|
73
|
+
- - "~>"
|
60
74
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
75
|
+
version: '3.0'
|
62
76
|
type: :development
|
63
77
|
prerelease: false
|
64
78
|
version_requirements: !ruby/object:Gem::Requirement
|
65
79
|
requirements:
|
66
|
-
- - "
|
80
|
+
- - "~>"
|
67
81
|
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
82
|
+
version: '3.0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
84
|
+
name: rubocop
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
72
86
|
requirements:
|
73
87
|
- - ">="
|
@@ -81,13 +95,13 @@ dependencies:
|
|
81
95
|
- !ruby/object:Gem::Version
|
82
96
|
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
98
|
+
name: jsonpath
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
86
100
|
requirements:
|
87
101
|
- - ">="
|
88
102
|
- !ruby/object:Gem::Version
|
89
103
|
version: '0'
|
90
|
-
type: :
|
104
|
+
type: :runtime
|
91
105
|
prerelease: false
|
92
106
|
version_requirements: !ruby/object:Gem::Requirement
|
93
107
|
requirements:
|
@@ -95,7 +109,7 @@ dependencies:
|
|
95
109
|
- !ruby/object:Gem::Version
|
96
110
|
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
112
|
+
name: oj
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
100
114
|
requirements:
|
101
115
|
- - ">="
|