decontaminate 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +26 -0
- data/lib/decontaminate/decoder/child_node_proxy.rb +1 -1
- data/lib/decontaminate/decoder/scalar.rb +26 -15
- data/lib/decontaminate/decoder/tuple.rb +24 -0
- data/lib/decontaminate/decontaminator.rb +11 -4
- data/lib/decontaminate/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a19c276660c214e8f2048ee4b7368403c91ccb3
|
4
|
+
data.tar.gz: ef6acef5a324da44451220877c59b9db9ebd3432
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 893c0c7b972eafed7c7afda763602e3f3ef177da07350c39f888fd430aeda37828fa6ee5c5b72ba499279060261c6a9195648298bb69f0c13fc2a63c495dcaf9
|
7
|
+
data.tar.gz: e6ce0f2510bf0ca46eff1e5eff0ac772903b2f42cd7e0c8e57148a0719d04be5a5275aa694d539c8146ba8830b6b62ad01c970a2a6585dd4bb1a631ee942dc0e
|
data/README.md
CHANGED
@@ -68,6 +68,16 @@ Attributes can be specified with XPath syntax by prepending an `@` sign:
|
|
68
68
|
scalar '@id', type: :integer
|
69
69
|
```
|
70
70
|
|
71
|
+
#### Scalar Transformers
|
72
|
+
|
73
|
+
In addition to customization of the parser using the `type:` keyword argument, `scalar` can be provided with a block that will allow custom transformation of the value. It will be supplied with the value as parsed according to the provided type, and the return value will be the result stored in the output.
|
74
|
+
|
75
|
+
```ruby
|
76
|
+
scalar 'RatingPercentage', key: 'rating_ratio', type: :float do |percentage|
|
77
|
+
percentage && percentage / 100.0
|
78
|
+
end
|
79
|
+
```
|
80
|
+
|
71
81
|
### Nested Values
|
72
82
|
|
73
83
|
It's also possible to specify nested or even deeply nested hashes with the `hash` class method:
|
@@ -169,6 +179,22 @@ There are some special things to note in the above example:
|
|
169
179
|
hashes path: 'Article', key: 'articles' do; ...; end
|
170
180
|
```
|
171
181
|
|
182
|
+
### Tuple Data
|
183
|
+
|
184
|
+
Complementing `scalar` and `hash` is `tuple`, which accepts multiple paths and returns a fixed-length array containing an element for each path.
|
185
|
+
|
186
|
+
```ruby
|
187
|
+
tuple ['Height/text()', 'Height/@units'], key: 'height_with_units'
|
188
|
+
```
|
189
|
+
|
190
|
+
The `tuple` method is most useful when supplied with a block, which works like `scalar`'s value transformer, but is supplied with an argument for each path. This allows values to be parsed from multiple values in the source document.
|
191
|
+
|
192
|
+
```ruby
|
193
|
+
tuple ['Height/text()', 'Height/@units'], key: 'height_cm' do |height, units|
|
194
|
+
convert_units height.to_f, from: units, to: 'cm'
|
195
|
+
end
|
196
|
+
```
|
197
|
+
|
172
198
|
### Flattening nested data
|
173
199
|
|
174
200
|
Since source data is sometimes more nested than is desired, the `with` method is a helper for scoping decontamination directives to a given XML element without increasing the nesting depth of the resulting object. Like `hash`, it accepts an XPath and a block, but the attributes created from within the block will not be wrapped in a hash.
|
@@ -1,32 +1,30 @@
|
|
1
1
|
module Decontaminate
|
2
2
|
module Decoder
|
3
3
|
class Scalar
|
4
|
-
attr_reader :xpath, :type
|
4
|
+
attr_reader :xpath, :type, :transformer
|
5
5
|
|
6
|
-
def initialize(xpath, type)
|
6
|
+
def initialize(xpath, type, transformer)
|
7
7
|
@xpath = xpath
|
8
8
|
@type = type
|
9
|
+
@transformer = transformer
|
9
10
|
end
|
10
11
|
|
11
12
|
def decode(xml_node)
|
13
|
+
value = value_from_xml_node xml_node
|
14
|
+
value = transformer.call value if transformer
|
15
|
+
value
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def value_from_xml_node(xml_node)
|
12
21
|
child = xml_node && xml_node.at_xpath(xpath)
|
13
22
|
return unless child
|
14
|
-
text = coerce_node_to_text child
|
15
23
|
|
16
|
-
|
17
|
-
|
18
|
-
text
|
19
|
-
when :integer
|
20
|
-
text.to_i
|
21
|
-
when :float
|
22
|
-
text.to_f
|
23
|
-
when :boolean
|
24
|
-
coerce_string_to_boolean text
|
25
|
-
end
|
24
|
+
text = coerce_node_to_text child
|
25
|
+
coerce_string_to_type text, type
|
26
26
|
end
|
27
27
|
|
28
|
-
private
|
29
|
-
|
30
28
|
def coerce_node_to_text(node)
|
31
29
|
if node.is_a?(Nokogiri::XML::Text) || node.is_a?(Nokogiri::XML::Attr)
|
32
30
|
node.to_s
|
@@ -35,6 +33,19 @@ module Decontaminate
|
|
35
33
|
end
|
36
34
|
end
|
37
35
|
|
36
|
+
def coerce_string_to_type(str, type)
|
37
|
+
case type
|
38
|
+
when :string
|
39
|
+
str
|
40
|
+
when :integer
|
41
|
+
str.to_i
|
42
|
+
when :float
|
43
|
+
str.to_f
|
44
|
+
when :boolean
|
45
|
+
coerce_string_to_boolean str
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
38
49
|
def coerce_string_to_boolean(str)
|
39
50
|
str == 'true' || str == '1'
|
40
51
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Decontaminate
|
2
|
+
module Decoder
|
3
|
+
class Tuple
|
4
|
+
attr_reader :xpaths, :decoder, :transformer
|
5
|
+
|
6
|
+
def initialize(xpaths, decoder, transformer)
|
7
|
+
@xpaths = xpaths
|
8
|
+
@decoder = decoder
|
9
|
+
@transformer = transformer
|
10
|
+
end
|
11
|
+
|
12
|
+
def decode(xml_node)
|
13
|
+
xml_nodes = xpaths.map { |xpath| xml_node && xml_node.at_xpath(xpath) }
|
14
|
+
tuple = xml_nodes.map do |element_node|
|
15
|
+
decoder.decode element_node
|
16
|
+
end
|
17
|
+
|
18
|
+
tuple = transformer.call(*tuple) if transformer
|
19
|
+
|
20
|
+
tuple
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -4,6 +4,7 @@ require_relative 'decoder/array'
|
|
4
4
|
require_relative 'decoder/child_node_proxy'
|
5
5
|
require_relative 'decoder/hash'
|
6
6
|
require_relative 'decoder/scalar'
|
7
|
+
require_relative 'decoder/tuple'
|
7
8
|
|
8
9
|
module Decontaminate
|
9
10
|
# Decontaminate::Decontaminator is the base class for creating XML extraction
|
@@ -21,20 +22,26 @@ module Decontaminate
|
|
21
22
|
end
|
22
23
|
end
|
23
24
|
|
24
|
-
def scalar(xpath, type: :string, key: infer_key(xpath))
|
25
|
-
add_decoder key, Decontaminate::Decoder::Scalar.new(xpath, type)
|
25
|
+
def scalar(xpath, type: :string, key: infer_key(xpath), &block)
|
26
|
+
add_decoder key, Decontaminate::Decoder::Scalar.new(xpath, type, block)
|
26
27
|
end
|
27
28
|
|
28
|
-
def scalars(xpath = nil, path: nil, type: :string, key: nil)
|
29
|
+
def scalars(xpath = nil, path: nil, type: :string, key: nil, &block)
|
29
30
|
resolved_path = path || infer_plural_path(xpath)
|
30
31
|
key ||= infer_key(path || xpath)
|
31
32
|
|
32
|
-
singular = Decontaminate::Decoder::Scalar.new('.', type)
|
33
|
+
singular = Decontaminate::Decoder::Scalar.new('.', type, block)
|
33
34
|
decoder = Decontaminate::Decoder::Array.new(resolved_path, singular)
|
34
35
|
|
35
36
|
add_decoder key, decoder
|
36
37
|
end
|
37
38
|
|
39
|
+
def tuple(paths, key:, type: :string, &block)
|
40
|
+
scalar = Decontaminate::Decoder::Scalar.new('.', type, nil)
|
41
|
+
decoder = Decontaminate::Decoder::Tuple.new(paths, scalar, block)
|
42
|
+
add_decoder key, decoder
|
43
|
+
end
|
44
|
+
|
38
45
|
def hash(xpath = '.', key: infer_key(xpath), &body)
|
39
46
|
decontaminator = Class.new(Decontaminate::Decontaminator, &body)
|
40
47
|
add_decoder key, Decontaminate::Decoder::Hash.new(xpath, decontaminator)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: decontaminate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexis King
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -144,6 +144,7 @@ files:
|
|
144
144
|
- lib/decontaminate/decoder/child_node_proxy.rb
|
145
145
|
- lib/decontaminate/decoder/hash.rb
|
146
146
|
- lib/decontaminate/decoder/scalar.rb
|
147
|
+
- lib/decontaminate/decoder/tuple.rb
|
147
148
|
- lib/decontaminate/decontaminator.rb
|
148
149
|
- lib/decontaminate/version.rb
|
149
150
|
homepage: https://github.com/lexi-lambda/decontaminate
|