xmlmunger 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.travis.yml +6 -0
- data/Gemfile +4 -0
- data/LICENSE +7 -0
- data/README.md +7 -0
- data/Rakefile +9 -0
- data/lib/xmlmunger.rb +36 -0
- data/lib/xmlmunger/list_heuristics.rb +242 -0
- data/lib/xmlmunger/nested_hash.rb +90 -0
- data/lib/xmlmunger/nori_constants.rb +10 -0
- data/lib/xmlmunger/parser.rb +70 -0
- data/lib/xmlmunger/version.rb +3 -0
- data/test/test_nested_hash.rb +36 -0
- data/test/test_parser.rb +57 -0
- data/test/test_xmlmunger.rb +7 -0
- data/xmlmunger.gemspec +36 -0
- metadata +154 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bd96e736bb5dd412ad3f3557fd21e379bc5247d5
|
4
|
+
data.tar.gz: 15e5b2ea8a778a40fec73b4312d0741f415f2e16
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a8a24f4d0e19944332a4c2c3a61c3942243dc2d37f188ce97058c1fcdc895b502ce89f0f15d60ed30f959b09dcca2e7f7bdb358f9faa4968ac9a01324a66ea6e
|
7
|
+
data.tar.gz: 5744a2c806e068c97b749e45c3ffbcdcd10bcf32a8115c7fef7d9cc9e1d6188e9516f8a4c8365c8a141f284915ae3c9ace7d9ca0f49f3087df6e7f932545aa5d
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
Copyright (c) 2010-2014 Andrew Benton.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
4
|
+
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
6
|
+
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
xmlmunger
|
2
|
+
=========
|
3
|
+
|
4
|
+
Convert XML files into flat hashes with automatic naming via nested paths
|
5
|
+
|
6
|
+
[![Build Status](https://travis-ci.org/robertzk/xmlmunger.svg?branch=master)](https://travis-ci.org/robertzk/xmlmunger)
|
7
|
+
[![Code Climate](https://codeclimate.com/github/robertzk/xmlmunger.png)](https://codeclimate.com/github/robertzk/xmlmunger)
|
data/Rakefile
ADDED
data/lib/xmlmunger.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'xmlmunger/version'
|
2
|
+
require 'xmlmunger/nori_constants'
|
3
|
+
require 'xmlmunger/nested_hash'
|
4
|
+
require 'xmlmunger/list_heuristics'
|
5
|
+
require 'xmlmunger/parser'
|
6
|
+
|
7
|
+
module XMLMunger
|
8
|
+
|
9
|
+
# Add native support to testing libraries
|
10
|
+
|
11
|
+
module Test
|
12
|
+
module Unit
|
13
|
+
class TestCase
|
14
|
+
include ::XMLMunger
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
module MiniTest
|
20
|
+
class Unit
|
21
|
+
class TestCase
|
22
|
+
include ::XMLMunger
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
module RSpec
|
29
|
+
module Core
|
30
|
+
class ExampleGroup
|
31
|
+
include ::XMLMunger
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,242 @@
|
|
1
|
+
require 'descriptive_statistics/safe'
|
2
|
+
|
3
|
+
module XMLMunger
|
4
|
+
class StateError < StandardError; end
|
5
|
+
class ListHeuristics
|
6
|
+
|
7
|
+
def initialize(list)
|
8
|
+
raise ArgumentError, "Argument must be an array" unless list.is_a?(Array)
|
9
|
+
@list = list
|
10
|
+
end
|
11
|
+
|
12
|
+
def empty?
|
13
|
+
@empty ||= @list.count.zero?
|
14
|
+
end
|
15
|
+
|
16
|
+
def singleton?
|
17
|
+
@singleton ||= @list.count == 1
|
18
|
+
end
|
19
|
+
|
20
|
+
def multiple?
|
21
|
+
@multiple ||= @list.count > 1
|
22
|
+
end
|
23
|
+
|
24
|
+
def common_type(of = nil)
|
25
|
+
@common_types ||= {}
|
26
|
+
of ||= @list
|
27
|
+
@common_types[of] ||= of.map{|x|x.class.ancestors}.reduce(:&).first
|
28
|
+
end
|
29
|
+
|
30
|
+
def skipped_types
|
31
|
+
@skipped_types ||= [:strings]
|
32
|
+
end
|
33
|
+
|
34
|
+
def shared_key_hashes?
|
35
|
+
@shared_key_hashes ||=
|
36
|
+
multiple? &&
|
37
|
+
common_type == Hash &&
|
38
|
+
(keys = @list.first.keys) &&
|
39
|
+
@list[1..-1].all? { |hash| hash.keys == keys }
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_variable_hash
|
43
|
+
return {} if empty?
|
44
|
+
return {nil => @list.first} if singleton?
|
45
|
+
if shared_key_hashes?
|
46
|
+
merged = merge_hashes(@list)
|
47
|
+
typed = classify(merged)
|
48
|
+
else
|
49
|
+
type, data = identity(@list)
|
50
|
+
typed = { nil => { type: type, data: data } }
|
51
|
+
end
|
52
|
+
apply(typed)
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
# call data extraction functions
|
58
|
+
# returns a variable hash
|
59
|
+
def apply(input)
|
60
|
+
filter_types(input).reduce({}) do |out, (var,with)|
|
61
|
+
func = "extract_#{with[:type]}".to_sym
|
62
|
+
self.send(func, with[:data]).each do |key,val|
|
63
|
+
ind = [var,key].map(&:to_s).reject{ |s| s.empty? }.join('_')
|
64
|
+
out[ind] = val
|
65
|
+
end
|
66
|
+
out
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Allow caller to ignore certain data types
|
71
|
+
def filter_types(input)
|
72
|
+
input.reject { |k,v|
|
73
|
+
( skipped_types + [:notype, :other] ).include?(v[:type])
|
74
|
+
}
|
75
|
+
end
|
76
|
+
|
77
|
+
# merge multiple hashes with the same keys
|
78
|
+
# resulting hash values are arrays of the input values
|
79
|
+
def merge_hashes(hashes)
|
80
|
+
keys = hashes.first.keys
|
81
|
+
container = Hash[*keys.map{|k|[k,[]]}.flatten(1)]
|
82
|
+
hashes.each { |hash| hash.each { |(k,v)| container[k] << v } }
|
83
|
+
container
|
84
|
+
end
|
85
|
+
|
86
|
+
# discover type information for each
|
87
|
+
# key,value pair of the input hash
|
88
|
+
def classify(hash)
|
89
|
+
hash.reduce({}) do |acc, (var, vals)|
|
90
|
+
type, data = identity(vals)
|
91
|
+
acc[var] = {
|
92
|
+
type: type,
|
93
|
+
data: data
|
94
|
+
}
|
95
|
+
acc
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# assign the list of values into its proper type
|
100
|
+
# also return the appropriate transformation of the input list
|
101
|
+
TYPES = [:boolean?, :singleton?, :days?, :numeric?, :strings?, :notype?]
|
102
|
+
def identity(vals, memo = {})
|
103
|
+
TYPES.each do |key|
|
104
|
+
if compute(key, vals, memo)
|
105
|
+
type = key[0...-1].to_sym
|
106
|
+
val = compute(type, vals, memo)
|
107
|
+
return type, val
|
108
|
+
end
|
109
|
+
end
|
110
|
+
return :other, vals
|
111
|
+
end
|
112
|
+
|
113
|
+
# memoized computations for #identity
|
114
|
+
def compute(what, vals, store)
|
115
|
+
store[what] ||= case what
|
116
|
+
# ifs
|
117
|
+
when :singleton?
|
118
|
+
compute(:unique, vals, store).count == 1
|
119
|
+
when :boolean?
|
120
|
+
all_type?(vals, TrueClass, FalseClass)
|
121
|
+
when :days?
|
122
|
+
all_type?(vals, Date, Time)
|
123
|
+
when :numeric?
|
124
|
+
compute(:numeric, vals, store).all?
|
125
|
+
when :strings?
|
126
|
+
common_type(vals) <= String
|
127
|
+
when :notype?
|
128
|
+
common_type(vals) == Object
|
129
|
+
# thens
|
130
|
+
when :singleton
|
131
|
+
compute(:unique, vals, store).first
|
132
|
+
when :unique
|
133
|
+
vals.uniq
|
134
|
+
when :numeric
|
135
|
+
vals.map{ |x| to_numeric(x) }
|
136
|
+
when :days
|
137
|
+
dates = vals.map{ |x| x.to_date }
|
138
|
+
epoch = Date.new(1970,1,1)
|
139
|
+
dates.map { |d| (d - epoch).to_i }
|
140
|
+
else
|
141
|
+
vals
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Data Extraction Functions
|
146
|
+
|
147
|
+
def extract_singleton(item)
|
148
|
+
{nil => item}
|
149
|
+
end
|
150
|
+
|
151
|
+
def extract_boolean(vals)
|
152
|
+
has, vec = 0, 0
|
153
|
+
vals.each do |bool|
|
154
|
+
case bool
|
155
|
+
when FalseClass
|
156
|
+
has |= 1
|
157
|
+
vec -= 1
|
158
|
+
when TrueClass
|
159
|
+
has |= 2
|
160
|
+
vec += 1
|
161
|
+
end
|
162
|
+
end
|
163
|
+
{has: has, vec: vec}
|
164
|
+
end
|
165
|
+
|
166
|
+
def extract_strings(items)
|
167
|
+
h = Hash.new(0)
|
168
|
+
items.each{ |i| h[i] += 1 }
|
169
|
+
h.reduce({}) do |acc,(item,count)|
|
170
|
+
acc[var_name_for_string(item)] = count
|
171
|
+
acc
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def extract_days(days)
|
176
|
+
sorted_days = days.sort
|
177
|
+
difference_comps(sorted_days)
|
178
|
+
end
|
179
|
+
|
180
|
+
def extract_numeric(numbers)
|
181
|
+
case
|
182
|
+
when is_sequence?(numbers,3), all_large?(numbers)
|
183
|
+
{} # do nothing; junk data
|
184
|
+
else
|
185
|
+
difference_comps(numbers)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
# Utility Functions
|
190
|
+
|
191
|
+
def to_numeric(anything)
|
192
|
+
float = Float(anything)
|
193
|
+
int = Integer(anything) rescue float
|
194
|
+
float == int ? int : float
|
195
|
+
rescue
|
196
|
+
nil
|
197
|
+
end
|
198
|
+
|
199
|
+
def is_sequence?(nums, min_length = nil)
|
200
|
+
(min_length.nil? || nums.count >= min_length) &&
|
201
|
+
nums == (nums.min..nums.max).to_a
|
202
|
+
end
|
203
|
+
|
204
|
+
def all_large?(nums)
|
205
|
+
nums.all? { |n|
|
206
|
+
n > 1000000
|
207
|
+
}
|
208
|
+
end
|
209
|
+
|
210
|
+
def all_type?(objects, *types)
|
211
|
+
objects.all? { |obj|
|
212
|
+
types.any?{ |c| obj.is_a?(c) }
|
213
|
+
}
|
214
|
+
end
|
215
|
+
|
216
|
+
def difference_comps(data)
|
217
|
+
stats = {}
|
218
|
+
stats[:length] = data.count
|
219
|
+
stats[:min] = data.min
|
220
|
+
stats[:max] = data.max
|
221
|
+
if stats[:length] > 1
|
222
|
+
diffs = data.each_cons(2).map { |a,b| b-a }
|
223
|
+
diffs.extend(DescriptiveStatistics)
|
224
|
+
stats[:min_diff] = diffs.min
|
225
|
+
stats[:max_diff] = diffs.max
|
226
|
+
stats[:avg_diff] = diffs.median
|
227
|
+
end
|
228
|
+
stats
|
229
|
+
end
|
230
|
+
|
231
|
+
def var_name_for_string(key)
|
232
|
+
base = "is_"
|
233
|
+
if key.nil?
|
234
|
+
base += "nil"
|
235
|
+
else
|
236
|
+
base += key.to_s.strip.gsub(/\s+/, "_").downcase
|
237
|
+
end
|
238
|
+
base
|
239
|
+
end
|
240
|
+
|
241
|
+
end
|
242
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
module XMLMunger
|
2
|
+
|
3
|
+
class NestedHash < Hash
|
4
|
+
#######
|
5
|
+
# Don't lose class type
|
6
|
+
#######
|
7
|
+
(Hash.instance_methods - Object.instance_methods).each do |m|
|
8
|
+
define_method(m) { |*args, &block|
|
9
|
+
result = super(*args, &block)
|
10
|
+
return NestedHash[result] if result.class == Hash
|
11
|
+
result
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
#######
|
16
|
+
# Map terminal hash values in a nested hash
|
17
|
+
# > nh = NestedHash[a: { c: 2 }, b: 1]
|
18
|
+
# => { a: { c: 2 }, b: 1 }
|
19
|
+
# > nh.transform { |value| value + 1 }
|
20
|
+
# => { a: { c: 3 }, b: 2 }
|
21
|
+
#######
|
22
|
+
def transform(&block)
|
23
|
+
# http://stackoverflow.com/questions/5189161/changing-every-value-in-a-hash-in-ruby
|
24
|
+
NestedHash[self.map { |key, value|
|
25
|
+
[key, case value.class.to_s
|
26
|
+
when 'Hash', 'NestedHash' then NestedHash[value].transform(&block)
|
27
|
+
else yield value
|
28
|
+
end]
|
29
|
+
}]
|
30
|
+
end
|
31
|
+
|
32
|
+
#######
|
33
|
+
# Map terminal hash values in a nested hash *with* route tracking
|
34
|
+
# > nh = NestedHash[a: { c: 2 }, b: 1]
|
35
|
+
# => { a: { c: 2 }, b: 1 }
|
36
|
+
# > nh.transform_with_route { |route, value| "#{route.join(' -> ')} -> #{value}" }
|
37
|
+
# => { a: { c: "a -> c -> 2" }, b: "b -> 1" }
|
38
|
+
#######
|
39
|
+
def transform_with_route(route = [], &block)
|
40
|
+
NestedHash[self.map { |key, value|
|
41
|
+
[key, case value.class.to_s
|
42
|
+
when 'Hash', 'NestedHash' then
|
43
|
+
NestedHash[value].transform_with_route(route.dup.concat([key]), &block)
|
44
|
+
else yield route.dup.concat([key]), value
|
45
|
+
end]
|
46
|
+
}]
|
47
|
+
end
|
48
|
+
|
49
|
+
#######
|
50
|
+
# Map terminal hash values in a nested hash to an array
|
51
|
+
# > nh = NestedHash[a: { c: 2 }, b: 1]
|
52
|
+
# => { a: { c: 2 }, b: 1 }
|
53
|
+
# > nh.map_values
|
54
|
+
# => [2, 1]
|
55
|
+
# > nh.map_values { |x| x + 1 }
|
56
|
+
# => [3, 2]
|
57
|
+
#######
|
58
|
+
def map_values(&block)
|
59
|
+
values = []
|
60
|
+
self.each { |key, value|
|
61
|
+
values.concat case value.class.to_s
|
62
|
+
when 'Hash', 'NestedHash' then NestedHash[value].map_values(&block)
|
63
|
+
else [block_given? ? yield(value) : value]
|
64
|
+
end
|
65
|
+
}
|
66
|
+
values
|
67
|
+
end
|
68
|
+
|
69
|
+
#######
|
70
|
+
# Map terminal hash values in a nested hash to an array *with* route tracking
|
71
|
+
# > nh = NestedHash[a: { c: 2 }, b: 1]
|
72
|
+
# => { a: { c: 2 }, b: 1 }
|
73
|
+
# > nh.map_values_with_route { |route, value| route << value**value }
|
74
|
+
# => [[:a, :c, 4], [:b, 1]]
|
75
|
+
#######
|
76
|
+
def map_values_with_route(route = [], &block)
|
77
|
+
values = []
|
78
|
+
self.each { |key, value|
|
79
|
+
route_copy = route.dup
|
80
|
+
values.concat case value.class.to_s
|
81
|
+
when 'Hash', 'NestedHash' then
|
82
|
+
NestedHash[value].map_values_with_route(route_copy.concat([key]), &block)
|
83
|
+
else [block_given? ? yield(route_copy.concat([key]), value) : route_copy.concat([key, value])]
|
84
|
+
end
|
85
|
+
}
|
86
|
+
values
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'nori'
|
2
|
+
|
3
|
+
module XMLMunger
|
4
|
+
|
5
|
+
class Parser
|
6
|
+
attr_accessor :xml
|
7
|
+
|
8
|
+
def initialize xml, nori_options = {}
|
9
|
+
unless xml.is_a?(Hash)
|
10
|
+
unless xml.is_a?(String)
|
11
|
+
raise ArgumentError.new("Argument xml should be a Hash or String (XML file).")
|
12
|
+
end
|
13
|
+
@xml = ::Nori.new(NoriConstants.default_options.merge(nori_options)).parse(xml)
|
14
|
+
else
|
15
|
+
@xml = xml
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def run options = {}
|
20
|
+
# prepare the options hash
|
21
|
+
raise TypeError.new("options argument must be a hash") unless options.is_a?(Hash)
|
22
|
+
options = default_options.merge options
|
23
|
+
# move to the starting point and traverse the xml
|
24
|
+
filtered = options[:filter].inject(xml) { |hash, key| hash[key] }
|
25
|
+
traverse = NestedHash[filtered].map_values_with_route do |route, value|
|
26
|
+
# skip attributes?
|
27
|
+
next if !options[:attributes] && route.any? { |r| r =~ /@/ }
|
28
|
+
# prohibited type?
|
29
|
+
next if options[:prohibited_types].any? { |type| value.is_a?(type) }
|
30
|
+
# extract data from lists
|
31
|
+
value = ListHeuristics.new(value).to_variable_hash if value.is_a?(Array)
|
32
|
+
[route, value]
|
33
|
+
end.compact
|
34
|
+
# create variable:value mapping
|
35
|
+
# need the second iteration in case of list data
|
36
|
+
parsed = NestedHash[traverse].map_values_with_route do |route, value|
|
37
|
+
key = make_key(route, options)
|
38
|
+
[key, value]
|
39
|
+
end
|
40
|
+
NestedHash[parsed]
|
41
|
+
end
|
42
|
+
|
43
|
+
protected
|
44
|
+
|
45
|
+
def default_options
|
46
|
+
{
|
47
|
+
prefix: '',
|
48
|
+
filter: [],
|
49
|
+
sep: '_',
|
50
|
+
strip_chars: '', # Whether to strip any characters from route names
|
51
|
+
attributes: true, # Whether or not to parse XML tag attributes
|
52
|
+
prohibited_types: [Array]
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def make_key(route, options)
|
59
|
+
route.
|
60
|
+
flatten.
|
61
|
+
map { |s| s.to_s.tr(options[:strip_chars], '') }.
|
62
|
+
reject { |s| s.empty? }.
|
63
|
+
join(options[:sep]).
|
64
|
+
prepend(options[:prefix])
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'xmlmunger'
|
3
|
+
|
4
|
+
class NestedHashTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def nested_hash
|
7
|
+
::XMLMunger::NestedHash[a: {b: 1}, c: { d: {e: 2} }]
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_transform_nested_hash
|
11
|
+
nested_hash2 = nested_hash.transform { |value| value + 1 }
|
12
|
+
assert_equal nested_hash2[:a][:b], 2
|
13
|
+
assert_equal nested_hash2[:c][:d][:e], 3
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_transform_nested_hash_with_route
|
17
|
+
nested_hash2 = nested_hash.transform_with_route { |route, value|
|
18
|
+
route.concat [value]
|
19
|
+
}
|
20
|
+
assert_equal nested_hash2[:a][:b], [:a, :b, 1]
|
21
|
+
assert_equal nested_hash2[:c][:d][:e], [:c, :d, :e, 2]
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_map_nested_hash
|
25
|
+
nested_hash2 = nested_hash.map_values { |value| value }
|
26
|
+
assert_equal nested_hash2, [1, 2]
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_map_nested_hash_with_route
|
30
|
+
nested_hash2 = nested_hash.map_values_with_route { |route, value|
|
31
|
+
route.concat [value]
|
32
|
+
}
|
33
|
+
assert_equal nested_hash2, [[:a, :b, 1], [:c, :d, :e, 2]]
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
data/test/test_parser.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'xmlmunger'
|
3
|
+
|
4
|
+
class ParserTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_simple_nested_parse
|
7
|
+
parser = ::XMLMunger::Parser.new('<x><y>1</y><z>2</z></x>')
|
8
|
+
hash = parser.run
|
9
|
+
assert_equal({"x_y"=>"1", "x_z"=>"2"}, hash)
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_filter_option
|
13
|
+
parser = ::XMLMunger::Parser.new('<x><y>1</y><z>2</z></x>')
|
14
|
+
hash = parser.run(filter: [:x])
|
15
|
+
assert_equal({"y"=>"1", "z"=>"2"}, hash)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_prefix_option
|
19
|
+
parser = ::XMLMunger::Parser.new('<x><y>1</y><z>2</z></x>')
|
20
|
+
hash = parser.run(prefix: 'test_')
|
21
|
+
assert_equal({"test_x_y"=>"1", "test_x_z"=>"2"}, hash)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_sep_option
|
25
|
+
parser = ::XMLMunger::Parser.new('<x><y>1</y><z>2</z></x>')
|
26
|
+
hash = parser.run(sep: '-')
|
27
|
+
assert_equal({"x-y"=>"1", "x-z"=>"2"}, hash)
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_prohibited_types_option
|
31
|
+
parser = ::XMLMunger::Parser.new('<x><y><z>1</z><z>1</z></y><z>2</z></x>')
|
32
|
+
hash = parser.run(prohibited_types: [Array])
|
33
|
+
assert_equal({"x_z"=>"2"}, hash)
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_strip_chars_option
|
37
|
+
parser = ::XMLMunger::Parser.new('<x_y><z_w>2</z_w></x_y>')
|
38
|
+
hash = parser.run(strip_chars: '_')
|
39
|
+
assert_equal({"xy_zw" => "2"}, hash)
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_attributes_option
|
43
|
+
parser = ::XMLMunger::Parser.new('<x a="1" />')
|
44
|
+
hash = parser.run(attributes: true)
|
45
|
+
assert_equal({"x_@a" => "1"}, hash)
|
46
|
+
hash = parser.run(attributes: false)
|
47
|
+
assert_equal({}, hash)
|
48
|
+
hash = parser.run(attributes: true, strip_chars: '@')
|
49
|
+
assert_equal({"x_a" => "1"}, hash)
|
50
|
+
hash = parser.run(attributes: false, strip_chars: '@')
|
51
|
+
assert_equal({}, hash)
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
end
|
57
|
+
|
data/xmlmunger.gemspec
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require './lib/xmlmunger/version'
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'xmlmunger'
|
6
|
+
s.version = XMLMunger::VERSION
|
7
|
+
s.date = Date.today.to_s
|
8
|
+
s.summary = 'Convert XML files into flat hashes with automatic naming via nested paths'
|
9
|
+
s.description = %(XML files typically come in nested structures. For data extraction purposes,
|
10
|
+
we frequently wish to have a flat hash instead. The naming then becomes tricky, because
|
11
|
+
there can be collision in the terminal nodes. However, if we use the chain of parent tags
|
12
|
+
joined with an underscore, this provides a unique name for every data point in the XML file.
|
13
|
+
The goal of this package is to make it very simple to convert XML files into flat hashes.
|
14
|
+
).strip.gsub(/\s+/, " ")
|
15
|
+
s.authors = ["Robert Krzyzanowski", "David Feldman"]
|
16
|
+
s.email = 'rkrzyzanowski@gmail.com'
|
17
|
+
s.homepage = 'http://avantcredit.com'
|
18
|
+
s.license = 'MIT'
|
19
|
+
s.homepage = 'https://github.com/robertzk/xmlmunger'
|
20
|
+
|
21
|
+
s.platform = Gem::Platform::RUBY
|
22
|
+
s.require_paths = %w[lib]
|
23
|
+
s.files = `git ls-files`.split("\n")
|
24
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
25
|
+
|
26
|
+
s.add_dependency 'nokogiri', '>= 1.6.1'
|
27
|
+
s.add_dependency 'nori', '>= 2.3.0'
|
28
|
+
s.add_dependency 'descriptive_statistics', '>= 1.1.5'
|
29
|
+
|
30
|
+
s.add_development_dependency 'rake', '>= 0.9.0'
|
31
|
+
s.add_development_dependency 'test-unit', '>= 1.2.3'
|
32
|
+
s.add_development_dependency 'codeclimate-test-reporter'
|
33
|
+
|
34
|
+
s.extra_rdoc_files = ['README.md', 'LICENSE']
|
35
|
+
end
|
36
|
+
|
metadata
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xmlmunger
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.6
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Robert Krzyzanowski
|
8
|
+
- David Feldman
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2014-07-30 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - '>='
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: 1.6.1
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - '>='
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: 1.6.1
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: nori
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: 2.3.0
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - '>='
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: 2.3.0
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: descriptive_statistics
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - '>='
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: 1.1.5
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 1.1.5
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: rake
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 0.9.0
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.9.0
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: test-unit
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 1.2.3
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - '>='
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: 1.2.3
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: codeclimate-test-reporter
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - '>='
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
type: :development
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - '>='
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
description: XML files typically come in nested structures. For data extraction purposes,
|
99
|
+
we frequently wish to have a flat hash instead. The naming then becomes tricky,
|
100
|
+
because there can be collision in the terminal nodes. However, if we use the chain
|
101
|
+
of parent tags joined with an underscore, this provides a unique name for every
|
102
|
+
data point in the XML file. The goal of this package is to make it very simple to
|
103
|
+
convert XML files into flat hashes.
|
104
|
+
email: rkrzyzanowski@gmail.com
|
105
|
+
executables: []
|
106
|
+
extensions: []
|
107
|
+
extra_rdoc_files:
|
108
|
+
- README.md
|
109
|
+
- LICENSE
|
110
|
+
files:
|
111
|
+
- .gitignore
|
112
|
+
- .travis.yml
|
113
|
+
- Gemfile
|
114
|
+
- LICENSE
|
115
|
+
- README.md
|
116
|
+
- Rakefile
|
117
|
+
- lib/xmlmunger.rb
|
118
|
+
- lib/xmlmunger/list_heuristics.rb
|
119
|
+
- lib/xmlmunger/nested_hash.rb
|
120
|
+
- lib/xmlmunger/nori_constants.rb
|
121
|
+
- lib/xmlmunger/parser.rb
|
122
|
+
- lib/xmlmunger/version.rb
|
123
|
+
- test/test_nested_hash.rb
|
124
|
+
- test/test_parser.rb
|
125
|
+
- test/test_xmlmunger.rb
|
126
|
+
- xmlmunger.gemspec
|
127
|
+
homepage: https://github.com/robertzk/xmlmunger
|
128
|
+
licenses:
|
129
|
+
- MIT
|
130
|
+
metadata: {}
|
131
|
+
post_install_message:
|
132
|
+
rdoc_options: []
|
133
|
+
require_paths:
|
134
|
+
- lib
|
135
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
136
|
+
requirements:
|
137
|
+
- - '>='
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: '0'
|
140
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
141
|
+
requirements:
|
142
|
+
- - '>='
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '0'
|
145
|
+
requirements: []
|
146
|
+
rubyforge_project:
|
147
|
+
rubygems_version: 2.1.11
|
148
|
+
signing_key:
|
149
|
+
specification_version: 4
|
150
|
+
summary: Convert XML files into flat hashes with automatic naming via nested paths
|
151
|
+
test_files:
|
152
|
+
- test/test_nested_hash.rb
|
153
|
+
- test/test_parser.rb
|
154
|
+
- test/test_xmlmunger.rb
|