hash_engine 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +1 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +52 -0
- data/Guardfile +24 -0
- data/README.txt +320 -0
- data/Rakefile +3 -0
- data/hash_engine.gemspec +21 -0
- data/lib/hash_engine.rb +20 -0
- data/lib/hash_engine/actions.rb +82 -0
- data/lib/hash_engine/add_error.rb +13 -0
- data/lib/hash_engine/conditionals.rb +32 -0
- data/lib/hash_engine/csv_parse.rb +49 -0
- data/lib/hash_engine/extract.rb +139 -0
- data/lib/hash_engine/fetchers.rb +27 -0
- data/lib/hash_engine/format.rb +62 -0
- data/lib/hash_engine/transform.rb +196 -0
- data/spec/hash_engine/actions_spec.rb +146 -0
- data/spec/hash_engine/conditional_spec.rb +28 -0
- data/spec/hash_engine/csv_parse_spec.rb +42 -0
- data/spec/hash_engine/csv_transform_spec.rb +90 -0
- data/spec/hash_engine/ds_spec.rb +178 -0
- data/spec/hash_engine/extract_spec.rb +144 -0
- data/spec/hash_engine/fetchers_spec.rb +30 -0
- data/spec/hash_engine/format_spec.rb +55 -0
- data/spec/hash_engine/transform_spec.rb +365 -0
- data/spec/hash_engine_spec.rb +0 -0
- data/spec/spec_helper.rb +12 -0
- metadata +82 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 597555b81a0c5cb47e8023594f85354b42ef2eae
|
4
|
+
data.tar.gz: fb910967b4d540b4d13b350fb9c48bc973107af9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2c3a075b6a8d6639fe94c5e0bb1df8501806a269e270c4a6cf7bba3c8559088685ad4daaba324c7f4dcaab8b6b54049908bbf29f2a35c85925a172c701e00587
|
7
|
+
data.tar.gz: fbe1af8ef29f54bb201db7724837ef7e6e201b285de818cf07aecb1dfb35067b8cbcaf2ecbd22e614ef902d3cacfa29cd5e946e9a3d3a6a8699ea88b861d49d3
|
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
coderay (1.0.9)
|
5
|
+
diff-lcs (1.2.4)
|
6
|
+
ffi (1.9.0)
|
7
|
+
formatador (0.2.4)
|
8
|
+
guard (1.8.3)
|
9
|
+
formatador (>= 0.2.4)
|
10
|
+
listen (~> 1.3)
|
11
|
+
lumberjack (>= 1.0.2)
|
12
|
+
pry (>= 0.9.10)
|
13
|
+
thor (>= 0.14.6)
|
14
|
+
guard-rspec (3.1.0)
|
15
|
+
guard (>= 1.8)
|
16
|
+
rspec (~> 2.13)
|
17
|
+
listen (1.3.1)
|
18
|
+
rb-fsevent (>= 0.9.3)
|
19
|
+
rb-inotify (>= 0.9)
|
20
|
+
rb-kqueue (>= 0.2)
|
21
|
+
lumberjack (1.0.4)
|
22
|
+
method_source (0.8.2)
|
23
|
+
pry (0.9.12.2)
|
24
|
+
coderay (~> 1.0.5)
|
25
|
+
method_source (~> 0.8)
|
26
|
+
slop (~> 3.4)
|
27
|
+
rake (10.1.0)
|
28
|
+
rb-fsevent (0.9.3)
|
29
|
+
rb-inotify (0.9.2)
|
30
|
+
ffi (>= 0.5.0)
|
31
|
+
rb-kqueue (0.2.0)
|
32
|
+
ffi (>= 0.5.0)
|
33
|
+
rspec (2.14.1)
|
34
|
+
rspec-core (~> 2.14.0)
|
35
|
+
rspec-expectations (~> 2.14.0)
|
36
|
+
rspec-mocks (~> 2.14.0)
|
37
|
+
rspec-core (2.14.6)
|
38
|
+
rspec-expectations (2.14.3)
|
39
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
40
|
+
rspec-mocks (2.14.4)
|
41
|
+
slop (3.4.6)
|
42
|
+
thor (0.18.1)
|
43
|
+
|
44
|
+
PLATFORMS
|
45
|
+
ruby
|
46
|
+
|
47
|
+
DEPENDENCIES
|
48
|
+
guard (~> 1.8.3)
|
49
|
+
guard-rspec
|
50
|
+
listen (~> 1.3.1)
|
51
|
+
rake
|
52
|
+
rspec
|
data/Guardfile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# A sample Guardfile
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
3
|
+
|
4
|
+
guard :rspec, :all_on_start => true do
|
5
|
+
watch(%r{^spec/.+_spec\.rb$})
|
6
|
+
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
|
7
|
+
watch('spec/spec_helper.rb') { "spec" }
|
8
|
+
|
9
|
+
# Rails example
|
10
|
+
watch(%r{^app/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
|
11
|
+
watch(%r{^app/(.*)(\.erb|\.haml|\.slim)$}) { |m| "spec/#{m[1]}#{m[2]}_spec.rb" }
|
12
|
+
watch(%r{^app/controllers/(.+)_(controller)\.rb$}) { |m| ["spec/routing/#{m[1]}_routing_spec.rb", "spec/#{m[2]}s/#{m[1]}_#{m[2]}_spec.rb", "spec/acceptance/#{m[1]}_spec.rb"] }
|
13
|
+
watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
|
14
|
+
watch('config/routes.rb') { "spec/routing" }
|
15
|
+
watch('app/controllers/application_controller.rb') { "spec/controllers" }
|
16
|
+
|
17
|
+
# Capybara features specs
|
18
|
+
watch(%r{^app/views/(.+)/.*\.(erb|haml|slim)$}) { |m| "spec/features/#{m[1]}_spec.rb" }
|
19
|
+
|
20
|
+
# Turnip features and steps
|
21
|
+
watch(%r{^spec/acceptance/(.+)\.feature$})
|
22
|
+
watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) { |m| Dir[File.join("**/#{m[1]}.feature")][0] || 'spec/acceptance' }
|
23
|
+
end
|
24
|
+
|
data/README.txt
ADDED
@@ -0,0 +1,320 @@
|
|
1
|
+
= hash_engine
|
2
|
+
|
3
|
+
HashEngine is designed to operate on inputs given a hash of instructions to genetate an output hash
|
4
|
+
HashEngine is compatible with Ruby versions 1.8.7, 1.9, and 2.0, mostly because I have a legacy project still on 1.8.7.
|
5
|
+
|
6
|
+
The primary manipulations HashEngin is capable of are:
|
7
|
+
* Transform => given a hash and instructions generate an output hash
|
8
|
+
* CsvTransform => given a string, instructions, and additional data generate an output hash
|
9
|
+
* Extract => given a hash of Objects and instructions generate an output hash
|
10
|
+
|
11
|
+
HashEngine has several functions built in which will be covered below, however HashEngine was designed with extensability and customizability in mind. You can add your own functions and/or remove some of the built in functions.
|
12
|
+
|
13
|
+
There are two main classes of instructions for HashEngine, fetchers and actions.
|
14
|
+
Fetchers are instructions to 'fetch' data.
|
15
|
+
Actions are instructions which 'act' on previously fetched data.
|
16
|
+
A majority of tranformations involve only one fetcher and action, but can specify multiple instructions.
|
17
|
+
|
18
|
+
1.8.7 vs 1.9 and 2.0
|
19
|
+
Since hashes in 1.8.7 do not remembering insert order all instruction chaining needs to be specified as array elements.
|
20
|
+
The exception being a single fetcher followed by a single action, which can be specified as an array.
|
21
|
+
|
22
|
+
== Fetchers
|
23
|
+
The defined fetchers are:
|
24
|
+
* literal
|
25
|
+
* input
|
26
|
+
* data
|
27
|
+
* subgroup_input
|
28
|
+
* conditional_input
|
29
|
+
|
30
|
+
The data that has been fetched will be accumulated and passed into the following action
|
31
|
+
For all examples below assume the data passed in is:
|
32
|
+
--
|
33
|
+
data_key_1: data_value_1
|
34
|
+
data_key_2: data_value_2
|
35
|
+
data_key_3: data_value_3
|
36
|
+
data_key_4: data_value_4
|
37
|
+
data_key_5: data_value_5
|
38
|
+
|
39
|
+
* * literal -- used to specify literal or constant values
|
40
|
+
YAML examples:
|
41
|
+
output_1:
|
42
|
+
literal: foo
|
43
|
+
output_2:
|
44
|
+
literal: ''
|
45
|
+
* * input -- used to specify the key of the data hash to be used
|
46
|
+
YAML example:
|
47
|
+
output_1:
|
48
|
+
input: data_key_1
|
49
|
+
* * data -- used to specify multiple keys of the data hash to be used
|
50
|
+
YAML example:
|
51
|
+
output_1:
|
52
|
+
data:
|
53
|
+
- data_key_1
|
54
|
+
- data_key_2
|
55
|
+
join: ', '
|
56
|
+
Result would be => data_value_1, data_value_2
|
57
|
+
* * subgroup_input -- used when some data needs to be acted upon before being used as an input, all defined fetchers and actions can be used. Technically you could define a highly nested structure, but it is not recomended.
|
58
|
+
YAML example:
|
59
|
+
output_1:
|
60
|
+
- subgroup:
|
61
|
+
data:
|
62
|
+
- data_key_1
|
63
|
+
- data_key_2
|
64
|
+
join: ', '
|
65
|
+
- input: data_key_3
|
66
|
+
- join: '#'
|
67
|
+
Result would be => data_value_1, data_value_2#data_value_3
|
68
|
+
* * conditional_input -- variant of subgroup_input where conditional logic is applied to the data, the equivalent Ruby code would be:
|
69
|
+
if left_operand operator right_operand
|
70
|
+
true_instructions
|
71
|
+
else
|
72
|
+
false_instructions
|
73
|
+
|
74
|
+
-- The structure of this fetcher is a bit different
|
75
|
+
There must be a fetcher specified for the
|
76
|
+
* * left_operand and right_operand can be any valid fetcher
|
77
|
+
* * operators
|
78
|
+
eq => equal
|
79
|
+
ne => not equal
|
80
|
+
lt => less than
|
81
|
+
gt => greater than
|
82
|
+
lteq => less than or equal to
|
83
|
+
gteq => greater than or equal to
|
84
|
+
exist => exists
|
85
|
+
* * true_instructions and false_instructions can be any valid combination of fetchers and actions
|
86
|
+
YAML example:
|
87
|
+
output_1:
|
88
|
+
- conditional_input:
|
89
|
+
left_operand:
|
90
|
+
input: data_key_1
|
91
|
+
operator: exist
|
92
|
+
true:
|
93
|
+
input: data_key_2
|
94
|
+
false:
|
95
|
+
input: data_key_3
|
96
|
+
- input: data_key_3
|
97
|
+
- join: '#'
|
98
|
+
When data_value_1 exists the result would be => data_value_2#data_value_3
|
99
|
+
When data_value_1 doesn't exist the result would be => data_value_3#data_value_3
|
100
|
+
output_2:
|
101
|
+
conditional_input:
|
102
|
+
left_operand
|
103
|
+
input: data_key_1
|
104
|
+
operator: eq
|
105
|
+
right_operand:
|
106
|
+
input: data_key_4
|
107
|
+
true_instructions:
|
108
|
+
data:
|
109
|
+
- data_key_2
|
110
|
+
- data_key_3
|
111
|
+
join: '#'
|
112
|
+
false_instructions:
|
113
|
+
data:
|
114
|
+
- data_key_1
|
115
|
+
- data_key_4
|
116
|
+
join: '#'
|
117
|
+
When data_value_1 is the same as data_value_4 the result would be => data_value_2#data_value_3
|
118
|
+
When data_value_1 is not the same as data_value_4 the result would be => data_value_1#data_value_4
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
== Actions
|
124
|
+
The main actions are:
|
125
|
+
* lookup_map
|
126
|
+
* first_value
|
127
|
+
* join
|
128
|
+
* max_length
|
129
|
+
* format
|
130
|
+
|
131
|
+
== Global settings
|
132
|
+
* default_value
|
133
|
+
* allow_nil
|
134
|
+
* suppress_nil
|
135
|
+
* allow_blank
|
136
|
+
* suppress_blank
|
137
|
+
* quiet
|
138
|
+
* optional
|
139
|
+
|
140
|
+
The version of Ruby in use will change how multiple instructions must be specified to maintain the correct order of operations. In Ruby 1.8 an Array must be used, however since Ruby 1.9 Hashes maintain insertion order an Array or Hash can be used.
|
141
|
+
|
142
|
+
Input Data Type:
|
143
|
+
* hash
|
144
|
+
Inputs:
|
145
|
+
* input
|
146
|
+
* literal
|
147
|
+
* data
|
148
|
+
* xml
|
149
|
+
Inputs:
|
150
|
+
* css
|
151
|
+
* xpath
|
152
|
+
* string
|
153
|
+
* object
|
154
|
+
Inputs: method names
|
155
|
+
|
156
|
+
|
157
|
+
== Builtin Actions:
|
158
|
+
* lookup_map => mostly equivalent to a hash lookup, the differences are because Hashes loaded from YAML don't have default procs
|
159
|
+
modifiers: default: <default value> => will return <default_value> if input is not found
|
160
|
+
default_to_key: true => will return the key if input is not found
|
161
|
+
for example given the following instructions:
|
162
|
+
output_field_1:
|
163
|
+
- input: foo
|
164
|
+
- lookup_map:
|
165
|
+
x: 1
|
166
|
+
y: 2
|
167
|
+
output_field_2:
|
168
|
+
- input: bar
|
169
|
+
- lookup_map:
|
170
|
+
x: 1
|
171
|
+
y: 2
|
172
|
+
default: 0
|
173
|
+
output_field_3:
|
174
|
+
- input: baz
|
175
|
+
- lookup_map:
|
176
|
+
x: 1
|
177
|
+
y: 2
|
178
|
+
default_to_key: true
|
179
|
+
and the source data => {'foo' => 'z', 'bar' => 'z', 'baz' => 'z'}
|
180
|
+
the output will be {'output_field_1 => nil
|
181
|
+
|
182
|
+
* first_value => find the first non-nil value
|
183
|
+
YAML example:
|
184
|
+
output_1:
|
185
|
+
data:
|
186
|
+
- data_key_1
|
187
|
+
- data_key_2
|
188
|
+
join: ', '
|
189
|
+
When data_value_1 exists the result would be => data_value_1
|
190
|
+
When data_value_1 doesn't exist the result would be => data_value_2
|
191
|
+
* join => join all the data using the specified string
|
192
|
+
YAML example:
|
193
|
+
output_1:
|
194
|
+
data:
|
195
|
+
- data_key_1
|
196
|
+
- data_key_2
|
197
|
+
join: ', '
|
198
|
+
Result would be => data_value_1, data_value_2
|
199
|
+
* max_length => cut the output down to the size specified
|
200
|
+
YAML example:
|
201
|
+
output_1:
|
202
|
+
input: data_key_1
|
203
|
+
max_length: 8
|
204
|
+
Result would be => data_val
|
205
|
+
|
206
|
+
* strtfmt: <pattern>
|
207
|
+
This will call strfmt using the supplied pattern on the value if the value responds to strfmt
|
208
|
+
* format: <sub-action>
|
209
|
+
These are for formatting or casting values
|
210
|
+
The built in formats are:
|
211
|
+
* string = effectively calls to_s on the value, as well as stripping all leading and trailing spaces, examples (initial, result):
|
212
|
+
* * 'sample' => 'sample'
|
213
|
+
* * ' sample' => 'sample'
|
214
|
+
* * 'sample ' => 'sample'
|
215
|
+
* * ' sample ' => 'sample'
|
216
|
+
* * 55 => '55'
|
217
|
+
* * :sample => 'sample'
|
218
|
+
|
219
|
+
* first = returns first character, if needed calls to_s first, examples (initial, result):
|
220
|
+
* * 'sample' => 's'
|
221
|
+
* * 55 => '5'
|
222
|
+
|
223
|
+
* alphanumeric = , examples (initial, result):
|
224
|
+
* * 'sample' => 'sample'
|
225
|
+
* * 's_a+m=p%l-e' => 'sample'
|
226
|
+
* * 55 => '55'
|
227
|
+
|
228
|
+
* no_whitespace = , examples (initial, result):
|
229
|
+
* * 'sample' => 'sample'
|
230
|
+
* * 'sam-ple' => 'sam-ple'
|
231
|
+
* * 's_a+m=p%le' => 'sample'
|
232
|
+
* * 55 => '55'
|
233
|
+
|
234
|
+
* alpha = calls to_s on value and then keeps only a-zA-Z, examples (initial, result):
|
235
|
+
* * '123sample' => 'sample'
|
236
|
+
* * 's_a45m=p%l-e' => 'sample'
|
237
|
+
|
238
|
+
* numeric = keeps only digits 0-9, examples (initial, result):
|
239
|
+
* * '123sample' => '123'
|
240
|
+
* * '682-59-7267' => '682597267'
|
241
|
+
* * 'ext 99' => '99'
|
242
|
+
|
243
|
+
* float = calls to_f, examples (initial, result):
|
244
|
+
* * '2000.99' => 2000.99
|
245
|
+
|
246
|
+
* integer = calls to_i with the following exceptions, true => 1, nil and false => 0, examples (initial, result):
|
247
|
+
* * 'sample' => 0
|
248
|
+
* * '123sample' => 123
|
249
|
+
* * 55 => 55
|
250
|
+
* * true => 1
|
251
|
+
* * false => 0
|
252
|
+
* * nil => 0
|
253
|
+
|
254
|
+
* boolean = converts values to true or false, examples (initial, result):
|
255
|
+
* * 'true' => true
|
256
|
+
* * true => true
|
257
|
+
* * 'TrUe' => true
|
258
|
+
* * 't' => true
|
259
|
+
* * 'T' => true
|
260
|
+
* * 1 => true
|
261
|
+
* * 'yes' => true
|
262
|
+
* * 'Y' => true
|
263
|
+
* * 2 => false
|
264
|
+
* * 'false' => false
|
265
|
+
* * false => false
|
266
|
+
* * 'no' => false
|
267
|
+
|
268
|
+
* upcase = changes all characters to upper case, examples (initial, result):
|
269
|
+
* * 'sample' => 'SAMPLE'
|
270
|
+
* * true => 'TRUE'
|
271
|
+
|
272
|
+
* capitalize = capitalizes the string, examples (initial, result):
|
273
|
+
* * 'sample' => 'Sample'
|
274
|
+
|
275
|
+
* downcase = changes all characters to lower case, examples (initial, result):
|
276
|
+
* * 'TRUE' => 'true'
|
277
|
+
|
278
|
+
* reverse = reverse the order of the characters
|
279
|
+
* * 'sample' => 'elpmas'
|
280
|
+
|
281
|
+
|
282
|
+
== Adding/Removing/Changing Actions
|
283
|
+
To add a new action using a block do:
|
284
|
+
HashEngine.register_action_block('first_value') {|data, action_data, error_array|
|
285
|
+
data.find {|field| (field && !field.empty?) }
|
286
|
+
}
|
287
|
+
|
288
|
+
To change a action, simply register the new action.
|
289
|
+
To delete a action use:
|
290
|
+
HashEngine.remove_format_block('float')
|
291
|
+
|
292
|
+
== Adding/Removing/Changing Formats
|
293
|
+
To add a new format using a block do:
|
294
|
+
HashEngine.register_format_block('float') {|data| data.to_f}
|
295
|
+
|
296
|
+
To add a new format using a hash do:
|
297
|
+
i_hash = Hash.new {|hash, key| key.to_i }
|
298
|
+
i_hash[true] = 1
|
299
|
+
i_hash[false] = 0
|
300
|
+
i_hash[nil] = 0
|
301
|
+
HashEngine.register_format_hash('integer', i_hash)
|
302
|
+
|
303
|
+
To change a format, simply register the new format.
|
304
|
+
To delete a format use:
|
305
|
+
HashEngine.remove_format_block('float')
|
306
|
+
|
307
|
+
|
308
|
+
== Contributing to hash_engine
|
309
|
+
|
310
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
311
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
312
|
+
* Fork the project
|
313
|
+
* Start a feature/bugfix branch
|
314
|
+
* Commit and push until you are happy with your contribution
|
315
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
316
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
317
|
+
|
318
|
+
== Copyright
|
319
|
+
|
320
|
+
Copyright (c) 2011-2012 Michael King (kingmt@gmail.com).
|
data/Rakefile
ADDED
data/hash_engine.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "hash_engine"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "hash_engine"
|
7
|
+
s.version = HashEngine::VERSION
|
8
|
+
s.authors = ["Michael King"]
|
9
|
+
s.email = ["kingmt@gmail.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{HashEngine converts input data and intructions into output data.}
|
12
|
+
s.description = %q{HashEngine converts input data, including a hash, csv string, or objects, using provided instructions into an output hash.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "hash_engine"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
end
|