stats_package_syntax_file_generator 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,13 @@
1
+ ---
2
+
3
+ project: ipumsi
4
+ caller: test
5
+ data_dir_name: __
6
+ data_file_name: xx9999a.dat
7
+ output_formats: [sas, spss, stata, sts]
8
+ output_dir_name: __
9
+ output_file_stem: '%s'
10
+ output_overwrite: true
11
+ data_structure: hier
12
+ record_types: [H, P]
13
+ record_type_var_name: RECTYPE
data/tests/setup.rb ADDED
@@ -0,0 +1,103 @@
1
+ # This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
2
+ # For copyright and licensing information, see the NOTICE and LICENSE files
3
+ # in this project's top-level directory, and also on-line at:
4
+ # https://github.com/mnpopcenter/stats_package_syntax_file_generator
5
+
6
+ require 'test/unit'
7
+
8
+ require File.expand_path(File.join(File.dirname(__FILE__), '../lib/stats_package_syntax_file_generator.rb'))
9
+
10
+ module StatsPackageSyntaxFileGeneratorTestSetup
11
+
12
+ # YAML metadata used to initialize new StatsPackageSyntaxFileGenerator::Controller objects.
13
+
14
+ PATH = File.expand_path(File.dirname(__FILE__))
15
+ YAML_FILES = [
16
+ 'input_all_vars.yaml',
17
+ 'input_controller.yaml',
18
+ ].map { |f| File.join(PATH, f) }
19
+
20
+ # Methods to create new objects with known values.
21
+
22
+ def new_controller
23
+ StatsPackageSyntaxFileGenerator::Controller.new(:yaml_files => YAML_FILES)
24
+ end
25
+
26
+ def new_variable
27
+ StatsPackageSyntaxFileGenerator::Variable.new params_variable()
28
+ end
29
+
30
+ def new_value
31
+ StatsPackageSyntaxFileGenerator::Value.new params_value()
32
+ end
33
+
34
+ def new_maker (syntax_type = '')
35
+ maker_class = 'StatsPackageSyntaxFileGenerator::Maker' + syntax_type.upcase
36
+ eval(maker_class).new(new_controller, syntax_type)
37
+ end
38
+
39
+ # Parameters used when creating objects with known values.
40
+
41
+ def params_variable
42
+ {
43
+ # Parameters needed to create the Variable.
44
+ :name => 'FOO',
45
+ :label => 'Test variable',
46
+ :start_column => 100,
47
+ :width => 4,
48
+ :is_string_var => false,
49
+ :is_common_var => false,
50
+ :record_type => 'P',
51
+ :implied_decimals => 0,
52
+ :suppress_labels => false,
53
+ :values => [],
54
+ # Expected values used by tests.
55
+ :end_column => 103,
56
+ :column_locations_as_s => '100-103',
57
+ }
58
+ end
59
+
60
+ def params_value (val = 99, lab = 'bar')
61
+ {
62
+ :value => val,
63
+ :label => 'Test value: ' + lab,
64
+ }
65
+ end
66
+
67
+ # Methods to add a bunch of Values to a Variable.
68
+
69
+ def params_values
70
+ [0,1,2,9,9999].map { |v| params_value(v, 'bar' + v.to_s) }
71
+ end
72
+
73
+ def add_new_values_to_var (var)
74
+ params_values.each { |pv| var.add_value(pv) }
75
+ end
76
+
77
+ # Helper functions.
78
+
79
+ def params_variable_lookup (k)
80
+ params_variable[k]
81
+ end
82
+
83
+ def vars_to_names (var_list)
84
+ return nil if var_list.nil?
85
+ var_list.map { |v| v.name }
86
+ end
87
+
88
+ def names_to_vars (sfc, var_list)
89
+ var_list.map { |nm| sfc.get_var_by_name(nm) }
90
+ end
91
+
92
+ def dir_contents (dir_name)
93
+ Dir.entries(dir_name).sort.reject { |f| f[0,1] == '.' }
94
+ end
95
+
96
+ def remove_file_from_dir (d, files)
97
+ files.each do |f|
98
+ full_path = File.join(d, f)
99
+ File.delete(full_path) if File.file?(full_path)
100
+ end
101
+ end
102
+
103
+ end
@@ -0,0 +1,378 @@
1
+ # This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
2
+ # For copyright and licensing information, see the NOTICE and LICENSE files
3
+ # in this project's top-level directory, and also on-line at:
4
+ # https://github.com/mnpopcenter/stats_package_syntax_file_generator
5
+
6
+ require File.expand_path(File.join(File.dirname(__FILE__), 'setup.rb'))
7
+
8
+ module StatsPackageSyntaxFileGeneratorTest
9
+ class Controller < Test::Unit::TestCase
10
+
11
+ VARS_C = %w(RECTYPE DWNUM HHNUM HDFIRSTD FBIG_ND BADDW)
12
+ VARS_H = %w(CANTON URBAN DWTYPE OWNERSHP RENT)
13
+ VARS_P = %w(RELATE SEX AGE RESPREV2 SOCSEC EDLEVEL LIT BIGDEC BIGINT BIGSTR)
14
+ VARS_ALL = [VARS_C, VARS_H, VARS_P].flatten
15
+
16
+ include StatsPackageSyntaxFileGeneratorTestSetup
17
+
18
+ def test_create_controller
19
+ msg = 'Try to create an object.'
20
+ sfc = new_controller()
21
+ assert_instance_of StatsPackageSyntaxFileGenerator::Controller, sfc, msg
22
+ end
23
+
24
+ def test_add_variable
25
+ sfc = new_controller()
26
+ n = sfc.variables.size
27
+ var = sfc.add_variable( params_variable )
28
+
29
+ msg = 'The method should increase the N of variables.'
30
+ assert_equal n + 1, sfc.variables.size, msg
31
+
32
+ msg = 'The method should return the added variable.'
33
+ assert_instance_of StatsPackageSyntaxFileGenerator::Variable, var, msg
34
+ assert_equal params_variable_lookup(:name), var.name, msg
35
+ assert_equal params_variable_lookup(:label), var.label, msg
36
+ end
37
+
38
+ def test_clear_variables
39
+ msg = 'The method should remove all variables.'
40
+ sfc = new_controller()
41
+ assert_equal VARS_ALL.size, sfc.variables.size, msg
42
+ sfc.clear_variables
43
+ assert_equal 0, sfc.variables.size, msg
44
+ end
45
+
46
+ def test_get_var_by_name
47
+ sfc = new_controller()
48
+
49
+ msg = 'The method should return the correct variable.'
50
+ nm = 'RELATE'
51
+ var = sfc.get_var_by_name(nm)
52
+ assert_equal nm, var.name, msg
53
+
54
+ msg = 'The method should return nil if given an invalid name.'
55
+ nm = 'fubb'
56
+ var = sfc.get_var_by_name(nm)
57
+ assert_nil var, msg
58
+
59
+ msg = 'The method should return the FIRST variable that matches.'
60
+ pv = params_variable()
61
+ nm = 'RELATE'
62
+ pv[:name] = nm
63
+ sfc.add_variable(pv)
64
+ var = sfc.get_var_by_name(nm)
65
+ assert_equal "Relationship to household head", var.label, msg
66
+ assert_not_equal pv[:label], var.label, msg
67
+ end
68
+
69
+ def test_get_vars_by_record_type
70
+ sfc = new_controller()
71
+
72
+ var_lists = {
73
+ 'H' => [VARS_C, VARS_H].flatten,
74
+ 'P' => [VARS_C, VARS_P].flatten,
75
+ }
76
+
77
+ msg = 'The method should return the correct variables '
78
+ var_lists.each { |k, v|
79
+ vars = sfc.get_vars_by_record_type(k)
80
+ assert_equal var_lists[k], vars_to_names(vars), msg + "(#{k} record)"
81
+ }
82
+
83
+ msg = 'The method should return common variables if given an invalid record type.'
84
+ vars = sfc.get_vars_by_record_type('.')
85
+ assert_equal VARS_C, vars_to_names(vars), msg
86
+
87
+ msg = 'The method should return the correct variables (no common vars).'
88
+ pv = params_variable()
89
+ sfc.clear_variables
90
+ sfc.add_variable(pv)
91
+ sfc.add_variable(pv)
92
+ vars = sfc.get_vars_by_record_type('P')
93
+ assert_equal [pv[:name], pv[:name]], vars_to_names(vars), msg
94
+
95
+ msg = 'The method should return [] if given an invalid record type (no common vars).'
96
+ vars = sfc.get_vars_by_record_type('H')
97
+ assert_equal [], vars, msg
98
+ end
99
+
100
+ def test_get_vars_with_var_labels
101
+ sfc = new_controller()
102
+
103
+ without_labels = %w(SOCSEC)
104
+ var_list = VARS_ALL.reject { |v| without_labels.include?(v) }
105
+
106
+ msg = 'The method should return the correct variables.'
107
+ vars = sfc.get_vars_with_var_labels
108
+ assert_equal var_list, vars_to_names(vars), msg
109
+ end
110
+
111
+ def test_get_vars_with_values
112
+ sfc = new_controller()
113
+
114
+ without_values = %w(AGE DWNUM CANTON RESPREV2 BIGDEC BIGINT BIGSTR)
115
+ var_list = VARS_ALL.reject { |v| without_values.include?(v) }
116
+
117
+ msg = 'The method should return the correct variables.'
118
+ vars = sfc.get_vars_with_values
119
+ assert_equal var_list, vars_to_names(vars), msg
120
+ end
121
+
122
+ def test_record_type_var
123
+ sfc = new_controller()
124
+
125
+ msg = 'The method should return the correct variable.'
126
+ assert_equal 'RECTYPE', sfc.record_type_var.name, msg
127
+
128
+ msg = 'The method should return nil if there is no record type variable.'
129
+ sfc.record_type_var_name = ''
130
+ assert_nil sfc.record_type_var, msg
131
+ end
132
+
133
+ def test_add_value
134
+ sfc = new_controller()
135
+
136
+ msg = 'The method should increase the N of values.'
137
+ var = sfc.add_variable( params_variable )
138
+ n = 5
139
+ (1..n).each { |v| sfc.add_value(:value => v) }
140
+ assert_equal n, var.values.size, msg
141
+ end
142
+
143
+ def test_new_values
144
+ sfc = new_controller()
145
+
146
+ msg = 'The method should result in a variable with values.'
147
+ var = sfc.add_variable(
148
+ :name => 'foo',
149
+ :start_column => 1,
150
+ :width => 4,
151
+ :values => sfc.new_values(
152
+ {:value => 1},
153
+ {:value => 2},
154
+ {:value => 3}
155
+ )
156
+ )
157
+ assert_equal 3, var.values.size, msg
158
+
159
+ msg = 'The method should also accept an array.'
160
+ val_list = (1..10).map {|i| {:value => i}}
161
+ var = sfc.add_variable(
162
+ :name => 'bar',
163
+ :start_column => 22,
164
+ :width => 4,
165
+ :values => sfc.new_values(val_list)
166
+ )
167
+ assert_equal val_list.size, var.values.size, msg
168
+ end
169
+
170
+ def test_is_last_record_type
171
+ sfc = new_controller()
172
+
173
+ msg = 'Compare against hardcoded result.'
174
+ sfc.record_types = %w(F U B A R)
175
+ assert_equal true, sfc.is_last_record_type('R'), msg
176
+ assert_equal false, sfc.is_last_record_type('A'), msg
177
+ assert_equal false, sfc.is_last_record_type('x'), msg
178
+
179
+ msg = 'Should return false if there are no record types'
180
+ sfc.record_types = []
181
+ assert_equal false, sfc.is_last_record_type(1), msg
182
+ end
183
+
184
+ def test_rec_types_except_last
185
+ sfc = new_controller()
186
+
187
+ msg = 'Compare against hardcoded result.'
188
+ sfc.record_types = %w(F U B A R)
189
+ assert_equal %w(F U B A), sfc.rec_types_except_last, msg
190
+
191
+ msg = 'Calling the method should not affect @record_types.'
192
+ sfc.record_types = %w(F U B A R)
193
+ assert_equal %w(F U B A R), sfc.record_types, msg
194
+
195
+ msg = 'Should return empty array if there are no record types.'
196
+ sfc.record_types = []
197
+ assert_equal [], sfc.rec_types_except_last, msg
198
+ end
199
+
200
+ def test_max_var_name_length
201
+ sfc = new_controller()
202
+
203
+ msg = 'The method should return the correct value.'
204
+ assert_equal 8, sfc.max_var_name_length, msg
205
+
206
+ msg = 'The method should return 0 if there are no Variables.'
207
+ sfc.clear_variables
208
+ assert_equal 0, sfc.max_var_name_length, msg
209
+ end
210
+
211
+ def test_max_col_loc_width
212
+ sfc = new_controller()
213
+
214
+ msg = 'Compare against hardcoded result.'
215
+ assert_equal 3, sfc.max_col_loc_width, msg
216
+
217
+ msg = 'The method should return 0 if there are no Variables.'
218
+ sfc.clear_variables
219
+ assert_equal 0, sfc.max_col_loc_width, msg
220
+ end
221
+
222
+ def test_generate_syntax_files
223
+ output_dir = File.join(
224
+ File.expand_path(File.dirname(__FILE__)),
225
+ 'output'
226
+ )
227
+ Dir.mkdir(output_dir) unless File.directory?(output_dir)
228
+
229
+ msg = 'Make sure the testing output directory exists.'
230
+ assert File.directory?(output_dir), msg
231
+
232
+ # Remove files generated during any previous test.
233
+ stem = 'testing'
234
+ expected_files = %w(do sas sps sts).map { |e| stem + '.' + e }
235
+ remove_file_from_dir(output_dir, expected_files)
236
+
237
+ msg = 'Make sure the testing output directory is empty.'
238
+ assert_equal [], dir_contents(output_dir), msg
239
+
240
+ msg = 'Make sure the method creates the expected files.'
241
+ sfc = new_controller()
242
+ sfc.output_dir_name = output_dir
243
+ sfc.output_file_stem = '%s'
244
+ sfc.data_file_name = stem
245
+ sfc.generate_syntax_files
246
+ assert_equal expected_files, dir_contents(output_dir), msg
247
+
248
+ msg = 'Remove the files and make sure no files remain.'
249
+ remove_file_from_dir(output_dir, expected_files)
250
+ assert_equal [], dir_contents(output_dir), msg
251
+ Dir.delete(output_dir)
252
+ end
253
+
254
+ def test_syntax
255
+ msg = 'Make sure that syntax generation works same way, regardless of order.'
256
+ sfc = new_controller()
257
+ formats = sfc.output_formats
258
+ syntax1 = {}
259
+ syntax2 = {}
260
+ formats.each { |f| syntax1[f] = sfc.syntax(f) }
261
+ formats.reverse.each { |f| syntax2[f] = sfc.syntax(f) }
262
+ assert_equal formats.map { |f| [f, true] },
263
+ formats.map { |f| [f, syntax1[f] == syntax2[f]] },
264
+ msg
265
+ end
266
+
267
+ def test_modify_metadata_all_vars_as_string
268
+ sfc = new_controller()
269
+
270
+ # A Proc to count N of string variables.
271
+ string_vars_n = lambda { sfc.variables.inject(0) { |n, v| n + (v.is_string_var ? 1 : 0) } }
272
+
273
+ msg = 'Every variable should be a string variable: '
274
+ orig = string_vars_n.call
275
+ sfc.all_vars_as_string = true
276
+ sfc.modify_metadata
277
+ assert_not_equal(orig, string_vars_n.call, msg + 'vs orig')
278
+ assert_equal(sfc.variables.size, string_vars_n.call, msg + 'all')
279
+ end
280
+
281
+ def test_modify_metadata_select_vars_by_record_type
282
+ sfc = new_controller()
283
+
284
+ msg = 'Should have no effect if all record types are still present.'
285
+ sfc.select_vars_by_record_type = true
286
+ sfc.modify_metadata
287
+ assert_equal(VARS_ALL.size, sfc.variables.size, msg)
288
+
289
+ msg = 'Just person variables.'
290
+ sfc.record_types = ['P']
291
+ sfc.modify_metadata
292
+ assert_equal(VARS_C.size + VARS_P.size, sfc.variables.size, msg)
293
+
294
+ msg = 'Just common variables if there are no record types.'
295
+ sfc.record_types = []
296
+ sfc.modify_metadata
297
+ assert_equal(VARS_C.size, sfc.variables.size, msg)
298
+
299
+ msg = 'The rectangularize option requires select_vars_by_record_type.'
300
+ sfc = new_controller()
301
+ sfc.rectangularize = true
302
+ assert_equal(false, sfc.select_vars_by_record_type, msg)
303
+ sfc.modify_metadata
304
+ assert_equal(true, sfc.select_vars_by_record_type, msg)
305
+ end
306
+
307
+ def test_validate_metadata
308
+ msg = 'Invalid metadata: no variables'
309
+ sfc = new_controller()
310
+ sfc.clear_variables
311
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
312
+
313
+ msg = 'Invalid metadata: no output formats'
314
+ sfc = new_controller()
315
+ sfc.output_formats = []
316
+ assert_raise(RuntimeError, msg) { sfc.generate_syntax_files }
317
+
318
+ msg = 'Invalid metadata: hier without any record types'
319
+ sfc = new_controller()
320
+ sfc.record_types = []
321
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
322
+
323
+ msg = 'Invalid metadata: rectangularize without hier'
324
+ sfc = new_controller()
325
+ sfc.rectangularize = true
326
+ sfc.data_structure = 'rect'
327
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
328
+
329
+ msg = 'Invalid metadata: hier without a record type variable'
330
+ sfc = new_controller()
331
+ sfc.record_type_var_name = ''
332
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
333
+
334
+ msg = 'Invalid metadata: hier with a variable that lacks a record type'
335
+ sfc = new_controller()
336
+ sfc.get_var_by_name('SEX').record_type = ''
337
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
338
+
339
+ msg = 'Invalid metadata: variable with invalid start_column'
340
+ sfc = new_controller()
341
+ sex_var = sfc.get_var_by_name('SEX')
342
+ sex_var.start_column = -1
343
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
344
+ sex_var.start_column = 'a'
345
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
346
+ sex_var.start_column = 0
347
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
348
+
349
+ msg = 'Invalid metadata: variable with invalid width'
350
+ sfc = new_controller()
351
+ sex_var = sfc.get_var_by_name('SEX')
352
+ sex_var.width = -1
353
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
354
+ sex_var.width = 'a'
355
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
356
+ sex_var.width = 0
357
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
358
+
359
+ msg = 'Invalid metadata: variable with implied_decimals'
360
+ sfc = new_controller()
361
+ sex_var = sfc.get_var_by_name('SEX')
362
+ sex_var.implied_decimals = -1
363
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
364
+ end
365
+
366
+ def test_rec_type_lookup_hash
367
+ sfc = new_controller()
368
+
369
+ msg = 'Compare against hardcoded result.'
370
+ assert_equal( {'P' => 0, 'H' => 0}, sfc.rec_type_lookup_hash, msg )
371
+
372
+ msg = 'Should return empty hash if there are no record types.'
373
+ sfc.record_types = []
374
+ assert_equal( {}, sfc.rec_type_lookup_hash, msg )
375
+ end
376
+
377
+ end
378
+ end