stats_package_syntax_file_generator 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ ---
2
+
3
+ project: ipumsi
4
+ caller: test
5
+ data_dir_name: __
6
+ data_file_name: xx9999a.dat
7
+ output_formats: [sas, spss, stata, sts]
8
+ output_dir_name: __
9
+ output_file_stem: '%s'
10
+ output_overwrite: true
11
+ data_structure: hier
12
+ record_types: [H, P]
13
+ record_type_var_name: RECTYPE
data/tests/setup.rb ADDED
@@ -0,0 +1,103 @@
1
+ # This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
2
+ # For copyright and licensing information, see the NOTICE and LICENSE files
3
+ # in this project's top-level directory, and also on-line at:
4
+ # https://github.com/mnpopcenter/stats_package_syntax_file_generator
5
+
6
+ require 'test/unit'
7
+
8
+ require File.expand_path(File.join(File.dirname(__FILE__), '../lib/stats_package_syntax_file_generator.rb'))
9
+
10
+ module StatsPackageSyntaxFileGeneratorTestSetup
11
+
12
+ # YAML metadata used to initialize new StatsPackageSyntaxFileGenerator::Controller objects.
13
+
14
+ PATH = File.expand_path(File.dirname(__FILE__))
15
+ YAML_FILES = [
16
+ 'input_all_vars.yaml',
17
+ 'input_controller.yaml',
18
+ ].map { |f| File.join(PATH, f) }
19
+
20
+ # Methods to create new objects with known values.
21
+
22
+ def new_controller
23
+ StatsPackageSyntaxFileGenerator::Controller.new(:yaml_files => YAML_FILES)
24
+ end
25
+
26
+ def new_variable
27
+ StatsPackageSyntaxFileGenerator::Variable.new params_variable()
28
+ end
29
+
30
+ def new_value
31
+ StatsPackageSyntaxFileGenerator::Value.new params_value()
32
+ end
33
+
34
+ def new_maker (syntax_type = '')
35
+ maker_class = 'StatsPackageSyntaxFileGenerator::Maker' + syntax_type.upcase
36
+ eval(maker_class).new(new_controller, syntax_type)
37
+ end
38
+
39
+ # Parameters used when creating objects with known values.
40
+
41
+ def params_variable
42
+ {
43
+ # Parameters needed to create the Variable.
44
+ :name => 'FOO',
45
+ :label => 'Test variable',
46
+ :start_column => 100,
47
+ :width => 4,
48
+ :is_string_var => false,
49
+ :is_common_var => false,
50
+ :record_type => 'P',
51
+ :implied_decimals => 0,
52
+ :suppress_labels => false,
53
+ :values => [],
54
+ # Expected values used by tests.
55
+ :end_column => 103,
56
+ :column_locations_as_s => '100-103',
57
+ }
58
+ end
59
+
60
+ def params_value (val = 99, lab = 'bar')
61
+ {
62
+ :value => val,
63
+ :label => 'Test value: ' + lab,
64
+ }
65
+ end
66
+
67
+ # Methods to add a bunch of Values to a Variable.
68
+
69
+ def params_values
70
+ [0,1,2,9,9999].map { |v| params_value(v, 'bar' + v.to_s) }
71
+ end
72
+
73
+ def add_new_values_to_var (var)
74
+ params_values.each { |pv| var.add_value(pv) }
75
+ end
76
+
77
+ # Helper functions.
78
+
79
+ def params_variable_lookup (k)
80
+ params_variable[k]
81
+ end
82
+
83
+ def vars_to_names (var_list)
84
+ return nil if var_list.nil?
85
+ var_list.map { |v| v.name }
86
+ end
87
+
88
+ def names_to_vars (sfc, var_list)
89
+ var_list.map { |nm| sfc.get_var_by_name(nm) }
90
+ end
91
+
92
+ def dir_contents (dir_name)
93
+ Dir.entries(dir_name).sort.reject { |f| f[0,1] == '.' }
94
+ end
95
+
96
+ def remove_file_from_dir (d, files)
97
+ files.each do |f|
98
+ full_path = File.join(d, f)
99
+ File.delete(full_path) if File.file?(full_path)
100
+ end
101
+ end
102
+
103
+ end
@@ -0,0 +1,378 @@
1
+ # This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
2
+ # For copyright and licensing information, see the NOTICE and LICENSE files
3
+ # in this project's top-level directory, and also on-line at:
4
+ # https://github.com/mnpopcenter/stats_package_syntax_file_generator
5
+
6
+ require File.expand_path(File.join(File.dirname(__FILE__), 'setup.rb'))
7
+
8
+ module StatsPackageSyntaxFileGeneratorTest
9
+ class Controller < Test::Unit::TestCase
10
+
11
+ VARS_C = %w(RECTYPE DWNUM HHNUM HDFIRSTD FBIG_ND BADDW)
12
+ VARS_H = %w(CANTON URBAN DWTYPE OWNERSHP RENT)
13
+ VARS_P = %w(RELATE SEX AGE RESPREV2 SOCSEC EDLEVEL LIT BIGDEC BIGINT BIGSTR)
14
+ VARS_ALL = [VARS_C, VARS_H, VARS_P].flatten
15
+
16
+ include StatsPackageSyntaxFileGeneratorTestSetup
17
+
18
+ def test_create_controller
19
+ msg = 'Try to create an object.'
20
+ sfc = new_controller()
21
+ assert_instance_of StatsPackageSyntaxFileGenerator::Controller, sfc, msg
22
+ end
23
+
24
+ def test_add_variable
25
+ sfc = new_controller()
26
+ n = sfc.variables.size
27
+ var = sfc.add_variable( params_variable )
28
+
29
+ msg = 'The method should increase the N of variables.'
30
+ assert_equal n + 1, sfc.variables.size, msg
31
+
32
+ msg = 'The method should return the added variable.'
33
+ assert_instance_of StatsPackageSyntaxFileGenerator::Variable, var, msg
34
+ assert_equal params_variable_lookup(:name), var.name, msg
35
+ assert_equal params_variable_lookup(:label), var.label, msg
36
+ end
37
+
38
+ def test_clear_variables
39
+ msg = 'The method should remove all variables.'
40
+ sfc = new_controller()
41
+ assert_equal VARS_ALL.size, sfc.variables.size, msg
42
+ sfc.clear_variables
43
+ assert_equal 0, sfc.variables.size, msg
44
+ end
45
+
46
+ def test_get_var_by_name
47
+ sfc = new_controller()
48
+
49
+ msg = 'The method should return the correct variable.'
50
+ nm = 'RELATE'
51
+ var = sfc.get_var_by_name(nm)
52
+ assert_equal nm, var.name, msg
53
+
54
+ msg = 'The method should return nil if given an invalid name.'
55
+ nm = 'fubb'
56
+ var = sfc.get_var_by_name(nm)
57
+ assert_nil var, msg
58
+
59
+ msg = 'The method should return the FIRST variable that matches.'
60
+ pv = params_variable()
61
+ nm = 'RELATE'
62
+ pv[:name] = nm
63
+ sfc.add_variable(pv)
64
+ var = sfc.get_var_by_name(nm)
65
+ assert_equal "Relationship to household head", var.label, msg
66
+ assert_not_equal pv[:label], var.label, msg
67
+ end
68
+
69
+ def test_get_vars_by_record_type
70
+ sfc = new_controller()
71
+
72
+ var_lists = {
73
+ 'H' => [VARS_C, VARS_H].flatten,
74
+ 'P' => [VARS_C, VARS_P].flatten,
75
+ }
76
+
77
+ msg = 'The method should return the correct variables '
78
+ var_lists.each { |k, v|
79
+ vars = sfc.get_vars_by_record_type(k)
80
+ assert_equal var_lists[k], vars_to_names(vars), msg + "(#{k} record)"
81
+ }
82
+
83
+ msg = 'The method should return common variables if given an invalid record type.'
84
+ vars = sfc.get_vars_by_record_type('.')
85
+ assert_equal VARS_C, vars_to_names(vars), msg
86
+
87
+ msg = 'The method should return the correct variables (no common vars).'
88
+ pv = params_variable()
89
+ sfc.clear_variables
90
+ sfc.add_variable(pv)
91
+ sfc.add_variable(pv)
92
+ vars = sfc.get_vars_by_record_type('P')
93
+ assert_equal [pv[:name], pv[:name]], vars_to_names(vars), msg
94
+
95
+ msg = 'The method should return [] if given an invalid record type (no common vars).'
96
+ vars = sfc.get_vars_by_record_type('H')
97
+ assert_equal [], vars, msg
98
+ end
99
+
100
+ def test_get_vars_with_var_labels
101
+ sfc = new_controller()
102
+
103
+ without_labels = %w(SOCSEC)
104
+ var_list = VARS_ALL.reject { |v| without_labels.include?(v) }
105
+
106
+ msg = 'The method should return the correct variables.'
107
+ vars = sfc.get_vars_with_var_labels
108
+ assert_equal var_list, vars_to_names(vars), msg
109
+ end
110
+
111
+ def test_get_vars_with_values
112
+ sfc = new_controller()
113
+
114
+ without_values = %w(AGE DWNUM CANTON RESPREV2 BIGDEC BIGINT BIGSTR)
115
+ var_list = VARS_ALL.reject { |v| without_values.include?(v) }
116
+
117
+ msg = 'The method should return the correct variables.'
118
+ vars = sfc.get_vars_with_values
119
+ assert_equal var_list, vars_to_names(vars), msg
120
+ end
121
+
122
+ def test_record_type_var
123
+ sfc = new_controller()
124
+
125
+ msg = 'The method should return the correct variable.'
126
+ assert_equal 'RECTYPE', sfc.record_type_var.name, msg
127
+
128
+ msg = 'The method should return nil if there is no record type variable.'
129
+ sfc.record_type_var_name = ''
130
+ assert_nil sfc.record_type_var, msg
131
+ end
132
+
133
+ def test_add_value
134
+ sfc = new_controller()
135
+
136
+ msg = 'The method should increase the N of values.'
137
+ var = sfc.add_variable( params_variable )
138
+ n = 5
139
+ (1..n).each { |v| sfc.add_value(:value => v) }
140
+ assert_equal n, var.values.size, msg
141
+ end
142
+
143
+ def test_new_values
144
+ sfc = new_controller()
145
+
146
+ msg = 'The method should result in a variable with values.'
147
+ var = sfc.add_variable(
148
+ :name => 'foo',
149
+ :start_column => 1,
150
+ :width => 4,
151
+ :values => sfc.new_values(
152
+ {:value => 1},
153
+ {:value => 2},
154
+ {:value => 3}
155
+ )
156
+ )
157
+ assert_equal 3, var.values.size, msg
158
+
159
+ msg = 'The method should also accept an array.'
160
+ val_list = (1..10).map {|i| {:value => i}}
161
+ var = sfc.add_variable(
162
+ :name => 'bar',
163
+ :start_column => 22,
164
+ :width => 4,
165
+ :values => sfc.new_values(val_list)
166
+ )
167
+ assert_equal val_list.size, var.values.size, msg
168
+ end
169
+
170
+ def test_is_last_record_type
171
+ sfc = new_controller()
172
+
173
+ msg = 'Compare against hardcoded result.'
174
+ sfc.record_types = %w(F U B A R)
175
+ assert_equal true, sfc.is_last_record_type('R'), msg
176
+ assert_equal false, sfc.is_last_record_type('A'), msg
177
+ assert_equal false, sfc.is_last_record_type('x'), msg
178
+
179
+ msg = 'Should return false if there are no record types'
180
+ sfc.record_types = []
181
+ assert_equal false, sfc.is_last_record_type(1), msg
182
+ end
183
+
184
+ def test_rec_types_except_last
185
+ sfc = new_controller()
186
+
187
+ msg = 'Compare against hardcoded result.'
188
+ sfc.record_types = %w(F U B A R)
189
+ assert_equal %w(F U B A), sfc.rec_types_except_last, msg
190
+
191
+ msg = 'Calling the method should not affect @record_types.'
192
+ sfc.record_types = %w(F U B A R)
193
+ assert_equal %w(F U B A R), sfc.record_types, msg
194
+
195
+ msg = 'Should return empty array if there are no record types.'
196
+ sfc.record_types = []
197
+ assert_equal [], sfc.rec_types_except_last, msg
198
+ end
199
+
200
+ def test_max_var_name_length
201
+ sfc = new_controller()
202
+
203
+ msg = 'The method should return the correct value.'
204
+ assert_equal 8, sfc.max_var_name_length, msg
205
+
206
+ msg = 'The method should return 0 if there are no Variables.'
207
+ sfc.clear_variables
208
+ assert_equal 0, sfc.max_var_name_length, msg
209
+ end
210
+
211
+ def test_max_col_loc_width
212
+ sfc = new_controller()
213
+
214
+ msg = 'Compare against hardcoded result.'
215
+ assert_equal 3, sfc.max_col_loc_width, msg
216
+
217
+ msg = 'The method should return 0 if there are no Variables.'
218
+ sfc.clear_variables
219
+ assert_equal 0, sfc.max_col_loc_width, msg
220
+ end
221
+
222
+ def test_generate_syntax_files
223
+ output_dir = File.join(
224
+ File.expand_path(File.dirname(__FILE__)),
225
+ 'output'
226
+ )
227
+ Dir.mkdir(output_dir) unless File.directory?(output_dir)
228
+
229
+ msg = 'Make sure the testing output directory exists.'
230
+ assert File.directory?(output_dir), msg
231
+
232
+ # Remove files generated during any previous test.
233
+ stem = 'testing'
234
+ expected_files = %w(do sas sps sts).map { |e| stem + '.' + e }
235
+ remove_file_from_dir(output_dir, expected_files)
236
+
237
+ msg = 'Make sure the testing output directory is empty.'
238
+ assert_equal [], dir_contents(output_dir), msg
239
+
240
+ msg = 'Make sure the method creates the expected files.'
241
+ sfc = new_controller()
242
+ sfc.output_dir_name = output_dir
243
+ sfc.output_file_stem = '%s'
244
+ sfc.data_file_name = stem
245
+ sfc.generate_syntax_files
246
+ assert_equal expected_files, dir_contents(output_dir), msg
247
+
248
+ msg = 'Remove the files and make sure no files remain.'
249
+ remove_file_from_dir(output_dir, expected_files)
250
+ assert_equal [], dir_contents(output_dir), msg
251
+ Dir.delete(output_dir)
252
+ end
253
+
254
+ def test_syntax
255
+ msg = 'Make sure that syntax generation works same way, regardless of order.'
256
+ sfc = new_controller()
257
+ formats = sfc.output_formats
258
+ syntax1 = {}
259
+ syntax2 = {}
260
+ formats.each { |f| syntax1[f] = sfc.syntax(f) }
261
+ formats.reverse.each { |f| syntax2[f] = sfc.syntax(f) }
262
+ assert_equal formats.map { |f| [f, true] },
263
+ formats.map { |f| [f, syntax1[f] == syntax2[f]] },
264
+ msg
265
+ end
266
+
267
+ def test_modify_metadata_all_vars_as_string
268
+ sfc = new_controller()
269
+
270
+ # A Proc to count N of string variables.
271
+ string_vars_n = lambda { sfc.variables.inject(0) { |n, v| n + (v.is_string_var ? 1 : 0) } }
272
+
273
+ msg = 'Every variable should be a string variable: '
274
+ orig = string_vars_n.call
275
+ sfc.all_vars_as_string = true
276
+ sfc.modify_metadata
277
+ assert_not_equal(orig, string_vars_n.call, msg + 'vs orig')
278
+ assert_equal(sfc.variables.size, string_vars_n.call, msg + 'all')
279
+ end
280
+
281
+ def test_modify_metadata_select_vars_by_record_type
282
+ sfc = new_controller()
283
+
284
+ msg = 'Should have no effect if all record types are still present.'
285
+ sfc.select_vars_by_record_type = true
286
+ sfc.modify_metadata
287
+ assert_equal(VARS_ALL.size, sfc.variables.size, msg)
288
+
289
+ msg = 'Just person variables.'
290
+ sfc.record_types = ['P']
291
+ sfc.modify_metadata
292
+ assert_equal(VARS_C.size + VARS_P.size, sfc.variables.size, msg)
293
+
294
+ msg = 'Just common variables if there are no record types.'
295
+ sfc.record_types = []
296
+ sfc.modify_metadata
297
+ assert_equal(VARS_C.size, sfc.variables.size, msg)
298
+
299
+ msg = 'The rectangularize option requires select_vars_by_record_type.'
300
+ sfc = new_controller()
301
+ sfc.rectangularize = true
302
+ assert_equal(false, sfc.select_vars_by_record_type, msg)
303
+ sfc.modify_metadata
304
+ assert_equal(true, sfc.select_vars_by_record_type, msg)
305
+ end
306
+
307
+ def test_validate_metadata
308
+ msg = 'Invalid metadata: no variables'
309
+ sfc = new_controller()
310
+ sfc.clear_variables
311
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
312
+
313
+ msg = 'Invalid metadata: no output formats'
314
+ sfc = new_controller()
315
+ sfc.output_formats = []
316
+ assert_raise(RuntimeError, msg) { sfc.generate_syntax_files }
317
+
318
+ msg = 'Invalid metadata: hier without any record types'
319
+ sfc = new_controller()
320
+ sfc.record_types = []
321
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
322
+
323
+ msg = 'Invalid metadata: rectangularize without hier'
324
+ sfc = new_controller()
325
+ sfc.rectangularize = true
326
+ sfc.data_structure = 'rect'
327
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
328
+
329
+ msg = 'Invalid metadata: hier without a record type variable'
330
+ sfc = new_controller()
331
+ sfc.record_type_var_name = ''
332
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
333
+
334
+ msg = 'Invalid metadata: hier with a variable that lacks a record type'
335
+ sfc = new_controller()
336
+ sfc.get_var_by_name('SEX').record_type = ''
337
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
338
+
339
+ msg = 'Invalid metadata: variable with invalid start_column'
340
+ sfc = new_controller()
341
+ sex_var = sfc.get_var_by_name('SEX')
342
+ sex_var.start_column = -1
343
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
344
+ sex_var.start_column = 'a'
345
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
346
+ sex_var.start_column = 0
347
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
348
+
349
+ msg = 'Invalid metadata: variable with invalid width'
350
+ sfc = new_controller()
351
+ sex_var = sfc.get_var_by_name('SEX')
352
+ sex_var.width = -1
353
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
354
+ sex_var.width = 'a'
355
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
356
+ sex_var.width = 0
357
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
358
+
359
+ msg = 'Invalid metadata: variable with implied_decimals'
360
+ sfc = new_controller()
361
+ sex_var = sfc.get_var_by_name('SEX')
362
+ sex_var.implied_decimals = -1
363
+ assert_raise(RuntimeError, msg) { sfc.syntax('spss') }
364
+ end
365
+
366
+ def test_rec_type_lookup_hash
367
+ sfc = new_controller()
368
+
369
+ msg = 'Compare against hardcoded result.'
370
+ assert_equal( {'P' => 0, 'H' => 0}, sfc.rec_type_lookup_hash, msg )
371
+
372
+ msg = 'Should return empty hash if there are no record types.'
373
+ sfc.record_types = []
374
+ assert_equal( {}, sfc.rec_type_lookup_hash, msg )
375
+ end
376
+
377
+ end
378
+ end