stats_package_syntax_file_generator 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README +128 -0
- data/lib/stats_package_syntax_file_generator.rb +19 -0
- data/lib/stats_package_syntax_file_generator/controller.rb +323 -0
- data/lib/stats_package_syntax_file_generator/maker.rb +126 -0
- data/lib/stats_package_syntax_file_generator/maker_sas.rb +306 -0
- data/lib/stats_package_syntax_file_generator/maker_spss.rb +194 -0
- data/lib/stats_package_syntax_file_generator/maker_stata.rb +300 -0
- data/lib/stats_package_syntax_file_generator/maker_sts.rb +181 -0
- data/lib/stats_package_syntax_file_generator/value.rb +29 -0
- data/lib/stats_package_syntax_file_generator/variable.rb +56 -0
- data/tests/input_all_vars.yaml +2012 -0
- data/tests/input_controller.yaml +13 -0
- data/tests/setup.rb +103 -0
- data/tests/tc_controller.rb +378 -0
- data/tests/tc_maker.rb +172 -0
- data/tests/tc_maker_sas.rb +251 -0
- data/tests/tc_maker_spss.rb +121 -0
- data/tests/tc_maker_stata.rb +224 -0
- data/tests/tc_maker_sts.rb +190 -0
- data/tests/tc_value.rb +23 -0
- data/tests/tc_variable.rb +53 -0
- data/tests/ts_all.rb +20 -0
- metadata +67 -0
@@ -0,0 +1,300 @@
|
|
1
|
+
# This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
|
2
|
+
# For copyright and licensing information, see the NOTICE and LICENSE files
|
3
|
+
# in this project's top-level directory, and also on-line at:
|
4
|
+
# https://github.com/mnpopcenter/stats_package_syntax_file_generator
|
5
|
+
|
6
|
+
module StatsPackageSyntaxFileGenerator
|
7
|
+
class MakerSTATA < Maker
|
8
|
+
|
9
|
+
def initialize (sfc, syntax_type)
|
10
|
+
super
|
11
|
+
|
12
|
+
|
13
|
+
mx_var = @sfc.max_var_name_length
|
14
|
+
mx_col = 2 * @sfc.max_col_loc_width + 1
|
15
|
+
@var_loc_format = " %-7s %-#{mx_var}s %-#{mx_col}s %s"
|
16
|
+
@var_lab_format = "label var %-#{mx_var}s %s"
|
17
|
+
@infix_format = "%#{mx_col + mx_var + 4}s"
|
18
|
+
@replace_format = "replace %-#{mx_var}s = %-#{mx_var}s / %d"
|
19
|
+
@display_format = "format %-#{mx_var}s %%%d.%df"
|
20
|
+
@cmd_end = ''
|
21
|
+
@cmd_continue = ' ///'
|
22
|
+
@var_label_max_leng = 80
|
23
|
+
@val_label_max_leng = 244
|
24
|
+
@sort_var_stem = '_line_num'
|
25
|
+
end
|
26
|
+
|
27
|
+
def syntax
|
28
|
+
r = [
|
29
|
+
comments_start,
|
30
|
+
'set more off',
|
31
|
+
blank,
|
32
|
+
syn_df,
|
33
|
+
blank,
|
34
|
+
syn_convert_implied_decim,
|
35
|
+
blank,
|
36
|
+
syn_display_format,
|
37
|
+
blank,
|
38
|
+
syn_var_labs,
|
39
|
+
blank,
|
40
|
+
syn_val_labs,
|
41
|
+
blank,
|
42
|
+
comments_end,
|
43
|
+
]
|
44
|
+
r.flatten
|
45
|
+
end
|
46
|
+
|
47
|
+
def comments_start
|
48
|
+
convert_to_comments(super)
|
49
|
+
end
|
50
|
+
|
51
|
+
def comments_end
|
52
|
+
convert_to_comments(super)
|
53
|
+
end
|
54
|
+
|
55
|
+
def convert_to_comments (lines)
|
56
|
+
return [] if lines.empty?
|
57
|
+
[
|
58
|
+
lines.map { |ln| '* ' + ln },
|
59
|
+
blank,
|
60
|
+
].flatten
|
61
|
+
end
|
62
|
+
|
63
|
+
def syn_df
|
64
|
+
@sfc.data_structure == 'hier' ? syn_dfh : syn_dfr
|
65
|
+
end
|
66
|
+
|
67
|
+
def syn_dfr
|
68
|
+
syn_infix(@sfc.variables)
|
69
|
+
end
|
70
|
+
|
71
|
+
def syn_dfh
|
72
|
+
r = [
|
73
|
+
syn_dfh_infix_blocks,
|
74
|
+
syn_dfh_combine,
|
75
|
+
]
|
76
|
+
r.flatten
|
77
|
+
end
|
78
|
+
|
79
|
+
def syn_infix (var_list)
|
80
|
+
r = [
|
81
|
+
syn_infix_start,
|
82
|
+
syn_infix_var_locs(var_list),
|
83
|
+
syn_infix_end,
|
84
|
+
]
|
85
|
+
r.flatten
|
86
|
+
end
|
87
|
+
|
88
|
+
def syn_infix_start
|
89
|
+
[
|
90
|
+
'clear',
|
91
|
+
'quietly infix' + sprintf(@infix_format, @cmd_continue),
|
92
|
+
]
|
93
|
+
end
|
94
|
+
|
95
|
+
def syn_infix_var_locs (var_list)
|
96
|
+
var_list.map { |v|
|
97
|
+
sprintf @var_loc_format,
|
98
|
+
var_fmt(v),
|
99
|
+
v.name.downcase,
|
100
|
+
v.column_locations_as_s,
|
101
|
+
@cmd_continue
|
102
|
+
}
|
103
|
+
end
|
104
|
+
|
105
|
+
def syn_infix_end
|
106
|
+
' using ' + q(@sfc.data_file_name)
|
107
|
+
end
|
108
|
+
|
109
|
+
def syn_dfh_infix_blocks
|
110
|
+
r = []
|
111
|
+
@sfc.record_types.each { |rt|
|
112
|
+
var_list = @sfc.get_vars_by_record_type(rt)
|
113
|
+
r.push(
|
114
|
+
syn_infix(var_list),
|
115
|
+
syn_dfh_infix_block_end(rt)
|
116
|
+
) if var_list.size > 0
|
117
|
+
}
|
118
|
+
r.flatten
|
119
|
+
end
|
120
|
+
|
121
|
+
def syn_dfh_infix_block_end (rt)
|
122
|
+
r = [
|
123
|
+
syn_dfh_infix_gen,
|
124
|
+
'drop if ' + rt_ne_statement(rt),
|
125
|
+
'sort ' + sort_vars.join(' '),
|
126
|
+
'save ' + temp_file_name(rt),
|
127
|
+
blank,
|
128
|
+
]
|
129
|
+
r.flatten
|
130
|
+
end
|
131
|
+
|
132
|
+
def syn_dfh_infix_gen
|
133
|
+
return ["gen #{@sort_var_stem} = _n"] unless @sfc.rectangularize
|
134
|
+
sv = sort_vars()
|
135
|
+
r = [
|
136
|
+
sv.map { |v| "gen #{v} = _n" },
|
137
|
+
sv.zip(@sfc.record_types).map { |z|
|
138
|
+
'replace ' +
|
139
|
+
z[0] +
|
140
|
+
' = ' +
|
141
|
+
z[0] +
|
142
|
+
'[_n - 1] if _n > 1 & ' +
|
143
|
+
rt_ne_statement(z[1])
|
144
|
+
}
|
145
|
+
]
|
146
|
+
r.flatten!
|
147
|
+
end
|
148
|
+
|
149
|
+
def syn_dfh_combine
|
150
|
+
r = [
|
151
|
+
'clear',
|
152
|
+
syn_dfh_combine_append,
|
153
|
+
syn_dfh_combine_save,
|
154
|
+
syn_dfh_combine_erase,
|
155
|
+
]
|
156
|
+
r.flatten
|
157
|
+
end
|
158
|
+
|
159
|
+
def syn_dfh_combine_append
|
160
|
+
r = []
|
161
|
+
tf = temp_file_names()
|
162
|
+
if @sfc.rectangularize
|
163
|
+
sv = sort_vars.reverse
|
164
|
+
tf = tf.reverse
|
165
|
+
sv.shift
|
166
|
+
r.push 'use ' + tf.shift
|
167
|
+
sv.zip(tf).each { |z|
|
168
|
+
r.push 'merge m:1 ' + z[0] + ' using ' + z[1] + ', keep(master match)'
|
169
|
+
r.push 'drop _merge'
|
170
|
+
}
|
171
|
+
else
|
172
|
+
r.push 'use ' + tf.shift
|
173
|
+
tf.each { |t| r.push 'append using ' + t }
|
174
|
+
end
|
175
|
+
r
|
176
|
+
end
|
177
|
+
|
178
|
+
def syn_dfh_combine_save
|
179
|
+
[
|
180
|
+
'sort ' + sort_vars.join(' '),
|
181
|
+
'drop ' + sort_vars.join(' '),
|
182
|
+
]
|
183
|
+
end
|
184
|
+
|
185
|
+
def syn_dfh_combine_erase
|
186
|
+
temp_file_names.map { |t| 'erase ' + t }
|
187
|
+
end
|
188
|
+
|
189
|
+
def syn_convert_implied_decim
|
190
|
+
var_list = @sfc.variables.find_all { |var| var.implied_decimals > 0 }
|
191
|
+
return [] if var_list.empty?
|
192
|
+
var_list.map { |var|
|
193
|
+
v = var.name.downcase
|
194
|
+
sprintf @replace_format, v, v, 10 ** var.implied_decimals
|
195
|
+
}
|
196
|
+
end
|
197
|
+
|
198
|
+
def syn_display_format
|
199
|
+
var_list = @sfc.variables.find_all { |var|
|
200
|
+
vf = var_fmt(var)
|
201
|
+
vf == 'double' or vf == 'float'
|
202
|
+
}
|
203
|
+
return [] if var_list.empty?
|
204
|
+
var_list.map { |var|
|
205
|
+
v = var.name.downcase
|
206
|
+
sprintf @display_format, v, var.width, var.implied_decimals
|
207
|
+
}
|
208
|
+
end
|
209
|
+
|
210
|
+
def syn_var_labs (var_list = [])
|
211
|
+
var_list = @sfc.get_vars_with_var_labels if var_list.empty?
|
212
|
+
return [] if var_list.empty?
|
213
|
+
var_list.map { |var|
|
214
|
+
sprintf @var_lab_format,
|
215
|
+
var.name.downcase,
|
216
|
+
q( label_trunc(var.label, @var_label_max_leng) )
|
217
|
+
}
|
218
|
+
end
|
219
|
+
|
220
|
+
def syn_val_labs
|
221
|
+
var_list = @sfc.get_vars_with_values.find_all { |var| not var.is_string_var }
|
222
|
+
return [] if var_list.empty?
|
223
|
+
r = var_list.map { |var|
|
224
|
+
[
|
225
|
+
syn_val_labs_for_var(var),
|
226
|
+
"label values " + var.name.downcase + ' ' + label_handle(var),
|
227
|
+
blank,
|
228
|
+
]
|
229
|
+
}
|
230
|
+
r.flatten
|
231
|
+
end
|
232
|
+
|
233
|
+
def syn_val_labs_for_var (var)
|
234
|
+
val_list = labelable_values(var)
|
235
|
+
return [] if val_list.empty?
|
236
|
+
m = max_value_length(var, val_list)
|
237
|
+
value_format = "label define %s %-#{m}s %s%s"
|
238
|
+
add_cmd = ''
|
239
|
+
r = []
|
240
|
+
val_list.each { |val|
|
241
|
+
label_truncated = label_trunc(val.label, @val_label_max_leng)
|
242
|
+
# stata doesn't like blank value labels
|
243
|
+
label_truncated = val.value if label_truncated.nil? || (label_truncated.strip.length == 0)
|
244
|
+
r.push sprintf(
|
245
|
+
value_format,
|
246
|
+
label_handle(var),
|
247
|
+
val.value,
|
248
|
+
q( label_truncated ),
|
249
|
+
add_cmd
|
250
|
+
)
|
251
|
+
add_cmd = ', add'
|
252
|
+
}
|
253
|
+
r.flatten
|
254
|
+
end
|
255
|
+
|
256
|
+
def q (s)
|
257
|
+
'`"' + s.to_s + '"\''
|
258
|
+
end
|
259
|
+
|
260
|
+
def var_fmt (var)
|
261
|
+
|
262
|
+
return 'str' if var.is_string_var
|
263
|
+
return 'double' if var.is_double_var
|
264
|
+
return 'float' if var.implied_decimals > 0
|
265
|
+
return 'byte' if var.width <= 2
|
266
|
+
return 'int' if var.width <= 4
|
267
|
+
return 'long' if var.width <= 7
|
268
|
+
|
269
|
+
return 'double'
|
270
|
+
end
|
271
|
+
|
272
|
+
def temp_file_names
|
273
|
+
tf = []
|
274
|
+
@sfc.record_types.each { |rt|
|
275
|
+
var_list = @sfc.get_vars_by_record_type(rt)
|
276
|
+
tf.push temp_file_name(rt) if var_list.size > 0
|
277
|
+
}
|
278
|
+
tf
|
279
|
+
end
|
280
|
+
|
281
|
+
def temp_file_name (rt)
|
282
|
+
'__temp_ipums_hier_' + rt + '.dta'
|
283
|
+
end
|
284
|
+
|
285
|
+
def label_handle (var)
|
286
|
+
var.name.downcase + '_lbl'
|
287
|
+
end
|
288
|
+
|
289
|
+
def sort_vars
|
290
|
+
return [ @sort_var_stem ] unless @sfc.rectangularize
|
291
|
+
return @sfc.record_types.map { |rt| @sort_var_stem + rt }
|
292
|
+
end
|
293
|
+
|
294
|
+
def rt_ne_statement (rt)
|
295
|
+
rt_var = @sfc.record_type_var
|
296
|
+
rt_var.name.downcase + ' != ' + val_q(rt_var, val_as_s(rt_var, rt))
|
297
|
+
end
|
298
|
+
|
299
|
+
end
|
300
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
# This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
|
2
|
+
# For copyright and licensing information, see the NOTICE and LICENSE files
|
3
|
+
# in this project's top-level directory, and also on-line at:
|
4
|
+
# https://github.com/mnpopcenter/stats_package_syntax_file_generator
|
5
|
+
|
6
|
+
module StatsPackageSyntaxFileGenerator
|
7
|
+
class MakerSTS < Maker
|
8
|
+
|
9
|
+
def initialize (sfc, syntax_type)
|
10
|
+
super
|
11
|
+
|
12
|
+
m = @sfc.max_var_name_length
|
13
|
+
@var_lab_format = " %-#{m}s %s"
|
14
|
+
@var_loc_format = " %-#{m}s %s %s"
|
15
|
+
|
16
|
+
@vars_with_values = get_vars_with_sts_supported_values # cache
|
17
|
+
end
|
18
|
+
|
19
|
+
def syntax
|
20
|
+
r = [
|
21
|
+
syn_df,
|
22
|
+
syn_var_labs,
|
23
|
+
syn_val_labs,
|
24
|
+
]
|
25
|
+
r.flatten
|
26
|
+
end
|
27
|
+
|
28
|
+
def convert_to_comments (lines)
|
29
|
+
return [] if lines.empty?
|
30
|
+
[
|
31
|
+
lines.map { |ln| '// ' + ln },
|
32
|
+
blank,
|
33
|
+
].flatten
|
34
|
+
end
|
35
|
+
|
36
|
+
def syn_df
|
37
|
+
r = [
|
38
|
+
syn_df_start,
|
39
|
+
syn_var_locations(@sfc.variables),
|
40
|
+
syntax_end,
|
41
|
+
]
|
42
|
+
r.flatten
|
43
|
+
end
|
44
|
+
|
45
|
+
def syn_df_start
|
46
|
+
['FORMAT fixed', '', (@sfc.data_structure == 'hier') ? hier_fyi : '']
|
47
|
+
end
|
48
|
+
|
49
|
+
def hier_fyi
|
50
|
+
convert_to_comments([ '',
|
51
|
+
'Hierarchical data structures are not directly supported by Stat/Transfer.',
|
52
|
+
'Please see the README for the stats_package_syntax_file_generator gem for more information.', ''
|
53
|
+
])
|
54
|
+
end
|
55
|
+
|
56
|
+
def syn_var_locations (var_list)
|
57
|
+
r = [
|
58
|
+
'VARIABLES',
|
59
|
+
var_list.map { |v| sprintf @var_loc_format, v.name, var_loc_with_fmt(v), var_val_lbl_id(v) }
|
60
|
+
]
|
61
|
+
r.flatten
|
62
|
+
end
|
63
|
+
|
64
|
+
def var_val_lbl_id (var)
|
65
|
+
return '' unless @vars_with_values.include?(var)
|
66
|
+
'\\' + var.name
|
67
|
+
end
|
68
|
+
|
69
|
+
def syn_var_labs (var_list = [])
|
70
|
+
var_list = @sfc.get_vars_with_var_labels if var_list.empty?
|
71
|
+
var_list = var_list.reject { |var| !supported_var_label?(var) }
|
72
|
+
return [] if var_list.empty?
|
73
|
+
r = [
|
74
|
+
'VARIABLE LABELS',
|
75
|
+
var_list.map { |var| syn_var_lab_for_var(var) },
|
76
|
+
syntax_end,
|
77
|
+
]
|
78
|
+
r.flatten
|
79
|
+
end
|
80
|
+
|
81
|
+
def syn_var_lab_for_var (var)
|
82
|
+
sprintf @var_lab_format, var.name, esc(q(var.label))
|
83
|
+
end
|
84
|
+
|
85
|
+
def syn_val_labs
|
86
|
+
var_list = @vars_with_values
|
87
|
+
return [] if var_list.empty?
|
88
|
+
r = [
|
89
|
+
'VALUE LABELS',
|
90
|
+
syn_val_labs_for_var_list(var_list),
|
91
|
+
syntax_end,
|
92
|
+
]
|
93
|
+
r.flatten
|
94
|
+
end
|
95
|
+
|
96
|
+
def syn_val_labs_for_var_list (var_list)
|
97
|
+
var_list.map { |var| syn_val_labs_for_var(var) }
|
98
|
+
end
|
99
|
+
|
100
|
+
def syn_val_labs_for_var (var)
|
101
|
+
val_list = labelable_values(var)
|
102
|
+
return [] if val_list.empty?
|
103
|
+
|
104
|
+
m = max_value_length(var, val_list.select {|x| supported_val?(x)})
|
105
|
+
value_format = " %-#{m}s %s"
|
106
|
+
r = [
|
107
|
+
syn_val_labs_for_var_start(var),
|
108
|
+
val_list.map { |val| syn_val_lab_for_val(var, val, value_format) }
|
109
|
+
]
|
110
|
+
r.flatten
|
111
|
+
end
|
112
|
+
|
113
|
+
def syn_val_labs_for_var_start (var)
|
114
|
+
' \\' + var.name
|
115
|
+
end
|
116
|
+
|
117
|
+
def syn_val_lab_for_val (var, val, fmt)
|
118
|
+
return explain_skipped_value(val) if !supported_val?(val)
|
119
|
+
sprintf fmt, sts_val_q(var, val_as_s(var, val.value.to_s)), esc(q(val.label))
|
120
|
+
end
|
121
|
+
|
122
|
+
# value codes (aka value values) need to be quoted with single quotes if they are strings
|
123
|
+
def sts_val_q (var, v)
|
124
|
+
var.is_string_var ? "'#{v}'" : v.to_s
|
125
|
+
end
|
126
|
+
|
127
|
+
def var_loc_with_fmt (var)
|
128
|
+
return var.column_locations_as_s + var_fmt(var) unless var.implied_decimals > 0
|
129
|
+
var.start_column.to_s + var_fmt(var)
|
130
|
+
end
|
131
|
+
|
132
|
+
def var_fmt (var)
|
133
|
+
return ' (A)' if var.is_string_var
|
134
|
+
return '' unless var.implied_decimals > 0
|
135
|
+
' (F' + var.width.to_s + '.' + var.implied_decimals.to_s + ')'
|
136
|
+
end
|
137
|
+
|
138
|
+
def q (s)
|
139
|
+
'"' + s.to_s.gsub('"', '\'\'') + '"'
|
140
|
+
end
|
141
|
+
|
142
|
+
def esc (s)
|
143
|
+
s.gsub(/\n/, " [New line.] ")
|
144
|
+
end
|
145
|
+
|
146
|
+
def explain_skipped_value(val)
|
147
|
+
return "// Value label for '#{val.value}' is not STS compatible -- skipping" if !supported_val_label?(val)
|
148
|
+
"// Value '#{val.value}' is not STS compatible -- skipping" if !supported_val_value?(val)
|
149
|
+
"// Skipping"
|
150
|
+
end
|
151
|
+
|
152
|
+
# Stat/Transfer does not like blank value labels
|
153
|
+
def get_vars_with_sts_supported_values()
|
154
|
+
@sfc.get_vars_with_values.select do |var|
|
155
|
+
sts_supported_values(var).size > 0
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def sts_supported_values(var)
|
160
|
+
return [] if (var.nil? || var.values.nil?)
|
161
|
+
var.values.select { |val| supported_val?(val) }
|
162
|
+
end
|
163
|
+
|
164
|
+
def supported_val?(val)
|
165
|
+
supported_val_label?(val) && supported_val_value?(val)
|
166
|
+
end
|
167
|
+
|
168
|
+
def supported_val_label?(val)
|
169
|
+
!(val.nil?) && !(val.label.nil?) && !(val.label.strip.empty?)
|
170
|
+
end
|
171
|
+
|
172
|
+
def supported_val_value?(val)
|
173
|
+
!(val.nil?) && !(val.value.nil?) && !!(val.value.to_s =~ /^[A-Za-z0-9\-\_\.]+$/)
|
174
|
+
end
|
175
|
+
|
176
|
+
def supported_var_label?(var)
|
177
|
+
!(var.nil?) && !(var.label.nil?) && !(var.label.strip.empty?)
|
178
|
+
end
|
179
|
+
|
180
|
+
end
|
181
|
+
end
|