stats_package_syntax_file_generator 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README +128 -0
- data/lib/stats_package_syntax_file_generator.rb +19 -0
- data/lib/stats_package_syntax_file_generator/controller.rb +323 -0
- data/lib/stats_package_syntax_file_generator/maker.rb +126 -0
- data/lib/stats_package_syntax_file_generator/maker_sas.rb +306 -0
- data/lib/stats_package_syntax_file_generator/maker_spss.rb +194 -0
- data/lib/stats_package_syntax_file_generator/maker_stata.rb +300 -0
- data/lib/stats_package_syntax_file_generator/maker_sts.rb +181 -0
- data/lib/stats_package_syntax_file_generator/value.rb +29 -0
- data/lib/stats_package_syntax_file_generator/variable.rb +56 -0
- data/tests/input_all_vars.yaml +2012 -0
- data/tests/input_controller.yaml +13 -0
- data/tests/setup.rb +103 -0
- data/tests/tc_controller.rb +378 -0
- data/tests/tc_maker.rb +172 -0
- data/tests/tc_maker_sas.rb +251 -0
- data/tests/tc_maker_spss.rb +121 -0
- data/tests/tc_maker_stata.rb +224 -0
- data/tests/tc_maker_sts.rb +190 -0
- data/tests/tc_value.rb +23 -0
- data/tests/tc_variable.rb +53 -0
- data/tests/ts_all.rb +20 -0
- metadata +67 -0
@@ -0,0 +1,300 @@
|
|
1
|
+
# This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
|
2
|
+
# For copyright and licensing information, see the NOTICE and LICENSE files
|
3
|
+
# in this project's top-level directory, and also on-line at:
|
4
|
+
# https://github.com/mnpopcenter/stats_package_syntax_file_generator
|
5
|
+
|
6
|
+
module StatsPackageSyntaxFileGenerator
|
7
|
+
class MakerSTATA < Maker
|
8
|
+
|
9
|
+
def initialize (sfc, syntax_type)
|
10
|
+
super
|
11
|
+
|
12
|
+
|
13
|
+
mx_var = @sfc.max_var_name_length
|
14
|
+
mx_col = 2 * @sfc.max_col_loc_width + 1
|
15
|
+
@var_loc_format = " %-7s %-#{mx_var}s %-#{mx_col}s %s"
|
16
|
+
@var_lab_format = "label var %-#{mx_var}s %s"
|
17
|
+
@infix_format = "%#{mx_col + mx_var + 4}s"
|
18
|
+
@replace_format = "replace %-#{mx_var}s = %-#{mx_var}s / %d"
|
19
|
+
@display_format = "format %-#{mx_var}s %%%d.%df"
|
20
|
+
@cmd_end = ''
|
21
|
+
@cmd_continue = ' ///'
|
22
|
+
@var_label_max_leng = 80
|
23
|
+
@val_label_max_leng = 244
|
24
|
+
@sort_var_stem = '_line_num'
|
25
|
+
end
|
26
|
+
|
27
|
+
def syntax
|
28
|
+
r = [
|
29
|
+
comments_start,
|
30
|
+
'set more off',
|
31
|
+
blank,
|
32
|
+
syn_df,
|
33
|
+
blank,
|
34
|
+
syn_convert_implied_decim,
|
35
|
+
blank,
|
36
|
+
syn_display_format,
|
37
|
+
blank,
|
38
|
+
syn_var_labs,
|
39
|
+
blank,
|
40
|
+
syn_val_labs,
|
41
|
+
blank,
|
42
|
+
comments_end,
|
43
|
+
]
|
44
|
+
r.flatten
|
45
|
+
end
|
46
|
+
|
47
|
+
def comments_start
|
48
|
+
convert_to_comments(super)
|
49
|
+
end
|
50
|
+
|
51
|
+
def comments_end
|
52
|
+
convert_to_comments(super)
|
53
|
+
end
|
54
|
+
|
55
|
+
def convert_to_comments (lines)
|
56
|
+
return [] if lines.empty?
|
57
|
+
[
|
58
|
+
lines.map { |ln| '* ' + ln },
|
59
|
+
blank,
|
60
|
+
].flatten
|
61
|
+
end
|
62
|
+
|
63
|
+
def syn_df
|
64
|
+
@sfc.data_structure == 'hier' ? syn_dfh : syn_dfr
|
65
|
+
end
|
66
|
+
|
67
|
+
def syn_dfr
|
68
|
+
syn_infix(@sfc.variables)
|
69
|
+
end
|
70
|
+
|
71
|
+
def syn_dfh
|
72
|
+
r = [
|
73
|
+
syn_dfh_infix_blocks,
|
74
|
+
syn_dfh_combine,
|
75
|
+
]
|
76
|
+
r.flatten
|
77
|
+
end
|
78
|
+
|
79
|
+
def syn_infix (var_list)
|
80
|
+
r = [
|
81
|
+
syn_infix_start,
|
82
|
+
syn_infix_var_locs(var_list),
|
83
|
+
syn_infix_end,
|
84
|
+
]
|
85
|
+
r.flatten
|
86
|
+
end
|
87
|
+
|
88
|
+
def syn_infix_start
|
89
|
+
[
|
90
|
+
'clear',
|
91
|
+
'quietly infix' + sprintf(@infix_format, @cmd_continue),
|
92
|
+
]
|
93
|
+
end
|
94
|
+
|
95
|
+
def syn_infix_var_locs (var_list)
|
96
|
+
var_list.map { |v|
|
97
|
+
sprintf @var_loc_format,
|
98
|
+
var_fmt(v),
|
99
|
+
v.name.downcase,
|
100
|
+
v.column_locations_as_s,
|
101
|
+
@cmd_continue
|
102
|
+
}
|
103
|
+
end
|
104
|
+
|
105
|
+
def syn_infix_end
|
106
|
+
' using ' + q(@sfc.data_file_name)
|
107
|
+
end
|
108
|
+
|
109
|
+
def syn_dfh_infix_blocks
|
110
|
+
r = []
|
111
|
+
@sfc.record_types.each { |rt|
|
112
|
+
var_list = @sfc.get_vars_by_record_type(rt)
|
113
|
+
r.push(
|
114
|
+
syn_infix(var_list),
|
115
|
+
syn_dfh_infix_block_end(rt)
|
116
|
+
) if var_list.size > 0
|
117
|
+
}
|
118
|
+
r.flatten
|
119
|
+
end
|
120
|
+
|
121
|
+
def syn_dfh_infix_block_end (rt)
|
122
|
+
r = [
|
123
|
+
syn_dfh_infix_gen,
|
124
|
+
'drop if ' + rt_ne_statement(rt),
|
125
|
+
'sort ' + sort_vars.join(' '),
|
126
|
+
'save ' + temp_file_name(rt),
|
127
|
+
blank,
|
128
|
+
]
|
129
|
+
r.flatten
|
130
|
+
end
|
131
|
+
|
132
|
+
def syn_dfh_infix_gen
|
133
|
+
return ["gen #{@sort_var_stem} = _n"] unless @sfc.rectangularize
|
134
|
+
sv = sort_vars()
|
135
|
+
r = [
|
136
|
+
sv.map { |v| "gen #{v} = _n" },
|
137
|
+
sv.zip(@sfc.record_types).map { |z|
|
138
|
+
'replace ' +
|
139
|
+
z[0] +
|
140
|
+
' = ' +
|
141
|
+
z[0] +
|
142
|
+
'[_n - 1] if _n > 1 & ' +
|
143
|
+
rt_ne_statement(z[1])
|
144
|
+
}
|
145
|
+
]
|
146
|
+
r.flatten!
|
147
|
+
end
|
148
|
+
|
149
|
+
def syn_dfh_combine
|
150
|
+
r = [
|
151
|
+
'clear',
|
152
|
+
syn_dfh_combine_append,
|
153
|
+
syn_dfh_combine_save,
|
154
|
+
syn_dfh_combine_erase,
|
155
|
+
]
|
156
|
+
r.flatten
|
157
|
+
end
|
158
|
+
|
159
|
+
def syn_dfh_combine_append
|
160
|
+
r = []
|
161
|
+
tf = temp_file_names()
|
162
|
+
if @sfc.rectangularize
|
163
|
+
sv = sort_vars.reverse
|
164
|
+
tf = tf.reverse
|
165
|
+
sv.shift
|
166
|
+
r.push 'use ' + tf.shift
|
167
|
+
sv.zip(tf).each { |z|
|
168
|
+
r.push 'merge m:1 ' + z[0] + ' using ' + z[1] + ', keep(master match)'
|
169
|
+
r.push 'drop _merge'
|
170
|
+
}
|
171
|
+
else
|
172
|
+
r.push 'use ' + tf.shift
|
173
|
+
tf.each { |t| r.push 'append using ' + t }
|
174
|
+
end
|
175
|
+
r
|
176
|
+
end
|
177
|
+
|
178
|
+
def syn_dfh_combine_save
|
179
|
+
[
|
180
|
+
'sort ' + sort_vars.join(' '),
|
181
|
+
'drop ' + sort_vars.join(' '),
|
182
|
+
]
|
183
|
+
end
|
184
|
+
|
185
|
+
def syn_dfh_combine_erase
|
186
|
+
temp_file_names.map { |t| 'erase ' + t }
|
187
|
+
end
|
188
|
+
|
189
|
+
def syn_convert_implied_decim
|
190
|
+
var_list = @sfc.variables.find_all { |var| var.implied_decimals > 0 }
|
191
|
+
return [] if var_list.empty?
|
192
|
+
var_list.map { |var|
|
193
|
+
v = var.name.downcase
|
194
|
+
sprintf @replace_format, v, v, 10 ** var.implied_decimals
|
195
|
+
}
|
196
|
+
end
|
197
|
+
|
198
|
+
def syn_display_format
|
199
|
+
var_list = @sfc.variables.find_all { |var|
|
200
|
+
vf = var_fmt(var)
|
201
|
+
vf == 'double' or vf == 'float'
|
202
|
+
}
|
203
|
+
return [] if var_list.empty?
|
204
|
+
var_list.map { |var|
|
205
|
+
v = var.name.downcase
|
206
|
+
sprintf @display_format, v, var.width, var.implied_decimals
|
207
|
+
}
|
208
|
+
end
|
209
|
+
|
210
|
+
def syn_var_labs (var_list = [])
|
211
|
+
var_list = @sfc.get_vars_with_var_labels if var_list.empty?
|
212
|
+
return [] if var_list.empty?
|
213
|
+
var_list.map { |var|
|
214
|
+
sprintf @var_lab_format,
|
215
|
+
var.name.downcase,
|
216
|
+
q( label_trunc(var.label, @var_label_max_leng) )
|
217
|
+
}
|
218
|
+
end
|
219
|
+
|
220
|
+
def syn_val_labs
|
221
|
+
var_list = @sfc.get_vars_with_values.find_all { |var| not var.is_string_var }
|
222
|
+
return [] if var_list.empty?
|
223
|
+
r = var_list.map { |var|
|
224
|
+
[
|
225
|
+
syn_val_labs_for_var(var),
|
226
|
+
"label values " + var.name.downcase + ' ' + label_handle(var),
|
227
|
+
blank,
|
228
|
+
]
|
229
|
+
}
|
230
|
+
r.flatten
|
231
|
+
end
|
232
|
+
|
233
|
+
def syn_val_labs_for_var (var)
|
234
|
+
val_list = labelable_values(var)
|
235
|
+
return [] if val_list.empty?
|
236
|
+
m = max_value_length(var, val_list)
|
237
|
+
value_format = "label define %s %-#{m}s %s%s"
|
238
|
+
add_cmd = ''
|
239
|
+
r = []
|
240
|
+
val_list.each { |val|
|
241
|
+
label_truncated = label_trunc(val.label, @val_label_max_leng)
|
242
|
+
# stata doesn't like blank value labels
|
243
|
+
label_truncated = val.value if label_truncated.nil? || (label_truncated.strip.length == 0)
|
244
|
+
r.push sprintf(
|
245
|
+
value_format,
|
246
|
+
label_handle(var),
|
247
|
+
val.value,
|
248
|
+
q( label_truncated ),
|
249
|
+
add_cmd
|
250
|
+
)
|
251
|
+
add_cmd = ', add'
|
252
|
+
}
|
253
|
+
r.flatten
|
254
|
+
end
|
255
|
+
|
256
|
+
def q (s)
|
257
|
+
'`"' + s.to_s + '"\''
|
258
|
+
end
|
259
|
+
|
260
|
+
def var_fmt (var)
|
261
|
+
|
262
|
+
return 'str' if var.is_string_var
|
263
|
+
return 'double' if var.is_double_var
|
264
|
+
return 'float' if var.implied_decimals > 0
|
265
|
+
return 'byte' if var.width <= 2
|
266
|
+
return 'int' if var.width <= 4
|
267
|
+
return 'long' if var.width <= 7
|
268
|
+
|
269
|
+
return 'double'
|
270
|
+
end
|
271
|
+
|
272
|
+
def temp_file_names
|
273
|
+
tf = []
|
274
|
+
@sfc.record_types.each { |rt|
|
275
|
+
var_list = @sfc.get_vars_by_record_type(rt)
|
276
|
+
tf.push temp_file_name(rt) if var_list.size > 0
|
277
|
+
}
|
278
|
+
tf
|
279
|
+
end
|
280
|
+
|
281
|
+
def temp_file_name (rt)
|
282
|
+
'__temp_ipums_hier_' + rt + '.dta'
|
283
|
+
end
|
284
|
+
|
285
|
+
def label_handle (var)
|
286
|
+
var.name.downcase + '_lbl'
|
287
|
+
end
|
288
|
+
|
289
|
+
def sort_vars
|
290
|
+
return [ @sort_var_stem ] unless @sfc.rectangularize
|
291
|
+
return @sfc.record_types.map { |rt| @sort_var_stem + rt }
|
292
|
+
end
|
293
|
+
|
294
|
+
def rt_ne_statement (rt)
|
295
|
+
rt_var = @sfc.record_type_var
|
296
|
+
rt_var.name.downcase + ' != ' + val_q(rt_var, val_as_s(rt_var, rt))
|
297
|
+
end
|
298
|
+
|
299
|
+
end
|
300
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
# This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
|
2
|
+
# For copyright and licensing information, see the NOTICE and LICENSE files
|
3
|
+
# in this project's top-level directory, and also on-line at:
|
4
|
+
# https://github.com/mnpopcenter/stats_package_syntax_file_generator
|
5
|
+
|
6
|
+
module StatsPackageSyntaxFileGenerator
|
7
|
+
class MakerSTS < Maker
|
8
|
+
|
9
|
+
def initialize (sfc, syntax_type)
|
10
|
+
super
|
11
|
+
|
12
|
+
m = @sfc.max_var_name_length
|
13
|
+
@var_lab_format = " %-#{m}s %s"
|
14
|
+
@var_loc_format = " %-#{m}s %s %s"
|
15
|
+
|
16
|
+
@vars_with_values = get_vars_with_sts_supported_values # cache
|
17
|
+
end
|
18
|
+
|
19
|
+
def syntax
|
20
|
+
r = [
|
21
|
+
syn_df,
|
22
|
+
syn_var_labs,
|
23
|
+
syn_val_labs,
|
24
|
+
]
|
25
|
+
r.flatten
|
26
|
+
end
|
27
|
+
|
28
|
+
def convert_to_comments (lines)
|
29
|
+
return [] if lines.empty?
|
30
|
+
[
|
31
|
+
lines.map { |ln| '// ' + ln },
|
32
|
+
blank,
|
33
|
+
].flatten
|
34
|
+
end
|
35
|
+
|
36
|
+
def syn_df
|
37
|
+
r = [
|
38
|
+
syn_df_start,
|
39
|
+
syn_var_locations(@sfc.variables),
|
40
|
+
syntax_end,
|
41
|
+
]
|
42
|
+
r.flatten
|
43
|
+
end
|
44
|
+
|
45
|
+
def syn_df_start
|
46
|
+
['FORMAT fixed', '', (@sfc.data_structure == 'hier') ? hier_fyi : '']
|
47
|
+
end
|
48
|
+
|
49
|
+
def hier_fyi
|
50
|
+
convert_to_comments([ '',
|
51
|
+
'Hierarchical data structures are not directly supported by Stat/Transfer.',
|
52
|
+
'Please see the README for the stats_package_syntax_file_generator gem for more information.', ''
|
53
|
+
])
|
54
|
+
end
|
55
|
+
|
56
|
+
def syn_var_locations (var_list)
|
57
|
+
r = [
|
58
|
+
'VARIABLES',
|
59
|
+
var_list.map { |v| sprintf @var_loc_format, v.name, var_loc_with_fmt(v), var_val_lbl_id(v) }
|
60
|
+
]
|
61
|
+
r.flatten
|
62
|
+
end
|
63
|
+
|
64
|
+
def var_val_lbl_id (var)
|
65
|
+
return '' unless @vars_with_values.include?(var)
|
66
|
+
'\\' + var.name
|
67
|
+
end
|
68
|
+
|
69
|
+
def syn_var_labs (var_list = [])
|
70
|
+
var_list = @sfc.get_vars_with_var_labels if var_list.empty?
|
71
|
+
var_list = var_list.reject { |var| !supported_var_label?(var) }
|
72
|
+
return [] if var_list.empty?
|
73
|
+
r = [
|
74
|
+
'VARIABLE LABELS',
|
75
|
+
var_list.map { |var| syn_var_lab_for_var(var) },
|
76
|
+
syntax_end,
|
77
|
+
]
|
78
|
+
r.flatten
|
79
|
+
end
|
80
|
+
|
81
|
+
def syn_var_lab_for_var (var)
|
82
|
+
sprintf @var_lab_format, var.name, esc(q(var.label))
|
83
|
+
end
|
84
|
+
|
85
|
+
def syn_val_labs
|
86
|
+
var_list = @vars_with_values
|
87
|
+
return [] if var_list.empty?
|
88
|
+
r = [
|
89
|
+
'VALUE LABELS',
|
90
|
+
syn_val_labs_for_var_list(var_list),
|
91
|
+
syntax_end,
|
92
|
+
]
|
93
|
+
r.flatten
|
94
|
+
end
|
95
|
+
|
96
|
+
def syn_val_labs_for_var_list (var_list)
|
97
|
+
var_list.map { |var| syn_val_labs_for_var(var) }
|
98
|
+
end
|
99
|
+
|
100
|
+
def syn_val_labs_for_var (var)
|
101
|
+
val_list = labelable_values(var)
|
102
|
+
return [] if val_list.empty?
|
103
|
+
|
104
|
+
m = max_value_length(var, val_list.select {|x| supported_val?(x)})
|
105
|
+
value_format = " %-#{m}s %s"
|
106
|
+
r = [
|
107
|
+
syn_val_labs_for_var_start(var),
|
108
|
+
val_list.map { |val| syn_val_lab_for_val(var, val, value_format) }
|
109
|
+
]
|
110
|
+
r.flatten
|
111
|
+
end
|
112
|
+
|
113
|
+
def syn_val_labs_for_var_start (var)
|
114
|
+
' \\' + var.name
|
115
|
+
end
|
116
|
+
|
117
|
+
def syn_val_lab_for_val (var, val, fmt)
|
118
|
+
return explain_skipped_value(val) if !supported_val?(val)
|
119
|
+
sprintf fmt, sts_val_q(var, val_as_s(var, val.value.to_s)), esc(q(val.label))
|
120
|
+
end
|
121
|
+
|
122
|
+
# value codes (aka value values) need to be quoted with single quotes if they are strings
|
123
|
+
def sts_val_q (var, v)
|
124
|
+
var.is_string_var ? "'#{v}'" : v.to_s
|
125
|
+
end
|
126
|
+
|
127
|
+
def var_loc_with_fmt (var)
|
128
|
+
return var.column_locations_as_s + var_fmt(var) unless var.implied_decimals > 0
|
129
|
+
var.start_column.to_s + var_fmt(var)
|
130
|
+
end
|
131
|
+
|
132
|
+
def var_fmt (var)
|
133
|
+
return ' (A)' if var.is_string_var
|
134
|
+
return '' unless var.implied_decimals > 0
|
135
|
+
' (F' + var.width.to_s + '.' + var.implied_decimals.to_s + ')'
|
136
|
+
end
|
137
|
+
|
138
|
+
def q (s)
|
139
|
+
'"' + s.to_s.gsub('"', '\'\'') + '"'
|
140
|
+
end
|
141
|
+
|
142
|
+
def esc (s)
|
143
|
+
s.gsub(/\n/, " [New line.] ")
|
144
|
+
end
|
145
|
+
|
146
|
+
def explain_skipped_value(val)
|
147
|
+
return "// Value label for '#{val.value}' is not STS compatible -- skipping" if !supported_val_label?(val)
|
148
|
+
"// Value '#{val.value}' is not STS compatible -- skipping" if !supported_val_value?(val)
|
149
|
+
"// Skipping"
|
150
|
+
end
|
151
|
+
|
152
|
+
# Stat/Transfer does not like blank value labels
|
153
|
+
def get_vars_with_sts_supported_values()
|
154
|
+
@sfc.get_vars_with_values.select do |var|
|
155
|
+
sts_supported_values(var).size > 0
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def sts_supported_values(var)
|
160
|
+
return [] if (var.nil? || var.values.nil?)
|
161
|
+
var.values.select { |val| supported_val?(val) }
|
162
|
+
end
|
163
|
+
|
164
|
+
def supported_val?(val)
|
165
|
+
supported_val_label?(val) && supported_val_value?(val)
|
166
|
+
end
|
167
|
+
|
168
|
+
def supported_val_label?(val)
|
169
|
+
!(val.nil?) && !(val.label.nil?) && !(val.label.strip.empty?)
|
170
|
+
end
|
171
|
+
|
172
|
+
def supported_val_value?(val)
|
173
|
+
!(val.nil?) && !(val.value.nil?) && !!(val.value.to_s =~ /^[A-Za-z0-9\-\_\.]+$/)
|
174
|
+
end
|
175
|
+
|
176
|
+
def supported_var_label?(var)
|
177
|
+
!(var.nil?) && !(var.label.nil?) && !(var.label.strip.empty?)
|
178
|
+
end
|
179
|
+
|
180
|
+
end
|
181
|
+
end
|