stats_package_syntax_file_generator 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,300 @@
1
+ # This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
2
+ # For copyright and licensing information, see the NOTICE and LICENSE files
3
+ # in this project's top-level directory, and also on-line at:
4
+ # https://github.com/mnpopcenter/stats_package_syntax_file_generator
5
+
6
+ module StatsPackageSyntaxFileGenerator
7
+ class MakerSTATA < Maker
8
+
9
+ def initialize (sfc, syntax_type)
10
+ super
11
+
12
+
13
+ mx_var = @sfc.max_var_name_length
14
+ mx_col = 2 * @sfc.max_col_loc_width + 1
15
+ @var_loc_format = " %-7s %-#{mx_var}s %-#{mx_col}s %s"
16
+ @var_lab_format = "label var %-#{mx_var}s %s"
17
+ @infix_format = "%#{mx_col + mx_var + 4}s"
18
+ @replace_format = "replace %-#{mx_var}s = %-#{mx_var}s / %d"
19
+ @display_format = "format %-#{mx_var}s %%%d.%df"
20
+ @cmd_end = ''
21
+ @cmd_continue = ' ///'
22
+ @var_label_max_leng = 80
23
+ @val_label_max_leng = 244
24
+ @sort_var_stem = '_line_num'
25
+ end
26
+
27
+ def syntax
28
+ r = [
29
+ comments_start,
30
+ 'set more off',
31
+ blank,
32
+ syn_df,
33
+ blank,
34
+ syn_convert_implied_decim,
35
+ blank,
36
+ syn_display_format,
37
+ blank,
38
+ syn_var_labs,
39
+ blank,
40
+ syn_val_labs,
41
+ blank,
42
+ comments_end,
43
+ ]
44
+ r.flatten
45
+ end
46
+
47
+ def comments_start
48
+ convert_to_comments(super)
49
+ end
50
+
51
+ def comments_end
52
+ convert_to_comments(super)
53
+ end
54
+
55
+ def convert_to_comments (lines)
56
+ return [] if lines.empty?
57
+ [
58
+ lines.map { |ln| '* ' + ln },
59
+ blank,
60
+ ].flatten
61
+ end
62
+
63
+ def syn_df
64
+ @sfc.data_structure == 'hier' ? syn_dfh : syn_dfr
65
+ end
66
+
67
+ def syn_dfr
68
+ syn_infix(@sfc.variables)
69
+ end
70
+
71
+ def syn_dfh
72
+ r = [
73
+ syn_dfh_infix_blocks,
74
+ syn_dfh_combine,
75
+ ]
76
+ r.flatten
77
+ end
78
+
79
+ def syn_infix (var_list)
80
+ r = [
81
+ syn_infix_start,
82
+ syn_infix_var_locs(var_list),
83
+ syn_infix_end,
84
+ ]
85
+ r.flatten
86
+ end
87
+
88
+ def syn_infix_start
89
+ [
90
+ 'clear',
91
+ 'quietly infix' + sprintf(@infix_format, @cmd_continue),
92
+ ]
93
+ end
94
+
95
+ def syn_infix_var_locs (var_list)
96
+ var_list.map { |v|
97
+ sprintf @var_loc_format,
98
+ var_fmt(v),
99
+ v.name.downcase,
100
+ v.column_locations_as_s,
101
+ @cmd_continue
102
+ }
103
+ end
104
+
105
+ def syn_infix_end
106
+ ' using ' + q(@sfc.data_file_name)
107
+ end
108
+
109
+ def syn_dfh_infix_blocks
110
+ r = []
111
+ @sfc.record_types.each { |rt|
112
+ var_list = @sfc.get_vars_by_record_type(rt)
113
+ r.push(
114
+ syn_infix(var_list),
115
+ syn_dfh_infix_block_end(rt)
116
+ ) if var_list.size > 0
117
+ }
118
+ r.flatten
119
+ end
120
+
121
+ def syn_dfh_infix_block_end (rt)
122
+ r = [
123
+ syn_dfh_infix_gen,
124
+ 'drop if ' + rt_ne_statement(rt),
125
+ 'sort ' + sort_vars.join(' '),
126
+ 'save ' + temp_file_name(rt),
127
+ blank,
128
+ ]
129
+ r.flatten
130
+ end
131
+
132
+ def syn_dfh_infix_gen
133
+ return ["gen #{@sort_var_stem} = _n"] unless @sfc.rectangularize
134
+ sv = sort_vars()
135
+ r = [
136
+ sv.map { |v| "gen #{v} = _n" },
137
+ sv.zip(@sfc.record_types).map { |z|
138
+ 'replace ' +
139
+ z[0] +
140
+ ' = ' +
141
+ z[0] +
142
+ '[_n - 1] if _n > 1 & ' +
143
+ rt_ne_statement(z[1])
144
+ }
145
+ ]
146
+ r.flatten!
147
+ end
148
+
149
+ def syn_dfh_combine
150
+ r = [
151
+ 'clear',
152
+ syn_dfh_combine_append,
153
+ syn_dfh_combine_save,
154
+ syn_dfh_combine_erase,
155
+ ]
156
+ r.flatten
157
+ end
158
+
159
+ def syn_dfh_combine_append
160
+ r = []
161
+ tf = temp_file_names()
162
+ if @sfc.rectangularize
163
+ sv = sort_vars.reverse
164
+ tf = tf.reverse
165
+ sv.shift
166
+ r.push 'use ' + tf.shift
167
+ sv.zip(tf).each { |z|
168
+ r.push 'merge m:1 ' + z[0] + ' using ' + z[1] + ', keep(master match)'
169
+ r.push 'drop _merge'
170
+ }
171
+ else
172
+ r.push 'use ' + tf.shift
173
+ tf.each { |t| r.push 'append using ' + t }
174
+ end
175
+ r
176
+ end
177
+
178
+ def syn_dfh_combine_save
179
+ [
180
+ 'sort ' + sort_vars.join(' '),
181
+ 'drop ' + sort_vars.join(' '),
182
+ ]
183
+ end
184
+
185
+ def syn_dfh_combine_erase
186
+ temp_file_names.map { |t| 'erase ' + t }
187
+ end
188
+
189
+ def syn_convert_implied_decim
190
+ var_list = @sfc.variables.find_all { |var| var.implied_decimals > 0 }
191
+ return [] if var_list.empty?
192
+ var_list.map { |var|
193
+ v = var.name.downcase
194
+ sprintf @replace_format, v, v, 10 ** var.implied_decimals
195
+ }
196
+ end
197
+
198
+ def syn_display_format
199
+ var_list = @sfc.variables.find_all { |var|
200
+ vf = var_fmt(var)
201
+ vf == 'double' or vf == 'float'
202
+ }
203
+ return [] if var_list.empty?
204
+ var_list.map { |var|
205
+ v = var.name.downcase
206
+ sprintf @display_format, v, var.width, var.implied_decimals
207
+ }
208
+ end
209
+
210
+ def syn_var_labs (var_list = [])
211
+ var_list = @sfc.get_vars_with_var_labels if var_list.empty?
212
+ return [] if var_list.empty?
213
+ var_list.map { |var|
214
+ sprintf @var_lab_format,
215
+ var.name.downcase,
216
+ q( label_trunc(var.label, @var_label_max_leng) )
217
+ }
218
+ end
219
+
220
+ def syn_val_labs
221
+ var_list = @sfc.get_vars_with_values.find_all { |var| not var.is_string_var }
222
+ return [] if var_list.empty?
223
+ r = var_list.map { |var|
224
+ [
225
+ syn_val_labs_for_var(var),
226
+ "label values " + var.name.downcase + ' ' + label_handle(var),
227
+ blank,
228
+ ]
229
+ }
230
+ r.flatten
231
+ end
232
+
233
+ def syn_val_labs_for_var (var)
234
+ val_list = labelable_values(var)
235
+ return [] if val_list.empty?
236
+ m = max_value_length(var, val_list)
237
+ value_format = "label define %s %-#{m}s %s%s"
238
+ add_cmd = ''
239
+ r = []
240
+ val_list.each { |val|
241
+ label_truncated = label_trunc(val.label, @val_label_max_leng)
242
+ # stata doesn't like blank value labels
243
+ label_truncated = val.value if label_truncated.nil? || (label_truncated.strip.length == 0)
244
+ r.push sprintf(
245
+ value_format,
246
+ label_handle(var),
247
+ val.value,
248
+ q( label_truncated ),
249
+ add_cmd
250
+ )
251
+ add_cmd = ', add'
252
+ }
253
+ r.flatten
254
+ end
255
+
256
+ def q (s)
257
+ '`"' + s.to_s + '"\''
258
+ end
259
+
260
+ def var_fmt (var)
261
+
262
+ return 'str' if var.is_string_var
263
+ return 'double' if var.is_double_var
264
+ return 'float' if var.implied_decimals > 0
265
+ return 'byte' if var.width <= 2
266
+ return 'int' if var.width <= 4
267
+ return 'long' if var.width <= 7
268
+
269
+ return 'double'
270
+ end
271
+
272
+ def temp_file_names
273
+ tf = []
274
+ @sfc.record_types.each { |rt|
275
+ var_list = @sfc.get_vars_by_record_type(rt)
276
+ tf.push temp_file_name(rt) if var_list.size > 0
277
+ }
278
+ tf
279
+ end
280
+
281
+ def temp_file_name (rt)
282
+ '__temp_ipums_hier_' + rt + '.dta'
283
+ end
284
+
285
+ def label_handle (var)
286
+ var.name.downcase + '_lbl'
287
+ end
288
+
289
+ def sort_vars
290
+ return [ @sort_var_stem ] unless @sfc.rectangularize
291
+ return @sfc.record_types.map { |rt| @sort_var_stem + rt }
292
+ end
293
+
294
+ def rt_ne_statement (rt)
295
+ rt_var = @sfc.record_type_var
296
+ rt_var.name.downcase + ' != ' + val_q(rt_var, val_as_s(rt_var, rt))
297
+ end
298
+
299
+ end
300
+ end
@@ -0,0 +1,181 @@
1
+ # This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
2
+ # For copyright and licensing information, see the NOTICE and LICENSE files
3
+ # in this project's top-level directory, and also on-line at:
4
+ # https://github.com/mnpopcenter/stats_package_syntax_file_generator
5
+
6
+ module StatsPackageSyntaxFileGenerator
7
+ class MakerSTS < Maker
8
+
9
+ def initialize (sfc, syntax_type)
10
+ super
11
+
12
+ m = @sfc.max_var_name_length
13
+ @var_lab_format = " %-#{m}s %s"
14
+ @var_loc_format = " %-#{m}s %s %s"
15
+
16
+ @vars_with_values = get_vars_with_sts_supported_values # cache
17
+ end
18
+
19
+ def syntax
20
+ r = [
21
+ syn_df,
22
+ syn_var_labs,
23
+ syn_val_labs,
24
+ ]
25
+ r.flatten
26
+ end
27
+
28
+ def convert_to_comments (lines)
29
+ return [] if lines.empty?
30
+ [
31
+ lines.map { |ln| '// ' + ln },
32
+ blank,
33
+ ].flatten
34
+ end
35
+
36
+ def syn_df
37
+ r = [
38
+ syn_df_start,
39
+ syn_var_locations(@sfc.variables),
40
+ syntax_end,
41
+ ]
42
+ r.flatten
43
+ end
44
+
45
+ def syn_df_start
46
+ ['FORMAT fixed', '', (@sfc.data_structure == 'hier') ? hier_fyi : '']
47
+ end
48
+
49
+ def hier_fyi
50
+ convert_to_comments([ '',
51
+ 'Hierarchical data structures are not directly supported by Stat/Transfer.',
52
+ 'Please see the README for the stats_package_syntax_file_generator gem for more information.', ''
53
+ ])
54
+ end
55
+
56
+ def syn_var_locations (var_list)
57
+ r = [
58
+ 'VARIABLES',
59
+ var_list.map { |v| sprintf @var_loc_format, v.name, var_loc_with_fmt(v), var_val_lbl_id(v) }
60
+ ]
61
+ r.flatten
62
+ end
63
+
64
+ def var_val_lbl_id (var)
65
+ return '' unless @vars_with_values.include?(var)
66
+ '\\' + var.name
67
+ end
68
+
69
+ def syn_var_labs (var_list = [])
70
+ var_list = @sfc.get_vars_with_var_labels if var_list.empty?
71
+ var_list = var_list.reject { |var| !supported_var_label?(var) }
72
+ return [] if var_list.empty?
73
+ r = [
74
+ 'VARIABLE LABELS',
75
+ var_list.map { |var| syn_var_lab_for_var(var) },
76
+ syntax_end,
77
+ ]
78
+ r.flatten
79
+ end
80
+
81
+ def syn_var_lab_for_var (var)
82
+ sprintf @var_lab_format, var.name, esc(q(var.label))
83
+ end
84
+
85
+ def syn_val_labs
86
+ var_list = @vars_with_values
87
+ return [] if var_list.empty?
88
+ r = [
89
+ 'VALUE LABELS',
90
+ syn_val_labs_for_var_list(var_list),
91
+ syntax_end,
92
+ ]
93
+ r.flatten
94
+ end
95
+
96
+ def syn_val_labs_for_var_list (var_list)
97
+ var_list.map { |var| syn_val_labs_for_var(var) }
98
+ end
99
+
100
+ def syn_val_labs_for_var (var)
101
+ val_list = labelable_values(var)
102
+ return [] if val_list.empty?
103
+
104
+ m = max_value_length(var, val_list.select {|x| supported_val?(x)})
105
+ value_format = " %-#{m}s %s"
106
+ r = [
107
+ syn_val_labs_for_var_start(var),
108
+ val_list.map { |val| syn_val_lab_for_val(var, val, value_format) }
109
+ ]
110
+ r.flatten
111
+ end
112
+
113
+ def syn_val_labs_for_var_start (var)
114
+ ' \\' + var.name
115
+ end
116
+
117
+ def syn_val_lab_for_val (var, val, fmt)
118
+ return explain_skipped_value(val) if !supported_val?(val)
119
+ sprintf fmt, sts_val_q(var, val_as_s(var, val.value.to_s)), esc(q(val.label))
120
+ end
121
+
122
+ # value codes (aka value values) need to be quoted with single quotes if they are strings
123
+ def sts_val_q (var, v)
124
+ var.is_string_var ? "'#{v}'" : v.to_s
125
+ end
126
+
127
+ def var_loc_with_fmt (var)
128
+ return var.column_locations_as_s + var_fmt(var) unless var.implied_decimals > 0
129
+ var.start_column.to_s + var_fmt(var)
130
+ end
131
+
132
+ def var_fmt (var)
133
+ return ' (A)' if var.is_string_var
134
+ return '' unless var.implied_decimals > 0
135
+ ' (F' + var.width.to_s + '.' + var.implied_decimals.to_s + ')'
136
+ end
137
+
138
+ def q (s)
139
+ '"' + s.to_s.gsub('"', '\'\'') + '"'
140
+ end
141
+
142
+ def esc (s)
143
+ s.gsub(/\n/, " [New line.] ")
144
+ end
145
+
146
+ def explain_skipped_value(val)
147
+ return "// Value label for '#{val.value}' is not STS compatible -- skipping" if !supported_val_label?(val)
148
+ "// Value '#{val.value}' is not STS compatible -- skipping" if !supported_val_value?(val)
149
+ "// Skipping"
150
+ end
151
+
152
+ # Stat/Transfer does not like blank value labels
153
+ def get_vars_with_sts_supported_values()
154
+ @sfc.get_vars_with_values.select do |var|
155
+ sts_supported_values(var).size > 0
156
+ end
157
+ end
158
+
159
+ def sts_supported_values(var)
160
+ return [] if (var.nil? || var.values.nil?)
161
+ var.values.select { |val| supported_val?(val) }
162
+ end
163
+
164
+ def supported_val?(val)
165
+ supported_val_label?(val) && supported_val_value?(val)
166
+ end
167
+
168
+ def supported_val_label?(val)
169
+ !(val.nil?) && !(val.label.nil?) && !(val.label.strip.empty?)
170
+ end
171
+
172
+ def supported_val_value?(val)
173
+ !(val.nil?) && !(val.value.nil?) && !!(val.value.to_s =~ /^[A-Za-z0-9\-\_\.]+$/)
174
+ end
175
+
176
+ def supported_var_label?(var)
177
+ !(var.nil?) && !(var.label.nil?) && !(var.label.strip.empty?)
178
+ end
179
+
180
+ end
181
+ end