stats_package_syntax_file_generator 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,300 @@
1
+ # This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
2
+ # For copyright and licensing information, see the NOTICE and LICENSE files
3
+ # in this project's top-level directory, and also on-line at:
4
+ # https://github.com/mnpopcenter/stats_package_syntax_file_generator
5
+
6
+ module StatsPackageSyntaxFileGenerator
7
+ class MakerSTATA < Maker
8
+
9
+ def initialize (sfc, syntax_type)
10
+ super
11
+
12
+
13
+ mx_var = @sfc.max_var_name_length
14
+ mx_col = 2 * @sfc.max_col_loc_width + 1
15
+ @var_loc_format = " %-7s %-#{mx_var}s %-#{mx_col}s %s"
16
+ @var_lab_format = "label var %-#{mx_var}s %s"
17
+ @infix_format = "%#{mx_col + mx_var + 4}s"
18
+ @replace_format = "replace %-#{mx_var}s = %-#{mx_var}s / %d"
19
+ @display_format = "format %-#{mx_var}s %%%d.%df"
20
+ @cmd_end = ''
21
+ @cmd_continue = ' ///'
22
+ @var_label_max_leng = 80
23
+ @val_label_max_leng = 244
24
+ @sort_var_stem = '_line_num'
25
+ end
26
+
27
+ def syntax
28
+ r = [
29
+ comments_start,
30
+ 'set more off',
31
+ blank,
32
+ syn_df,
33
+ blank,
34
+ syn_convert_implied_decim,
35
+ blank,
36
+ syn_display_format,
37
+ blank,
38
+ syn_var_labs,
39
+ blank,
40
+ syn_val_labs,
41
+ blank,
42
+ comments_end,
43
+ ]
44
+ r.flatten
45
+ end
46
+
47
+ def comments_start
48
+ convert_to_comments(super)
49
+ end
50
+
51
+ def comments_end
52
+ convert_to_comments(super)
53
+ end
54
+
55
+ def convert_to_comments (lines)
56
+ return [] if lines.empty?
57
+ [
58
+ lines.map { |ln| '* ' + ln },
59
+ blank,
60
+ ].flatten
61
+ end
62
+
63
+ def syn_df
64
+ @sfc.data_structure == 'hier' ? syn_dfh : syn_dfr
65
+ end
66
+
67
+ def syn_dfr
68
+ syn_infix(@sfc.variables)
69
+ end
70
+
71
+ def syn_dfh
72
+ r = [
73
+ syn_dfh_infix_blocks,
74
+ syn_dfh_combine,
75
+ ]
76
+ r.flatten
77
+ end
78
+
79
+ def syn_infix (var_list)
80
+ r = [
81
+ syn_infix_start,
82
+ syn_infix_var_locs(var_list),
83
+ syn_infix_end,
84
+ ]
85
+ r.flatten
86
+ end
87
+
88
+ def syn_infix_start
89
+ [
90
+ 'clear',
91
+ 'quietly infix' + sprintf(@infix_format, @cmd_continue),
92
+ ]
93
+ end
94
+
95
+ def syn_infix_var_locs (var_list)
96
+ var_list.map { |v|
97
+ sprintf @var_loc_format,
98
+ var_fmt(v),
99
+ v.name.downcase,
100
+ v.column_locations_as_s,
101
+ @cmd_continue
102
+ }
103
+ end
104
+
105
+ def syn_infix_end
106
+ ' using ' + q(@sfc.data_file_name)
107
+ end
108
+
109
+ def syn_dfh_infix_blocks
110
+ r = []
111
+ @sfc.record_types.each { |rt|
112
+ var_list = @sfc.get_vars_by_record_type(rt)
113
+ r.push(
114
+ syn_infix(var_list),
115
+ syn_dfh_infix_block_end(rt)
116
+ ) if var_list.size > 0
117
+ }
118
+ r.flatten
119
+ end
120
+
121
+ def syn_dfh_infix_block_end (rt)
122
+ r = [
123
+ syn_dfh_infix_gen,
124
+ 'drop if ' + rt_ne_statement(rt),
125
+ 'sort ' + sort_vars.join(' '),
126
+ 'save ' + temp_file_name(rt),
127
+ blank,
128
+ ]
129
+ r.flatten
130
+ end
131
+
132
+ def syn_dfh_infix_gen
133
+ return ["gen #{@sort_var_stem} = _n"] unless @sfc.rectangularize
134
+ sv = sort_vars()
135
+ r = [
136
+ sv.map { |v| "gen #{v} = _n" },
137
+ sv.zip(@sfc.record_types).map { |z|
138
+ 'replace ' +
139
+ z[0] +
140
+ ' = ' +
141
+ z[0] +
142
+ '[_n - 1] if _n > 1 & ' +
143
+ rt_ne_statement(z[1])
144
+ }
145
+ ]
146
+ r.flatten!
147
+ end
148
+
149
+ def syn_dfh_combine
150
+ r = [
151
+ 'clear',
152
+ syn_dfh_combine_append,
153
+ syn_dfh_combine_save,
154
+ syn_dfh_combine_erase,
155
+ ]
156
+ r.flatten
157
+ end
158
+
159
+ def syn_dfh_combine_append
160
+ r = []
161
+ tf = temp_file_names()
162
+ if @sfc.rectangularize
163
+ sv = sort_vars.reverse
164
+ tf = tf.reverse
165
+ sv.shift
166
+ r.push 'use ' + tf.shift
167
+ sv.zip(tf).each { |z|
168
+ r.push 'merge m:1 ' + z[0] + ' using ' + z[1] + ', keep(master match)'
169
+ r.push 'drop _merge'
170
+ }
171
+ else
172
+ r.push 'use ' + tf.shift
173
+ tf.each { |t| r.push 'append using ' + t }
174
+ end
175
+ r
176
+ end
177
+
178
+ def syn_dfh_combine_save
179
+ [
180
+ 'sort ' + sort_vars.join(' '),
181
+ 'drop ' + sort_vars.join(' '),
182
+ ]
183
+ end
184
+
185
+ def syn_dfh_combine_erase
186
+ temp_file_names.map { |t| 'erase ' + t }
187
+ end
188
+
189
+ def syn_convert_implied_decim
190
+ var_list = @sfc.variables.find_all { |var| var.implied_decimals > 0 }
191
+ return [] if var_list.empty?
192
+ var_list.map { |var|
193
+ v = var.name.downcase
194
+ sprintf @replace_format, v, v, 10 ** var.implied_decimals
195
+ }
196
+ end
197
+
198
+ def syn_display_format
199
+ var_list = @sfc.variables.find_all { |var|
200
+ vf = var_fmt(var)
201
+ vf == 'double' or vf == 'float'
202
+ }
203
+ return [] if var_list.empty?
204
+ var_list.map { |var|
205
+ v = var.name.downcase
206
+ sprintf @display_format, v, var.width, var.implied_decimals
207
+ }
208
+ end
209
+
210
+ def syn_var_labs (var_list = [])
211
+ var_list = @sfc.get_vars_with_var_labels if var_list.empty?
212
+ return [] if var_list.empty?
213
+ var_list.map { |var|
214
+ sprintf @var_lab_format,
215
+ var.name.downcase,
216
+ q( label_trunc(var.label, @var_label_max_leng) )
217
+ }
218
+ end
219
+
220
+ def syn_val_labs
221
+ var_list = @sfc.get_vars_with_values.find_all { |var| not var.is_string_var }
222
+ return [] if var_list.empty?
223
+ r = var_list.map { |var|
224
+ [
225
+ syn_val_labs_for_var(var),
226
+ "label values " + var.name.downcase + ' ' + label_handle(var),
227
+ blank,
228
+ ]
229
+ }
230
+ r.flatten
231
+ end
232
+
233
+ def syn_val_labs_for_var (var)
234
+ val_list = labelable_values(var)
235
+ return [] if val_list.empty?
236
+ m = max_value_length(var, val_list)
237
+ value_format = "label define %s %-#{m}s %s%s"
238
+ add_cmd = ''
239
+ r = []
240
+ val_list.each { |val|
241
+ label_truncated = label_trunc(val.label, @val_label_max_leng)
242
+ # stata doesn't like blank value labels
243
+ label_truncated = val.value if label_truncated.nil? || (label_truncated.strip.length == 0)
244
+ r.push sprintf(
245
+ value_format,
246
+ label_handle(var),
247
+ val.value,
248
+ q( label_truncated ),
249
+ add_cmd
250
+ )
251
+ add_cmd = ', add'
252
+ }
253
+ r.flatten
254
+ end
255
+
256
+ def q (s)
257
+ '`"' + s.to_s + '"\''
258
+ end
259
+
260
+ def var_fmt (var)
261
+
262
+ return 'str' if var.is_string_var
263
+ return 'double' if var.is_double_var
264
+ return 'float' if var.implied_decimals > 0
265
+ return 'byte' if var.width <= 2
266
+ return 'int' if var.width <= 4
267
+ return 'long' if var.width <= 7
268
+
269
+ return 'double'
270
+ end
271
+
272
+ def temp_file_names
273
+ tf = []
274
+ @sfc.record_types.each { |rt|
275
+ var_list = @sfc.get_vars_by_record_type(rt)
276
+ tf.push temp_file_name(rt) if var_list.size > 0
277
+ }
278
+ tf
279
+ end
280
+
281
+ def temp_file_name (rt)
282
+ '__temp_ipums_hier_' + rt + '.dta'
283
+ end
284
+
285
+ def label_handle (var)
286
+ var.name.downcase + '_lbl'
287
+ end
288
+
289
+ def sort_vars
290
+ return [ @sort_var_stem ] unless @sfc.rectangularize
291
+ return @sfc.record_types.map { |rt| @sort_var_stem + rt }
292
+ end
293
+
294
+ def rt_ne_statement (rt)
295
+ rt_var = @sfc.record_type_var
296
+ rt_var.name.downcase + ' != ' + val_q(rt_var, val_as_s(rt_var, rt))
297
+ end
298
+
299
+ end
300
+ end
@@ -0,0 +1,181 @@
1
+ # This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
2
+ # For copyright and licensing information, see the NOTICE and LICENSE files
3
+ # in this project's top-level directory, and also on-line at:
4
+ # https://github.com/mnpopcenter/stats_package_syntax_file_generator
5
+
6
+ module StatsPackageSyntaxFileGenerator
7
+ class MakerSTS < Maker
8
+
9
+ def initialize (sfc, syntax_type)
10
+ super
11
+
12
+ m = @sfc.max_var_name_length
13
+ @var_lab_format = " %-#{m}s %s"
14
+ @var_loc_format = " %-#{m}s %s %s"
15
+
16
+ @vars_with_values = get_vars_with_sts_supported_values # cache
17
+ end
18
+
19
+ def syntax
20
+ r = [
21
+ syn_df,
22
+ syn_var_labs,
23
+ syn_val_labs,
24
+ ]
25
+ r.flatten
26
+ end
27
+
28
+ def convert_to_comments (lines)
29
+ return [] if lines.empty?
30
+ [
31
+ lines.map { |ln| '// ' + ln },
32
+ blank,
33
+ ].flatten
34
+ end
35
+
36
+ def syn_df
37
+ r = [
38
+ syn_df_start,
39
+ syn_var_locations(@sfc.variables),
40
+ syntax_end,
41
+ ]
42
+ r.flatten
43
+ end
44
+
45
+ def syn_df_start
46
+ ['FORMAT fixed', '', (@sfc.data_structure == 'hier') ? hier_fyi : '']
47
+ end
48
+
49
+ def hier_fyi
50
+ convert_to_comments([ '',
51
+ 'Hierarchical data structures are not directly supported by Stat/Transfer.',
52
+ 'Please see the README for the stats_package_syntax_file_generator gem for more information.', ''
53
+ ])
54
+ end
55
+
56
+ def syn_var_locations (var_list)
57
+ r = [
58
+ 'VARIABLES',
59
+ var_list.map { |v| sprintf @var_loc_format, v.name, var_loc_with_fmt(v), var_val_lbl_id(v) }
60
+ ]
61
+ r.flatten
62
+ end
63
+
64
+ def var_val_lbl_id (var)
65
+ return '' unless @vars_with_values.include?(var)
66
+ '\\' + var.name
67
+ end
68
+
69
+ def syn_var_labs (var_list = [])
70
+ var_list = @sfc.get_vars_with_var_labels if var_list.empty?
71
+ var_list = var_list.reject { |var| !supported_var_label?(var) }
72
+ return [] if var_list.empty?
73
+ r = [
74
+ 'VARIABLE LABELS',
75
+ var_list.map { |var| syn_var_lab_for_var(var) },
76
+ syntax_end,
77
+ ]
78
+ r.flatten
79
+ end
80
+
81
+ def syn_var_lab_for_var (var)
82
+ sprintf @var_lab_format, var.name, esc(q(var.label))
83
+ end
84
+
85
+ def syn_val_labs
86
+ var_list = @vars_with_values
87
+ return [] if var_list.empty?
88
+ r = [
89
+ 'VALUE LABELS',
90
+ syn_val_labs_for_var_list(var_list),
91
+ syntax_end,
92
+ ]
93
+ r.flatten
94
+ end
95
+
96
+ def syn_val_labs_for_var_list (var_list)
97
+ var_list.map { |var| syn_val_labs_for_var(var) }
98
+ end
99
+
100
+ def syn_val_labs_for_var (var)
101
+ val_list = labelable_values(var)
102
+ return [] if val_list.empty?
103
+
104
+ m = max_value_length(var, val_list.select {|x| supported_val?(x)})
105
+ value_format = " %-#{m}s %s"
106
+ r = [
107
+ syn_val_labs_for_var_start(var),
108
+ val_list.map { |val| syn_val_lab_for_val(var, val, value_format) }
109
+ ]
110
+ r.flatten
111
+ end
112
+
113
+ def syn_val_labs_for_var_start (var)
114
+ ' \\' + var.name
115
+ end
116
+
117
+ def syn_val_lab_for_val (var, val, fmt)
118
+ return explain_skipped_value(val) if !supported_val?(val)
119
+ sprintf fmt, sts_val_q(var, val_as_s(var, val.value.to_s)), esc(q(val.label))
120
+ end
121
+
122
+ # value codes (aka value values) need to be quoted with single quotes if they are strings
123
+ def sts_val_q (var, v)
124
+ var.is_string_var ? "'#{v}'" : v.to_s
125
+ end
126
+
127
+ def var_loc_with_fmt (var)
128
+ return var.column_locations_as_s + var_fmt(var) unless var.implied_decimals > 0
129
+ var.start_column.to_s + var_fmt(var)
130
+ end
131
+
132
+ def var_fmt (var)
133
+ return ' (A)' if var.is_string_var
134
+ return '' unless var.implied_decimals > 0
135
+ ' (F' + var.width.to_s + '.' + var.implied_decimals.to_s + ')'
136
+ end
137
+
138
+ def q (s)
139
+ '"' + s.to_s.gsub('"', '\'\'') + '"'
140
+ end
141
+
142
+ def esc (s)
143
+ s.gsub(/\n/, " [New line.] ")
144
+ end
145
+
146
+ def explain_skipped_value(val)
147
+ return "// Value label for '#{val.value}' is not STS compatible -- skipping" if !supported_val_label?(val)
148
+ "// Value '#{val.value}' is not STS compatible -- skipping" if !supported_val_value?(val)
149
+ "// Skipping"
150
+ end
151
+
152
+ # Stat/Transfer does not like blank value labels
153
+ def get_vars_with_sts_supported_values()
154
+ @sfc.get_vars_with_values.select do |var|
155
+ sts_supported_values(var).size > 0
156
+ end
157
+ end
158
+
159
+ def sts_supported_values(var)
160
+ return [] if (var.nil? || var.values.nil?)
161
+ var.values.select { |val| supported_val?(val) }
162
+ end
163
+
164
+ def supported_val?(val)
165
+ supported_val_label?(val) && supported_val_value?(val)
166
+ end
167
+
168
+ def supported_val_label?(val)
169
+ !(val.nil?) && !(val.label.nil?) && !(val.label.strip.empty?)
170
+ end
171
+
172
+ def supported_val_value?(val)
173
+ !(val.nil?) && !(val.value.nil?) && !!(val.value.to_s =~ /^[A-Za-z0-9\-\_\.]+$/)
174
+ end
175
+
176
+ def supported_var_label?(var)
177
+ !(var.nil?) && !(var.label.nil?) && !(var.label.strip.empty?)
178
+ end
179
+
180
+ end
181
+ end