stats_package_syntax_file_generator 1.0.4 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README +2 -1
- data/lib/stats_package_syntax_file_generator.rb +1 -0
- data/lib/syntax_file/controller.rb +214 -220
- data/lib/syntax_file/maker.rb +92 -92
- data/lib/syntax_file/maker_rddi.rb +55 -0
- data/lib/syntax_file/maker_sas.rb +204 -205
- data/lib/syntax_file/maker_spss.rb +143 -143
- data/lib/syntax_file/maker_stata.rb +211 -192
- data/lib/syntax_file/maker_sts.rb +120 -127
- data/lib/syntax_file/value.rb +15 -16
- data/lib/syntax_file/variable.rb +40 -41
- data/tests/input_all_vars.yaml +1 -1
- data/tests/input_controller.yaml +1 -1
- data/tests/setup.rb +8 -8
- data/tests/tc_controller.rb +1 -1
- data/tests/tc_maker.rb +1 -1
- data/tests/tc_maker_rddi.rb +33 -0
- data/tests/tc_maker_stata.rb +1 -1
- data/tests/tc_maker_sts.rb +0 -1
- data/tests/ts_all.rb +1 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 979787a0351769e1f7ba25a38e169a8f8a13ae0091e7b5faad0a858b8d501cb7
|
4
|
+
data.tar.gz: 79ba66410caaed9dec8d450ae28137a8328405987985cf9d472ecff8b6929873
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 07c600f733ed4bef35d689d3486f51dbac9e914a774ae0e442b1c900a3e74cc8b82afe49bba534a655671d12a217dd9393c0a9611430f4b0ad9da1687ffe425e
|
7
|
+
data.tar.gz: 5f5c54efe960a92abe98f808e2c8829efca9e03eddef54705a5d04c9e339246653a628e5b1bed059cc3153ebd53fee438d14be50866d8543ecef180c47bc5c14
|
data/README
CHANGED
@@ -4,7 +4,7 @@ This gem produces statistical package syntax files for fixed-column data files.
|
|
4
4
|
SPSS
|
5
5
|
Stata
|
6
6
|
Stat/Transfer STS metadata files
|
7
|
-
|
7
|
+
R (via the ipumsr package which depends on IPUMS DDIs)
|
8
8
|
|
9
9
|
Metadata can be supplied to the Controller in two general ways:
|
10
10
|
|
@@ -89,6 +89,7 @@ Class overview:
|
|
89
89
|
Maker_SPSS
|
90
90
|
Maker_STATA
|
91
91
|
Maker_STS
|
92
|
+
Maker_RDDI
|
92
93
|
|
93
94
|
- Classes responsible for creating syntax.
|
94
95
|
|
@@ -4,282 +4,276 @@
|
|
4
4
|
# https://github.com/mnpopcenter/stats_package_syntax_file_generator
|
5
5
|
|
6
6
|
module SyntaxFile
|
7
|
-
class Controller
|
8
|
-
|
9
|
-
VERSION = "1.
|
10
|
-
|
11
|
-
ATTR = {
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
}
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
7
|
+
class Controller
|
8
|
+
|
9
|
+
VERSION = "1.1.3"
|
10
|
+
|
11
|
+
ATTR = {
|
12
|
+
:project => { :req => false, :rw => 'rw', :def => '', :yaml => true },
|
13
|
+
:caller => { :req => false, :rw => 'rw', :def => '', :yaml => true },
|
14
|
+
:data_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
|
15
|
+
:data_file_name => { :req => false, :rw => 'rw', :def => 'DATA_FILE', :yaml => true },
|
16
|
+
:output_formats => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
|
17
|
+
:output_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
|
18
|
+
:output_file_stem => { :req => false, :rw => 'rw', :def => '%s', :yaml => true },
|
19
|
+
:output_file_ext => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
|
20
|
+
:output_overwrite => { :req => false, :rw => 'rw', :def => false, :yaml => true },
|
21
|
+
:data_structure => { :req => false, :rw => 'rw', :def => 'rect', :yaml => true },
|
22
|
+
:record_types => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
|
23
|
+
:record_type_var_name => { :req => false, :rw => 'rw', :def => '', :yaml => true },
|
24
|
+
:rectangularize => { :req => false, :rw => 'rw', :def => false, :yaml => true },
|
25
|
+
:all_vars_as_string => { :req => false, :rw => 'rw', :def => false, :yaml => true },
|
26
|
+
:select_vars_by_record_type => { :req => false, :rw => 'rw', :def => false, :yaml => true },
|
27
|
+
:variables => { :req => false, :rw => 'r', :def => nil, :yaml => false },
|
28
|
+
:yaml_files => { :req => false, :rw => 'r', :def => nil, :yaml => false },
|
29
|
+
:output_encoding => { :req => false, :rw => 'r', :def => "iso-8859-1",:yaml => true },
|
30
|
+
}
|
31
|
+
|
32
|
+
ATTR.each_key do |k|
|
33
|
+
attr_reader k if ATTR[k][:rw].include? 'r'
|
34
|
+
attr_writer k if ATTR[k][:rw].include? 'w'
|
35
|
+
end
|
35
36
|
|
36
|
-
def initialize
|
37
|
-
|
38
|
-
raise(ArgumentError, "Missing required parameter: '#{k}'.") if
|
39
|
-
ATTR[k][:req] and not args.has_key?(k)
|
37
|
+
def initialize(args = {})
|
38
|
+
ATTR.each_key { |k|
|
39
|
+
raise(ArgumentError, "Missing required parameter: '#{k}'.") if ATTR[k][:req] and not args.has_key?(k)
|
40
40
|
v = args.has_key?(k) ? args[k] : ATTR[k][:def]
|
41
41
|
instance_variable_set("@#{k}".to_sym, v)
|
42
|
-
|
42
|
+
}
|
43
43
|
|
44
|
-
|
45
|
-
'sas'
|
46
|
-
'spss'
|
44
|
+
@output_file_ext = {
|
45
|
+
'sas' => '.sas',
|
46
|
+
'spss' => '.sps',
|
47
47
|
'stata' => '.do',
|
48
|
-
'sts'
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
48
|
+
'sts' => '.sts',
|
49
|
+
'rddi' => '.R'
|
50
|
+
} if @output_file_ext.nil?
|
51
|
+
@output_formats = [] if @output_formats.nil?
|
52
|
+
@record_types = [] if @record_types.nil?
|
53
|
+
@variables = [] if @variables.nil?
|
54
|
+
@yaml_files = [] if @yaml_files.nil?
|
55
|
+
read_metadata_from_yaml
|
56
|
+
end
|
57
57
|
|
58
|
-
# Methods to import metadata from YAML files into the Controller object.
|
58
|
+
# Methods to import metadata from YAML files into the Controller object.
|
59
59
|
|
60
|
-
def yaml_files=
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
end
|
60
|
+
def yaml_files=(file_names)
|
61
|
+
# Caller can supply a file name or an array of file names.
|
62
|
+
@yaml_files = file_names.to_a
|
63
|
+
read_metadata_from_yaml
|
64
|
+
end
|
65
65
|
|
66
|
-
def read_metadata_from_yaml
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
end
|
66
|
+
def read_metadata_from_yaml
|
67
|
+
return if @yaml_files.empty?
|
68
|
+
md = {}
|
69
|
+
@yaml_files.each { |f| md.merge! YAML.load_file(f) }
|
70
|
+
md = symbolize_keys(md)
|
71
|
+
load_yaml_md(md)
|
72
|
+
end
|
73
73
|
|
74
|
-
def load_yaml_md
|
75
|
-
|
76
|
-
|
74
|
+
def load_yaml_md(md)
|
75
|
+
# Uses metadata from yaml to set metadata-related instance variables.
|
76
|
+
ATTR.each_key do |k|
|
77
77
|
next unless md.has_key?(k) and ATTR[k][:yaml]
|
78
78
|
instance_variable_set("@#{k}".to_sym, md[k])
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
79
|
+
end
|
80
|
+
return unless md.has_key?(:variables)
|
81
|
+
@variables = []
|
82
|
+
return unless md[:variables].size > 0
|
83
|
+
md[:variables].each do |md_var|
|
84
84
|
vals = md_var.delete(:values)
|
85
85
|
var = add_variable(md_var)
|
86
86
|
vals.each { |v| var.add_value(v) } unless vals.nil?
|
87
|
+
end
|
87
88
|
end
|
88
|
-
end
|
89
89
|
|
90
|
-
def symbolize_keys
|
91
|
-
|
92
|
-
|
93
|
-
h.inject({}) { |return_hash,(k,v)| return_hash[k.to_sym] = symbolize_keys(v); return_hash }
|
94
|
-
|
90
|
+
def symbolize_keys(h)
|
91
|
+
# Recursively converts hash keys from strings to symbols.
|
92
|
+
if h.instance_of? Hash
|
93
|
+
h.inject({}) { |return_hash, (k, v)| return_hash[k.to_sym] = symbolize_keys(v); return_hash }
|
94
|
+
elsif h.instance_of? Array
|
95
95
|
h.map { |v| symbolize_keys(v) }
|
96
|
-
|
96
|
+
else
|
97
97
|
h
|
98
|
+
end
|
98
99
|
end
|
99
|
-
end
|
100
100
|
|
101
|
-
# Methods to add or get variables.
|
101
|
+
# Methods to add or get variables.
|
102
102
|
|
103
|
-
def add_variable
|
104
|
-
|
105
|
-
|
106
|
-
end
|
103
|
+
def add_variable(args)
|
104
|
+
@variables.push Variable.new(args)
|
105
|
+
@variables[-1]
|
106
|
+
end
|
107
107
|
|
108
|
-
def clear_variables
|
109
|
-
|
110
|
-
end
|
108
|
+
def clear_variables
|
109
|
+
@variables = []
|
110
|
+
end
|
111
111
|
|
112
|
-
def get_var_by_name
|
113
|
-
|
114
|
-
end
|
112
|
+
def get_var_by_name(n)
|
113
|
+
@variables.find { |v| v.name == n }
|
114
|
+
end
|
115
115
|
|
116
|
-
def get_vars_by_record_type
|
117
|
-
|
118
|
-
end
|
116
|
+
def get_vars_by_record_type(rt)
|
117
|
+
@variables.find_all { |v| v.record_type == rt or v.is_common_var }
|
118
|
+
end
|
119
119
|
|
120
|
-
def get_vars_with_var_labels
|
121
|
-
|
122
|
-
end
|
120
|
+
def get_vars_with_var_labels
|
121
|
+
@variables.find_all { |v| v.label.length > 0 }
|
122
|
+
end
|
123
123
|
|
124
|
-
def get_vars_with_values
|
125
|
-
|
124
|
+
def get_vars_with_values
|
125
|
+
@variables.find_all { |var|
|
126
126
|
var.values.size > 0 and
|
127
|
-
|
128
|
-
|
129
|
-
end
|
127
|
+
not var.suppress_labels
|
128
|
+
}
|
129
|
+
end
|
130
130
|
|
131
|
-
def get_big_nums
|
132
|
-
|
131
|
+
def get_big_nums
|
132
|
+
@variables.find_all { |var|
|
133
133
|
var.width > 8 and
|
134
|
-
|
135
|
-
|
136
|
-
end
|
137
|
-
|
138
|
-
|
139
|
-
def record_type_var
|
140
|
-
get_var_by_name(@record_type_var_name)
|
141
|
-
end
|
134
|
+
not var.is_string_var
|
135
|
+
}
|
136
|
+
end
|
142
137
|
|
138
|
+
def record_type_var
|
139
|
+
get_var_by_name(@record_type_var_name)
|
140
|
+
end
|
143
141
|
|
144
|
-
# Methods for adding values to variables.
|
142
|
+
# Methods for adding values to variables.
|
145
143
|
|
146
|
-
def add_value
|
147
|
-
|
148
|
-
|
149
|
-
end
|
150
|
-
|
151
|
-
def new_values (*vals)
|
152
|
-
vals.flatten!
|
153
|
-
vals.map { |v| Value.new(v) }
|
154
|
-
end
|
144
|
+
def add_value(args)
|
145
|
+
@variables[-1].values.push Value.new(args)
|
146
|
+
@variables[-1].values[-1]
|
147
|
+
end
|
155
148
|
|
149
|
+
def new_values(*vals)
|
150
|
+
vals.flatten!
|
151
|
+
vals.map { |v| Value.new(v) }
|
152
|
+
end
|
156
153
|
|
157
|
-
# Methods for record types.
|
154
|
+
# Methods for record types.
|
158
155
|
|
159
|
-
def is_last_record_type
|
160
|
-
|
161
|
-
|
162
|
-
end
|
163
|
-
|
164
|
-
def rec_types_except_last
|
165
|
-
r = Array.new(@record_types)
|
166
|
-
r.pop
|
167
|
-
r
|
168
|
-
end
|
156
|
+
def is_last_record_type(rt)
|
157
|
+
return true if @record_types.size > 0 and @record_types[-1] == rt
|
158
|
+
return false
|
159
|
+
end
|
169
160
|
|
161
|
+
def rec_types_except_last
|
162
|
+
r = Array.new(@record_types)
|
163
|
+
r.pop
|
164
|
+
r
|
165
|
+
end
|
170
166
|
|
171
|
-
# Helper methods.
|
167
|
+
# Helper methods.
|
172
168
|
|
173
|
-
def max_var_name_length
|
174
|
-
|
175
|
-
|
176
|
-
end
|
169
|
+
def max_var_name_length
|
170
|
+
return 0 if @variables.empty?
|
171
|
+
@variables.map { |v| v.name.length }.max
|
172
|
+
end
|
177
173
|
|
178
|
-
def max_col_loc_width
|
179
|
-
|
180
|
-
|
181
|
-
end
|
174
|
+
def max_col_loc_width
|
175
|
+
return 0 if @variables.empty?
|
176
|
+
@variables.map { |v| v.end_column.to_s.length }.max
|
177
|
+
end
|
182
178
|
|
183
|
-
def data_file_name_stem
|
184
|
-
|
185
|
-
end
|
179
|
+
def data_file_name_stem
|
180
|
+
File.basename(@data_file_name, '.*')
|
181
|
+
end
|
186
182
|
|
187
|
-
def rec_type_lookup_hash
|
188
|
-
|
189
|
-
end
|
183
|
+
def rec_type_lookup_hash
|
184
|
+
Hash[* @record_types.map { |rt| [rt, 0] }.flatten]
|
185
|
+
end
|
190
186
|
|
191
|
-
def last_column_used
|
192
|
-
|
193
|
-
|
194
|
-
end
|
187
|
+
def last_column_used
|
188
|
+
return 0 if @variables.empty?
|
189
|
+
@variables.map { |v| v.end_column }.max
|
190
|
+
end
|
195
191
|
|
196
|
-
# Output methods.
|
192
|
+
# Output methods.
|
197
193
|
|
198
|
-
def to_s
|
199
|
-
|
200
|
-
end
|
194
|
+
def to_s
|
195
|
+
YAML.dump(self)
|
196
|
+
end
|
201
197
|
|
202
|
-
def generate_syntax_files
|
203
|
-
|
204
|
-
|
205
|
-
end
|
198
|
+
def generate_syntax_files
|
199
|
+
bad_metadata('no output formats') if @output_formats.empty?
|
200
|
+
@output_formats.each { |t| generate_syntax_file(t) }
|
201
|
+
end
|
206
202
|
|
207
|
-
def generate_syntax_file
|
208
|
-
|
209
|
-
|
210
|
-
|
203
|
+
def generate_syntax_file(syntax_type)
|
204
|
+
msg = "output directory does not exist => #{@output_dir_name}"
|
205
|
+
bad_metadata(msg) unless File.directory?(@output_dir_name)
|
206
|
+
file_name = File.join(
|
211
207
|
@output_dir_name,
|
212
208
|
sprintf(@output_file_stem, data_file_name_stem) + @output_file_ext[syntax_type]
|
213
|
-
|
214
|
-
|
209
|
+
)
|
210
|
+
if File.file?(file_name) and not @output_overwrite
|
215
211
|
$stderr.puts "Skipping file that aready exists => #{file_name}."
|
216
|
-
|
212
|
+
else
|
217
213
|
if RUBY_VERSION.start_with? "1.8"
|
218
214
|
File.open(file_name, 'w') { |f| f.puts syntax(syntax_type) }
|
219
215
|
else
|
220
|
-
File.open(file_name,
|
216
|
+
File.open(file_name, "w:#{self.output_encoding}") { |f|
|
221
217
|
|
222
|
-
|
223
|
-
|
218
|
+
lines = syntax(syntax_type)
|
219
|
+
lines.each do |line|
|
224
220
|
begin
|
225
|
-
|
226
|
-
rescue Exception=>msg
|
227
|
-
|
221
|
+
f.puts line.rstrip.encode(self.output_encoding, line.encoding.to_s, :invalid => :replace, :undef => :replace, :replace => '?')
|
222
|
+
rescue Exception => msg
|
223
|
+
puts "Failed encoding on line #{line} #{msg}"
|
228
224
|
end
|
229
|
-
|
225
|
+
end
|
230
226
|
}
|
231
227
|
end
|
232
228
|
|
229
|
+
end
|
233
230
|
end
|
234
|
-
end
|
235
|
-
|
236
|
-
def syntax (syntax_type)
|
237
|
-
validate_metadata(:minimal => true)
|
238
|
-
modify_metadata
|
239
|
-
validate_metadata
|
240
231
|
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
232
|
+
def syntax(syntax_type)
|
233
|
+
validate_metadata(:minimal => true)
|
234
|
+
modify_metadata
|
235
|
+
validate_metadata
|
245
236
|
|
237
|
+
maker_class = 'Maker' + syntax_type.upcase
|
238
|
+
syntax_maker = eval(maker_class).new(self, syntax_type)
|
239
|
+
syntax_maker.syntax
|
240
|
+
end
|
246
241
|
|
247
|
-
# Before generating syntax, we need to handle some controller-level
|
248
|
-
# options that require global modification of the metadata.
|
242
|
+
# Before generating syntax, we need to handle some controller-level
|
243
|
+
# options that require global modification of the metadata.
|
249
244
|
|
250
|
-
def modify_metadata
|
251
|
-
|
252
|
-
|
245
|
+
def modify_metadata
|
246
|
+
# Force all variables to be strings.
|
247
|
+
if @all_vars_as_string
|
253
248
|
@variables.each do |var|
|
254
|
-
|
255
|
-
|
256
|
-
|
249
|
+
var.is_string_var = true
|
250
|
+
var.is_double_var = false
|
251
|
+
var.implied_decimals = 0
|
257
252
|
end
|
258
|
-
|
253
|
+
end
|
259
254
|
|
260
|
-
|
261
|
-
|
262
|
-
|
255
|
+
# If the user wants to rectangularize hierarchical data, the
|
256
|
+
# select_vars_by_record_type option is required.
|
257
|
+
@select_vars_by_record_type = true if @rectangularize
|
263
258
|
|
264
|
-
|
265
|
-
|
259
|
+
# Remove any variables not belonging to the declared record types.
|
260
|
+
if @select_vars_by_record_type
|
266
261
|
rt_lookup = rec_type_lookup_hash()
|
267
262
|
@variables = @variables.find_all { |var| var.is_common_var or rt_lookup[var.record_type] }
|
263
|
+
end
|
268
264
|
end
|
269
|
-
end
|
270
|
-
|
271
265
|
|
272
|
-
# Before generating syntax, run a sanity check on the metadata.
|
266
|
+
# Before generating syntax, run a sanity check on the metadata.
|
273
267
|
|
274
|
-
def validate_metadata
|
275
|
-
|
268
|
+
def validate_metadata(check = {})
|
269
|
+
bad_metadata('no variables') if @variables.empty?
|
276
270
|
|
277
|
-
|
271
|
+
if @rectangularize
|
278
272
|
msg = 'the rectangularize option requires data_structure=hier'
|
279
273
|
bad_metadata(msg) unless @data_structure == 'hier'
|
280
|
-
|
274
|
+
end
|
281
275
|
|
282
|
-
|
276
|
+
if @data_structure == 'hier' or @select_vars_by_record_type
|
283
277
|
bad_metadata('no record types') if @record_types.empty?
|
284
278
|
|
285
279
|
msg = 'record types must be unique'
|
@@ -290,34 +284,34 @@ def validate_metadata (check = {})
|
|
290
284
|
|
291
285
|
msg = 'with no common variables, every record type needs at least one variable ('
|
292
286
|
if @variables.find { |var| var.is_common_var }.nil?
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
287
|
+
@record_types.each do |rt|
|
288
|
+
next if get_vars_by_record_type(rt).size > 0
|
289
|
+
bad_metadata(msg + rt + ')')
|
290
|
+
end
|
297
291
|
end
|
298
|
-
|
292
|
+
end
|
299
293
|
|
300
|
-
|
294
|
+
if @data_structure == 'hier'
|
301
295
|
bad_metadata('no record type variable') if record_type_var.nil?
|
302
|
-
|
296
|
+
end
|
303
297
|
|
304
|
-
|
298
|
+
return if check[:minimal]
|
305
299
|
|
306
|
-
|
307
|
-
v.start_column
|
308
|
-
v.width
|
300
|
+
@variables.each do |v|
|
301
|
+
v.start_column = v.start_column.to_i
|
302
|
+
v.width = v.width.to_i
|
309
303
|
v.implied_decimals = v.implied_decimals.to_i
|
310
|
-
bad_metadata("#{v.name}, start_column"
|
311
|
-
bad_metadata("#{v.name}, width"
|
304
|
+
bad_metadata("#{v.name}, start_column") unless v.start_column > 0
|
305
|
+
bad_metadata("#{v.name}, width") unless v.width > 0
|
312
306
|
bad_metadata("#{v.name}, implied_decimals") unless v.implied_decimals >= 0
|
307
|
+
end
|
313
308
|
end
|
314
|
-
end
|
315
309
|
|
316
|
-
def bad_metadata
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
end
|
310
|
+
def bad_metadata(msg)
|
311
|
+
msg = 'Invalid metadata: ' + msg
|
312
|
+
abort(msg) if @caller == 'vb' or @caller == 'dcp'
|
313
|
+
raise(RuntimeError, msg)
|
314
|
+
end
|
321
315
|
|
322
|
-
end
|
316
|
+
end
|
323
317
|
end
|