stats_package_syntax_file_generator 1.0.4 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 01e673e91ed9befd7e300e9790ed3aa788a51f68
4
- data.tar.gz: d776cf809e2c21006fd1f576b559ba2b5cb7def6
2
+ SHA256:
3
+ metadata.gz: 979787a0351769e1f7ba25a38e169a8f8a13ae0091e7b5faad0a858b8d501cb7
4
+ data.tar.gz: 79ba66410caaed9dec8d450ae28137a8328405987985cf9d472ecff8b6929873
5
5
  SHA512:
6
- metadata.gz: 0aca70ce0c9c0f908c905ac366377b08d439a6f02b57c4610e2d7d01f7fc70de1384378a66464fb70b9c31d8de44f105cc37165996251c459bebc3571701a9fb
7
- data.tar.gz: d01c808fff405a9ed202282f601c9025a90f9c03872371e076ac6bdae5c54e8e9a92f9e77f9fa3b6f1c545ac040da0f4c4ae6824f0193fefd1359d17281444d3
6
+ metadata.gz: 07c600f733ed4bef35d689d3486f51dbac9e914a774ae0e442b1c900a3e74cc8b82afe49bba534a655671d12a217dd9393c0a9611430f4b0ad9da1687ffe425e
7
+ data.tar.gz: 5f5c54efe960a92abe98f808e2c8829efca9e03eddef54705a5d04c9e339246653a628e5b1bed059cc3153ebd53fee438d14be50866d8543ecef180c47bc5c14
data/README CHANGED
@@ -4,7 +4,7 @@ This gem produces statistical package syntax files for fixed-column data files.
4
4
  SPSS
5
5
  Stata
6
6
  Stat/Transfer STS metadata files
7
-
7
+ R (via the ipumsr package which depends on IPUMS DDIs)
8
8
 
9
9
  Metadata can be supplied to the Controller in two general ways:
10
10
 
@@ -89,6 +89,7 @@ Class overview:
89
89
  Maker_SPSS
90
90
  Maker_STATA
91
91
  Maker_STS
92
+ Maker_RDDI
92
93
 
93
94
  - Classes responsible for creating syntax.
94
95
 
@@ -14,6 +14,7 @@ require 'yaml'
14
14
  maker_spss
15
15
  maker_stata
16
16
  maker_sts
17
+ maker_rddi
17
18
  ).each do |f|
18
19
  require File.expand_path(File.join(File.dirname(__FILE__), 'syntax_file', f))
19
20
  end
@@ -4,282 +4,276 @@
4
4
  # https://github.com/mnpopcenter/stats_package_syntax_file_generator
5
5
 
6
6
  module SyntaxFile
7
- class Controller
8
-
9
- VERSION = "1.0.4"
10
-
11
- ATTR = {
12
- :project => { :req => false, :rw => 'rw', :def => '', :yaml => true },
13
- :caller => { :req => false, :rw => 'rw', :def => '', :yaml => true },
14
- :data_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
15
- :data_file_name => { :req => false, :rw => 'rw', :def => 'DATA_FILE', :yaml => true },
16
- :output_formats => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
17
- :output_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
18
- :output_file_stem => { :req => false, :rw => 'rw', :def => '%s', :yaml => true },
19
- :output_file_ext => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
20
- :output_overwrite => { :req => false, :rw => 'rw', :def => false, :yaml => true },
21
- :data_structure => { :req => false, :rw => 'rw', :def => 'rect', :yaml => true },
22
- :record_types => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
23
- :record_type_var_name => { :req => false, :rw => 'rw', :def => '', :yaml => true },
24
- :rectangularize => { :req => false, :rw => 'rw', :def => false, :yaml => true },
25
- :all_vars_as_string => { :req => false, :rw => 'rw', :def => false, :yaml => true },
26
- :select_vars_by_record_type => { :req => false, :rw => 'rw', :def => false, :yaml => true },
27
- :variables => { :req => false, :rw => 'r', :def => nil, :yaml => false },
28
- :yaml_files => { :req => false, :rw => 'r', :def => nil, :yaml => false },
29
- }
30
-
31
- ATTR.each_key do |k|
32
- attr_reader k if ATTR[k][:rw].include? 'r'
33
- attr_writer k if ATTR[k][:rw].include? 'w'
34
- end
7
+ class Controller
8
+
9
+ VERSION = "1.1.3"
10
+
11
+ ATTR = {
12
+ :project => { :req => false, :rw => 'rw', :def => '', :yaml => true },
13
+ :caller => { :req => false, :rw => 'rw', :def => '', :yaml => true },
14
+ :data_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
15
+ :data_file_name => { :req => false, :rw => 'rw', :def => 'DATA_FILE', :yaml => true },
16
+ :output_formats => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
17
+ :output_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
18
+ :output_file_stem => { :req => false, :rw => 'rw', :def => '%s', :yaml => true },
19
+ :output_file_ext => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
20
+ :output_overwrite => { :req => false, :rw => 'rw', :def => false, :yaml => true },
21
+ :data_structure => { :req => false, :rw => 'rw', :def => 'rect', :yaml => true },
22
+ :record_types => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
23
+ :record_type_var_name => { :req => false, :rw => 'rw', :def => '', :yaml => true },
24
+ :rectangularize => { :req => false, :rw => 'rw', :def => false, :yaml => true },
25
+ :all_vars_as_string => { :req => false, :rw => 'rw', :def => false, :yaml => true },
26
+ :select_vars_by_record_type => { :req => false, :rw => 'rw', :def => false, :yaml => true },
27
+ :variables => { :req => false, :rw => 'r', :def => nil, :yaml => false },
28
+ :yaml_files => { :req => false, :rw => 'r', :def => nil, :yaml => false },
29
+ :output_encoding => { :req => false, :rw => 'r', :def => "iso-8859-1",:yaml => true },
30
+ }
31
+
32
+ ATTR.each_key do |k|
33
+ attr_reader k if ATTR[k][:rw].include? 'r'
34
+ attr_writer k if ATTR[k][:rw].include? 'w'
35
+ end
35
36
 
36
- def initialize (args = {})
37
- ATTR.each_key { |k|
38
- raise(ArgumentError, "Missing required parameter: '#{k}'.") if
39
- ATTR[k][:req] and not args.has_key?(k)
37
+ def initialize(args = {})
38
+ ATTR.each_key { |k|
39
+ raise(ArgumentError, "Missing required parameter: '#{k}'.") if ATTR[k][:req] and not args.has_key?(k)
40
40
  v = args.has_key?(k) ? args[k] : ATTR[k][:def]
41
41
  instance_variable_set("@#{k}".to_sym, v)
42
- }
42
+ }
43
43
 
44
- @output_file_ext = {
45
- 'sas' => '.sas',
46
- 'spss' => '.sps',
44
+ @output_file_ext = {
45
+ 'sas' => '.sas',
46
+ 'spss' => '.sps',
47
47
  'stata' => '.do',
48
- 'sts' => '.sts'
49
- } if @output_file_ext.nil?
50
- @output_formats = [] if @output_formats.nil?
51
- @record_types = [] if @record_types.nil?
52
- @variables = [] if @variables.nil?
53
- @yaml_files = [] if @yaml_files.nil?
54
- read_metadata_from_yaml
55
- end
56
-
48
+ 'sts' => '.sts',
49
+ 'rddi' => '.R'
50
+ } if @output_file_ext.nil?
51
+ @output_formats = [] if @output_formats.nil?
52
+ @record_types = [] if @record_types.nil?
53
+ @variables = [] if @variables.nil?
54
+ @yaml_files = [] if @yaml_files.nil?
55
+ read_metadata_from_yaml
56
+ end
57
57
 
58
- # Methods to import metadata from YAML files into the Controller object.
58
+ # Methods to import metadata from YAML files into the Controller object.
59
59
 
60
- def yaml_files= (file_names)
61
- # Caller can supply a file name or an array of file names.
62
- @yaml_files = file_names.to_a
63
- read_metadata_from_yaml
64
- end
60
+ def yaml_files=(file_names)
61
+ # Caller can supply a file name or an array of file names.
62
+ @yaml_files = file_names.to_a
63
+ read_metadata_from_yaml
64
+ end
65
65
 
66
- def read_metadata_from_yaml
67
- return if @yaml_files.empty?
68
- md = {}
69
- @yaml_files.each { |f| md.merge! YAML.load_file(f) }
70
- md = symbolize_keys(md)
71
- load_yaml_md(md)
72
- end
66
+ def read_metadata_from_yaml
67
+ return if @yaml_files.empty?
68
+ md = {}
69
+ @yaml_files.each { |f| md.merge! YAML.load_file(f) }
70
+ md = symbolize_keys(md)
71
+ load_yaml_md(md)
72
+ end
73
73
 
74
- def load_yaml_md (md)
75
- # Uses metadata from yaml to set metadata-related instance variables.
76
- ATTR.each_key do |k|
74
+ def load_yaml_md(md)
75
+ # Uses metadata from yaml to set metadata-related instance variables.
76
+ ATTR.each_key do |k|
77
77
  next unless md.has_key?(k) and ATTR[k][:yaml]
78
78
  instance_variable_set("@#{k}".to_sym, md[k])
79
- end
80
- return unless md.has_key?(:variables)
81
- @variables = []
82
- return unless md[:variables].size > 0
83
- md[:variables].each do |md_var|
79
+ end
80
+ return unless md.has_key?(:variables)
81
+ @variables = []
82
+ return unless md[:variables].size > 0
83
+ md[:variables].each do |md_var|
84
84
  vals = md_var.delete(:values)
85
85
  var = add_variable(md_var)
86
86
  vals.each { |v| var.add_value(v) } unless vals.nil?
87
+ end
87
88
  end
88
- end
89
89
 
90
- def symbolize_keys (h)
91
- # Recursively converts hash keys from strings to symbols.
92
- if h.instance_of? Hash
93
- h.inject({}) { |return_hash,(k,v)| return_hash[k.to_sym] = symbolize_keys(v); return_hash }
94
- elsif h.instance_of? Array
90
+ def symbolize_keys(h)
91
+ # Recursively converts hash keys from strings to symbols.
92
+ if h.instance_of? Hash
93
+ h.inject({}) { |return_hash, (k, v)| return_hash[k.to_sym] = symbolize_keys(v); return_hash }
94
+ elsif h.instance_of? Array
95
95
  h.map { |v| symbolize_keys(v) }
96
- else
96
+ else
97
97
  h
98
+ end
98
99
  end
99
- end
100
100
 
101
- # Methods to add or get variables.
101
+ # Methods to add or get variables.
102
102
 
103
- def add_variable (args)
104
- @variables.push Variable.new(args)
105
- @variables[-1]
106
- end
103
+ def add_variable(args)
104
+ @variables.push Variable.new(args)
105
+ @variables[-1]
106
+ end
107
107
 
108
- def clear_variables
109
- @variables = []
110
- end
108
+ def clear_variables
109
+ @variables = []
110
+ end
111
111
 
112
- def get_var_by_name (n)
113
- @variables.find { |v| v.name == n }
114
- end
112
+ def get_var_by_name(n)
113
+ @variables.find { |v| v.name == n }
114
+ end
115
115
 
116
- def get_vars_by_record_type (rt)
117
- @variables.find_all { |v| v.record_type == rt or v.is_common_var }
118
- end
116
+ def get_vars_by_record_type(rt)
117
+ @variables.find_all { |v| v.record_type == rt or v.is_common_var }
118
+ end
119
119
 
120
- def get_vars_with_var_labels
121
- @variables.find_all { |v| v.label.length > 0 }
122
- end
120
+ def get_vars_with_var_labels
121
+ @variables.find_all { |v| v.label.length > 0 }
122
+ end
123
123
 
124
- def get_vars_with_values
125
- @variables.find_all { |var|
124
+ def get_vars_with_values
125
+ @variables.find_all { |var|
126
126
  var.values.size > 0 and
127
- not var.suppress_labels
128
- }
129
- end
127
+ not var.suppress_labels
128
+ }
129
+ end
130
130
 
131
- def get_big_nums
132
- @variables.find_all { |var|
131
+ def get_big_nums
132
+ @variables.find_all { |var|
133
133
  var.width > 8 and
134
- not var.is_string_var
135
- }
136
- end
137
-
138
-
139
- def record_type_var
140
- get_var_by_name(@record_type_var_name)
141
- end
134
+ not var.is_string_var
135
+ }
136
+ end
142
137
 
138
+ def record_type_var
139
+ get_var_by_name(@record_type_var_name)
140
+ end
143
141
 
144
- # Methods for adding values to variables.
142
+ # Methods for adding values to variables.
145
143
 
146
- def add_value (args)
147
- @variables[-1].values.push Value.new(args)
148
- @variables[-1].values[-1]
149
- end
150
-
151
- def new_values (*vals)
152
- vals.flatten!
153
- vals.map { |v| Value.new(v) }
154
- end
144
+ def add_value(args)
145
+ @variables[-1].values.push Value.new(args)
146
+ @variables[-1].values[-1]
147
+ end
155
148
 
149
+ def new_values(*vals)
150
+ vals.flatten!
151
+ vals.map { |v| Value.new(v) }
152
+ end
156
153
 
157
- # Methods for record types.
154
+ # Methods for record types.
158
155
 
159
- def is_last_record_type (rt)
160
- return true if @record_types.size > 0 and @record_types[-1] == rt
161
- return false
162
- end
163
-
164
- def rec_types_except_last
165
- r = Array.new(@record_types)
166
- r.pop
167
- r
168
- end
156
+ def is_last_record_type(rt)
157
+ return true if @record_types.size > 0 and @record_types[-1] == rt
158
+ return false
159
+ end
169
160
 
161
+ def rec_types_except_last
162
+ r = Array.new(@record_types)
163
+ r.pop
164
+ r
165
+ end
170
166
 
171
- # Helper methods.
167
+ # Helper methods.
172
168
 
173
- def max_var_name_length
174
- return 0 if @variables.empty?
175
- @variables.map { |v| v.name.length }.max
176
- end
169
+ def max_var_name_length
170
+ return 0 if @variables.empty?
171
+ @variables.map { |v| v.name.length }.max
172
+ end
177
173
 
178
- def max_col_loc_width
179
- return 0 if @variables.empty?
180
- @variables.map { |v| v.end_column.to_s.length }.max
181
- end
174
+ def max_col_loc_width
175
+ return 0 if @variables.empty?
176
+ @variables.map { |v| v.end_column.to_s.length }.max
177
+ end
182
178
 
183
- def data_file_name_stem
184
- File.basename(@data_file_name, '.*')
185
- end
179
+ def data_file_name_stem
180
+ File.basename(@data_file_name, '.*')
181
+ end
186
182
 
187
- def rec_type_lookup_hash
188
- Hash[ * @record_types.map { |rt| [rt, 0] }.flatten ]
189
- end
183
+ def rec_type_lookup_hash
184
+ Hash[* @record_types.map { |rt| [rt, 0] }.flatten]
185
+ end
190
186
 
191
- def last_column_used
192
- return 0 if @variables.empty?
193
- @variables.map { |v| v.end_column }.max
194
- end
187
+ def last_column_used
188
+ return 0 if @variables.empty?
189
+ @variables.map { |v| v.end_column }.max
190
+ end
195
191
 
196
- # Output methods.
192
+ # Output methods.
197
193
 
198
- def to_s
199
- YAML.dump(self)
200
- end
194
+ def to_s
195
+ YAML.dump(self)
196
+ end
201
197
 
202
- def generate_syntax_files
203
- bad_metadata('no output formats')if @output_formats.empty?
204
- @output_formats.each { |t| generate_syntax_file(t) }
205
- end
198
+ def generate_syntax_files
199
+ bad_metadata('no output formats') if @output_formats.empty?
200
+ @output_formats.each { |t| generate_syntax_file(t) }
201
+ end
206
202
 
207
- def generate_syntax_file (syntax_type)
208
- msg = "output directory does not exist => #{@output_dir_name}"
209
- bad_metadata(msg) unless File.directory?(@output_dir_name)
210
- file_name = File.join(
203
+ def generate_syntax_file(syntax_type)
204
+ msg = "output directory does not exist => #{@output_dir_name}"
205
+ bad_metadata(msg) unless File.directory?(@output_dir_name)
206
+ file_name = File.join(
211
207
  @output_dir_name,
212
208
  sprintf(@output_file_stem, data_file_name_stem) + @output_file_ext[syntax_type]
213
- )
214
- if File.file?(file_name) and not @output_overwrite
209
+ )
210
+ if File.file?(file_name) and not @output_overwrite
215
211
  $stderr.puts "Skipping file that aready exists => #{file_name}."
216
- else
212
+ else
217
213
  if RUBY_VERSION.start_with? "1.8"
218
214
  File.open(file_name, 'w') { |f| f.puts syntax(syntax_type) }
219
215
  else
220
- File.open(file_name, 'w:iso-8859-1') { |f|
216
+ File.open(file_name, "w:#{self.output_encoding}") { |f|
221
217
 
222
- lines = syntax(syntax_type)
223
- lines.each do |line|
218
+ lines = syntax(syntax_type)
219
+ lines.each do |line|
224
220
  begin
225
- f.puts line.rstrip.encode('iso-8859-1', line.encoding.to_s,{:invalid=>:replace, :undef=>:replace,:replace => '?'})
226
- rescue Exception=>msg
227
- puts "Failed encoding on line #{line} #{msg}"
221
+ f.puts line.rstrip.encode(self.output_encoding, line.encoding.to_s, :invalid => :replace, :undef => :replace, :replace => '?')
222
+ rescue Exception => msg
223
+ puts "Failed encoding on line #{line} #{msg}"
228
224
  end
229
- end
225
+ end
230
226
  }
231
227
  end
232
228
 
229
+ end
233
230
  end
234
- end
235
-
236
- def syntax (syntax_type)
237
- validate_metadata(:minimal => true)
238
- modify_metadata
239
- validate_metadata
240
231
 
241
- maker_class = 'Maker' + syntax_type.upcase
242
- syntax_maker = eval(maker_class).new(self, syntax_type)
243
- syntax_maker.syntax
244
- end
232
+ def syntax(syntax_type)
233
+ validate_metadata(:minimal => true)
234
+ modify_metadata
235
+ validate_metadata
245
236
 
237
+ maker_class = 'Maker' + syntax_type.upcase
238
+ syntax_maker = eval(maker_class).new(self, syntax_type)
239
+ syntax_maker.syntax
240
+ end
246
241
 
247
- # Before generating syntax, we need to handle some controller-level
248
- # options that require global modification of the metadata.
242
+ # Before generating syntax, we need to handle some controller-level
243
+ # options that require global modification of the metadata.
249
244
 
250
- def modify_metadata
251
- # Force all variables to be strings.
252
- if @all_vars_as_string
245
+ def modify_metadata
246
+ # Force all variables to be strings.
247
+ if @all_vars_as_string
253
248
  @variables.each do |var|
254
- var.is_string_var = true
255
- var.is_double_var = false
256
- var.implied_decimals = 0
249
+ var.is_string_var = true
250
+ var.is_double_var = false
251
+ var.implied_decimals = 0
257
252
  end
258
- end
253
+ end
259
254
 
260
- # If the user wants to rectangularize hierarchical data, the
261
- # select_vars_by_record_type option is required.
262
- @select_vars_by_record_type = true if @rectangularize
255
+ # If the user wants to rectangularize hierarchical data, the
256
+ # select_vars_by_record_type option is required.
257
+ @select_vars_by_record_type = true if @rectangularize
263
258
 
264
- # Remove any variables not belonging to the declared record types.
265
- if @select_vars_by_record_type
259
+ # Remove any variables not belonging to the declared record types.
260
+ if @select_vars_by_record_type
266
261
  rt_lookup = rec_type_lookup_hash()
267
262
  @variables = @variables.find_all { |var| var.is_common_var or rt_lookup[var.record_type] }
263
+ end
268
264
  end
269
- end
270
-
271
265
 
272
- # Before generating syntax, run a sanity check on the metadata.
266
+ # Before generating syntax, run a sanity check on the metadata.
273
267
 
274
- def validate_metadata (check = {})
275
- bad_metadata('no variables') if @variables.empty?
268
+ def validate_metadata(check = {})
269
+ bad_metadata('no variables') if @variables.empty?
276
270
 
277
- if @rectangularize
271
+ if @rectangularize
278
272
  msg = 'the rectangularize option requires data_structure=hier'
279
273
  bad_metadata(msg) unless @data_structure == 'hier'
280
- end
274
+ end
281
275
 
282
- if @data_structure == 'hier' or @select_vars_by_record_type
276
+ if @data_structure == 'hier' or @select_vars_by_record_type
283
277
  bad_metadata('no record types') if @record_types.empty?
284
278
 
285
279
  msg = 'record types must be unique'
@@ -290,34 +284,34 @@ def validate_metadata (check = {})
290
284
 
291
285
  msg = 'with no common variables, every record type needs at least one variable ('
292
286
  if @variables.find { |var| var.is_common_var }.nil?
293
- @record_types.each do |rt|
294
- next if get_vars_by_record_type(rt).size > 0
295
- bad_metadata(msg + rt + ')')
296
- end
287
+ @record_types.each do |rt|
288
+ next if get_vars_by_record_type(rt).size > 0
289
+ bad_metadata(msg + rt + ')')
290
+ end
297
291
  end
298
- end
292
+ end
299
293
 
300
- if @data_structure == 'hier'
294
+ if @data_structure == 'hier'
301
295
  bad_metadata('no record type variable') if record_type_var.nil?
302
- end
296
+ end
303
297
 
304
- return if check[:minimal]
298
+ return if check[:minimal]
305
299
 
306
- @variables.each do |v|
307
- v.start_column = v.start_column.to_i
308
- v.width = v.width.to_i
300
+ @variables.each do |v|
301
+ v.start_column = v.start_column.to_i
302
+ v.width = v.width.to_i
309
303
  v.implied_decimals = v.implied_decimals.to_i
310
- bad_metadata("#{v.name}, start_column" ) unless v.start_column > 0
311
- bad_metadata("#{v.name}, width" ) unless v.width > 0
304
+ bad_metadata("#{v.name}, start_column") unless v.start_column > 0
305
+ bad_metadata("#{v.name}, width") unless v.width > 0
312
306
  bad_metadata("#{v.name}, implied_decimals") unless v.implied_decimals >= 0
307
+ end
313
308
  end
314
- end
315
309
 
316
- def bad_metadata (msg)
317
- msg = 'Invalid metadata: ' + msg
318
- abort(msg) if @caller == 'vb' or @caller == 'dcp'
319
- raise(RuntimeError, msg)
320
- end
310
+ def bad_metadata(msg)
311
+ msg = 'Invalid metadata: ' + msg
312
+ abort(msg) if @caller == 'vb' or @caller == 'dcp'
313
+ raise(RuntimeError, msg)
314
+ end
321
315
 
322
- end
316
+ end
323
317
  end