stats_package_syntax_file_generator 1.0.6 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- MTAyOGExOTg0NWVjNWRhNjc2N2M1NTkzMzdlY2QwMDc2NzY0NGNlYg==
5
- data.tar.gz: !binary |-
6
- MmE4N2Y0N2QxYzRhMDgyYTMxMzg0MTY2OGRjZDY3NjhlYzkyYTFhNA==
2
+ SHA256:
3
+ metadata.gz: 2006302f8dde73e249ee6175ef65b7c41439a14911f515f82f32f6f7dbf5403b
4
+ data.tar.gz: 4e3eaf39e8537c7e49ac3ea01a37c2e4440d4a07e80d5d8810b103e02257ec8d
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- MGZjNTQ4NjlhNTQwNjE1NDlmYTM5MTc2NmViNzc1MTZiODY0YzQyYTllODQx
10
- ODk3ZDE5ZGJjZTk5ZWU0MGVjZmFhZWFhMjc0NjdmNTZiOGQ0MTE2MjViYTY0
11
- NWFjOTEzNzdjNDNjY2JmYjdiMjAxNmFjZWQxMWRiMDI1ZTdjMzg=
12
- data.tar.gz: !binary |-
13
- OWIxMjQwNGNlNzY1ODI5MjI0YmFkYWY1ZDQ3MzI3MzVmY2ExMjA0ZjNhMmVk
14
- MmIxNGEwMTZhNjQ1MjcyZTc5YTFhMGE3NGU1YzRiMDA1Yzg0NDUyZGJjODE0
15
- OWU2NjlmN2Y2MWNmMGVlYzRjMzEzNmNhZmJhOTFhYzZhMWMyOWQ=
6
+ metadata.gz: 68dd339e0624e374c374d7c006a7747dcf3800b9661e976594a6f005acdb46a00d972ab1cea25becc3ddb4c945672b3cf15bd3ede23487f21bdd5e0f3685cb5e
7
+ data.tar.gz: 553b2e56987e63490a7684c00e2bd3a12b4b18027a7fd60343e0d7089efc00f75f6385a49c94cd8b4301fb489b04e469819897e9c3cb815c9baafa1255ac1170
data/README CHANGED
@@ -4,7 +4,7 @@ This gem produces statistical package syntax files for fixed-column data files.
4
4
  SPSS
5
5
  Stata
6
6
  Stat/Transfer STS metadata files
7
-
7
+ R (via the ipumsr package which depends on IPUMS DDIs)
8
8
 
9
9
  Metadata can be supplied to the Controller in two general ways:
10
10
 
@@ -89,6 +89,7 @@ Class overview:
89
89
  Maker_SPSS
90
90
  Maker_STATA
91
91
  Maker_STS
92
+ Maker_RDDI
92
93
 
93
94
  - Classes responsible for creating syntax.
94
95
 
@@ -14,6 +14,7 @@ require 'yaml'
14
14
  maker_spss
15
15
  maker_stata
16
16
  maker_sts
17
+ maker_rddi
17
18
  ).each do |f|
18
19
  require File.expand_path(File.join(File.dirname(__FILE__), 'syntax_file', f))
19
20
  end
@@ -4,283 +4,276 @@
4
4
  # https://github.com/mnpopcenter/stats_package_syntax_file_generator
5
5
 
6
6
  module SyntaxFile
7
- class Controller
8
-
9
- VERSION = "1.0.6"
10
-
11
- ATTR = {
12
- :project => { :req => false, :rw => 'rw', :def => '', :yaml => true },
13
- :caller => { :req => false, :rw => 'rw', :def => '', :yaml => true },
14
- :data_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
15
- :data_file_name => { :req => false, :rw => 'rw', :def => 'DATA_FILE', :yaml => true },
16
- :output_formats => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
17
- :output_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
18
- :output_file_stem => { :req => false, :rw => 'rw', :def => '%s', :yaml => true },
19
- :output_file_ext => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
20
- :output_overwrite => { :req => false, :rw => 'rw', :def => false, :yaml => true },
21
- :data_structure => { :req => false, :rw => 'rw', :def => 'rect', :yaml => true },
22
- :record_types => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
23
- :record_type_var_name => { :req => false, :rw => 'rw', :def => '', :yaml => true },
24
- :rectangularize => { :req => false, :rw => 'rw', :def => false, :yaml => true },
25
- :all_vars_as_string => { :req => false, :rw => 'rw', :def => false, :yaml => true },
26
- :select_vars_by_record_type => { :req => false, :rw => 'rw', :def => false, :yaml => true },
27
- :variables => { :req => false, :rw => 'r', :def => nil, :yaml => false },
28
- :yaml_files => { :req => false, :rw => 'r', :def => nil, :yaml => false },
29
- :output_encoding => { :req => false, :rw => 'r', :def => "iso-8859-1",:yaml => true },
30
- }
31
-
32
- ATTR.each_key do |k|
33
- attr_reader k if ATTR[k][:rw].include? 'r'
34
- attr_writer k if ATTR[k][:rw].include? 'w'
35
- end
7
+ class Controller
8
+
9
+ VERSION = "1.1.7"
10
+
11
+ ATTR = {
12
+ :project => { :req => false, :rw => 'rw', :def => '', :yaml => true },
13
+ :caller => { :req => false, :rw => 'rw', :def => '', :yaml => true },
14
+ :data_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
15
+ :data_file_name => { :req => false, :rw => 'rw', :def => 'DATA_FILE', :yaml => true },
16
+ :output_formats => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
17
+ :output_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
18
+ :output_file_stem => { :req => false, :rw => 'rw', :def => '%s', :yaml => true },
19
+ :output_file_ext => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
20
+ :output_overwrite => { :req => false, :rw => 'rw', :def => false, :yaml => true },
21
+ :data_structure => { :req => false, :rw => 'rw', :def => 'rect', :yaml => true },
22
+ :record_types => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
23
+ :record_type_var_name => { :req => false, :rw => 'rw', :def => '', :yaml => true },
24
+ :rectangularize => { :req => false, :rw => 'rw', :def => false, :yaml => true },
25
+ :all_vars_as_string => { :req => false, :rw => 'rw', :def => false, :yaml => true },
26
+ :select_vars_by_record_type => { :req => false, :rw => 'rw', :def => false, :yaml => true },
27
+ :variables => { :req => false, :rw => 'r', :def => nil, :yaml => false },
28
+ :yaml_files => { :req => false, :rw => 'r', :def => nil, :yaml => false },
29
+ :output_encoding => { :req => false, :rw => 'r', :def => "iso-8859-1",:yaml => true },
30
+ }
31
+
32
+ ATTR.each_key do |k|
33
+ attr_reader k if ATTR[k][:rw].include? 'r'
34
+ attr_writer k if ATTR[k][:rw].include? 'w'
35
+ end
36
36
 
37
- def initialize (args = {})
38
- ATTR.each_key { |k|
39
- raise(ArgumentError, "Missing required parameter: '#{k}'.") if
40
- ATTR[k][:req] and not args.has_key?(k)
37
+ def initialize(args = {})
38
+ ATTR.each_key { |k|
39
+ raise(ArgumentError, "Missing required parameter: '#{k}'.") if ATTR[k][:req] and not args.has_key?(k)
41
40
  v = args.has_key?(k) ? args[k] : ATTR[k][:def]
42
41
  instance_variable_set("@#{k}".to_sym, v)
43
- }
42
+ }
44
43
 
45
- @output_file_ext = {
46
- 'sas' => '.sas',
47
- 'spss' => '.sps',
44
+ @output_file_ext = {
45
+ 'sas' => '.sas',
46
+ 'spss' => '.sps',
48
47
  'stata' => '.do',
49
- 'sts' => '.sts'
50
- } if @output_file_ext.nil?
51
- @output_formats = [] if @output_formats.nil?
52
- @record_types = [] if @record_types.nil?
53
- @variables = [] if @variables.nil?
54
- @yaml_files = [] if @yaml_files.nil?
55
- read_metadata_from_yaml
56
- end
57
-
48
+ 'sts' => '.sts',
49
+ 'rddi' => '.R'
50
+ } if @output_file_ext.nil?
51
+ @output_formats = [] if @output_formats.nil?
52
+ @record_types = [] if @record_types.nil?
53
+ @variables = [] if @variables.nil?
54
+ @yaml_files = [] if @yaml_files.nil?
55
+ read_metadata_from_yaml
56
+ end
58
57
 
59
- # Methods to import metadata from YAML files into the Controller object.
58
+ # Methods to import metadata from YAML files into the Controller object.
60
59
 
61
- def yaml_files= (file_names)
62
- # Caller can supply a file name or an array of file names.
63
- @yaml_files = file_names.to_a
64
- read_metadata_from_yaml
65
- end
60
+ def yaml_files=(file_names)
61
+ # Caller can supply a file name or an array of file names.
62
+ @yaml_files = file_names.to_a
63
+ read_metadata_from_yaml
64
+ end
66
65
 
67
- def read_metadata_from_yaml
68
- return if @yaml_files.empty?
69
- md = {}
70
- @yaml_files.each { |f| md.merge! YAML.load_file(f) }
71
- md = symbolize_keys(md)
72
- load_yaml_md(md)
73
- end
66
+ def read_metadata_from_yaml
67
+ return if @yaml_files.empty?
68
+ md = {}
69
+ @yaml_files.each { |f| md.merge! YAML.load_file(f) }
70
+ md = symbolize_keys(md)
71
+ load_yaml_md(md)
72
+ end
74
73
 
75
- def load_yaml_md (md)
76
- # Uses metadata from yaml to set metadata-related instance variables.
77
- ATTR.each_key do |k|
74
+ def load_yaml_md(md)
75
+ # Uses metadata from yaml to set metadata-related instance variables.
76
+ ATTR.each_key do |k|
78
77
  next unless md.has_key?(k) and ATTR[k][:yaml]
79
78
  instance_variable_set("@#{k}".to_sym, md[k])
80
- end
81
- return unless md.has_key?(:variables)
82
- @variables = []
83
- return unless md[:variables].size > 0
84
- md[:variables].each do |md_var|
79
+ end
80
+ return unless md.has_key?(:variables)
81
+ @variables = []
82
+ return unless md[:variables].size > 0
83
+ md[:variables].each do |md_var|
85
84
  vals = md_var.delete(:values)
86
85
  var = add_variable(md_var)
87
86
  vals.each { |v| var.add_value(v) } unless vals.nil?
87
+ end
88
88
  end
89
- end
90
89
 
91
- def symbolize_keys (h)
92
- # Recursively converts hash keys from strings to symbols.
93
- if h.instance_of? Hash
94
- h.inject({}) { |return_hash,(k,v)| return_hash[k.to_sym] = symbolize_keys(v); return_hash }
95
- elsif h.instance_of? Array
90
+ def symbolize_keys(h)
91
+ # Recursively converts hash keys from strings to symbols.
92
+ if h.instance_of? Hash
93
+ h.inject({}) { |return_hash, (k, v)| return_hash[k.to_sym] = symbolize_keys(v); return_hash }
94
+ elsif h.instance_of? Array
96
95
  h.map { |v| symbolize_keys(v) }
97
- else
96
+ else
98
97
  h
98
+ end
99
99
  end
100
- end
101
100
 
102
- # Methods to add or get variables.
101
+ # Methods to add or get variables.
103
102
 
104
- def add_variable (args)
105
- @variables.push Variable.new(args)
106
- @variables[-1]
107
- end
103
+ def add_variable(args)
104
+ @variables.push Variable.new(args)
105
+ @variables[-1]
106
+ end
108
107
 
109
- def clear_variables
110
- @variables = []
111
- end
108
+ def clear_variables
109
+ @variables = []
110
+ end
112
111
 
113
- def get_var_by_name (n)
114
- @variables.find { |v| v.name == n }
115
- end
112
+ def get_var_by_name(n)
113
+ @variables.find { |v| v.name == n }
114
+ end
116
115
 
117
- def get_vars_by_record_type (rt)
118
- @variables.find_all { |v| v.record_type == rt or v.is_common_var }
119
- end
116
+ def get_vars_by_record_type(rt)
117
+ @variables.find_all { |v| v.record_type == rt or v.is_common_var }
118
+ end
120
119
 
121
- def get_vars_with_var_labels
122
- @variables.find_all { |v| v.label.length > 0 }
123
- end
120
+ def get_vars_with_var_labels
121
+ @variables.find_all { |v| v.label.length > 0 }
122
+ end
124
123
 
125
- def get_vars_with_values
126
- @variables.find_all { |var|
124
+ def get_vars_with_values
125
+ @variables.find_all { |var|
127
126
  var.values.size > 0 and
128
- not var.suppress_labels
129
- }
130
- end
127
+ not var.suppress_labels
128
+ }
129
+ end
131
130
 
132
- def get_big_nums
133
- @variables.find_all { |var|
131
+ def get_big_nums
132
+ @variables.find_all { |var|
134
133
  var.width > 8 and
135
- not var.is_string_var
136
- }
137
- end
138
-
139
-
140
- def record_type_var
141
- get_var_by_name(@record_type_var_name)
142
- end
134
+ not var.is_string_var
135
+ }
136
+ end
143
137
 
138
+ def record_type_var
139
+ get_var_by_name(@record_type_var_name)
140
+ end
144
141
 
145
- # Methods for adding values to variables.
142
+ # Methods for adding values to variables.
146
143
 
147
- def add_value (args)
148
- @variables[-1].values.push Value.new(args)
149
- @variables[-1].values[-1]
150
- end
151
-
152
- def new_values (*vals)
153
- vals.flatten!
154
- vals.map { |v| Value.new(v) }
155
- end
144
+ def add_value(args)
145
+ @variables[-1].values.push Value.new(args)
146
+ @variables[-1].values[-1]
147
+ end
156
148
 
149
+ def new_values(*vals)
150
+ vals.flatten!
151
+ vals.map { |v| Value.new(v) }
152
+ end
157
153
 
158
- # Methods for record types.
154
+ # Methods for record types.
159
155
 
160
- def is_last_record_type (rt)
161
- return true if @record_types.size > 0 and @record_types[-1] == rt
162
- return false
163
- end
164
-
165
- def rec_types_except_last
166
- r = Array.new(@record_types)
167
- r.pop
168
- r
169
- end
156
+ def is_last_record_type(rt)
157
+ return true if @record_types.size > 0 and @record_types[-1] == rt
158
+ return false
159
+ end
170
160
 
161
+ def rec_types_except_last
162
+ r = Array.new(@record_types)
163
+ r.pop
164
+ r
165
+ end
171
166
 
172
- # Helper methods.
167
+ # Helper methods.
173
168
 
174
- def max_var_name_length
175
- return 0 if @variables.empty?
176
- @variables.map { |v| v.name.length }.max
177
- end
169
+ def max_var_name_length
170
+ return 0 if @variables.empty?
171
+ @variables.map { |v| v.name.length }.max
172
+ end
178
173
 
179
- def max_col_loc_width
180
- return 0 if @variables.empty?
181
- @variables.map { |v| v.end_column.to_s.length }.max
182
- end
174
+ def max_col_loc_width
175
+ return 0 if @variables.empty?
176
+ @variables.map { |v| v.end_column.to_s.length }.max
177
+ end
183
178
 
184
- def data_file_name_stem
185
- File.basename(@data_file_name, '.*')
186
- end
179
+ def data_file_name_stem
180
+ File.basename(@data_file_name, '.*')
181
+ end
187
182
 
188
- def rec_type_lookup_hash
189
- Hash[ * @record_types.map { |rt| [rt, 0] }.flatten ]
190
- end
183
+ def rec_type_lookup_hash
184
+ Hash[* @record_types.map { |rt| [rt, 0] }.flatten]
185
+ end
191
186
 
192
- def last_column_used
193
- return 0 if @variables.empty?
194
- @variables.map { |v| v.end_column }.max
195
- end
187
+ def last_column_used
188
+ return 0 if @variables.empty?
189
+ @variables.map { |v| v.end_column }.max
190
+ end
196
191
 
197
- # Output methods.
192
+ # Output methods.
198
193
 
199
- def to_s
200
- YAML.dump(self)
201
- end
194
+ def to_s
195
+ YAML.dump(self)
196
+ end
202
197
 
203
- def generate_syntax_files
204
- bad_metadata('no output formats')if @output_formats.empty?
205
- @output_formats.each { |t| generate_syntax_file(t) }
206
- end
198
+ def generate_syntax_files
199
+ bad_metadata('no output formats') if @output_formats.empty?
200
+ @output_formats.each { |t| generate_syntax_file(t) }
201
+ end
207
202
 
208
- def generate_syntax_file (syntax_type)
209
- msg = "output directory does not exist => #{@output_dir_name}"
210
- bad_metadata(msg) unless File.directory?(@output_dir_name)
211
- file_name = File.join(
203
+ def generate_syntax_file(syntax_type)
204
+ msg = "output directory does not exist => #{@output_dir_name}"
205
+ bad_metadata(msg) unless File.directory?(@output_dir_name)
206
+ file_name = File.join(
212
207
  @output_dir_name,
213
208
  sprintf(@output_file_stem, data_file_name_stem) + @output_file_ext[syntax_type]
214
- )
215
- if File.file?(file_name) and not @output_overwrite
209
+ )
210
+ if File.file?(file_name) and not @output_overwrite
216
211
  $stderr.puts "Skipping file that aready exists => #{file_name}."
217
- else
212
+ else
218
213
  if RUBY_VERSION.start_with? "1.8"
219
214
  File.open(file_name, 'w') { |f| f.puts syntax(syntax_type) }
220
215
  else
221
216
  File.open(file_name, "w:#{self.output_encoding}") { |f|
222
217
 
223
- lines = syntax(syntax_type)
224
- lines.each do |line|
218
+ lines = syntax(syntax_type)
219
+ lines.each do |line|
225
220
  begin
226
- f.puts line.rstrip.encode(self.output_encoding, line.encoding.to_s,{:invalid=>:replace, :undef=>:replace,:replace => '?'})
227
- rescue Exception=>msg
228
- puts "Failed encoding on line #{line} #{msg}"
221
+ f.puts line.rstrip.encode(self.output_encoding, line.encoding.to_s, :invalid => :replace, :undef => :replace, :replace => '?')
222
+ rescue Exception => msg
223
+ puts "Failed encoding on line #{line} #{msg}"
229
224
  end
230
- end
225
+ end
231
226
  }
232
227
  end
233
228
 
229
+ end
234
230
  end
235
- end
236
-
237
- def syntax (syntax_type)
238
- validate_metadata(:minimal => true)
239
- modify_metadata
240
- validate_metadata
241
231
 
242
- maker_class = 'Maker' + syntax_type.upcase
243
- syntax_maker = eval(maker_class).new(self, syntax_type)
244
- syntax_maker.syntax
245
- end
232
+ def syntax(syntax_type)
233
+ validate_metadata(:minimal => true)
234
+ modify_metadata
235
+ validate_metadata
246
236
 
237
+ maker_class = 'Maker' + syntax_type.upcase
238
+ syntax_maker = eval(maker_class).new(self, syntax_type)
239
+ syntax_maker.syntax
240
+ end
247
241
 
248
- # Before generating syntax, we need to handle some controller-level
249
- # options that require global modification of the metadata.
242
+ # Before generating syntax, we need to handle some controller-level
243
+ # options that require global modification of the metadata.
250
244
 
251
- def modify_metadata
252
- # Force all variables to be strings.
253
- if @all_vars_as_string
245
+ def modify_metadata
246
+ # Force all variables to be strings.
247
+ if @all_vars_as_string
254
248
  @variables.each do |var|
255
- var.is_string_var = true
256
- var.is_double_var = false
257
- var.implied_decimals = 0
249
+ var.is_string_var = true
250
+ var.is_double_var = false
251
+ var.implied_decimals = 0
258
252
  end
259
- end
253
+ end
260
254
 
261
- # If the user wants to rectangularize hierarchical data, the
262
- # select_vars_by_record_type option is required.
263
- @select_vars_by_record_type = true if @rectangularize
255
+ # If the user wants to rectangularize hierarchical data, the
256
+ # select_vars_by_record_type option is required.
257
+ @select_vars_by_record_type = true if @rectangularize
264
258
 
265
- # Remove any variables not belonging to the declared record types.
266
- if @select_vars_by_record_type
259
+ # Remove any variables not belonging to the declared record types.
260
+ if @select_vars_by_record_type
267
261
  rt_lookup = rec_type_lookup_hash()
268
262
  @variables = @variables.find_all { |var| var.is_common_var or rt_lookup[var.record_type] }
263
+ end
269
264
  end
270
- end
271
-
272
265
 
273
- # Before generating syntax, run a sanity check on the metadata.
266
+ # Before generating syntax, run a sanity check on the metadata.
274
267
 
275
- def validate_metadata (check = {})
276
- bad_metadata('no variables') if @variables.empty?
268
+ def validate_metadata(check = {})
269
+ bad_metadata('no variables') if @variables.empty?
277
270
 
278
- if @rectangularize
271
+ if @rectangularize
279
272
  msg = 'the rectangularize option requires data_structure=hier'
280
273
  bad_metadata(msg) unless @data_structure == 'hier'
281
- end
274
+ end
282
275
 
283
- if @data_structure == 'hier' or @select_vars_by_record_type
276
+ if @data_structure == 'hier' or @select_vars_by_record_type
284
277
  bad_metadata('no record types') if @record_types.empty?
285
278
 
286
279
  msg = 'record types must be unique'
@@ -291,34 +284,34 @@ def validate_metadata (check = {})
291
284
 
292
285
  msg = 'with no common variables, every record type needs at least one variable ('
293
286
  if @variables.find { |var| var.is_common_var }.nil?
294
- @record_types.each do |rt|
295
- next if get_vars_by_record_type(rt).size > 0
296
- bad_metadata(msg + rt + ')')
297
- end
287
+ @record_types.each do |rt|
288
+ next if get_vars_by_record_type(rt).size > 0
289
+ bad_metadata(msg + rt + ')')
290
+ end
298
291
  end
299
- end
292
+ end
300
293
 
301
- if @data_structure == 'hier'
294
+ if @data_structure == 'hier'
302
295
  bad_metadata('no record type variable') if record_type_var.nil?
303
- end
296
+ end
304
297
 
305
- return if check[:minimal]
298
+ return if check[:minimal]
306
299
 
307
- @variables.each do |v|
308
- v.start_column = v.start_column.to_i
309
- v.width = v.width.to_i
300
+ @variables.each do |v|
301
+ v.start_column = v.start_column.to_i
302
+ v.width = v.width.to_i
310
303
  v.implied_decimals = v.implied_decimals.to_i
311
- bad_metadata("#{v.name}, start_column" ) unless v.start_column > 0
312
- bad_metadata("#{v.name}, width" ) unless v.width > 0
304
+ bad_metadata("#{v.name}, start_column") unless v.start_column > 0
305
+ bad_metadata("#{v.name}, width") unless v.width > 0
313
306
  bad_metadata("#{v.name}, implied_decimals") unless v.implied_decimals >= 0
307
+ end
314
308
  end
315
- end
316
309
 
317
- def bad_metadata (msg)
318
- msg = 'Invalid metadata: ' + msg
319
- abort(msg) if @caller == 'vb' or @caller == 'dcp'
320
- raise(RuntimeError, msg)
321
- end
310
+ def bad_metadata(msg)
311
+ msg = 'Invalid metadata: ' + msg
312
+ abort(msg) if @caller == 'vb' or @caller == 'dcp'
313
+ raise(RuntimeError, msg)
314
+ end
322
315
 
323
- end
316
+ end
324
317
  end