stats_package_syntax_file_generator 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README +128 -0
- data/lib/stats_package_syntax_file_generator.rb +19 -0
- data/lib/stats_package_syntax_file_generator/controller.rb +323 -0
- data/lib/stats_package_syntax_file_generator/maker.rb +126 -0
- data/lib/stats_package_syntax_file_generator/maker_sas.rb +306 -0
- data/lib/stats_package_syntax_file_generator/maker_spss.rb +194 -0
- data/lib/stats_package_syntax_file_generator/maker_stata.rb +300 -0
- data/lib/stats_package_syntax_file_generator/maker_sts.rb +181 -0
- data/lib/stats_package_syntax_file_generator/value.rb +29 -0
- data/lib/stats_package_syntax_file_generator/variable.rb +56 -0
- data/tests/input_all_vars.yaml +2012 -0
- data/tests/input_controller.yaml +13 -0
- data/tests/setup.rb +103 -0
- data/tests/tc_controller.rb +378 -0
- data/tests/tc_maker.rb +172 -0
- data/tests/tc_maker_sas.rb +251 -0
- data/tests/tc_maker_spss.rb +121 -0
- data/tests/tc_maker_stata.rb +224 -0
- data/tests/tc_maker_sts.rb +190 -0
- data/tests/tc_value.rb +23 -0
- data/tests/tc_variable.rb +53 -0
- data/tests/ts_all.rb +20 -0
- metadata +67 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0f9197c10adf1e85ad5b72ddf53aa414b9e38c08
|
4
|
+
data.tar.gz: c77ce671f48ee5eaa923ce8ea5f21c875f456aa8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c092c5f3ef02f13d9819a78831fd56817a4d753e0bfba16646732845a70488fed496204647fa834e0c02a094b47f5558a31d7c6529f381092bc74e34271df86b
|
7
|
+
data.tar.gz: 0836c59716068a5f5e87e62d6eb6665a69e9152c00b2ed1a2c1dd52c2d0131f04459ed349a495cf659f0933f06b98b26ff9a8e730ceb737c5939868242e05b1a
|
data/README
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
This gem produces statistical package syntax files for fixed-column data files.
|
2
|
+
|
3
|
+
SAS
|
4
|
+
SPSS
|
5
|
+
Stata
|
6
|
+
Stat/Transfer STS metadata files
|
7
|
+
|
8
|
+
|
9
|
+
Metadata can be supplied to the Controller in two general ways:
|
10
|
+
|
11
|
+
- Programmatically, using the API provided by the Controller class.
|
12
|
+
|
13
|
+
- Via one or more YAML files -- typically just one YAML file, but the
|
14
|
+
Controller will accept multiple files and merge their content.
|
15
|
+
|
16
|
+
|
17
|
+
Basic usage:
|
18
|
+
|
19
|
+
require 'stats_package_syntax_file_generator'
|
20
|
+
|
21
|
+
# Supply metadata via one YAML file.
|
22
|
+
sfc = StatsPackageSyntaxFileGenerator::Controller.new(:yaml_files => 'metadata.yaml')
|
23
|
+
|
24
|
+
# Or via multiple YAML files.
|
25
|
+
sfc = StatsPackageSyntaxFileGenerator::Controller.new(:yaml_files => ['md1.yaml', 'md2.yaml'])
|
26
|
+
|
27
|
+
# Or programmatically.
|
28
|
+
# For a working example, see devel/api_example.rb.
|
29
|
+
|
30
|
+
# Generate all syntax files.
|
31
|
+
sfc.generate_syntax_files
|
32
|
+
|
33
|
+
# Generate a syntax file of a specific TYPE (spss, sas, stata).
|
34
|
+
sfc.generate_syntax_file('TYPE')
|
35
|
+
|
36
|
+
# Ditto, but get the syntax as a list of strings rather than writing a file.
|
37
|
+
syntax_lines = sfc.syntax('TYPE')
|
38
|
+
|
39
|
+
|
40
|
+
Running tests:
|
41
|
+
|
42
|
+
devel/run_all_checks.sh
|
43
|
+
|
44
|
+
|
45
|
+
Project structure:
|
46
|
+
|
47
|
+
devel/
|
48
|
+
# Developer area.
|
49
|
+
# Various scripts, plus the following:
|
50
|
+
|
51
|
+
input/
|
52
|
+
# YAML metadata used during development
|
53
|
+
# and in end-to-end acceptance testing.
|
54
|
+
|
55
|
+
output_expected/
|
56
|
+
# Acceptance testing: expected output.
|
57
|
+
|
58
|
+
output_result/
|
59
|
+
# Acceptance testing: directory where new output
|
60
|
+
# is written. This output is not included in Git.
|
61
|
+
|
62
|
+
lib/ # The syntax file utility itself.
|
63
|
+
tests/ # Unit test scripts and their YAML metadata.
|
64
|
+
|
65
|
+
|
66
|
+
Class overview:
|
67
|
+
|
68
|
+
Controller
|
69
|
+
|
70
|
+
- The API for the gem. Users of the gem should not need to interact with
|
71
|
+
other classes.
|
72
|
+
|
73
|
+
- Serves as a container for the metadata needed to generate syntax files.
|
74
|
+
|
75
|
+
- Holds various attributes specifying the type of syntax files to
|
76
|
+
generate, the structure of the data files, and so forth.
|
77
|
+
|
78
|
+
Variable
|
79
|
+
Value
|
80
|
+
|
81
|
+
- Metadata classes holding information about variables in the data file.
|
82
|
+
|
83
|
+
- A Controller contains 1+ Variable objects.
|
84
|
+
|
85
|
+
- A Variable contains 0+ Value objects.
|
86
|
+
|
87
|
+
Maker
|
88
|
+
Maker_SAS
|
89
|
+
Maker_SPSS
|
90
|
+
Maker_STATA
|
91
|
+
Maker_STS
|
92
|
+
|
93
|
+
- Classes responsible for creating syntax.
|
94
|
+
|
95
|
+
- Maker provides methods that all classes have in common.
|
96
|
+
|
97
|
+
- The other classes inherit from Maker, overriding behavior as needed.
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
General behavior of the specific Maker classes:
|
102
|
+
|
103
|
+
- The classes all define a primary method: syntax().
|
104
|
+
|
105
|
+
- The syntax() method consists of calls to various other methods responsible
|
106
|
+
for generating sections of the syntax.
|
107
|
+
|
108
|
+
- Methods that generate sub-sections of syntax have names beginning with 'syn_'.
|
109
|
+
|
110
|
+
- All of the syntax-generating methods return a list of strings.
|
111
|
+
|
112
|
+
|
113
|
+
Notes on STS files:
|
114
|
+
|
115
|
+
1. Hierchical data.
|
116
|
+
Stat/Transfer does not directly support hierarchical data structures. The
|
117
|
+
workaround is to perform multiple invocations of Stat/Transfer, once per record
|
118
|
+
type. Each invocation must instruct Stat/Transfer to perform case selection to
|
119
|
+
filter the records such that a single record type is in scope, and must request
|
120
|
+
the variables appropriate for that record type.
|
121
|
+
|
122
|
+
2. Variable and value labels with quotes.
|
123
|
+
Stat/Transfer does not support escaping quotes within value labels. Therefore,
|
124
|
+
double quotes are represented by a pair of single quotes.
|
125
|
+
|
126
|
+
3. Only alphanumeric values are permitted for value labels.
|
127
|
+
Nonconforming values are skipped, with a comment added to the STS file that
|
128
|
+
documents this action.
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
|
2
|
+
# For copyright and licensing information, see the NOTICE and LICENSE files
|
3
|
+
# in this project's top-level directory, and also on-line at:
|
4
|
+
# https://github.com/mnpopcenter/stats_package_syntax_file_generator
|
5
|
+
|
6
|
+
require 'yaml'
|
7
|
+
|
8
|
+
%w(
|
9
|
+
controller
|
10
|
+
variable
|
11
|
+
value
|
12
|
+
maker
|
13
|
+
maker_sas
|
14
|
+
maker_spss
|
15
|
+
maker_stata
|
16
|
+
maker_sts
|
17
|
+
).each do |f|
|
18
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'stats_package_syntax_file_generator', f))
|
19
|
+
end
|
@@ -0,0 +1,323 @@
|
|
1
|
+
# This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
|
2
|
+
# For copyright and licensing information, see the NOTICE and LICENSE files
|
3
|
+
# in this project's top-level directory, and also on-line at:
|
4
|
+
# https://github.com/mnpopcenter/stats_package_syntax_file_generator
|
5
|
+
|
6
|
+
module StatsPackageSyntaxFileGenerator
|
7
|
+
class Controller
|
8
|
+
|
9
|
+
VERSION = "1.0.1"
|
10
|
+
|
11
|
+
ATTR = {
|
12
|
+
:project => { :req => false, :rw => 'rw', :def => '', :yaml => true },
|
13
|
+
:caller => { :req => false, :rw => 'rw', :def => '', :yaml => true },
|
14
|
+
:data_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
|
15
|
+
:data_file_name => { :req => false, :rw => 'rw', :def => 'DATA_FILE', :yaml => true },
|
16
|
+
:output_formats => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
|
17
|
+
:output_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
|
18
|
+
:output_file_stem => { :req => false, :rw => 'rw', :def => '%s', :yaml => true },
|
19
|
+
:output_file_ext => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
|
20
|
+
:output_overwrite => { :req => false, :rw => 'rw', :def => false, :yaml => true },
|
21
|
+
:data_structure => { :req => false, :rw => 'rw', :def => 'rect', :yaml => true },
|
22
|
+
:record_types => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
|
23
|
+
:record_type_var_name => { :req => false, :rw => 'rw', :def => '', :yaml => true },
|
24
|
+
:rectangularize => { :req => false, :rw => 'rw', :def => false, :yaml => true },
|
25
|
+
:all_vars_as_string => { :req => false, :rw => 'rw', :def => false, :yaml => true },
|
26
|
+
:select_vars_by_record_type => { :req => false, :rw => 'rw', :def => false, :yaml => true },
|
27
|
+
:variables => { :req => false, :rw => 'r', :def => nil, :yaml => false },
|
28
|
+
:yaml_files => { :req => false, :rw => 'r', :def => nil, :yaml => false },
|
29
|
+
}
|
30
|
+
|
31
|
+
ATTR.each_key do |k|
|
32
|
+
attr_reader k if ATTR[k][:rw].include? 'r'
|
33
|
+
attr_writer k if ATTR[k][:rw].include? 'w'
|
34
|
+
end
|
35
|
+
|
36
|
+
def initialize (args = {})
|
37
|
+
ATTR.each_key { |k|
|
38
|
+
raise(ArgumentError, "Missing required parameter: '#{k}'.") if
|
39
|
+
ATTR[k][:req] and not args.has_key?(k)
|
40
|
+
v = args.has_key?(k) ? args[k] : ATTR[k][:def]
|
41
|
+
instance_variable_set("@#{k}".to_sym, v)
|
42
|
+
}
|
43
|
+
|
44
|
+
@output_file_ext = {
|
45
|
+
'sas' => '.sas',
|
46
|
+
'spss' => '.sps',
|
47
|
+
'stata' => '.do',
|
48
|
+
'sts' => '.sts'
|
49
|
+
} if @output_file_ext.nil?
|
50
|
+
@output_formats = [] if @output_formats.nil?
|
51
|
+
@record_types = [] if @record_types.nil?
|
52
|
+
@variables = [] if @variables.nil?
|
53
|
+
@yaml_files = [] if @yaml_files.nil?
|
54
|
+
read_metadata_from_yaml
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
# Methods to import metadata from YAML files into the Controller object.
|
59
|
+
|
60
|
+
def yaml_files= (file_names)
|
61
|
+
# Caller can supply a file name or an array of file names.
|
62
|
+
@yaml_files = file_names.to_a
|
63
|
+
read_metadata_from_yaml
|
64
|
+
end
|
65
|
+
|
66
|
+
def read_metadata_from_yaml
|
67
|
+
return if @yaml_files.empty?
|
68
|
+
md = {}
|
69
|
+
@yaml_files.each { |f| md.merge! YAML.load_file(f) }
|
70
|
+
md = symbolize_keys(md)
|
71
|
+
load_yaml_md(md)
|
72
|
+
end
|
73
|
+
|
74
|
+
def load_yaml_md (md)
|
75
|
+
# Uses metadata from yaml to set metadata-related instance variables.
|
76
|
+
ATTR.each_key do |k|
|
77
|
+
next unless md.has_key?(k) and ATTR[k][:yaml]
|
78
|
+
instance_variable_set("@#{k}".to_sym, md[k])
|
79
|
+
end
|
80
|
+
return unless md.has_key?(:variables)
|
81
|
+
@variables = []
|
82
|
+
return unless md[:variables].size > 0
|
83
|
+
md[:variables].each do |md_var|
|
84
|
+
vals = md_var.delete(:values)
|
85
|
+
var = add_variable(md_var)
|
86
|
+
vals.each { |v| var.add_value(v) } unless vals.nil?
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def symbolize_keys (h)
|
91
|
+
# Recursively converts hash keys from strings to symbols.
|
92
|
+
if h.instance_of? Hash
|
93
|
+
h.inject({}) { |return_hash,(k,v)| return_hash[k.to_sym] = symbolize_keys(v); return_hash }
|
94
|
+
elsif h.instance_of? Array
|
95
|
+
h.map { |v| symbolize_keys(v) }
|
96
|
+
else
|
97
|
+
h
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Methods to add or get variables.
|
102
|
+
|
103
|
+
def add_variable (args)
|
104
|
+
@variables.push Variable.new(args)
|
105
|
+
@variables[-1]
|
106
|
+
end
|
107
|
+
|
108
|
+
def clear_variables
|
109
|
+
@variables = []
|
110
|
+
end
|
111
|
+
|
112
|
+
def get_var_by_name (n)
|
113
|
+
@variables.find { |v| v.name == n }
|
114
|
+
end
|
115
|
+
|
116
|
+
def get_vars_by_record_type (rt)
|
117
|
+
@variables.find_all { |v| v.record_type == rt or v.is_common_var }
|
118
|
+
end
|
119
|
+
|
120
|
+
def get_vars_with_var_labels
|
121
|
+
@variables.find_all { |v| v.label.length > 0 }
|
122
|
+
end
|
123
|
+
|
124
|
+
def get_vars_with_values
|
125
|
+
@variables.find_all { |var|
|
126
|
+
var.values.size > 0 and
|
127
|
+
not var.suppress_labels
|
128
|
+
}
|
129
|
+
end
|
130
|
+
|
131
|
+
def get_big_nums
|
132
|
+
@variables.find_all { |var|
|
133
|
+
var.width > 8 and
|
134
|
+
not var.is_string_var
|
135
|
+
}
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def record_type_var
|
140
|
+
get_var_by_name(@record_type_var_name)
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
# Methods for adding values to variables.
|
145
|
+
|
146
|
+
def add_value (args)
|
147
|
+
@variables[-1].values.push Value.new(args)
|
148
|
+
@variables[-1].values[-1]
|
149
|
+
end
|
150
|
+
|
151
|
+
def new_values (*vals)
|
152
|
+
vals.flatten!
|
153
|
+
vals.map { |v| Value.new(v) }
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
# Methods for record types.
|
158
|
+
|
159
|
+
def is_last_record_type (rt)
|
160
|
+
return true if @record_types.size > 0 and @record_types[-1] == rt
|
161
|
+
return false
|
162
|
+
end
|
163
|
+
|
164
|
+
def rec_types_except_last
|
165
|
+
r = Array.new(@record_types)
|
166
|
+
r.pop
|
167
|
+
r
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
# Helper methods.
|
172
|
+
|
173
|
+
def max_var_name_length
|
174
|
+
return 0 if @variables.empty?
|
175
|
+
@variables.map { |v| v.name.length }.max
|
176
|
+
end
|
177
|
+
|
178
|
+
def max_col_loc_width
|
179
|
+
return 0 if @variables.empty?
|
180
|
+
@variables.map { |v| v.end_column.to_s.length }.max
|
181
|
+
end
|
182
|
+
|
183
|
+
def data_file_name_stem
|
184
|
+
File.basename(@data_file_name, '.*')
|
185
|
+
end
|
186
|
+
|
187
|
+
def rec_type_lookup_hash
|
188
|
+
Hash[ * @record_types.map { |rt| [rt, 0] }.flatten ]
|
189
|
+
end
|
190
|
+
|
191
|
+
def last_column_used
|
192
|
+
return 0 if @variables.empty?
|
193
|
+
@variables.map { |v| v.end_column }.max
|
194
|
+
end
|
195
|
+
|
196
|
+
# Output methods.
|
197
|
+
|
198
|
+
def to_s
|
199
|
+
YAML.dump(self)
|
200
|
+
end
|
201
|
+
|
202
|
+
def generate_syntax_files
|
203
|
+
bad_metadata('no output formats')if @output_formats.empty?
|
204
|
+
@output_formats.each { |t| generate_syntax_file(t) }
|
205
|
+
end
|
206
|
+
|
207
|
+
def generate_syntax_file (syntax_type)
|
208
|
+
msg = "output directory does not exist => #{@output_dir_name}"
|
209
|
+
bad_metadata(msg) unless File.directory?(@output_dir_name)
|
210
|
+
file_name = File.join(
|
211
|
+
@output_dir_name,
|
212
|
+
sprintf(@output_file_stem, data_file_name_stem) + @output_file_ext[syntax_type]
|
213
|
+
)
|
214
|
+
if File.file?(file_name) and not @output_overwrite
|
215
|
+
$stderr.puts "Skipping file that aready exists => #{file_name}."
|
216
|
+
else
|
217
|
+
if RUBY_VERSION.start_with? "1.8"
|
218
|
+
File.open(file_name, 'w') { |f| f.puts syntax(syntax_type) }
|
219
|
+
else
|
220
|
+
File.open(file_name, 'w:iso-8859-1') { |f|
|
221
|
+
|
222
|
+
lines = syntax(syntax_type)
|
223
|
+
lines.each do |line|
|
224
|
+
begin
|
225
|
+
f.puts line.rstrip.encode('iso-8859-1', line.encoding.to_s,{:invalid=>:replace, :undef=>:replace,:replace => '?'})
|
226
|
+
rescue Exception=>msg
|
227
|
+
puts "Failed encoding on line #{line} #{msg}"
|
228
|
+
end
|
229
|
+
end
|
230
|
+
}
|
231
|
+
end
|
232
|
+
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
def syntax (syntax_type)
|
237
|
+
validate_metadata(:minimal => true)
|
238
|
+
modify_metadata
|
239
|
+
validate_metadata
|
240
|
+
|
241
|
+
maker_class = 'Maker' + syntax_type.upcase
|
242
|
+
syntax_maker = eval(maker_class).new(self, syntax_type)
|
243
|
+
syntax_maker.syntax
|
244
|
+
end
|
245
|
+
|
246
|
+
|
247
|
+
# Before generating syntax, we need to handle some controller-level
|
248
|
+
# options that require global modification of the metadata.
|
249
|
+
|
250
|
+
def modify_metadata
|
251
|
+
# Force all variables to be strings.
|
252
|
+
if @all_vars_as_string
|
253
|
+
@variables.each do |var|
|
254
|
+
var.is_string_var = true
|
255
|
+
var.is_double_var = false
|
256
|
+
var.implied_decimals = 0
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
# If the user wants to rectangularize hierarchical data, the
|
261
|
+
# select_vars_by_record_type option is required.
|
262
|
+
@select_vars_by_record_type = true if @rectangularize
|
263
|
+
|
264
|
+
# Remove any variables not belonging to the declared record types.
|
265
|
+
if @select_vars_by_record_type
|
266
|
+
rt_lookup = rec_type_lookup_hash()
|
267
|
+
@variables = @variables.find_all { |var| var.is_common_var or rt_lookup[var.record_type] }
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
|
272
|
+
# Before generating syntax, run a sanity check on the metadata.
|
273
|
+
|
274
|
+
def validate_metadata (check = {})
|
275
|
+
bad_metadata('no variables') if @variables.empty?
|
276
|
+
|
277
|
+
if @rectangularize
|
278
|
+
msg = 'the rectangularize option requires data_structure=hier'
|
279
|
+
bad_metadata(msg) unless @data_structure == 'hier'
|
280
|
+
end
|
281
|
+
|
282
|
+
if @data_structure == 'hier' or @select_vars_by_record_type
|
283
|
+
bad_metadata('no record types') if @record_types.empty?
|
284
|
+
|
285
|
+
msg = 'record types must be unique'
|
286
|
+
bad_metadata(msg) unless rec_type_lookup_hash.keys.size == @record_types.size
|
287
|
+
|
288
|
+
msg = 'all variables must have a record type'
|
289
|
+
bad_metadata(msg) unless @variables.find { |var| var.record_type.length == 0 }.nil?
|
290
|
+
|
291
|
+
msg = 'with no common variables, every record type needs at least one variable ('
|
292
|
+
if @variables.find { |var| var.is_common_var }.nil?
|
293
|
+
@record_types.each do |rt|
|
294
|
+
next if get_vars_by_record_type(rt).size > 0
|
295
|
+
bad_metadata(msg + rt + ')')
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
if @data_structure == 'hier'
|
301
|
+
bad_metadata('no record type variable') if record_type_var.nil?
|
302
|
+
end
|
303
|
+
|
304
|
+
return if check[:minimal]
|
305
|
+
|
306
|
+
@variables.each do |v|
|
307
|
+
v.start_column = v.start_column.to_i
|
308
|
+
v.width = v.width.to_i
|
309
|
+
v.implied_decimals = v.implied_decimals.to_i
|
310
|
+
bad_metadata("#{v.name}, start_column" ) unless v.start_column > 0
|
311
|
+
bad_metadata("#{v.name}, width" ) unless v.width > 0
|
312
|
+
bad_metadata("#{v.name}, implied_decimals") unless v.implied_decimals >= 0
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
def bad_metadata (msg)
|
317
|
+
msg = 'Invalid metadata: ' + msg
|
318
|
+
abort(msg) if @caller == 'vb' or @caller == 'dcp'
|
319
|
+
raise(RuntimeError, msg)
|
320
|
+
end
|
321
|
+
|
322
|
+
end
|
323
|
+
end
|