stats_package_syntax_file_generator 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README +128 -0
- data/lib/stats_package_syntax_file_generator.rb +19 -0
- data/lib/stats_package_syntax_file_generator/controller.rb +323 -0
- data/lib/stats_package_syntax_file_generator/maker.rb +126 -0
- data/lib/stats_package_syntax_file_generator/maker_sas.rb +306 -0
- data/lib/stats_package_syntax_file_generator/maker_spss.rb +194 -0
- data/lib/stats_package_syntax_file_generator/maker_stata.rb +300 -0
- data/lib/stats_package_syntax_file_generator/maker_sts.rb +181 -0
- data/lib/stats_package_syntax_file_generator/value.rb +29 -0
- data/lib/stats_package_syntax_file_generator/variable.rb +56 -0
- data/tests/input_all_vars.yaml +2012 -0
- data/tests/input_controller.yaml +13 -0
- data/tests/setup.rb +103 -0
- data/tests/tc_controller.rb +378 -0
- data/tests/tc_maker.rb +172 -0
- data/tests/tc_maker_sas.rb +251 -0
- data/tests/tc_maker_spss.rb +121 -0
- data/tests/tc_maker_stata.rb +224 -0
- data/tests/tc_maker_sts.rb +190 -0
- data/tests/tc_value.rb +23 -0
- data/tests/tc_variable.rb +53 -0
- data/tests/ts_all.rb +20 -0
- metadata +67 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0f9197c10adf1e85ad5b72ddf53aa414b9e38c08
|
4
|
+
data.tar.gz: c77ce671f48ee5eaa923ce8ea5f21c875f456aa8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c092c5f3ef02f13d9819a78831fd56817a4d753e0bfba16646732845a70488fed496204647fa834e0c02a094b47f5558a31d7c6529f381092bc74e34271df86b
|
7
|
+
data.tar.gz: 0836c59716068a5f5e87e62d6eb6665a69e9152c00b2ed1a2c1dd52c2d0131f04459ed349a495cf659f0933f06b98b26ff9a8e730ceb737c5939868242e05b1a
|
data/README
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
This gem produces statistical package syntax files for fixed-column data files.
|
2
|
+
|
3
|
+
SAS
|
4
|
+
SPSS
|
5
|
+
Stata
|
6
|
+
Stat/Transfer STS metadata files
|
7
|
+
|
8
|
+
|
9
|
+
Metadata can be supplied to the Controller in two general ways:
|
10
|
+
|
11
|
+
- Programmatically, using the API provided by the Controller class.
|
12
|
+
|
13
|
+
- Via one or more YAML files -- typically just one YAML file, but the
|
14
|
+
Controller will accept multiple files and merge their content.
|
15
|
+
|
16
|
+
|
17
|
+
Basic usage:
|
18
|
+
|
19
|
+
require 'stats_package_syntax_file_generator'
|
20
|
+
|
21
|
+
# Supply metadata via one YAML file.
|
22
|
+
sfc = StatsPackageSyntaxFileGenerator::Controller.new(:yaml_files => 'metadata.yaml')
|
23
|
+
|
24
|
+
# Or via multiple YAML files.
|
25
|
+
sfc = StatsPackageSyntaxFileGenerator::Controller.new(:yaml_files => ['md1.yaml', 'md2.yaml'])
|
26
|
+
|
27
|
+
# Or programmatically.
|
28
|
+
# For a working example, see devel/api_example.rb.
|
29
|
+
|
30
|
+
# Generate all syntax files.
|
31
|
+
sfc.generate_syntax_files
|
32
|
+
|
33
|
+
# Generate a syntax file of a specific TYPE (spss, sas, stata).
|
34
|
+
sfc.generate_syntax_file('TYPE')
|
35
|
+
|
36
|
+
# Ditto, but get the syntax as a list of strings rather than writing a file.
|
37
|
+
syntax_lines = sfc.syntax('TYPE')
|
38
|
+
|
39
|
+
|
40
|
+
Running tests:
|
41
|
+
|
42
|
+
devel/run_all_checks.sh
|
43
|
+
|
44
|
+
|
45
|
+
Project structure:
|
46
|
+
|
47
|
+
devel/
|
48
|
+
# Developer area.
|
49
|
+
# Various scripts, plus the following:
|
50
|
+
|
51
|
+
input/
|
52
|
+
# YAML metadata used during development
|
53
|
+
# and in end-to-end acceptance testing.
|
54
|
+
|
55
|
+
output_expected/
|
56
|
+
# Acceptance testing: expected output.
|
57
|
+
|
58
|
+
output_result/
|
59
|
+
# Acceptance testing: directory where new output
|
60
|
+
# is written. This output is not included in Git.
|
61
|
+
|
62
|
+
lib/ # The syntax file utility itself.
|
63
|
+
tests/ # Unit test scripts and their YAML metadata.
|
64
|
+
|
65
|
+
|
66
|
+
Class overview:
|
67
|
+
|
68
|
+
Controller
|
69
|
+
|
70
|
+
- The API for the gem. Users of the gem should not need to interact with
|
71
|
+
other classes.
|
72
|
+
|
73
|
+
- Serves as a container for the metadata needed to generate syntax files.
|
74
|
+
|
75
|
+
- Holds various attributes specifying the type of syntax files to
|
76
|
+
generate, the structure of the data files, and so forth.
|
77
|
+
|
78
|
+
Variable
|
79
|
+
Value
|
80
|
+
|
81
|
+
- Metadata classes holding information about variables in the data file.
|
82
|
+
|
83
|
+
- A Controller contains 1+ Variable objects.
|
84
|
+
|
85
|
+
- A Variable contains 0+ Value objects.
|
86
|
+
|
87
|
+
Maker
|
88
|
+
Maker_SAS
|
89
|
+
Maker_SPSS
|
90
|
+
Maker_STATA
|
91
|
+
Maker_STS
|
92
|
+
|
93
|
+
- Classes responsible for creating syntax.
|
94
|
+
|
95
|
+
- Maker provides methods that all classes have in common.
|
96
|
+
|
97
|
+
- The other classes inherit from Maker, overriding behavior as needed.
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
General behavior of the specific Maker classes:
|
102
|
+
|
103
|
+
- The classes all define a primary method: syntax().
|
104
|
+
|
105
|
+
- The syntax() method consists of calls to various other methods responsible
|
106
|
+
for generating sections of the syntax.
|
107
|
+
|
108
|
+
- Methods that generate sub-sections of syntax have names beginning with 'syn_'.
|
109
|
+
|
110
|
+
- All of the syntax-generating methods return a list of strings.
|
111
|
+
|
112
|
+
|
113
|
+
Notes on STS files:
|
114
|
+
|
115
|
+
1. Hierchical data.
|
116
|
+
Stat/Transfer does not directly support hierarchical data structures. The
|
117
|
+
workaround is to perform multiple invocations of Stat/Transfer, once per record
|
118
|
+
type. Each invocation must instruct Stat/Transfer to perform case selection to
|
119
|
+
filter the records such that a single record type is in scope, and must request
|
120
|
+
the variables appropriate for that record type.
|
121
|
+
|
122
|
+
2. Variable and value labels with quotes.
|
123
|
+
Stat/Transfer does not support escaping quotes within value labels. Therefore,
|
124
|
+
double quotes are represented by a pair of single quotes.
|
125
|
+
|
126
|
+
3. Only alphanumeric values are permitted for value labels.
|
127
|
+
Nonconforming values are skipped, with a comment added to the STS file that
|
128
|
+
documents this action.
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
|
2
|
+
# For copyright and licensing information, see the NOTICE and LICENSE files
|
3
|
+
# in this project's top-level directory, and also on-line at:
|
4
|
+
# https://github.com/mnpopcenter/stats_package_syntax_file_generator
|
5
|
+
|
6
|
+
require 'yaml'
|
7
|
+
|
8
|
+
%w(
|
9
|
+
controller
|
10
|
+
variable
|
11
|
+
value
|
12
|
+
maker
|
13
|
+
maker_sas
|
14
|
+
maker_spss
|
15
|
+
maker_stata
|
16
|
+
maker_sts
|
17
|
+
).each do |f|
|
18
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'stats_package_syntax_file_generator', f))
|
19
|
+
end
|
@@ -0,0 +1,323 @@
|
|
1
|
+
# This file is part of the Minnesota Population Center's stats_package_syntax_file_generator project.
|
2
|
+
# For copyright and licensing information, see the NOTICE and LICENSE files
|
3
|
+
# in this project's top-level directory, and also on-line at:
|
4
|
+
# https://github.com/mnpopcenter/stats_package_syntax_file_generator
|
5
|
+
|
6
|
+
module StatsPackageSyntaxFileGenerator
|
7
|
+
class Controller
|
8
|
+
|
9
|
+
VERSION = "1.0.1"
|
10
|
+
|
11
|
+
ATTR = {
|
12
|
+
:project => { :req => false, :rw => 'rw', :def => '', :yaml => true },
|
13
|
+
:caller => { :req => false, :rw => 'rw', :def => '', :yaml => true },
|
14
|
+
:data_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
|
15
|
+
:data_file_name => { :req => false, :rw => 'rw', :def => 'DATA_FILE', :yaml => true },
|
16
|
+
:output_formats => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
|
17
|
+
:output_dir_name => { :req => false, :rw => 'rw', :def => '.', :yaml => true },
|
18
|
+
:output_file_stem => { :req => false, :rw => 'rw', :def => '%s', :yaml => true },
|
19
|
+
:output_file_ext => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
|
20
|
+
:output_overwrite => { :req => false, :rw => 'rw', :def => false, :yaml => true },
|
21
|
+
:data_structure => { :req => false, :rw => 'rw', :def => 'rect', :yaml => true },
|
22
|
+
:record_types => { :req => false, :rw => 'rw', :def => nil, :yaml => true },
|
23
|
+
:record_type_var_name => { :req => false, :rw => 'rw', :def => '', :yaml => true },
|
24
|
+
:rectangularize => { :req => false, :rw => 'rw', :def => false, :yaml => true },
|
25
|
+
:all_vars_as_string => { :req => false, :rw => 'rw', :def => false, :yaml => true },
|
26
|
+
:select_vars_by_record_type => { :req => false, :rw => 'rw', :def => false, :yaml => true },
|
27
|
+
:variables => { :req => false, :rw => 'r', :def => nil, :yaml => false },
|
28
|
+
:yaml_files => { :req => false, :rw => 'r', :def => nil, :yaml => false },
|
29
|
+
}
|
30
|
+
|
31
|
+
ATTR.each_key do |k|
|
32
|
+
attr_reader k if ATTR[k][:rw].include? 'r'
|
33
|
+
attr_writer k if ATTR[k][:rw].include? 'w'
|
34
|
+
end
|
35
|
+
|
36
|
+
def initialize (args = {})
|
37
|
+
ATTR.each_key { |k|
|
38
|
+
raise(ArgumentError, "Missing required parameter: '#{k}'.") if
|
39
|
+
ATTR[k][:req] and not args.has_key?(k)
|
40
|
+
v = args.has_key?(k) ? args[k] : ATTR[k][:def]
|
41
|
+
instance_variable_set("@#{k}".to_sym, v)
|
42
|
+
}
|
43
|
+
|
44
|
+
@output_file_ext = {
|
45
|
+
'sas' => '.sas',
|
46
|
+
'spss' => '.sps',
|
47
|
+
'stata' => '.do',
|
48
|
+
'sts' => '.sts'
|
49
|
+
} if @output_file_ext.nil?
|
50
|
+
@output_formats = [] if @output_formats.nil?
|
51
|
+
@record_types = [] if @record_types.nil?
|
52
|
+
@variables = [] if @variables.nil?
|
53
|
+
@yaml_files = [] if @yaml_files.nil?
|
54
|
+
read_metadata_from_yaml
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
# Methods to import metadata from YAML files into the Controller object.
|
59
|
+
|
60
|
+
def yaml_files= (file_names)
|
61
|
+
# Caller can supply a file name or an array of file names.
|
62
|
+
@yaml_files = file_names.to_a
|
63
|
+
read_metadata_from_yaml
|
64
|
+
end
|
65
|
+
|
66
|
+
def read_metadata_from_yaml
|
67
|
+
return if @yaml_files.empty?
|
68
|
+
md = {}
|
69
|
+
@yaml_files.each { |f| md.merge! YAML.load_file(f) }
|
70
|
+
md = symbolize_keys(md)
|
71
|
+
load_yaml_md(md)
|
72
|
+
end
|
73
|
+
|
74
|
+
def load_yaml_md (md)
|
75
|
+
# Uses metadata from yaml to set metadata-related instance variables.
|
76
|
+
ATTR.each_key do |k|
|
77
|
+
next unless md.has_key?(k) and ATTR[k][:yaml]
|
78
|
+
instance_variable_set("@#{k}".to_sym, md[k])
|
79
|
+
end
|
80
|
+
return unless md.has_key?(:variables)
|
81
|
+
@variables = []
|
82
|
+
return unless md[:variables].size > 0
|
83
|
+
md[:variables].each do |md_var|
|
84
|
+
vals = md_var.delete(:values)
|
85
|
+
var = add_variable(md_var)
|
86
|
+
vals.each { |v| var.add_value(v) } unless vals.nil?
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def symbolize_keys (h)
|
91
|
+
# Recursively converts hash keys from strings to symbols.
|
92
|
+
if h.instance_of? Hash
|
93
|
+
h.inject({}) { |return_hash,(k,v)| return_hash[k.to_sym] = symbolize_keys(v); return_hash }
|
94
|
+
elsif h.instance_of? Array
|
95
|
+
h.map { |v| symbolize_keys(v) }
|
96
|
+
else
|
97
|
+
h
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Methods to add or get variables.
|
102
|
+
|
103
|
+
def add_variable (args)
|
104
|
+
@variables.push Variable.new(args)
|
105
|
+
@variables[-1]
|
106
|
+
end
|
107
|
+
|
108
|
+
def clear_variables
|
109
|
+
@variables = []
|
110
|
+
end
|
111
|
+
|
112
|
+
def get_var_by_name (n)
|
113
|
+
@variables.find { |v| v.name == n }
|
114
|
+
end
|
115
|
+
|
116
|
+
def get_vars_by_record_type (rt)
|
117
|
+
@variables.find_all { |v| v.record_type == rt or v.is_common_var }
|
118
|
+
end
|
119
|
+
|
120
|
+
def get_vars_with_var_labels
|
121
|
+
@variables.find_all { |v| v.label.length > 0 }
|
122
|
+
end
|
123
|
+
|
124
|
+
def get_vars_with_values
|
125
|
+
@variables.find_all { |var|
|
126
|
+
var.values.size > 0 and
|
127
|
+
not var.suppress_labels
|
128
|
+
}
|
129
|
+
end
|
130
|
+
|
131
|
+
def get_big_nums
|
132
|
+
@variables.find_all { |var|
|
133
|
+
var.width > 8 and
|
134
|
+
not var.is_string_var
|
135
|
+
}
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def record_type_var
|
140
|
+
get_var_by_name(@record_type_var_name)
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
# Methods for adding values to variables.
|
145
|
+
|
146
|
+
def add_value (args)
|
147
|
+
@variables[-1].values.push Value.new(args)
|
148
|
+
@variables[-1].values[-1]
|
149
|
+
end
|
150
|
+
|
151
|
+
def new_values (*vals)
|
152
|
+
vals.flatten!
|
153
|
+
vals.map { |v| Value.new(v) }
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
# Methods for record types.
|
158
|
+
|
159
|
+
def is_last_record_type (rt)
|
160
|
+
return true if @record_types.size > 0 and @record_types[-1] == rt
|
161
|
+
return false
|
162
|
+
end
|
163
|
+
|
164
|
+
def rec_types_except_last
|
165
|
+
r = Array.new(@record_types)
|
166
|
+
r.pop
|
167
|
+
r
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
# Helper methods.
|
172
|
+
|
173
|
+
def max_var_name_length
|
174
|
+
return 0 if @variables.empty?
|
175
|
+
@variables.map { |v| v.name.length }.max
|
176
|
+
end
|
177
|
+
|
178
|
+
def max_col_loc_width
|
179
|
+
return 0 if @variables.empty?
|
180
|
+
@variables.map { |v| v.end_column.to_s.length }.max
|
181
|
+
end
|
182
|
+
|
183
|
+
def data_file_name_stem
|
184
|
+
File.basename(@data_file_name, '.*')
|
185
|
+
end
|
186
|
+
|
187
|
+
def rec_type_lookup_hash
|
188
|
+
Hash[ * @record_types.map { |rt| [rt, 0] }.flatten ]
|
189
|
+
end
|
190
|
+
|
191
|
+
def last_column_used
|
192
|
+
return 0 if @variables.empty?
|
193
|
+
@variables.map { |v| v.end_column }.max
|
194
|
+
end
|
195
|
+
|
196
|
+
# Output methods.
|
197
|
+
|
198
|
+
def to_s
|
199
|
+
YAML.dump(self)
|
200
|
+
end
|
201
|
+
|
202
|
+
def generate_syntax_files
|
203
|
+
bad_metadata('no output formats')if @output_formats.empty?
|
204
|
+
@output_formats.each { |t| generate_syntax_file(t) }
|
205
|
+
end
|
206
|
+
|
207
|
+
def generate_syntax_file (syntax_type)
|
208
|
+
msg = "output directory does not exist => #{@output_dir_name}"
|
209
|
+
bad_metadata(msg) unless File.directory?(@output_dir_name)
|
210
|
+
file_name = File.join(
|
211
|
+
@output_dir_name,
|
212
|
+
sprintf(@output_file_stem, data_file_name_stem) + @output_file_ext[syntax_type]
|
213
|
+
)
|
214
|
+
if File.file?(file_name) and not @output_overwrite
|
215
|
+
$stderr.puts "Skipping file that aready exists => #{file_name}."
|
216
|
+
else
|
217
|
+
if RUBY_VERSION.start_with? "1.8"
|
218
|
+
File.open(file_name, 'w') { |f| f.puts syntax(syntax_type) }
|
219
|
+
else
|
220
|
+
File.open(file_name, 'w:iso-8859-1') { |f|
|
221
|
+
|
222
|
+
lines = syntax(syntax_type)
|
223
|
+
lines.each do |line|
|
224
|
+
begin
|
225
|
+
f.puts line.rstrip.encode('iso-8859-1', line.encoding.to_s,{:invalid=>:replace, :undef=>:replace,:replace => '?'})
|
226
|
+
rescue Exception=>msg
|
227
|
+
puts "Failed encoding on line #{line} #{msg}"
|
228
|
+
end
|
229
|
+
end
|
230
|
+
}
|
231
|
+
end
|
232
|
+
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
def syntax (syntax_type)
|
237
|
+
validate_metadata(:minimal => true)
|
238
|
+
modify_metadata
|
239
|
+
validate_metadata
|
240
|
+
|
241
|
+
maker_class = 'Maker' + syntax_type.upcase
|
242
|
+
syntax_maker = eval(maker_class).new(self, syntax_type)
|
243
|
+
syntax_maker.syntax
|
244
|
+
end
|
245
|
+
|
246
|
+
|
247
|
+
# Before generating syntax, we need to handle some controller-level
|
248
|
+
# options that require global modification of the metadata.
|
249
|
+
|
250
|
+
def modify_metadata
|
251
|
+
# Force all variables to be strings.
|
252
|
+
if @all_vars_as_string
|
253
|
+
@variables.each do |var|
|
254
|
+
var.is_string_var = true
|
255
|
+
var.is_double_var = false
|
256
|
+
var.implied_decimals = 0
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
# If the user wants to rectangularize hierarchical data, the
|
261
|
+
# select_vars_by_record_type option is required.
|
262
|
+
@select_vars_by_record_type = true if @rectangularize
|
263
|
+
|
264
|
+
# Remove any variables not belonging to the declared record types.
|
265
|
+
if @select_vars_by_record_type
|
266
|
+
rt_lookup = rec_type_lookup_hash()
|
267
|
+
@variables = @variables.find_all { |var| var.is_common_var or rt_lookup[var.record_type] }
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
|
272
|
+
# Before generating syntax, run a sanity check on the metadata.
|
273
|
+
|
274
|
+
def validate_metadata (check = {})
|
275
|
+
bad_metadata('no variables') if @variables.empty?
|
276
|
+
|
277
|
+
if @rectangularize
|
278
|
+
msg = 'the rectangularize option requires data_structure=hier'
|
279
|
+
bad_metadata(msg) unless @data_structure == 'hier'
|
280
|
+
end
|
281
|
+
|
282
|
+
if @data_structure == 'hier' or @select_vars_by_record_type
|
283
|
+
bad_metadata('no record types') if @record_types.empty?
|
284
|
+
|
285
|
+
msg = 'record types must be unique'
|
286
|
+
bad_metadata(msg) unless rec_type_lookup_hash.keys.size == @record_types.size
|
287
|
+
|
288
|
+
msg = 'all variables must have a record type'
|
289
|
+
bad_metadata(msg) unless @variables.find { |var| var.record_type.length == 0 }.nil?
|
290
|
+
|
291
|
+
msg = 'with no common variables, every record type needs at least one variable ('
|
292
|
+
if @variables.find { |var| var.is_common_var }.nil?
|
293
|
+
@record_types.each do |rt|
|
294
|
+
next if get_vars_by_record_type(rt).size > 0
|
295
|
+
bad_metadata(msg + rt + ')')
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
if @data_structure == 'hier'
|
301
|
+
bad_metadata('no record type variable') if record_type_var.nil?
|
302
|
+
end
|
303
|
+
|
304
|
+
return if check[:minimal]
|
305
|
+
|
306
|
+
@variables.each do |v|
|
307
|
+
v.start_column = v.start_column.to_i
|
308
|
+
v.width = v.width.to_i
|
309
|
+
v.implied_decimals = v.implied_decimals.to_i
|
310
|
+
bad_metadata("#{v.name}, start_column" ) unless v.start_column > 0
|
311
|
+
bad_metadata("#{v.name}, width" ) unless v.width > 0
|
312
|
+
bad_metadata("#{v.name}, implied_decimals") unless v.implied_decimals >= 0
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
def bad_metadata (msg)
|
317
|
+
msg = 'Invalid metadata: ' + msg
|
318
|
+
abort(msg) if @caller == 'vb' or @caller == 'dcp'
|
319
|
+
raise(RuntimeError, msg)
|
320
|
+
end
|
321
|
+
|
322
|
+
end
|
323
|
+
end
|