stats_package_syntax_file_generator 1.1.8 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/syntax_file/controller.rb +30 -2
- data/lib/syntax_file/maker.rb +8 -0
- data/lib/syntax_file/maker_sas.rb +17 -9
- data/lib/syntax_file/maker_spss.rb +31 -1
- data/lib/syntax_file/maker_stata.rb +29 -6
- data/tests/setup.rb +10 -2
- data/tests/tc_controller.rb +7 -0
- data/tests/tc_maker_sas.rb +38 -0
- data/tests/tc_maker_spss.rb +11 -0
- data/tests/tc_maker_stata.rb +7 -0
- data/tests/tc_maker_sts.rb +7 -0
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8e54f08bcc60310213618ae309728257a75fde34af6dd0c9958a04279d47e111
|
|
4
|
+
data.tar.gz: 2c09c27ee28137762017fe52845ae61496310a2175adba9b2656390576b66e14
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 853aa7c596cf1964227877e6f562ad1b7e274262bd161e947e9138afff0d3d570d831efaaa96ec4694c1b6d794ba9798762b9311a41a35063127e1cde2416fc0
|
|
7
|
+
data.tar.gz: c4cfec89e1b2cf998f5aaf9b9edecd9cb2aec820bb9b04701300b378a41ed9dc0738458526255b5bf446d8aa803fce345bbb0e27c6f25c4f05a75c002f7280da
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
module SyntaxFile
|
|
7
7
|
class Controller
|
|
8
8
|
|
|
9
|
-
VERSION = "1.
|
|
9
|
+
VERSION = "1.2.0"
|
|
10
10
|
|
|
11
11
|
ATTR = {
|
|
12
12
|
:project => { :req => false, :rw => 'rw', :def => '', :yaml => true },
|
|
@@ -52,9 +52,17 @@ module SyntaxFile
|
|
|
52
52
|
@record_types = [] if @record_types.nil?
|
|
53
53
|
@variables = [] if @variables.nil?
|
|
54
54
|
@yaml_files = [] if @yaml_files.nil?
|
|
55
|
+
|
|
56
|
+
if @data_structure == 'hier' && is_csv?
|
|
57
|
+
raise(ArgumentError, 'Hierarchical data_structure is not supported for CSV data')
|
|
58
|
+
end
|
|
55
59
|
read_metadata_from_yaml
|
|
56
60
|
end
|
|
57
61
|
|
|
62
|
+
def is_csv?
|
|
63
|
+
@data_file_name.end_with?('.csv')
|
|
64
|
+
end
|
|
65
|
+
|
|
58
66
|
# Methods to import metadata from YAML files into the Controller object.
|
|
59
67
|
|
|
60
68
|
def yaml_files=(file_names)
|
|
@@ -66,11 +74,31 @@ module SyntaxFile
|
|
|
66
74
|
def read_metadata_from_yaml
|
|
67
75
|
return if @yaml_files.empty?
|
|
68
76
|
md = {}
|
|
69
|
-
@yaml_files.each { |f|
|
|
77
|
+
@yaml_files.each { |f|
|
|
78
|
+
md.merge! parse_yaml(f)
|
|
79
|
+
}
|
|
70
80
|
md = symbolize_keys(md)
|
|
71
81
|
load_yaml_md(md)
|
|
72
82
|
end
|
|
73
83
|
|
|
84
|
+
def parse_yaml(yaml_file)
|
|
85
|
+
if RUBY_ENGINE == 'jruby'
|
|
86
|
+
# This code exists to handle the new size limit in the Snakeyml Java library
|
|
87
|
+
# of three million code points.. The issue is discussed here:
|
|
88
|
+
# https://github.com/jruby/jruby/issues/7543
|
|
89
|
+
tree_builder = Psych::TreeBuilder.new
|
|
90
|
+
parser = Psych::Parser.new(tree_builder)
|
|
91
|
+
parser.code_point_limit = 20_000_000
|
|
92
|
+
|
|
93
|
+
yaml_data = File.read(yaml_file)
|
|
94
|
+
parser.parse(yaml_data)
|
|
95
|
+
# Convert to Ruby and get the hash out of the document array
|
|
96
|
+
tree_builder.root.to_ruby.first
|
|
97
|
+
else
|
|
98
|
+
YAML.load_file(yaml_file)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
74
102
|
def load_yaml_md(md)
|
|
75
103
|
# Uses metadata from yaml to set metadata-related instance variables.
|
|
76
104
|
ATTR.each_key do |k|
|
data/lib/syntax_file/maker.rb
CHANGED
|
@@ -13,6 +13,14 @@ module SyntaxFile
|
|
|
13
13
|
@sfc = sfc
|
|
14
14
|
@syntax_type = syntax_type
|
|
15
15
|
@cmd_end = ''
|
|
16
|
+
|
|
17
|
+
if @sfc.is_csv? && !supports_csv?
|
|
18
|
+
raise "CSV data not supported for #{@syntax_type.upcase} syntax files"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def supports_csv?
|
|
23
|
+
false
|
|
16
24
|
end
|
|
17
25
|
|
|
18
26
|
# Syntax terminator.
|
|
@@ -18,11 +18,15 @@ module SyntaxFile
|
|
|
18
18
|
@label_max_leng = 256
|
|
19
19
|
@segment_max_leng = 100
|
|
20
20
|
@sas_library_handle = 'IPUMS'
|
|
21
|
-
@sas_file_handle = 'ASCIIDAT'
|
|
21
|
+
@sas_file_handle = @sfc.is_csv? ? 'CSV' : 'ASCIIDAT'
|
|
22
22
|
@sas_fmt_suffix = '_f'
|
|
23
23
|
@sas_data_file_name = @sas_library_handle + '.' + @sfc.data_file_name_stem
|
|
24
24
|
end
|
|
25
25
|
|
|
26
|
+
def supports_csv?
|
|
27
|
+
true
|
|
28
|
+
end
|
|
29
|
+
|
|
26
30
|
def syntax
|
|
27
31
|
r = [
|
|
28
32
|
comments_start,
|
|
@@ -138,10 +142,14 @@ module SyntaxFile
|
|
|
138
142
|
end
|
|
139
143
|
|
|
140
144
|
def syn_df_infile
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
+
if @sfc.is_csv?
|
|
146
|
+
'infile ' + @sas_file_handle + " missover dsd delimiter=" + q(',') + " firstobs=2" + @cmd_end
|
|
147
|
+
else
|
|
148
|
+
# The LRECL specification is needed because the default behavior on some
|
|
149
|
+
# operating systems is to truncate records to 256 columns.
|
|
150
|
+
c = @sfc.last_column_used
|
|
151
|
+
'infile ' + @sas_file_handle + ' pad missover lrecl=' + c.to_s + @cmd_end
|
|
152
|
+
end
|
|
145
153
|
end
|
|
146
154
|
|
|
147
155
|
def syn_dfr
|
|
@@ -163,7 +171,7 @@ module SyntaxFile
|
|
|
163
171
|
var_list.collect { |v|
|
|
164
172
|
sprintf(@var_loc_format, v.name) +
|
|
165
173
|
(v.is_string_var ? '$ ' : ' ') +
|
|
166
|
-
v.column_locations_as_s +
|
|
174
|
+
(@sfc.is_csv? ? '' : v.column_locations_as_s) +
|
|
167
175
|
implied_decimal_fmt(v)
|
|
168
176
|
}
|
|
169
177
|
end
|
|
@@ -249,7 +257,7 @@ module SyntaxFile
|
|
|
249
257
|
|
|
250
258
|
def syn_fmt_big_nums
|
|
251
259
|
big_num_vars = @sfc.get_big_nums
|
|
252
|
-
return [] if big_num_vars.empty?
|
|
260
|
+
return [] if big_num_vars.empty? || @sfc.is_csv?
|
|
253
261
|
r = [
|
|
254
262
|
'format',
|
|
255
263
|
syn_fmt_big_nums_for_var_list(big_num_vars),
|
|
@@ -270,7 +278,7 @@ module SyntaxFile
|
|
|
270
278
|
|
|
271
279
|
def syn_fmt_link
|
|
272
280
|
var_list = @sfc.get_vars_with_values
|
|
273
|
-
return [] if var_list.empty?
|
|
281
|
+
return [] if var_list.empty? || @sfc.is_csv?
|
|
274
282
|
r = [
|
|
275
283
|
'format',
|
|
276
284
|
syn_fmt_link_for_var_list(var_list),
|
|
@@ -286,7 +294,7 @@ module SyntaxFile
|
|
|
286
294
|
end
|
|
287
295
|
|
|
288
296
|
def implied_decimal_fmt(var)
|
|
289
|
-
return '' if var.is_string_var or var.implied_decimals == 0
|
|
297
|
+
return '' if var.is_string_var or var.implied_decimals == 0 or @sfc.is_csv?
|
|
290
298
|
return ' .' + var.implied_decimals.to_s
|
|
291
299
|
end
|
|
292
300
|
|
|
@@ -17,6 +17,10 @@ module SyntaxFile
|
|
|
17
17
|
@segment_max_leng = 100
|
|
18
18
|
end
|
|
19
19
|
|
|
20
|
+
def supports_csv?
|
|
21
|
+
true
|
|
22
|
+
end
|
|
23
|
+
|
|
20
24
|
def syntax
|
|
21
25
|
r = [
|
|
22
26
|
comments_start,
|
|
@@ -54,7 +58,33 @@ module SyntaxFile
|
|
|
54
58
|
end
|
|
55
59
|
|
|
56
60
|
def syn_df
|
|
57
|
-
@sfc.
|
|
61
|
+
if @sfc.is_csv?
|
|
62
|
+
syn_dfr_csv
|
|
63
|
+
else
|
|
64
|
+
@sfc.data_structure == 'hier' ? syn_dfh : syn_dfr
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def syn_dfr_csv
|
|
69
|
+
r = [
|
|
70
|
+
'GET DATA /TYPE=TXT',
|
|
71
|
+
' /FILE=' + q(@sfc.data_file_name),
|
|
72
|
+
' /ENCODING=\'UTF8\'',
|
|
73
|
+
' /DELIMITERS=","',
|
|
74
|
+
' /QUALIFIER=\'"\'',
|
|
75
|
+
' /ARRANGEMENT=DELIMITED',
|
|
76
|
+
' /FIRSTCASE=2',
|
|
77
|
+
' /DATATYPEMIN PERCENTAGE=100.0',
|
|
78
|
+
' /VARIABLES=',
|
|
79
|
+
syn_vars_csv(@sfc.variables),
|
|
80
|
+
' /MAP.',
|
|
81
|
+
'execute.'
|
|
82
|
+
]
|
|
83
|
+
r.flatten
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def syn_vars_csv(var_list)
|
|
87
|
+
var_list.map { |v| sprintf @var_loc_format, v.name, v.is_string_var ? 'A' : 'AUTO' }
|
|
58
88
|
end
|
|
59
89
|
|
|
60
90
|
def syn_dfr
|
|
@@ -26,6 +26,10 @@ module SyntaxFile
|
|
|
26
26
|
@sort_var_stem = '_line_num'
|
|
27
27
|
end
|
|
28
28
|
|
|
29
|
+
def supports_csv?
|
|
30
|
+
true
|
|
31
|
+
end
|
|
32
|
+
|
|
29
33
|
def syntax
|
|
30
34
|
r = [
|
|
31
35
|
comments_start,
|
|
@@ -80,17 +84,24 @@ module SyntaxFile
|
|
|
80
84
|
|
|
81
85
|
def syn_infix(var_list)
|
|
82
86
|
r = [
|
|
83
|
-
syn_infix_start
|
|
84
|
-
syn_infix_var_locs(var_list),
|
|
85
|
-
syn_infix_end,
|
|
87
|
+
syn_infix_start
|
|
86
88
|
]
|
|
89
|
+
if !@sfc.is_csv?
|
|
90
|
+
r.push syn_infix_var_locs(var_list)
|
|
91
|
+
r.push syn_infix_end
|
|
92
|
+
end
|
|
87
93
|
r.flatten
|
|
88
94
|
end
|
|
89
95
|
|
|
90
96
|
def syn_infix_start
|
|
97
|
+
if @sfc.is_csv?
|
|
98
|
+
infix_cmd = "quietly import delimited #{q(@sfc.data_file_name)}, stringcols(#{list_stringcols(@sfc.variables)})"
|
|
99
|
+
else
|
|
100
|
+
infix_cmd = 'quietly infix'
|
|
101
|
+
end
|
|
91
102
|
[
|
|
92
103
|
'clear',
|
|
93
|
-
|
|
104
|
+
infix_cmd + sprintf(@infix_format, @cmd_continue),
|
|
94
105
|
]
|
|
95
106
|
end
|
|
96
107
|
|
|
@@ -190,7 +201,7 @@ module SyntaxFile
|
|
|
190
201
|
|
|
191
202
|
def syn_convert_implied_decim
|
|
192
203
|
var_list = @sfc.variables.find_all { |var| var.implied_decimals > 0 }
|
|
193
|
-
return [] if var_list.empty?
|
|
204
|
+
return [] if var_list.empty? || @sfc.is_csv?
|
|
194
205
|
var_list.map { |var|
|
|
195
206
|
v = var.name.downcase
|
|
196
207
|
sprintf @replace_format, v, v, 10 ** var.implied_decimals
|
|
@@ -202,7 +213,7 @@ module SyntaxFile
|
|
|
202
213
|
vf = var_fmt(var)
|
|
203
214
|
vf == 'double' or vf == 'float'
|
|
204
215
|
}
|
|
205
|
-
return [] if var_list.empty?
|
|
216
|
+
return [] if var_list.empty? || @sfc.is_csv?
|
|
206
217
|
var_list.map { |var|
|
|
207
218
|
v = var.name.downcase
|
|
208
219
|
|
|
@@ -323,5 +334,17 @@ module SyntaxFile
|
|
|
323
334
|
rt_var.name.downcase + ' != ' + val_q(rt_var, val_as_s(rt_var, rt))
|
|
324
335
|
end
|
|
325
336
|
|
|
337
|
+
def list_stringcols(vars)
|
|
338
|
+
positions = []
|
|
339
|
+
index = 1
|
|
340
|
+
vars.each do |v|
|
|
341
|
+
if v.is_string_var
|
|
342
|
+
positions << index
|
|
343
|
+
end
|
|
344
|
+
index += 1
|
|
345
|
+
end
|
|
346
|
+
positions.join(' ')
|
|
347
|
+
end
|
|
348
|
+
|
|
326
349
|
end
|
|
327
350
|
end
|
data/tests/setup.rb
CHANGED
|
@@ -23,6 +23,13 @@ def new_controller
|
|
|
23
23
|
SyntaxFile::Controller.new(:yaml_files => YAML_FILES)
|
|
24
24
|
end
|
|
25
25
|
|
|
26
|
+
def new_controller_csv
|
|
27
|
+
controller = new_controller
|
|
28
|
+
controller.data_file_name = 'data.csv'
|
|
29
|
+
controller.data_structure = 'rect'
|
|
30
|
+
controller
|
|
31
|
+
end
|
|
32
|
+
|
|
26
33
|
def new_variable
|
|
27
34
|
SyntaxFile::Variable.new params_variable()
|
|
28
35
|
end
|
|
@@ -31,9 +38,10 @@ def new_value
|
|
|
31
38
|
SyntaxFile::Value.new params_value()
|
|
32
39
|
end
|
|
33
40
|
|
|
34
|
-
def new_maker(syntax_type = '')
|
|
41
|
+
def new_maker(syntax_type = '', csv: false)
|
|
35
42
|
maker_class = 'SyntaxFile::Maker' + syntax_type.upcase
|
|
36
|
-
|
|
43
|
+
controller = csv ? new_controller_csv : new_controller
|
|
44
|
+
eval(maker_class).new(controller, syntax_type)
|
|
37
45
|
end
|
|
38
46
|
|
|
39
47
|
# Parameters used when creating objects with known values.
|
data/tests/tc_controller.rb
CHANGED
|
@@ -374,5 +374,12 @@ def test_rec_type_lookup_hash
|
|
|
374
374
|
assert_equal( {}, sfc.rec_type_lookup_hash, msg )
|
|
375
375
|
end
|
|
376
376
|
|
|
377
|
+
def test_hier_csv
|
|
378
|
+
error = assert_raises ArgumentError do
|
|
379
|
+
SyntaxFile::Controller.new(data_structure: 'hier', data_file_name: 'data.csv')
|
|
380
|
+
end
|
|
381
|
+
assert_equal('Hierarchical data_structure is not supported for CSV data', error.message)
|
|
382
|
+
end
|
|
383
|
+
|
|
377
384
|
end
|
|
378
385
|
end
|
data/tests/tc_maker_sas.rb
CHANGED
|
@@ -247,5 +247,43 @@ def test_non_last_non_common_vars
|
|
|
247
247
|
assert_equal [], vars_to_names(var_list), msg
|
|
248
248
|
end
|
|
249
249
|
|
|
250
|
+
def test_csv_import
|
|
251
|
+
msg = 'Compare against hardcoded result.'
|
|
252
|
+
mk = new_maker('sas', csv: true)
|
|
253
|
+
expected = [
|
|
254
|
+
'data IPUMS.data;',
|
|
255
|
+
'infile CSV missover dsd delimiter="," firstobs=2;',
|
|
256
|
+
'',
|
|
257
|
+
'input',
|
|
258
|
+
" RECTYPE $ ",
|
|
259
|
+
" DWNUM ",
|
|
260
|
+
" HHNUM ",
|
|
261
|
+
" HDFIRSTD ",
|
|
262
|
+
" FBIG_ND ",
|
|
263
|
+
" BADDW ",
|
|
264
|
+
" CANTON ",
|
|
265
|
+
" URBAN ",
|
|
266
|
+
" DWTYPE ",
|
|
267
|
+
" OWNERSHP ",
|
|
268
|
+
" RENT $ ",
|
|
269
|
+
" RELATE ",
|
|
270
|
+
" SEX ",
|
|
271
|
+
" AGE ",
|
|
272
|
+
" RESPREV2 ",
|
|
273
|
+
" SOCSEC ",
|
|
274
|
+
" EDLEVEL ",
|
|
275
|
+
" LIT ",
|
|
276
|
+
" BIGDEC ",
|
|
277
|
+
" BIGINT ",
|
|
278
|
+
" BIGSTR $ ",
|
|
279
|
+
";",
|
|
280
|
+
''
|
|
281
|
+
]
|
|
282
|
+
assert_equal expected, mk.syn_df, msg
|
|
283
|
+
|
|
284
|
+
expected2 = []
|
|
285
|
+
assert_equal expected2, mk.syn_fmt_link, msg
|
|
286
|
+
end
|
|
287
|
+
|
|
250
288
|
end
|
|
251
289
|
end
|
data/tests/tc_maker_spss.rb
CHANGED
|
@@ -116,6 +116,17 @@ def test_var_fmt
|
|
|
116
116
|
assert_equal expected, actual, msg
|
|
117
117
|
end
|
|
118
118
|
|
|
119
|
+
def test_csv_import
|
|
120
|
+
msg = 'Compare against hardcoded result.'
|
|
121
|
+
mk = new_maker('spss', csv: true)
|
|
122
|
+
syn_df = mk.syn_df
|
|
123
|
+
assert_equal 'GET DATA /TYPE=TXT', syn_df[0], msg
|
|
124
|
+
assert_equal ' /FILE="data.csv"', syn_df[1], msg
|
|
125
|
+
assert_equal ' RECTYPE A', syn_df[9], msg
|
|
126
|
+
assert_equal ' /MAP.', syn_df[-2], msg
|
|
127
|
+
assert_equal 'execute.', syn_df[-1], msg
|
|
128
|
+
end
|
|
129
|
+
|
|
119
130
|
|
|
120
131
|
end
|
|
121
132
|
end
|
data/tests/tc_maker_stata.rb
CHANGED
|
@@ -220,5 +220,12 @@ def test_rt_ne_statement
|
|
|
220
220
|
assert_equal expected, mk.rt_ne_statement('H'), msg
|
|
221
221
|
end
|
|
222
222
|
|
|
223
|
+
def test_csv_import
|
|
224
|
+
msg = 'Compare against hardcoded result.'
|
|
225
|
+
mk = new_maker('stata', csv: true)
|
|
226
|
+
expected = ['clear', 'quietly import delimited `"data.csv"\', stringcols(1 11 21) ///']
|
|
227
|
+
assert_equal expected, mk.syn_df, msg
|
|
228
|
+
end
|
|
229
|
+
|
|
223
230
|
end
|
|
224
231
|
end
|
data/tests/tc_maker_sts.rb
CHANGED
|
@@ -185,5 +185,12 @@ def test_syn_var_locations
|
|
|
185
185
|
assert_equal expected, actual, msg
|
|
186
186
|
end
|
|
187
187
|
|
|
188
|
+
def test_csv_import
|
|
189
|
+
error = assert_raises RuntimeError do
|
|
190
|
+
new_maker('sts', csv: true)
|
|
191
|
+
end
|
|
192
|
+
assert_equal('CSV data not supported for STS syntax files', error.message)
|
|
193
|
+
end
|
|
194
|
+
|
|
188
195
|
end
|
|
189
196
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: stats_package_syntax_file_generator
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Monty Hindman, Marcus Peterson, Colin Davis, Dan Elbert, Jayandra Pokharel
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-02-09 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -103,7 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
103
103
|
- !ruby/object:Gem::Version
|
|
104
104
|
version: '0'
|
|
105
105
|
requirements: []
|
|
106
|
-
rubygems_version: 3.
|
|
106
|
+
rubygems_version: 3.5.14
|
|
107
107
|
signing_key:
|
|
108
108
|
specification_version: 4
|
|
109
109
|
summary: Produces statistical package syntax files for fixed-column data.
|