stats_package_syntax_file_generator 1.1.8 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 805a94fbae05131beedbccf6c5f152bf9c591bdc4e187a8ff9f522787596d055
4
- data.tar.gz: 1f6471c80ee20fefb38f5533c88460e819a1eb21dfdbc27f9b656b39616023df
3
+ metadata.gz: 8e54f08bcc60310213618ae309728257a75fde34af6dd0c9958a04279d47e111
4
+ data.tar.gz: 2c09c27ee28137762017fe52845ae61496310a2175adba9b2656390576b66e14
5
5
  SHA512:
6
- metadata.gz: 2d62e7924e7b628d9e12789faf7430750691114eea125802c27c1a0af002cde7b58fbc2860b41805d2b9f8621e0b0ad032663dc4a3ddca60c2ed0ba288a0340b
7
- data.tar.gz: 508457b7708fbc5a5780141d1196d8034fbaa2d9485640d243c2c3fbf11c9623875cc090a33d3a8fc3c6f0c795adfc07f87a5c981d493074f148fb2d156f7efe
6
+ metadata.gz: 853aa7c596cf1964227877e6f562ad1b7e274262bd161e947e9138afff0d3d570d831efaaa96ec4694c1b6d794ba9798762b9311a41a35063127e1cde2416fc0
7
+ data.tar.gz: c4cfec89e1b2cf998f5aaf9b9edecd9cb2aec820bb9b04701300b378a41ed9dc0738458526255b5bf446d8aa803fce345bbb0e27c6f25c4f05a75c002f7280da
@@ -6,7 +6,7 @@
6
6
  module SyntaxFile
7
7
  class Controller
8
8
 
9
- VERSION = "1.1.8"
9
+ VERSION = "1.2.0"
10
10
 
11
11
  ATTR = {
12
12
  :project => { :req => false, :rw => 'rw', :def => '', :yaml => true },
@@ -52,9 +52,17 @@ module SyntaxFile
52
52
  @record_types = [] if @record_types.nil?
53
53
  @variables = [] if @variables.nil?
54
54
  @yaml_files = [] if @yaml_files.nil?
55
+
56
+ if @data_structure == 'hier' && is_csv?
57
+ raise(ArgumentError, 'Hierarchical data_structure is not supported for CSV data')
58
+ end
55
59
  read_metadata_from_yaml
56
60
  end
57
61
 
62
+ def is_csv?
63
+ @data_file_name.end_with?('.csv')
64
+ end
65
+
58
66
  # Methods to import metadata from YAML files into the Controller object.
59
67
 
60
68
  def yaml_files=(file_names)
@@ -66,11 +74,31 @@ module SyntaxFile
66
74
  def read_metadata_from_yaml
67
75
  return if @yaml_files.empty?
68
76
  md = {}
69
- @yaml_files.each { |f| md.merge! YAML.load_file(f) }
77
+ @yaml_files.each { |f|
78
+ md.merge! parse_yaml(f)
79
+ }
70
80
  md = symbolize_keys(md)
71
81
  load_yaml_md(md)
72
82
  end
73
83
 
84
+ def parse_yaml(yaml_file)
85
+ if RUBY_ENGINE == 'jruby'
86
+ # This code exists to handle the new size limit in the Snakeyml Java library
87
+ # of three million code points.. The issue is discussed here:
88
+ # https://github.com/jruby/jruby/issues/7543
89
+ tree_builder = Psych::TreeBuilder.new
90
+ parser = Psych::Parser.new(tree_builder)
91
+ parser.code_point_limit = 20_000_000
92
+
93
+ yaml_data = File.read(yaml_file)
94
+ parser.parse(yaml_data)
95
+ # Convert to Ruby and get the hash out of the document array
96
+ tree_builder.root.to_ruby.first
97
+ else
98
+ YAML.load_file(yaml_file)
99
+ end
100
+ end
101
+
74
102
  def load_yaml_md(md)
75
103
  # Uses metadata from yaml to set metadata-related instance variables.
76
104
  ATTR.each_key do |k|
@@ -13,6 +13,14 @@ module SyntaxFile
13
13
  @sfc = sfc
14
14
  @syntax_type = syntax_type
15
15
  @cmd_end = ''
16
+
17
+ if @sfc.is_csv? && !supports_csv?
18
+ raise "CSV data not supported for #{@syntax_type.upcase} syntax files"
19
+ end
20
+ end
21
+
22
+ def supports_csv?
23
+ false
16
24
  end
17
25
 
18
26
  # Syntax terminator.
@@ -18,11 +18,15 @@ module SyntaxFile
18
18
  @label_max_leng = 256
19
19
  @segment_max_leng = 100
20
20
  @sas_library_handle = 'IPUMS'
21
- @sas_file_handle = 'ASCIIDAT'
21
+ @sas_file_handle = @sfc.is_csv? ? 'CSV' : 'ASCIIDAT'
22
22
  @sas_fmt_suffix = '_f'
23
23
  @sas_data_file_name = @sas_library_handle + '.' + @sfc.data_file_name_stem
24
24
  end
25
25
 
26
+ def supports_csv?
27
+ true
28
+ end
29
+
26
30
  def syntax
27
31
  r = [
28
32
  comments_start,
@@ -138,10 +142,14 @@ module SyntaxFile
138
142
  end
139
143
 
140
144
  def syn_df_infile
141
- # The LRECL specification is needed because the default behavior on some
142
- # operating systems is to truncate records to 256 columns.
143
- c = @sfc.last_column_used
144
- 'infile ' + @sas_file_handle + ' pad missover lrecl=' + c.to_s + @cmd_end
145
+ if @sfc.is_csv?
146
+ 'infile ' + @sas_file_handle + " missover dsd delimiter=" + q(',') + " firstobs=2" + @cmd_end
147
+ else
148
+ # The LRECL specification is needed because the default behavior on some
149
+ # operating systems is to truncate records to 256 columns.
150
+ c = @sfc.last_column_used
151
+ 'infile ' + @sas_file_handle + ' pad missover lrecl=' + c.to_s + @cmd_end
152
+ end
145
153
  end
146
154
 
147
155
  def syn_dfr
@@ -163,7 +171,7 @@ module SyntaxFile
163
171
  var_list.collect { |v|
164
172
  sprintf(@var_loc_format, v.name) +
165
173
  (v.is_string_var ? '$ ' : ' ') +
166
- v.column_locations_as_s +
174
+ (@sfc.is_csv? ? '' : v.column_locations_as_s) +
167
175
  implied_decimal_fmt(v)
168
176
  }
169
177
  end
@@ -249,7 +257,7 @@ module SyntaxFile
249
257
 
250
258
  def syn_fmt_big_nums
251
259
  big_num_vars = @sfc.get_big_nums
252
- return [] if big_num_vars.empty?
260
+ return [] if big_num_vars.empty? || @sfc.is_csv?
253
261
  r = [
254
262
  'format',
255
263
  syn_fmt_big_nums_for_var_list(big_num_vars),
@@ -270,7 +278,7 @@ module SyntaxFile
270
278
 
271
279
  def syn_fmt_link
272
280
  var_list = @sfc.get_vars_with_values
273
- return [] if var_list.empty?
281
+ return [] if var_list.empty? || @sfc.is_csv?
274
282
  r = [
275
283
  'format',
276
284
  syn_fmt_link_for_var_list(var_list),
@@ -286,7 +294,7 @@ module SyntaxFile
286
294
  end
287
295
 
288
296
  def implied_decimal_fmt(var)
289
- return '' if var.is_string_var or var.implied_decimals == 0
297
+ return '' if var.is_string_var or var.implied_decimals == 0 or @sfc.is_csv?
290
298
  return ' .' + var.implied_decimals.to_s
291
299
  end
292
300
 
@@ -17,6 +17,10 @@ module SyntaxFile
17
17
  @segment_max_leng = 100
18
18
  end
19
19
 
20
+ def supports_csv?
21
+ true
22
+ end
23
+
20
24
  def syntax
21
25
  r = [
22
26
  comments_start,
@@ -54,7 +58,33 @@ module SyntaxFile
54
58
  end
55
59
 
56
60
  def syn_df
57
- @sfc.data_structure == 'hier' ? syn_dfh : syn_dfr
61
+ if @sfc.is_csv?
62
+ syn_dfr_csv
63
+ else
64
+ @sfc.data_structure == 'hier' ? syn_dfh : syn_dfr
65
+ end
66
+ end
67
+
68
+ def syn_dfr_csv
69
+ r = [
70
+ 'GET DATA /TYPE=TXT',
71
+ ' /FILE=' + q(@sfc.data_file_name),
72
+ ' /ENCODING=\'UTF8\'',
73
+ ' /DELIMITERS=","',
74
+ ' /QUALIFIER=\'"\'',
75
+ ' /ARRANGEMENT=DELIMITED',
76
+ ' /FIRSTCASE=2',
77
+ ' /DATATYPEMIN PERCENTAGE=100.0',
78
+ ' /VARIABLES=',
79
+ syn_vars_csv(@sfc.variables),
80
+ ' /MAP.',
81
+ 'execute.'
82
+ ]
83
+ r.flatten
84
+ end
85
+
86
+ def syn_vars_csv(var_list)
87
+ var_list.map { |v| sprintf @var_loc_format, v.name, v.is_string_var ? 'A' : 'AUTO' }
58
88
  end
59
89
 
60
90
  def syn_dfr
@@ -26,6 +26,10 @@ module SyntaxFile
26
26
  @sort_var_stem = '_line_num'
27
27
  end
28
28
 
29
+ def supports_csv?
30
+ true
31
+ end
32
+
29
33
  def syntax
30
34
  r = [
31
35
  comments_start,
@@ -80,17 +84,24 @@ module SyntaxFile
80
84
 
81
85
  def syn_infix(var_list)
82
86
  r = [
83
- syn_infix_start,
84
- syn_infix_var_locs(var_list),
85
- syn_infix_end,
87
+ syn_infix_start
86
88
  ]
89
+ if !@sfc.is_csv?
90
+ r.push syn_infix_var_locs(var_list)
91
+ r.push syn_infix_end
92
+ end
87
93
  r.flatten
88
94
  end
89
95
 
90
96
  def syn_infix_start
97
+ if @sfc.is_csv?
98
+ infix_cmd = "quietly import delimited #{q(@sfc.data_file_name)}, stringcols(#{list_stringcols(@sfc.variables)})"
99
+ else
100
+ infix_cmd = 'quietly infix'
101
+ end
91
102
  [
92
103
  'clear',
93
- 'quietly infix' + sprintf(@infix_format, @cmd_continue),
104
+ infix_cmd + sprintf(@infix_format, @cmd_continue),
94
105
  ]
95
106
  end
96
107
 
@@ -190,7 +201,7 @@ module SyntaxFile
190
201
 
191
202
  def syn_convert_implied_decim
192
203
  var_list = @sfc.variables.find_all { |var| var.implied_decimals > 0 }
193
- return [] if var_list.empty?
204
+ return [] if var_list.empty? || @sfc.is_csv?
194
205
  var_list.map { |var|
195
206
  v = var.name.downcase
196
207
  sprintf @replace_format, v, v, 10 ** var.implied_decimals
@@ -202,7 +213,7 @@ module SyntaxFile
202
213
  vf = var_fmt(var)
203
214
  vf == 'double' or vf == 'float'
204
215
  }
205
- return [] if var_list.empty?
216
+ return [] if var_list.empty? || @sfc.is_csv?
206
217
  var_list.map { |var|
207
218
  v = var.name.downcase
208
219
 
@@ -323,5 +334,17 @@ module SyntaxFile
323
334
  rt_var.name.downcase + ' != ' + val_q(rt_var, val_as_s(rt_var, rt))
324
335
  end
325
336
 
337
+ def list_stringcols(vars)
338
+ positions = []
339
+ index = 1
340
+ vars.each do |v|
341
+ if v.is_string_var
342
+ positions << index
343
+ end
344
+ index += 1
345
+ end
346
+ positions.join(' ')
347
+ end
348
+
326
349
  end
327
350
  end
data/tests/setup.rb CHANGED
@@ -23,6 +23,13 @@ def new_controller
23
23
  SyntaxFile::Controller.new(:yaml_files => YAML_FILES)
24
24
  end
25
25
 
26
+ def new_controller_csv
27
+ controller = new_controller
28
+ controller.data_file_name = 'data.csv'
29
+ controller.data_structure = 'rect'
30
+ controller
31
+ end
32
+
26
33
  def new_variable
27
34
  SyntaxFile::Variable.new params_variable()
28
35
  end
@@ -31,9 +38,10 @@ def new_value
31
38
  SyntaxFile::Value.new params_value()
32
39
  end
33
40
 
34
- def new_maker(syntax_type = '')
41
+ def new_maker(syntax_type = '', csv: false)
35
42
  maker_class = 'SyntaxFile::Maker' + syntax_type.upcase
36
- eval(maker_class).new(new_controller, syntax_type)
43
+ controller = csv ? new_controller_csv : new_controller
44
+ eval(maker_class).new(controller, syntax_type)
37
45
  end
38
46
 
39
47
  # Parameters used when creating objects with known values.
@@ -374,5 +374,12 @@ def test_rec_type_lookup_hash
374
374
  assert_equal( {}, sfc.rec_type_lookup_hash, msg )
375
375
  end
376
376
 
377
+ def test_hier_csv
378
+ error = assert_raises ArgumentError do
379
+ SyntaxFile::Controller.new(data_structure: 'hier', data_file_name: 'data.csv')
380
+ end
381
+ assert_equal('Hierarchical data_structure is not supported for CSV data', error.message)
382
+ end
383
+
377
384
  end
378
385
  end
@@ -247,5 +247,43 @@ def test_non_last_non_common_vars
247
247
  assert_equal [], vars_to_names(var_list), msg
248
248
  end
249
249
 
250
+ def test_csv_import
251
+ msg = 'Compare against hardcoded result.'
252
+ mk = new_maker('sas', csv: true)
253
+ expected = [
254
+ 'data IPUMS.data;',
255
+ 'infile CSV missover dsd delimiter="," firstobs=2;',
256
+ '',
257
+ 'input',
258
+ " RECTYPE $ ",
259
+ " DWNUM ",
260
+ " HHNUM ",
261
+ " HDFIRSTD ",
262
+ " FBIG_ND ",
263
+ " BADDW ",
264
+ " CANTON ",
265
+ " URBAN ",
266
+ " DWTYPE ",
267
+ " OWNERSHP ",
268
+ " RENT $ ",
269
+ " RELATE ",
270
+ " SEX ",
271
+ " AGE ",
272
+ " RESPREV2 ",
273
+ " SOCSEC ",
274
+ " EDLEVEL ",
275
+ " LIT ",
276
+ " BIGDEC ",
277
+ " BIGINT ",
278
+ " BIGSTR $ ",
279
+ ";",
280
+ ''
281
+ ]
282
+ assert_equal expected, mk.syn_df, msg
283
+
284
+ expected2 = []
285
+ assert_equal expected2, mk.syn_fmt_link, msg
286
+ end
287
+
250
288
  end
251
289
  end
@@ -116,6 +116,17 @@ def test_var_fmt
116
116
  assert_equal expected, actual, msg
117
117
  end
118
118
 
119
+ def test_csv_import
120
+ msg = 'Compare against hardcoded result.'
121
+ mk = new_maker('spss', csv: true)
122
+ syn_df = mk.syn_df
123
+ assert_equal 'GET DATA /TYPE=TXT', syn_df[0], msg
124
+ assert_equal ' /FILE="data.csv"', syn_df[1], msg
125
+ assert_equal ' RECTYPE A', syn_df[9], msg
126
+ assert_equal ' /MAP.', syn_df[-2], msg
127
+ assert_equal 'execute.', syn_df[-1], msg
128
+ end
129
+
119
130
 
120
131
  end
121
132
  end
@@ -220,5 +220,12 @@ def test_rt_ne_statement
220
220
  assert_equal expected, mk.rt_ne_statement('H'), msg
221
221
  end
222
222
 
223
+ def test_csv_import
224
+ msg = 'Compare against hardcoded result.'
225
+ mk = new_maker('stata', csv: true)
226
+ expected = ['clear', 'quietly import delimited `"data.csv"\', stringcols(1 11 21) ///']
227
+ assert_equal expected, mk.syn_df, msg
228
+ end
229
+
223
230
  end
224
231
  end
@@ -185,5 +185,12 @@ def test_syn_var_locations
185
185
  assert_equal expected, actual, msg
186
186
  end
187
187
 
188
+ def test_csv_import
189
+ error = assert_raises RuntimeError do
190
+ new_maker('sts', csv: true)
191
+ end
192
+ assert_equal('CSV data not supported for STS syntax files', error.message)
193
+ end
194
+
188
195
  end
189
196
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stats_package_syntax_file_generator
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.8
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Monty Hindman, Marcus Peterson, Colin Davis, Dan Elbert, Jayandra Pokharel
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-07-14 00:00:00.000000000 Z
11
+ date: 2026-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -103,7 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
103
103
  - !ruby/object:Gem::Version
104
104
  version: '0'
105
105
  requirements: []
106
- rubygems_version: 3.1.4
106
+ rubygems_version: 3.5.14
107
107
  signing_key:
108
108
  specification_version: 4
109
109
  summary: Produces statistical package syntax files for fixed-column data.