fechell 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/Manifest +75 -0
  2. data/README.rdoc +130 -0
  3. data/Rakefile +16 -0
  4. data/fechell.gemspec +33 -0
  5. data/lib/defs/3.00.csv +1 -0
  6. data/lib/defs/5.00.csv +1 -0
  7. data/lib/defs/5.1.csv +1 -0
  8. data/lib/defs/5.2.csv +1 -0
  9. data/lib/defs/5.3.csv +1 -0
  10. data/lib/defs/6.1.csv +1 -0
  11. data/lib/defs/6.2.csv +1 -0
  12. data/lib/defs/6.3.csv +1 -0
  13. data/lib/defs/6.4.csv +1 -0
  14. data/lib/fechell/forms.rb +1285 -0
  15. data/lib/fechell.rb +353 -0
  16. data/lib/tests/f3test.rb +1035 -0
  17. data/lib/tests/satest.rb +346 -0
  18. data/lib/tests/sbtest.rb +390 -0
  19. data/lib/tests/sc1test.rb +616 -0
  20. data/lib/tests/sctest.rb +387 -0
  21. data/lib/tests/testdata/F3-3.00-32564-SC1.fec +3 -0
  22. data/lib/tests/testdata/F3-3.00-32777.fec +17 -0
  23. data/lib/tests/testdata/F3-3.00-32909-SC.fec +3 -0
  24. data/lib/tests/testdata/F3-3.00-32933-SB.fec +3 -0
  25. data/lib/tests/testdata/F3-3.00-32933.fec +658 -0
  26. data/lib/tests/testdata/F3-5.00-97348.fec +410 -0
  27. data/lib/tests/testdata/F3-5.00-97424-SB.fec +3 -0
  28. data/lib/tests/testdata/F3-5.00-97424-SC.fec +3 -0
  29. data/lib/tests/testdata/F3-5.00-97424.fec +12 -0
  30. data/lib/tests/testdata/F3-5.00-97986-SC1.fec +3 -0
  31. data/lib/tests/testdata/F3-5.1-116177-SC1.fec +3 -0
  32. data/lib/tests/testdata/F3-5.1-116437-SC2.fec +370 -0
  33. data/lib/tests/testdata/F3-5.1-126642.fec +448 -0
  34. data/lib/tests/testdata/F3-5.1-126655-SB.fec +3 -0
  35. data/lib/tests/testdata/F3-5.1-126655-SC.fec +3 -0
  36. data/lib/tests/testdata/F3-5.1-126655.fec +70 -0
  37. data/lib/tests/testdata/F3-5.2-170434.fec +298 -0
  38. data/lib/tests/testdata/F3-5.2-170443.fec +5 -0
  39. data/lib/tests/testdata/F3-5.2-170775-SC.fec +3 -0
  40. data/lib/tests/testdata/F3-5.2-170890-SC.fec +3 -0
  41. data/lib/tests/testdata/F3-5.2-170890.fec +21 -0
  42. data/lib/tests/testdata/F3-5.2-171146-SC1.fec +3 -0
  43. data/lib/tests/testdata/F3-5.2-171146.fec +490 -0
  44. data/lib/tests/testdata/F3-5.3-210119-SB.fec +8 -0
  45. data/lib/tests/testdata/F3-5.3-210119.fec +86 -0
  46. data/lib/tests/testdata/F3-5.3-210142-SC1.fec +3 -0
  47. data/lib/tests/testdata/F3-5.3-210142.fec +414 -0
  48. data/lib/tests/testdata/F3-5.3-210250.fec +584 -0
  49. data/lib/tests/testdata/F3-5.3-212438-SC.fec +3 -0
  50. data/lib/tests/testdata/F3-6.1-331453-SB.fec +3 -0
  51. data/lib/tests/testdata/F3-6.1-331453.fec +414 -0
  52. data/lib/tests/testdata/F3-6.1-332530-SC.fec +4 -0
  53. data/lib/tests/testdata/F3-6.1-332675.fec +1140 -0
  54. data/lib/tests/testdata/F3-6.1-333405-SC1.fec +3 -0
  55. data/lib/tests/testdata/F3-6.1-333405.fec +199 -0
  56. data/lib/tests/testdata/F3-6.2-350353-SC.fec +4 -0
  57. data/lib/tests/testdata/F3-6.2-350353.fec +882 -0
  58. data/lib/tests/testdata/F3-6.2-350775-SB.fec +3 -0
  59. data/lib/tests/testdata/F3-6.2-350775.fec +406 -0
  60. data/lib/tests/testdata/F3-6.2-350844-SC1.fec +3 -0
  61. data/lib/tests/testdata/F3-6.2-350844.fec +139 -0
  62. data/lib/tests/testdata/F3-6.3-413014.fec +624 -0
  63. data/lib/tests/testdata/F3-6.3-413060-SC.fec +3 -0
  64. data/lib/tests/testdata/F3-6.3-413226-SB.fec +3 -0
  65. data/lib/tests/testdata/F3-6.3-413226.fec +219 -0
  66. data/lib/tests/testdata/F3-6.3-413284-SC1.fec +3 -0
  67. data/lib/tests/testdata/F3-6.4-420048-SC.fec +3 -0
  68. data/lib/tests/testdata/F3-6.4-420048.fec +325 -0
  69. data/lib/tests/testdata/F3-6.4-420106.fec +3 -0
  70. data/lib/tests/testdata/F3-6.4-423088-SC1.fec +3 -0
  71. data/lib/tests/testdata/F3-6.4-423088.fec +59 -0
  72. data/lib/tests/testdata/F3-6.4-424094.fec +969 -0
  73. data/lib/tests/testdata/F3-6.4-424586-SB.fec +4 -0
  74. data/lib/tests/testdata/F3-6.4-424586.fec +131 -0
  75. data/lib/tests.rb +6 -0
  76. metadata +153 -0
data/lib/fechell.rb ADDED
@@ -0,0 +1,353 @@
1
+ require "rubygems"
2
+ require "fastercsv"
3
+
4
+ class FECHell
5
+
6
+ @@modules = nil
7
+
8
+ @@valid_lines = {
9
+ "F1" => ["HDR", "TEXT"],
10
+ "F1S" => ["HDR", "TEXT"],
11
+ "F1M" => ["HDR", "TEXT"],
12
+ "F2" => ["HDR", "TEXT"],
13
+ "F2S" => ["HDR", "TEXT"],
14
+ "F24" => ["HDR", "SE", "SF", "TEXT"],
15
+ "F3" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
16
+ "F3S" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
17
+ "F3X" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
18
+ "F3Z" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
19
+ "F3P" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
20
+ "F3PS" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
21
+ "F3P31" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
22
+ "F3L" => ["HDR", "SA3L", "SB3L","TEXT"],
23
+ "F4" => ["HDR", "SA", "SB", "SC", "SD","TEXT"],
24
+ "F5" => ["HDR", "F56", "F57"],
25
+ "F6" => ["HDR", "F65"],
26
+ "F7" => ["HDR", "F76"],
27
+ "F8" => ["HDR", "F82", "F83"],
28
+ "F9" => ["HDR", "F91","F92","F93","F94" ],
29
+ "F10" => ["HDR", "F10.5"],
30
+ "F13" => ["HDR", "F132", "F133"],
31
+ "F99" => ["HDR","[BEGINTEXT]"]
32
+ }
33
+
34
+
35
+ SEPERATORS = {"3.00" => ",",
36
+ "5.00" => ",",
37
+ "5.1" => ",",
38
+ "5.2" => ",",
39
+ "5.3" => ",",
40
+ "6.1" => "\x1e",
41
+ "6.2" => "\x1e",
42
+ "6.3" => "\x1e",
43
+ "6.4" => "\x1e"
44
+ }
45
+
46
+
47
+ def self.load_modules
48
+ @@modules = {}
49
+ Dir["#{File.dirname(__FILE__)}/defs/*.csv"].each do |filename|
50
+ version = File.basename(filename,".csv")
51
+
52
+ formats = {}
53
+
54
+ FasterCSV.open(filename,"r",:col_sep => ';').each do |line|
55
+ schedule = line[0].gsub(' ','').gsub('Sch','S')
56
+ formats[schedule] = line[1..-1]
57
+ end
58
+
59
+ @@modules[version] = formats
60
+
61
+ end
62
+
63
+ end
64
+
65
+ def initialize
66
+ FECHell.load_modules unless @@modules.nil? == false
67
+
68
+ @form_matcher = Regexp.new("^(#{@@valid_lines.keys.sort { |l,r| r.size <=> l.size} .join('|')})[ANT]?")
69
+ end
70
+
71
+ def clean_fec_version(version)
72
+ out_version = version
73
+
74
+ # monkey patch messed up version names in older software (Public Affairs Support Services Inc.)
75
+
76
+ out_version = "5.3" if version == "5.30"
77
+
78
+ out_version = "3.00" if version == "3.0"
79
+
80
+ out_version
81
+ end
82
+
83
+ def peek_format(data)
84
+
85
+ header = {}
86
+ parts_raw = []
87
+ first_line = []
88
+ form_type = ''
89
+
90
+ lines = []
91
+
92
+ begin
93
+
94
+ if data.is_a?(Array) then
95
+ lines = data[0..1]
96
+ else
97
+ if data.is_a?(StringIO) then
98
+ file = data
99
+ elsif data.is_a?(String)
100
+ file = File.open(data,"r")
101
+ end
102
+ lines[0] = file.readline
103
+ lines[1] = file.readline.gsub(/\r\n?/, "").gsub("\n\r?", "").gsub("\n", "")
104
+ #sometimes the 2nd line is blank. argh
105
+ if lines[1] == ""
106
+ # read until we're not nil anymore
107
+ while lines[1] == ""
108
+ lines[1] = file.readline.gsub(/\r\n?/, "").gsub("\n\r?", "").gsub("\n", "")
109
+ end
110
+ end
111
+ lines[1] ||= ""
112
+ end
113
+
114
+ line = lines[0]
115
+ header = {}
116
+ if line =~ /^\/\* Header/ then
117
+ seperator = ","
118
+ line = lines[1]
119
+ while (line =~ /^\/\* End Header/).nil? == true
120
+ key,value = line.split("=")
121
+ if key =~ /^FEC_Ver_*/
122
+ header[:fec_version] = clean_fec_version(value.strip!)
123
+ end
124
+ line = file.readline
125
+ end
126
+ header[:record_type] = "HDR"
127
+ line = file.readline
128
+ else
129
+ seperator = guess_seperator(lines[0])
130
+ line = line.gsub!(/\n/,"").strip
131
+
132
+ parts_raw = FasterCSV.parse(line,:col_sep => seperator,:skip_blanks => true)[0]
133
+
134
+ header[:record_type] = parts_raw[0]
135
+ header[:ef_type] = parts_raw[1]
136
+ header[:fec_version] = clean_fec_version(parts_raw[2])
137
+ header[:software_name] = parts_raw[3]
138
+ header[:software_version] = parts_raw[4]
139
+ header[:report_id] = parts_raw[5]
140
+ header[:report_number] = parts_raw[6]
141
+ line = lines[1]
142
+ end
143
+ line = line.strip
144
+ parts_raw = FasterCSV.parse(line,:col_sep => seperator)[0]
145
+ form_type = parts_raw[0]
146
+ parts_raw.shift
147
+
148
+ file.close unless file.nil?
149
+ rescue FasterCSV::MalformedCSVError
150
+ header = {}
151
+ parts_raw = []
152
+ first_line = []
153
+ form_type = ''
154
+ end
155
+ return seperator,header,form_type,parts_raw
156
+ end
157
+
158
+ def guess_seperator(first_line)
159
+ seperator = ","
160
+ if first_line.index(28).nil? == false then
161
+ seperator = "\x1c"
162
+ end
163
+
164
+ seperator
165
+ end
166
+
167
+ def schedules_for(version)
168
+ @@modules[version]
169
+ end
170
+
171
+ def fields_for(version,schedule)
172
+ @@modules[version][schedule.upcase]
173
+ end
174
+
175
+ def guess_schedule(version,full_schedule)
176
+ schedules = []
177
+
178
+ # 1st we look for an exact match
179
+ # otherwise we look for a partial match, taking into account appended returns
180
+ full_schedule.upcase!
181
+ version.strip!
182
+ if @@modules[version].keys.index(full_schedule)
183
+ schedules << full_schedule
184
+ else
185
+ @@modules[version].keys.each do |key|
186
+ if full_schedule[0...1] == 'F' # F[number] can have an 'N','A' at the end
187
+ regex = "^#{key}[ANT]?"
188
+ else # TEXT or Schedules. S[Letter][numbers]
189
+ regex = "^#{key}"
190
+ end
191
+ if full_schedule.match(regex)
192
+ schedules << key
193
+ end
194
+ end
195
+ end
196
+
197
+ # return the longest match first
198
+ schedules.sort { |l,r| r.size <=> l.size }
199
+ end
200
+
201
+ def header_lines(filename)
202
+ seperator,header,form_type,elements = peek_format(filename)
203
+
204
+ schedule = guess_form(form_type)
205
+
206
+ if header[:fec_version].to_i < 3 then
207
+ return header[:fec_version],form_type,schedule,nil
208
+ end
209
+
210
+ csv = FasterCSV.open(filename,"r",:col_sep => seperator,:skip_blanks => true)
211
+
212
+ #schedules = guess_schedule(header[:fec_version],form_type)
213
+ line = csv.readline
214
+
215
+ if line =~ /^\/\* Header/ then
216
+ while ((line =csv.readline) =~ /^\/\* End Header/).nil? == true
217
+ end
218
+ end
219
+
220
+ line = csv.readline
221
+ #schedule = line[0]
222
+ if schedule.nil?
223
+ schedules1 = []
224
+ values1 = []
225
+ else
226
+ schedules1,values1 = process_line(header[:fec_version],schedule,line)
227
+ end
228
+
229
+ csv.close
230
+ return header[:fec_version],form_type,schedules1[0],values1
231
+
232
+ end
233
+
234
+ def process_line(fec_version,schedule,line)
235
+
236
+ guesses = guess_schedule(fec_version,schedule)
237
+ offsets = @@modules[fec_version][guesses[0]]
238
+ values = {}
239
+ index =0
240
+ if offsets.nil? == true
241
+ guesses = ["UNKNOWN"]
242
+ values = {"line" => line}
243
+ else
244
+ offsets.each do |offset|
245
+ values[offset] = line[index]
246
+ index = index + 1
247
+ end
248
+ end
249
+
250
+ return guesses,values
251
+ end
252
+
253
+ def guess_form(full_form)
254
+ full_form ||= ""
255
+ full_form.upcase!
256
+ md = @form_matcher.match(full_form)
257
+ candidate_form = md[1] unless md.nil? == true
258
+ candidate_form
259
+ end
260
+
261
+ def process(filename,options = {})
262
+
263
+ seperator,header,form_type,elements = peek_format(filename)
264
+
265
+ main_form = guess_form(form_type)
266
+
267
+ if main_form == "UNKNOWN" then
268
+ puts "ERROR: #{filename} - type was #{form_type} we found nothing"
269
+ end
270
+
271
+ valid_schedules_this_form = @@valid_lines[main_form]
272
+ matcher = Regexp.new("^(#{valid_schedules_this_form.sort{ |l,r| r.size <=> l.size} .join('|')})")
273
+
274
+ begin
275
+ FasterCSV.foreach(filename,:col_sep => seperator,:skip_blanks => true) do |line|
276
+ next if line.nil?
277
+ next if line.size == 0
278
+
279
+ sch = line[0]
280
+ matched_schedule = nil
281
+ # first two lines are [FORM_TYPE] or HDR
282
+ if sch == form_type
283
+ matched_schedule = main_form
284
+ elsif sch == "HDR"
285
+ matched_schedule = sch
286
+ else
287
+ md = matcher.match(sch)
288
+ matched_schedule = md[1] unless md.nil? == true
289
+ end
290
+
291
+ unless matched_schedule.nil? == true
292
+ values = {}
293
+ offsets = @@modules[header[:fec_version]][matched_schedule]
294
+ index = 0
295
+
296
+ offsets.each do |offset|
297
+ values[offset] = line[index]
298
+ index = index + 1
299
+ end
300
+ yield [matched_schedule, values]
301
+ end
302
+
303
+ end
304
+ rescue FasterCSV::MalformedCSVError
305
+ puts "malformed content in file #{filename}"
306
+ end
307
+ end
308
+
309
+
310
+ def process_oldmatcher(filename,options = {})
311
+
312
+ seperator,header,form_type,elements = peek_format(filename)
313
+
314
+ schedules = guess_schedule(header[:fec_version],form_type)
315
+ if schedules.size == 0 then
316
+ puts "ERROR: #{filename} - type was #{form_type} we found nothing"
317
+ end
318
+ offsets = @@modules[header[:fec_version]][schedules[0]]
319
+ matcher = Regexp.new('^[STFH]')
320
+
321
+ begin
322
+ FasterCSV.open(filename,:col_sep => seperator,:skip_blanks => true).each do |line|
323
+ next if line.nil?
324
+ next if line.size == 0
325
+ sch = line[0]
326
+
327
+ #next unless sch.match('^[STFH]')
328
+ next unless matcher.match(sch)
329
+
330
+ if form_type == 'F99'
331
+ guesses = ["TEXT"]
332
+ values = { "line" => line }
333
+ else
334
+ guesses,values = process_line(header[:fec_version],sch,line)
335
+ end
336
+
337
+
338
+ next if guesses.nil? || guesses.size == 0
339
+ next if values.nil?
340
+
341
+ if options[:ignore_schedules]
342
+ match_str = "^#{options[:ignore_schedules].join('|')}"
343
+ next if sch.match(match_str)
344
+ end
345
+ yield [guesses[0],values]
346
+
347
+ end
348
+ rescue FasterCSV::MalformedCSVError
349
+ puts "malformed content in file #{filename}"
350
+ end
351
+ end
352
+
353
+ end