fechell 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest +75 -0
- data/README.rdoc +130 -0
- data/Rakefile +16 -0
- data/fechell.gemspec +33 -0
- data/lib/defs/3.00.csv +1 -0
- data/lib/defs/5.00.csv +1 -0
- data/lib/defs/5.1.csv +1 -0
- data/lib/defs/5.2.csv +1 -0
- data/lib/defs/5.3.csv +1 -0
- data/lib/defs/6.1.csv +1 -0
- data/lib/defs/6.2.csv +1 -0
- data/lib/defs/6.3.csv +1 -0
- data/lib/defs/6.4.csv +1 -0
- data/lib/fechell/forms.rb +1285 -0
- data/lib/fechell.rb +353 -0
- data/lib/tests/f3test.rb +1035 -0
- data/lib/tests/satest.rb +346 -0
- data/lib/tests/sbtest.rb +390 -0
- data/lib/tests/sc1test.rb +616 -0
- data/lib/tests/sctest.rb +387 -0
- data/lib/tests/testdata/F3-3.00-32564-SC1.fec +3 -0
- data/lib/tests/testdata/F3-3.00-32777.fec +17 -0
- data/lib/tests/testdata/F3-3.00-32909-SC.fec +3 -0
- data/lib/tests/testdata/F3-3.00-32933-SB.fec +3 -0
- data/lib/tests/testdata/F3-3.00-32933.fec +658 -0
- data/lib/tests/testdata/F3-5.00-97348.fec +410 -0
- data/lib/tests/testdata/F3-5.00-97424-SB.fec +3 -0
- data/lib/tests/testdata/F3-5.00-97424-SC.fec +3 -0
- data/lib/tests/testdata/F3-5.00-97424.fec +12 -0
- data/lib/tests/testdata/F3-5.00-97986-SC1.fec +3 -0
- data/lib/tests/testdata/F3-5.1-116177-SC1.fec +3 -0
- data/lib/tests/testdata/F3-5.1-116437-SC2.fec +370 -0
- data/lib/tests/testdata/F3-5.1-126642.fec +448 -0
- data/lib/tests/testdata/F3-5.1-126655-SB.fec +3 -0
- data/lib/tests/testdata/F3-5.1-126655-SC.fec +3 -0
- data/lib/tests/testdata/F3-5.1-126655.fec +70 -0
- data/lib/tests/testdata/F3-5.2-170434.fec +298 -0
- data/lib/tests/testdata/F3-5.2-170443.fec +5 -0
- data/lib/tests/testdata/F3-5.2-170775-SC.fec +3 -0
- data/lib/tests/testdata/F3-5.2-170890-SC.fec +3 -0
- data/lib/tests/testdata/F3-5.2-170890.fec +21 -0
- data/lib/tests/testdata/F3-5.2-171146-SC1.fec +3 -0
- data/lib/tests/testdata/F3-5.2-171146.fec +490 -0
- data/lib/tests/testdata/F3-5.3-210119-SB.fec +8 -0
- data/lib/tests/testdata/F3-5.3-210119.fec +86 -0
- data/lib/tests/testdata/F3-5.3-210142-SC1.fec +3 -0
- data/lib/tests/testdata/F3-5.3-210142.fec +414 -0
- data/lib/tests/testdata/F3-5.3-210250.fec +584 -0
- data/lib/tests/testdata/F3-5.3-212438-SC.fec +3 -0
- data/lib/tests/testdata/F3-6.1-331453-SB.fec +3 -0
- data/lib/tests/testdata/F3-6.1-331453.fec +414 -0
- data/lib/tests/testdata/F3-6.1-332530-SC.fec +4 -0
- data/lib/tests/testdata/F3-6.1-332675.fec +1140 -0
- data/lib/tests/testdata/F3-6.1-333405-SC1.fec +3 -0
- data/lib/tests/testdata/F3-6.1-333405.fec +199 -0
- data/lib/tests/testdata/F3-6.2-350353-SC.fec +4 -0
- data/lib/tests/testdata/F3-6.2-350353.fec +882 -0
- data/lib/tests/testdata/F3-6.2-350775-SB.fec +3 -0
- data/lib/tests/testdata/F3-6.2-350775.fec +406 -0
- data/lib/tests/testdata/F3-6.2-350844-SC1.fec +3 -0
- data/lib/tests/testdata/F3-6.2-350844.fec +139 -0
- data/lib/tests/testdata/F3-6.3-413014.fec +624 -0
- data/lib/tests/testdata/F3-6.3-413060-SC.fec +3 -0
- data/lib/tests/testdata/F3-6.3-413226-SB.fec +3 -0
- data/lib/tests/testdata/F3-6.3-413226.fec +219 -0
- data/lib/tests/testdata/F3-6.3-413284-SC1.fec +3 -0
- data/lib/tests/testdata/F3-6.4-420048-SC.fec +3 -0
- data/lib/tests/testdata/F3-6.4-420048.fec +325 -0
- data/lib/tests/testdata/F3-6.4-420106.fec +3 -0
- data/lib/tests/testdata/F3-6.4-423088-SC1.fec +3 -0
- data/lib/tests/testdata/F3-6.4-423088.fec +59 -0
- data/lib/tests/testdata/F3-6.4-424094.fec +969 -0
- data/lib/tests/testdata/F3-6.4-424586-SB.fec +4 -0
- data/lib/tests/testdata/F3-6.4-424586.fec +131 -0
- data/lib/tests.rb +6 -0
- metadata +153 -0
data/lib/fechell.rb
ADDED
@@ -0,0 +1,353 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "fastercsv"
|
3
|
+
|
4
|
+
class FECHell
|
5
|
+
|
6
|
+
@@modules = nil
|
7
|
+
|
8
|
+
@@valid_lines = {
|
9
|
+
"F1" => ["HDR", "TEXT"],
|
10
|
+
"F1S" => ["HDR", "TEXT"],
|
11
|
+
"F1M" => ["HDR", "TEXT"],
|
12
|
+
"F2" => ["HDR", "TEXT"],
|
13
|
+
"F2S" => ["HDR", "TEXT"],
|
14
|
+
"F24" => ["HDR", "SE", "SF", "TEXT"],
|
15
|
+
"F3" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
|
16
|
+
"F3S" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
|
17
|
+
"F3X" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
|
18
|
+
"F3Z" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
|
19
|
+
"F3P" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
|
20
|
+
"F3PS" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
|
21
|
+
"F3P31" => ["HDR", "SA", "SB", "SC", "SC1", "SC2", "SD","TEXT"],
|
22
|
+
"F3L" => ["HDR", "SA3L", "SB3L","TEXT"],
|
23
|
+
"F4" => ["HDR", "SA", "SB", "SC", "SD","TEXT"],
|
24
|
+
"F5" => ["HDR", "F56", "F57"],
|
25
|
+
"F6" => ["HDR", "F65"],
|
26
|
+
"F7" => ["HDR", "F76"],
|
27
|
+
"F8" => ["HDR", "F82", "F83"],
|
28
|
+
"F9" => ["HDR", "F91","F92","F93","F94" ],
|
29
|
+
"F10" => ["HDR", "F10.5"],
|
30
|
+
"F13" => ["HDR", "F132", "F133"],
|
31
|
+
"F99" => ["HDR","[BEGINTEXT]"]
|
32
|
+
}
|
33
|
+
|
34
|
+
|
35
|
+
SEPERATORS = {"3.00" => ",",
|
36
|
+
"5.00" => ",",
|
37
|
+
"5.1" => ",",
|
38
|
+
"5.2" => ",",
|
39
|
+
"5.3" => ",",
|
40
|
+
"6.1" => "\x1e",
|
41
|
+
"6.2" => "\x1e",
|
42
|
+
"6.3" => "\x1e",
|
43
|
+
"6.4" => "\x1e"
|
44
|
+
}
|
45
|
+
|
46
|
+
|
47
|
+
def self.load_modules
|
48
|
+
@@modules = {}
|
49
|
+
Dir["#{File.dirname(__FILE__)}/defs/*.csv"].each do |filename|
|
50
|
+
version = File.basename(filename,".csv")
|
51
|
+
|
52
|
+
formats = {}
|
53
|
+
|
54
|
+
FasterCSV.open(filename,"r",:col_sep => ';').each do |line|
|
55
|
+
schedule = line[0].gsub(' ','').gsub('Sch','S')
|
56
|
+
formats[schedule] = line[1..-1]
|
57
|
+
end
|
58
|
+
|
59
|
+
@@modules[version] = formats
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
def initialize
|
66
|
+
FECHell.load_modules unless @@modules.nil? == false
|
67
|
+
|
68
|
+
@form_matcher = Regexp.new("^(#{@@valid_lines.keys.sort { |l,r| r.size <=> l.size} .join('|')})[ANT]?")
|
69
|
+
end
|
70
|
+
|
71
|
+
def clean_fec_version(version)
|
72
|
+
out_version = version
|
73
|
+
|
74
|
+
# monkey patch messed up version names in older software (Public Affairs Support Services Inc.)
|
75
|
+
|
76
|
+
out_version = "5.3" if version == "5.30"
|
77
|
+
|
78
|
+
out_version = "3.00" if version == "3.0"
|
79
|
+
|
80
|
+
out_version
|
81
|
+
end
|
82
|
+
|
83
|
+
def peek_format(data)
|
84
|
+
|
85
|
+
header = {}
|
86
|
+
parts_raw = []
|
87
|
+
first_line = []
|
88
|
+
form_type = ''
|
89
|
+
|
90
|
+
lines = []
|
91
|
+
|
92
|
+
begin
|
93
|
+
|
94
|
+
if data.is_a?(Array) then
|
95
|
+
lines = data[0..1]
|
96
|
+
else
|
97
|
+
if data.is_a?(StringIO) then
|
98
|
+
file = data
|
99
|
+
elsif data.is_a?(String)
|
100
|
+
file = File.open(data,"r")
|
101
|
+
end
|
102
|
+
lines[0] = file.readline
|
103
|
+
lines[1] = file.readline.gsub(/\r\n?/, "").gsub("\n\r?", "").gsub("\n", "")
|
104
|
+
#sometimes the 2nd line is blank. argh
|
105
|
+
if lines[1] == ""
|
106
|
+
# read until we're not nil anymore
|
107
|
+
while lines[1] == ""
|
108
|
+
lines[1] = file.readline.gsub(/\r\n?/, "").gsub("\n\r?", "").gsub("\n", "")
|
109
|
+
end
|
110
|
+
end
|
111
|
+
lines[1] ||= ""
|
112
|
+
end
|
113
|
+
|
114
|
+
line = lines[0]
|
115
|
+
header = {}
|
116
|
+
if line =~ /^\/\* Header/ then
|
117
|
+
seperator = ","
|
118
|
+
line = lines[1]
|
119
|
+
while (line =~ /^\/\* End Header/).nil? == true
|
120
|
+
key,value = line.split("=")
|
121
|
+
if key =~ /^FEC_Ver_*/
|
122
|
+
header[:fec_version] = clean_fec_version(value.strip!)
|
123
|
+
end
|
124
|
+
line = file.readline
|
125
|
+
end
|
126
|
+
header[:record_type] = "HDR"
|
127
|
+
line = file.readline
|
128
|
+
else
|
129
|
+
seperator = guess_seperator(lines[0])
|
130
|
+
line = line.gsub!(/\n/,"").strip
|
131
|
+
|
132
|
+
parts_raw = FasterCSV.parse(line,:col_sep => seperator,:skip_blanks => true)[0]
|
133
|
+
|
134
|
+
header[:record_type] = parts_raw[0]
|
135
|
+
header[:ef_type] = parts_raw[1]
|
136
|
+
header[:fec_version] = clean_fec_version(parts_raw[2])
|
137
|
+
header[:software_name] = parts_raw[3]
|
138
|
+
header[:software_version] = parts_raw[4]
|
139
|
+
header[:report_id] = parts_raw[5]
|
140
|
+
header[:report_number] = parts_raw[6]
|
141
|
+
line = lines[1]
|
142
|
+
end
|
143
|
+
line = line.strip
|
144
|
+
parts_raw = FasterCSV.parse(line,:col_sep => seperator)[0]
|
145
|
+
form_type = parts_raw[0]
|
146
|
+
parts_raw.shift
|
147
|
+
|
148
|
+
file.close unless file.nil?
|
149
|
+
rescue FasterCSV::MalformedCSVError
|
150
|
+
header = {}
|
151
|
+
parts_raw = []
|
152
|
+
first_line = []
|
153
|
+
form_type = ''
|
154
|
+
end
|
155
|
+
return seperator,header,form_type,parts_raw
|
156
|
+
end
|
157
|
+
|
158
|
+
def guess_seperator(first_line)
|
159
|
+
seperator = ","
|
160
|
+
if first_line.index(28).nil? == false then
|
161
|
+
seperator = "\x1c"
|
162
|
+
end
|
163
|
+
|
164
|
+
seperator
|
165
|
+
end
|
166
|
+
|
167
|
+
def schedules_for(version)
|
168
|
+
@@modules[version]
|
169
|
+
end
|
170
|
+
|
171
|
+
def fields_for(version,schedule)
|
172
|
+
@@modules[version][schedule.upcase]
|
173
|
+
end
|
174
|
+
|
175
|
+
def guess_schedule(version,full_schedule)
|
176
|
+
schedules = []
|
177
|
+
|
178
|
+
# 1st we look for an exact match
|
179
|
+
# otherwise we look for a partial match, taking into account appended returns
|
180
|
+
full_schedule.upcase!
|
181
|
+
version.strip!
|
182
|
+
if @@modules[version].keys.index(full_schedule)
|
183
|
+
schedules << full_schedule
|
184
|
+
else
|
185
|
+
@@modules[version].keys.each do |key|
|
186
|
+
if full_schedule[0...1] == 'F' # F[number] can have an 'N','A' at the end
|
187
|
+
regex = "^#{key}[ANT]?"
|
188
|
+
else # TEXT or Schedules. S[Letter][numbers]
|
189
|
+
regex = "^#{key}"
|
190
|
+
end
|
191
|
+
if full_schedule.match(regex)
|
192
|
+
schedules << key
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# return the longest match first
|
198
|
+
schedules.sort { |l,r| r.size <=> l.size }
|
199
|
+
end
|
200
|
+
|
201
|
+
def header_lines(filename)
|
202
|
+
seperator,header,form_type,elements = peek_format(filename)
|
203
|
+
|
204
|
+
schedule = guess_form(form_type)
|
205
|
+
|
206
|
+
if header[:fec_version].to_i < 3 then
|
207
|
+
return header[:fec_version],form_type,schedule,nil
|
208
|
+
end
|
209
|
+
|
210
|
+
csv = FasterCSV.open(filename,"r",:col_sep => seperator,:skip_blanks => true)
|
211
|
+
|
212
|
+
#schedules = guess_schedule(header[:fec_version],form_type)
|
213
|
+
line = csv.readline
|
214
|
+
|
215
|
+
if line =~ /^\/\* Header/ then
|
216
|
+
while ((line =csv.readline) =~ /^\/\* End Header/).nil? == true
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
line = csv.readline
|
221
|
+
#schedule = line[0]
|
222
|
+
if schedule.nil?
|
223
|
+
schedules1 = []
|
224
|
+
values1 = []
|
225
|
+
else
|
226
|
+
schedules1,values1 = process_line(header[:fec_version],schedule,line)
|
227
|
+
end
|
228
|
+
|
229
|
+
csv.close
|
230
|
+
return header[:fec_version],form_type,schedules1[0],values1
|
231
|
+
|
232
|
+
end
|
233
|
+
|
234
|
+
def process_line(fec_version,schedule,line)
|
235
|
+
|
236
|
+
guesses = guess_schedule(fec_version,schedule)
|
237
|
+
offsets = @@modules[fec_version][guesses[0]]
|
238
|
+
values = {}
|
239
|
+
index =0
|
240
|
+
if offsets.nil? == true
|
241
|
+
guesses = ["UNKNOWN"]
|
242
|
+
values = {"line" => line}
|
243
|
+
else
|
244
|
+
offsets.each do |offset|
|
245
|
+
values[offset] = line[index]
|
246
|
+
index = index + 1
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
return guesses,values
|
251
|
+
end
|
252
|
+
|
253
|
+
def guess_form(full_form)
|
254
|
+
full_form ||= ""
|
255
|
+
full_form.upcase!
|
256
|
+
md = @form_matcher.match(full_form)
|
257
|
+
candidate_form = md[1] unless md.nil? == true
|
258
|
+
candidate_form
|
259
|
+
end
|
260
|
+
|
261
|
+
def process(filename,options = {})
|
262
|
+
|
263
|
+
seperator,header,form_type,elements = peek_format(filename)
|
264
|
+
|
265
|
+
main_form = guess_form(form_type)
|
266
|
+
|
267
|
+
if main_form == "UNKNOWN" then
|
268
|
+
puts "ERROR: #{filename} - type was #{form_type} we found nothing"
|
269
|
+
end
|
270
|
+
|
271
|
+
valid_schedules_this_form = @@valid_lines[main_form]
|
272
|
+
matcher = Regexp.new("^(#{valid_schedules_this_form.sort{ |l,r| r.size <=> l.size} .join('|')})")
|
273
|
+
|
274
|
+
begin
|
275
|
+
FasterCSV.foreach(filename,:col_sep => seperator,:skip_blanks => true) do |line|
|
276
|
+
next if line.nil?
|
277
|
+
next if line.size == 0
|
278
|
+
|
279
|
+
sch = line[0]
|
280
|
+
matched_schedule = nil
|
281
|
+
# first two lines are [FORM_TYPE] or HDR
|
282
|
+
if sch == form_type
|
283
|
+
matched_schedule = main_form
|
284
|
+
elsif sch == "HDR"
|
285
|
+
matched_schedule = sch
|
286
|
+
else
|
287
|
+
md = matcher.match(sch)
|
288
|
+
matched_schedule = md[1] unless md.nil? == true
|
289
|
+
end
|
290
|
+
|
291
|
+
unless matched_schedule.nil? == true
|
292
|
+
values = {}
|
293
|
+
offsets = @@modules[header[:fec_version]][matched_schedule]
|
294
|
+
index = 0
|
295
|
+
|
296
|
+
offsets.each do |offset|
|
297
|
+
values[offset] = line[index]
|
298
|
+
index = index + 1
|
299
|
+
end
|
300
|
+
yield [matched_schedule, values]
|
301
|
+
end
|
302
|
+
|
303
|
+
end
|
304
|
+
rescue FasterCSV::MalformedCSVError
|
305
|
+
puts "malformed content in file #{filename}"
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
|
310
|
+
def process_oldmatcher(filename,options = {})
|
311
|
+
|
312
|
+
seperator,header,form_type,elements = peek_format(filename)
|
313
|
+
|
314
|
+
schedules = guess_schedule(header[:fec_version],form_type)
|
315
|
+
if schedules.size == 0 then
|
316
|
+
puts "ERROR: #{filename} - type was #{form_type} we found nothing"
|
317
|
+
end
|
318
|
+
offsets = @@modules[header[:fec_version]][schedules[0]]
|
319
|
+
matcher = Regexp.new('^[STFH]')
|
320
|
+
|
321
|
+
begin
|
322
|
+
FasterCSV.open(filename,:col_sep => seperator,:skip_blanks => true).each do |line|
|
323
|
+
next if line.nil?
|
324
|
+
next if line.size == 0
|
325
|
+
sch = line[0]
|
326
|
+
|
327
|
+
#next unless sch.match('^[STFH]')
|
328
|
+
next unless matcher.match(sch)
|
329
|
+
|
330
|
+
if form_type == 'F99'
|
331
|
+
guesses = ["TEXT"]
|
332
|
+
values = { "line" => line }
|
333
|
+
else
|
334
|
+
guesses,values = process_line(header[:fec_version],sch,line)
|
335
|
+
end
|
336
|
+
|
337
|
+
|
338
|
+
next if guesses.nil? || guesses.size == 0
|
339
|
+
next if values.nil?
|
340
|
+
|
341
|
+
if options[:ignore_schedules]
|
342
|
+
match_str = "^#{options[:ignore_schedules].join('|')}"
|
343
|
+
next if sch.match(match_str)
|
344
|
+
end
|
345
|
+
yield [guesses[0],values]
|
346
|
+
|
347
|
+
end
|
348
|
+
rescue FasterCSV::MalformedCSVError
|
349
|
+
puts "malformed content in file #{filename}"
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
end
|