myl-fech 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +48 -0
- data/LICENSE +13 -0
- data/README.rdoc +82 -0
- data/Rakefile +3 -0
- data/autotest/discover.rb +1 -0
- data/fech.gemspec +40 -0
- data/lib/fech/comparison.rb +36 -0
- data/lib/fech/csv.rb +70 -0
- data/lib/fech/default_translations.rb +133 -0
- data/lib/fech/fech_utils.rb +76 -0
- data/lib/fech/filing.rb +341 -0
- data/lib/fech/map_generator.rb +233 -0
- data/lib/fech/mapped.rb +38 -0
- data/lib/fech/mappings.rb +67 -0
- data/lib/fech/rendered_maps.rb +238 -0
- data/lib/fech/translator.rb +138 -0
- data/lib/fech/version.rb +3 -0
- data/lib/fech.rb +15 -0
- data/sources/F1.csv +106 -0
- data/sources/F1M.csv +78 -0
- data/sources/F2.csv +43 -0
- data/sources/F24.csv +18 -0
- data/sources/F3.csv +1 -0
- data/sources/F3L.csv +27 -0
- data/sources/F3P.csv +208 -0
- data/sources/F3P31.csv +39 -0
- data/sources/F3PS.csv +94 -0
- data/sources/F3S.csv +36 -0
- data/sources/F3X.csv +125 -0
- data/sources/F4.csv +86 -0
- data/sources/F5.csv +39 -0
- data/sources/F56.csv +33 -0
- data/sources/F57.csv +44 -0
- data/sources/F6.csv +1 -0
- data/sources/F65.csv +1 -0
- data/sources/F7.csv +1 -0
- data/sources/F76.csv +1 -0
- data/sources/F9.csv +46 -0
- data/sources/F91.csv +17 -0
- data/sources/F92.csv +23 -0
- data/sources/F93.csv +27 -0
- data/sources/F94.csv +18 -0
- data/sources/F99.csv +1 -0
- data/sources/H1.csv +1 -0
- data/sources/H2.csv +1 -0
- data/sources/H3.csv +1 -0
- data/sources/H4.csv +1 -0
- data/sources/H5.csv +1 -0
- data/sources/H6.csv +1 -0
- data/sources/HDR.csv +10 -0
- data/sources/SchA.csv +50 -0
- data/sources/SchB.csv +50 -0
- data/sources/SchC.csv +41 -0
- data/sources/SchC1.csv +52 -0
- data/sources/SchC2.csv +19 -0
- data/sources/SchD.csv +34 -0
- data/sources/SchE.csv +57 -0
- data/sources/SchF.csv +55 -0
- data/sources/SchL.csv +1 -0
- data/sources/TEXT.csv +1 -0
- data/sources/headers/3.csv +1 -0
- data/sources/headers/5.0.csv +1 -0
- data/sources/headers/5.1.csv +1 -0
- data/sources/headers/5.2.csv +1 -0
- data/sources/headers/5.3.csv +1 -0
- data/sources/headers/6.1.csv +1 -0
- data/sources/headers/6.2.csv +1 -0
- data/sources/headers/6.3.csv +1 -0
- data/sources/headers/6.4.csv +1 -0
- data/sources/headers/7.0.csv +49 -0
- data/sources/headers/8.0.csv +49 -0
- data/sources/headers/ignore.csv +5 -0
- data/spec/comparison_spec.rb +30 -0
- data/spec/data/467627.fec +608 -0
- data/spec/data/723604.fec +4 -0
- data/spec/data/730635.fec +2 -0
- data/spec/data/747058.fec +4 -0
- data/spec/data/748730.fec +1196 -0
- data/spec/data/752356.fec +5 -0
- data/spec/data/753533.fec +7 -0
- data/spec/data/764901.fec +7 -0
- data/spec/data/765310.fec +2 -0
- data/spec/data/767339.fec +648 -0
- data/spec/data/82094.fec +144 -0
- data/spec/data/97405.fec +10 -0
- data/spec/default_translations_spec.rb +104 -0
- data/spec/fech_utils_spec.rb +29 -0
- data/spec/filing_spec.rb +314 -0
- data/spec/map_generator_spec.rb +49 -0
- data/spec/mapped_spec.rb +44 -0
- data/spec/mappings_spec.rb +46 -0
- data/spec/sources/F24.csv +18 -0
- data/spec/sources/F3P.csv +1 -0
- data/spec/sources/F3P31.csv +39 -0
- data/spec/sources/SchA.csv +1 -0
- data/spec/sources/SchB.csv +1 -0
- data/spec/sources/SchC.csv +1 -0
- data/spec/sources/headers/3.csv +1 -0
- data/spec/sources/headers/5.0.csv +1 -0
- data/spec/sources/headers/5.1.csv +1 -0
- data/spec/sources/headers/5.2.csv +1 -0
- data/spec/sources/headers/5.3.csv +1 -0
- data/spec/sources/headers/6.1.csv +1 -0
- data/spec/sources/headers/6.2.csv +1 -0
- data/spec/sources/headers/6.3.csv +1 -0
- data/spec/sources/headers/6.4.csv +1 -0
- data/spec/sources/headers/7.0.csv +1 -0
- data/spec/sources/headers/8.0.csv +49 -0
- data/spec/sources/headers/ignore.csv +5 -0
- data/spec/sources/sa.csv +1 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/translator_spec.rb +195 -0
- data/tasks/fech.rake +41 -0
- metadata +342 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
fech (0.9.10)
|
5
|
+
fastercsv
|
6
|
+
people
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: http://rubygems.org/
|
10
|
+
specs:
|
11
|
+
columnize (0.3.6)
|
12
|
+
diff-lcs (1.1.2)
|
13
|
+
fastercsv (1.5.4)
|
14
|
+
linecache (0.43)
|
15
|
+
mocha (0.9.12)
|
16
|
+
people (0.2.1)
|
17
|
+
rake (0.8.7)
|
18
|
+
rcov (0.9.9)
|
19
|
+
rdoc (3.9.2)
|
20
|
+
rspec (2.6.0)
|
21
|
+
rspec-core (~> 2.6.0)
|
22
|
+
rspec-expectations (~> 2.6.0)
|
23
|
+
rspec-mocks (~> 2.6.0)
|
24
|
+
rspec-core (2.6.4)
|
25
|
+
rspec-expectations (2.6.0)
|
26
|
+
diff-lcs (~> 1.1.2)
|
27
|
+
rspec-mocks (2.6.0)
|
28
|
+
ruby-debug (0.10.4)
|
29
|
+
columnize (>= 0.1)
|
30
|
+
ruby-debug-base (~> 0.10.4.0)
|
31
|
+
ruby-debug-base (0.10.4)
|
32
|
+
linecache (>= 0.3)
|
33
|
+
yard (0.7.2)
|
34
|
+
|
35
|
+
PLATFORMS
|
36
|
+
ruby
|
37
|
+
|
38
|
+
DEPENDENCIES
|
39
|
+
bundler
|
40
|
+
fech!
|
41
|
+
linecache (= 0.43)
|
42
|
+
mocha
|
43
|
+
rake (= 0.8.7)
|
44
|
+
rcov
|
45
|
+
rdoc
|
46
|
+
rspec (~> 2.6)
|
47
|
+
ruby-debug
|
48
|
+
yard
|
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2011 The New York Times Company
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this library except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.rdoc
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
______ ______ ______ __
|
2
|
+
/\ ___\ /\ ___\ /\ ___\ /\ \___
|
3
|
+
\ \ __\ \ \ __\ \ \ \____ \ \ __ \
|
4
|
+
\ \_\ \ \_____\ \ \_____\ \ \_\ \_\
|
5
|
+
\/_/ \/_____/ \/_____/ \/_/\/_/
|
6
|
+
|
7
|
+
Fech makes it easy to parse electronic campaign finance filings[http://www.fec.gov/finance/disclosure/efile_search.shtml] by candidates, parties and political action committees from the Federal Election Commission. It lets you access filing attributes the same way regardless of filing version, and works as a framework for cleaning and filing data. Fech is an open source project of The New York Times, but contributions from anyone interested in working with F.E.C. filings are greatly appreciated.
|
8
|
+
|
9
|
+
|
10
|
+
NOTE: You're looking at the Make Your Laws fork of New York Times' Fech gem. It has bugfixes and features that NYT's doesn't. Take a look at the commit history / network graph.
|
11
|
+
|
12
|
+
Latest (myl-fech) version: 1.0.3
|
13
|
+
|
14
|
+
|
15
|
+
Fech is tested under Ruby versions 1.8.7, 1.9.2 and 1.9.3.
|
16
|
+
|
17
|
+
== Documentation
|
18
|
+
|
19
|
+
Can be found at Fech's Github page[http://nytimes.github.com/Fech/].
|
20
|
+
|
21
|
+
== News
|
22
|
+
|
23
|
+
* June 16, 2012: Version 1.0.1 released. Bug-fix for older Form 2 support.
|
24
|
+
* April 11, 2012: Version 1.0.0 released! Support for Ruby 1.9.3 added, all form types supported.
|
25
|
+
* April 9, 2012: Version 1.0.0.rc1 released. Release candidate with backwards-incompatible change (renaming zip attribute to zip_code).
|
26
|
+
* March 29, 2012: Version 0.9.10 released. Bug-fix for Form 24 in versions 6.4 and 7.0.
|
27
|
+
* March 28, 2012: Version 0.9.9 released. Bug-fix to add support for F3XA form type, support for Schedule H and L.
|
28
|
+
* March 23, 2012: Version 0.9.6 and 0.9.5 released. Bug-fixes for F6 mappings.
|
29
|
+
* March 10, 2012: Version 0.9.4 released. Added support for F6 filings.
|
30
|
+
* March 8, 2012: Version 0.9.3 released. Bug-fix for F2 & F24 mappings.
|
31
|
+
* Feb. 29, 2012: Version 0.9.2 released. Bug-fix for F3 mappings, added filing comparison class.
|
32
|
+
* Feb. 21, 2012: Versions 0.9.0, 0.9.1 released. Added support for alternative CSV Parsers, F4 filings.
|
33
|
+
* Feb. 19, 2012: Version 0.8.2 released. Added layouts for F1M and F2 filings.
|
34
|
+
* Feb. 15, 2012: Version 0.8.1 released. Bug-fix to support F3 termination filings.
|
35
|
+
* Feb. 13, 2012: Version 0.8.0 released. Layouts for form 3 and form 1 added, for parsing House candidate committee filings.
|
36
|
+
* Feb. 11, 2012: Version 0.7.0 released. Layouts for form 9 added, for parsing electioneering communications filings.
|
37
|
+
* Jan. 28, 2012: Version 0.6.0 released. Added support for quoted fields.
|
38
|
+
* Nov. 22, 2011: Version 0.5.0 released. Layouts for form 3X added, for parsing filings from non-candidate committees.
|
39
|
+
* Nov. 13, 2011: Version 0.3.0 released. Layouts for forms 24, 5, 56 and 57 added, for parsing independent expenditure filings.
|
40
|
+
* Nov. 4, 2011: Version 0.2.1 released. Bug-fix release to address a problem with the :include option for selecting only certain columns. Thanks to Aaron Bycoffe for the report and patch.
|
41
|
+
|
42
|
+
== Installation
|
43
|
+
|
44
|
+
Install Fech as a gem:
|
45
|
+
|
46
|
+
gem install myl-fech
|
47
|
+
|
48
|
+
For use in a Rails 3 application, put the following in your Gemfile:
|
49
|
+
|
50
|
+
gem 'myl-fech', :require => 'fech'
|
51
|
+
|
52
|
+
then issue the 'bundle install' command. Fech has been tested under Ruby 1.8.7, 1.9.2 and JRuby 1.6.3.
|
53
|
+
|
54
|
+
== How to contribute
|
55
|
+
|
56
|
+
Fech's goal is to provide support for all electronically filed forms and their row types, so contributors can pick an form type that is currently unsupported and try to implement it, or to improve an existing implementation. For entirely new form types, please include specs showing successful parsing of the summary. A good way to start is to look at the mappings for a similar form type. Please note that Fech currently commits to supporting the F.E.C.'s filing formats back to version 3.0, where applicable.
|
57
|
+
|
58
|
+
Bug reports and feature requests are welcomed by submitting an Issue. Please be advised that development is focused on parsing filings, which may contain errors or be improperly filed, and not on providing wrappers or helper methods for working with filings in another context.
|
59
|
+
|
60
|
+
To get started, fork the repo, make your additions or changes and send a pull request.
|
61
|
+
|
62
|
+
== Pronunciation guide
|
63
|
+
|
64
|
+
It's "fetch", with a soft "ch" sound. There are no other acceptable pronunciations.
|
65
|
+
|
66
|
+
== Authors
|
67
|
+
|
68
|
+
Michael Strickland, michael.strickland@nytimes.com
|
69
|
+
|
70
|
+
Evan Carmi, evan@ecarmi.org
|
71
|
+
|
72
|
+
Aaron Bycoffe, bycoffe@huffingtonpost.com
|
73
|
+
|
74
|
+
Derek Willis, dwillis@nytimes.com
|
75
|
+
|
76
|
+
Daniel Pritchett, daniel@sharingatwork.com
|
77
|
+
|
78
|
+
Sai, github@saizai.com
|
79
|
+
|
80
|
+
== Copyright
|
81
|
+
|
82
|
+
Copyright (c) 2012 The New York Times Company & Sai. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Autotest.add_discovery { "rspec2" }
|
data/fech.gemspec
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "fech/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "myl-fech"
|
7
|
+
s.version = Fech::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Michael Strickland", "Evan Carmi", "Aaron Bycoffe", "Derek Willis", "Sai"]
|
10
|
+
s.email = ["dwillis@gmail.com"]
|
11
|
+
s.homepage = "http://github.com/MakeYourLaws/fech"
|
12
|
+
s.summary = %q{Ruby library for parsing FEC filings.}
|
13
|
+
s.description = %q{A Ruby library for interacting with electronic filings from the Federal Election Commission.}
|
14
|
+
|
15
|
+
s.rubyforge_project = "fech"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {spec}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
|
22
|
+
s.add_dependency "fastercsv"
|
23
|
+
s.add_dependency "people"
|
24
|
+
if RUBY_VERSION < "1.9"
|
25
|
+
s.add_development_dependency "linecache", "0.43"
|
26
|
+
s.add_development_dependency "ruby-debug"
|
27
|
+
s.add_development_dependency "iconv"
|
28
|
+
end
|
29
|
+
if RUBY_VERSION >= "1.9"
|
30
|
+
s.add_development_dependency "ruby-debug19"
|
31
|
+
s.add_development_dependency "linecache19"
|
32
|
+
end
|
33
|
+
s.add_development_dependency "rake", "0.8.7"
|
34
|
+
s.add_development_dependency "rspec", "~> 2.6"
|
35
|
+
s.add_development_dependency "mocha"
|
36
|
+
s.add_development_dependency "bundler"
|
37
|
+
s.add_development_dependency "rcov"
|
38
|
+
s.add_development_dependency "rdoc"
|
39
|
+
s.add_development_dependency "yard"
|
40
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Fech
|
2
|
+
# Fech::Comparison takes two Filing objects and does comparisons on them,
|
3
|
+
# checking for differences between an original and amended filing, or
|
4
|
+
# two filings covering the same period in different years.
|
5
|
+
class Comparison
|
6
|
+
attr_accessor :filing_1, :filing_2
|
7
|
+
|
8
|
+
# Create a new Comparison object by passing in two Filing objects
|
9
|
+
# Filing objects need to be downloaded first
|
10
|
+
# f1 = Fech::Filing.new(767437)
|
11
|
+
# f1.download
|
12
|
+
# f2 = Fech::Filing.new(751798)
|
13
|
+
# f2.download
|
14
|
+
# comparison = Fech::Comparison.new(f1, f2)
|
15
|
+
# comparison.summary
|
16
|
+
def initialize(filing_1, filing_2, opts={})
|
17
|
+
@filing_1 = filing_1
|
18
|
+
@filing_2 = filing_2
|
19
|
+
end
|
20
|
+
|
21
|
+
# compares summary of this filing with summary of an earlier
|
22
|
+
# or later version of the filing, returning a Fech::Mapped hash
|
23
|
+
# of mapped fields whose values have changed. based on rails' hash diff:
|
24
|
+
# https://github.com/rails/rails/blob/master/activesupport/lib/active_support/core_ext/hash/diff.rb
|
25
|
+
def summary
|
26
|
+
@filing_1.summary.delete_if { |k, v| @filing_2.summary[k] == v }.merge!(@filing_2.summary.dup.delete_if { |k, v| @filing_1.summary.has_key?(k) })
|
27
|
+
end
|
28
|
+
|
29
|
+
# compares a schedule of itemized records from one filing to another
|
30
|
+
# returns an array of records that are new or have changed.
|
31
|
+
def schedule(schedule)
|
32
|
+
@filing_1.rows_like(schedule.to_sym) - @filing_2.rows_like(schedule.to_sym)
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
data/lib/fech/csv.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'csv' if RUBY_VERSION > '1.9'
|
2
|
+
require 'fastercsv' unless RUBY_VERSION > '1.9'
|
3
|
+
|
4
|
+
# Fech::Csv is a wrapper that provides simple CSV handling consistency
|
5
|
+
# between Ruby 1.8 and Ruby 1.9.
|
6
|
+
#
|
7
|
+
# 1.8's "FasterCSV" library has been brought into the Ruby core as of 1.9
|
8
|
+
# as "CSV". The methods are the same so no overloading should be needed.
|
9
|
+
module Fech
|
10
|
+
begin
|
11
|
+
# Ruby 1.9 and up compatibility
|
12
|
+
class Csv < CSV
|
13
|
+
end
|
14
|
+
rescue
|
15
|
+
# 1.8 compatibility
|
16
|
+
class Csv < FasterCSV
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class Csv
|
21
|
+
|
22
|
+
# Loads a given file and parses it into an array, line by line.
|
23
|
+
# Basic wrapper around FasterCSV.foreach
|
24
|
+
# @param [String] file_path location of the filing on the file system
|
25
|
+
# @options opts passed through to FasterCSV
|
26
|
+
def self.parse_row(file_path, opts)
|
27
|
+
foreach(file_path, opts) { |row| yield row }
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
class CsvDoctor < Fech::Csv
|
33
|
+
|
34
|
+
# Skips FasterCSV's whole-file wrapper, and passes each line in
|
35
|
+
# the file to a function that will parse it individually.
|
36
|
+
def self.parse_row(file_path, opts)
|
37
|
+
opts.reject! {|k,v| ![:col_sep, :quote_char].include?(k)}
|
38
|
+
|
39
|
+
File.open(file_path, 'r').each do |line|
|
40
|
+
# Skip empty lines
|
41
|
+
next if line.strip.empty?
|
42
|
+
yield safe_line(line, opts)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Tries to parse the line with FasterCSV.parse_line and the given
|
47
|
+
# quote_char. If this fails, try again with the "\0" quote_char.
|
48
|
+
# @param [String] line the file's row to parse
|
49
|
+
# @options opts :quote_char the quote_char to try initially
|
50
|
+
def self.safe_line(line, opts)
|
51
|
+
begin
|
52
|
+
parse_line(line, opts)
|
53
|
+
rescue Fech::Csv::MalformedCSVError
|
54
|
+
row = parse_line(line, opts.merge(:quote_char => "\0"))
|
55
|
+
row.map! { |val| safe_value(val) }
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Removes extraneous quotes from values.
|
60
|
+
def self.safe_value(val)
|
61
|
+
return val unless val.is_a?(String)
|
62
|
+
begin
|
63
|
+
parse_line(val).first
|
64
|
+
rescue Fech::Csv::MalformedCSVError
|
65
|
+
val
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
module Fech
|
2
|
+
|
3
|
+
# Stores sets of build-in translations that can be mixed in to a Fech::Filing.
|
4
|
+
# Contains functions that accept a Translator, and add arbitrary translations
|
5
|
+
# to it. The public function names should correspond to the key used to mix it in.
|
6
|
+
#
|
7
|
+
# filing = Fech::Filing.new(XXXXXX, :translate => [:names, :dates])
|
8
|
+
class DefaultTranslations
|
9
|
+
|
10
|
+
# The five bits that make up a name, and their labels in the People gem
|
11
|
+
NAME_BITS = [:prefix, :first_name, :middle_name, :last_name, :suffix]
|
12
|
+
PEOPLE_BITS = [:title, :first, :middle, :last, :suffix]
|
13
|
+
|
14
|
+
attr_reader :t
|
15
|
+
|
16
|
+
def initialize(translator)
|
17
|
+
@t = translator
|
18
|
+
end
|
19
|
+
|
20
|
+
# Splits composite names into its component parts, and combines those parts
|
21
|
+
# into composites where appropriate. Assumes that the canonical names of the
|
22
|
+
# fields follow the pattern:
|
23
|
+
# * FIELD_name - "Mr. John Charles Smith Sr."
|
24
|
+
# * FIELD_prefix - "Mr."
|
25
|
+
# * FIELD_first_name - "John"
|
26
|
+
# * FIELD_middle_name - "Charles"
|
27
|
+
# * FIELD_last_name - "Smith"
|
28
|
+
# * FIELD_suffix - "Sr."
|
29
|
+
def names
|
30
|
+
|
31
|
+
# COMBINE split names into composite names for these rows
|
32
|
+
composites = [
|
33
|
+
{:row => :sa, :version => /^[6-7]/, :field => [:contributor, :donor_candidate]},
|
34
|
+
{:row => :sb, :version => /^[6-7]/, :field => [:payee, :beneficiary_candidate]},
|
35
|
+
{:row => :sc, :version => /^[6-7]/, :field => [:lender, :lender_candidate]},
|
36
|
+
{:row => :sc1, :version => /^[6-7]/, :field => [:treasurer, :authorized]},
|
37
|
+
{:row => :sc2, :version => /^[6-7]/, :field => :guarantor},
|
38
|
+
{:row => :sd, :version => /^[6-7]/, :field => :creditor},
|
39
|
+
{:row => :se, :version => /^[6-7]/, :field => [:payee, :candidate]},
|
40
|
+
{:row => :sf, :version => /^[6-7]/, :field => [:payee, :payee_candidate]},
|
41
|
+
{:row => :f3p, :version => /^[6-7]/, :field => :treasurer},
|
42
|
+
{:row => :f3p31, :version => /^[6-7]/, :field => :contributor},
|
43
|
+
]
|
44
|
+
# SPLIT composite names into component parts for these rows
|
45
|
+
components = [
|
46
|
+
{:row => :sa, :version => /^3|(5.0)/, :field => :contributor},
|
47
|
+
{:row => :sa, :version => /^[3-5]/, :field => :donor_candidate},
|
48
|
+
{:row => :sb, :version => /^3|(5.0)/, :field => :payee},
|
49
|
+
{:row => :sb, :version => /^[3-5]/, :field => :beneficiary_candidate},
|
50
|
+
{:row => :sc, :version => /^[3-5]/, :field => [:lender, :lender_candidate]},
|
51
|
+
{:row => :sc1, :version => /^[3-5]/, :field => [:treasurer, :authorized]},
|
52
|
+
{:row => :sc2, :version => /^[3-5]/, :field => :guarantor},
|
53
|
+
{:row => :sd, :version => /^[3-5]/, :field => :creditor},
|
54
|
+
{:row => :se, :version => /^[3-5]/, :field => [:payee, :cadidate]},
|
55
|
+
{:row => :sf, :version => /^[3-5]/, :field => [:payee, :payee_candidate]},
|
56
|
+
{:row => :f3p, :version => /^[3-5]/, :field => :treasurer},
|
57
|
+
{:row => :f3p31, :version => /^[3-5]/, :field => :contributor},
|
58
|
+
]
|
59
|
+
|
60
|
+
composites.each { |c| combine_components_into_name(c) }
|
61
|
+
components.each { |c| split_name_into_components(c) }
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
# Converts everything that looks like an FEC-formatted date to a
|
66
|
+
# native Ruby Date object.
|
67
|
+
def dates
|
68
|
+
# only convert fields whose name is date* or *_date*
|
69
|
+
# lots of other things might be 8 digits, and we have to exclude eg 'candidate'
|
70
|
+
t.convert :field => /(^|_)date/ do |value|
|
71
|
+
Date.parse(value) rescue value
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
# Turns "Allred^Ann^Mrs.^III" into "Mrs. Ann Allred III"
|
78
|
+
def self.fix_carrot_names(name)
|
79
|
+
name = name.split("^").reverse
|
80
|
+
# move the suffix to the beginning
|
81
|
+
name.push name.shift if name.size > 3
|
82
|
+
name.join(" ")
|
83
|
+
end
|
84
|
+
|
85
|
+
# Create a Translation for the given row, version named as :field
|
86
|
+
def combine_components_into_name(composite)
|
87
|
+
raise ArgumentError, "Must pass a :row, :version AND :field" if composite.nil?
|
88
|
+
composite[:field] = [composite[:field]] unless composite[:field].is_a?(Array)
|
89
|
+
|
90
|
+
composite[:field].each do |field|
|
91
|
+
t.combine(:row => composite[:row], :version => composite[:version],
|
92
|
+
:field => "#{field}_name") do |row|
|
93
|
+
|
94
|
+
# Gather each name_bit from the parsed row, and join it into one value
|
95
|
+
bits = NAME_BITS.collect do |field_name|
|
96
|
+
row.send("#{field}_#{field_name}".to_sym)
|
97
|
+
end
|
98
|
+
bits.compact.join(" ")
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Create a Translation for all five name bits, that will strip
|
104
|
+
# out its respective bit from an already-populate composite name field.
|
105
|
+
def split_name_into_components(component)
|
106
|
+
raise ArgumentError, "Must pass a :row, :version AND :field" if component.nil?
|
107
|
+
component[:field] = [component[:field]] unless component[:field].is_a?(Array)
|
108
|
+
|
109
|
+
component[:field].each do |field|
|
110
|
+
NAME_BITS.zip(PEOPLE_BITS).each do |field_name, people_name|
|
111
|
+
t.combine(:row => component[:row], :version => component[:version],
|
112
|
+
:field => "#{field}_#{field_name}") do |row|
|
113
|
+
|
114
|
+
# Grab the original, composite name
|
115
|
+
name = row.send("#{field}_name")
|
116
|
+
|
117
|
+
unless name.nil?
|
118
|
+
# Fix various name formatting errors
|
119
|
+
name = self.class.fix_carrot_names(name) unless name.index("^").nil?
|
120
|
+
|
121
|
+
# Extract just the component you want
|
122
|
+
(Fech::Translator::NAME_PARSER.parse(name)[people_name] || "").strip
|
123
|
+
else
|
124
|
+
nil
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# Contains helper functions and static variables used by various
|
2
|
+
# Fech classes.
|
3
|
+
module FechUtils
|
4
|
+
|
5
|
+
# All supported row types pointed to regular expressions that will correctly
|
6
|
+
# match that row type in the wild. If multiple matches exist, Fech will match
|
7
|
+
# the longest regex pattern found.
|
8
|
+
ROW_TYPES = {
|
9
|
+
:hdr => /^hdr$/i,
|
10
|
+
:f1 => /^f1/i,
|
11
|
+
:f1m => /(^f1m[^a|n])/i,
|
12
|
+
:f2 => /(^f2$)|(^f2[^4])/i,
|
13
|
+
:f24 => /(^f24$)|(^f24[an])/i,
|
14
|
+
:f3 => /^f3[a|n|t]/i,
|
15
|
+
:f3l => /^f3l[a|n]/i,
|
16
|
+
:f3p => /(^f3p$)|(^f3p[^s|3])/i,
|
17
|
+
:f3s => /^f3s/i,
|
18
|
+
:f3p31 => /^f3p31/i,
|
19
|
+
:f3ps => /^f3ps/i,
|
20
|
+
:f3x => /(^f3x$)|(^f3x[ant])/i,
|
21
|
+
:f4 => /^f4[na]/i,
|
22
|
+
:f5 => /^f5[na]/i,
|
23
|
+
:f56 => /^f56/i,
|
24
|
+
:f57 => /^f57/i,
|
25
|
+
:f6 => /(^f6$)|(^f6[an])/i,
|
26
|
+
:f65 => /^f65/i,
|
27
|
+
:f7 => /^f7[na]/i,
|
28
|
+
:f76 => /^f76/i,
|
29
|
+
:f9 => /^f9/i,
|
30
|
+
:f91 => /^f91/i,
|
31
|
+
:f92 => /^f92/i,
|
32
|
+
:f93 => /^f93/i,
|
33
|
+
:f94 => /^f94/i,
|
34
|
+
:f99 => /^f99/i,
|
35
|
+
:h1 => /^h1/i,
|
36
|
+
:h2 => /^h2/i,
|
37
|
+
:h3 => /^h3/i,
|
38
|
+
:h4 => /^h4/i,
|
39
|
+
:h5 => /^h5/i,
|
40
|
+
:h6 => /^h6/i,
|
41
|
+
:sa => /^sa/i,
|
42
|
+
:sb => /^sb/i,
|
43
|
+
:sc => /^sc[^1-2]/i,
|
44
|
+
:sc1 => /^sc1/i,
|
45
|
+
:sc2 => /^sc2/i,
|
46
|
+
:sd => /^sd/i,
|
47
|
+
:se => /^se/i,
|
48
|
+
:sf => /^sf/i,
|
49
|
+
:sl => /^sl/i,
|
50
|
+
:text => /^text/i,
|
51
|
+
}
|
52
|
+
|
53
|
+
# Converts symbols and strings to Regexp objects for use in regex-keyed maps.
|
54
|
+
# Assumes that symbols should be matched literally, strings unanchored.
|
55
|
+
# @param [String,Symbol,Regexp] label the object to convert to a Regexp
|
56
|
+
def regexify(label)
|
57
|
+
if label.is_a?(Regexp)
|
58
|
+
Regexp.new(label.source, Regexp::IGNORECASE)
|
59
|
+
elsif label.is_a?(Symbol)
|
60
|
+
if ROW_TYPES.keys.include?(label)
|
61
|
+
ROW_TYPES[label]
|
62
|
+
else
|
63
|
+
Regexp.new("^#{label.to_s}$", Regexp::IGNORECASE)
|
64
|
+
end
|
65
|
+
else
|
66
|
+
Regexp.new(Regexp.escape(label.to_s), Regexp::IGNORECASE)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# get symbol (eg :f1) based on the literal row type returned (eg 'f1nwhatever')
|
71
|
+
# this is used to pick which table to store it in, since it's one table per base type (not per equivalent variant)
|
72
|
+
def base_type row_type
|
73
|
+
ROW_TYPES.sort{|x,y| y[1].to_s.length <=> x[1].to_s.length }.find{|k,v| k if row_type =~ v }.first
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|