fech 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. data/.gitignore +7 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +4 -0
  4. data/Gemfile.lock +49 -0
  5. data/LICENSE +13 -0
  6. data/README.rdoc +178 -0
  7. data/Rakefile +3 -0
  8. data/autotest/discover.rb +1 -0
  9. data/fech.gemspec +32 -0
  10. data/lib/fech.rb +13 -0
  11. data/lib/fech/default_translations.rb +135 -0
  12. data/lib/fech/fech_utils.rb +41 -0
  13. data/lib/fech/filing.rb +248 -0
  14. data/lib/fech/map_generator.rb +187 -0
  15. data/lib/fech/mapped.rb +38 -0
  16. data/lib/fech/mappings.rb +66 -0
  17. data/lib/fech/translator.rb +138 -0
  18. data/lib/fech/version.rb +3 -0
  19. data/sources/F3P.csv +1 -0
  20. data/sources/F3P31.csv +1 -0
  21. data/sources/F3PS.csv +1 -0
  22. data/sources/F3S.csv +1 -0
  23. data/sources/HDR.csv +1 -0
  24. data/sources/SchA.csv +1 -0
  25. data/sources/SchB.csv +1 -0
  26. data/sources/SchC.csv +1 -0
  27. data/sources/SchC1.csv +1 -0
  28. data/sources/SchC2.csv +1 -0
  29. data/sources/SchD.csv +1 -0
  30. data/sources/SchE.csv +1 -0
  31. data/sources/SchF.csv +1 -0
  32. data/sources/TEXT.csv +1 -0
  33. data/sources/headers/3.csv +1 -0
  34. data/sources/headers/5.0.csv +1 -0
  35. data/sources/headers/5.1.csv +1 -0
  36. data/sources/headers/5.2.csv +1 -0
  37. data/sources/headers/5.3.csv +1 -0
  38. data/sources/headers/6.1.csv +1 -0
  39. data/sources/headers/6.2.csv +1 -0
  40. data/sources/headers/6.3.csv +1 -0
  41. data/sources/headers/6.4.csv +1 -0
  42. data/sources/headers/7.0.csv +1 -0
  43. data/sources/headers/ignore.csv +5 -0
  44. data/spec/data/723604.fec +4 -0
  45. data/spec/data/97405.fec +10 -0
  46. data/spec/default_translations_spec.rb +104 -0
  47. data/spec/fech_utils_spec.rb +29 -0
  48. data/spec/filing_spec.rb +251 -0
  49. data/spec/map_generator_spec.rb +49 -0
  50. data/spec/mapped_spec.rb +44 -0
  51. data/spec/mappings_spec.rb +46 -0
  52. data/spec/sources/F3P.csv +1 -0
  53. data/spec/sources/SchA.csv +1 -0
  54. data/spec/sources/SchB.csv +1 -0
  55. data/spec/sources/SchC.csv +1 -0
  56. data/spec/sources/headers/3.csv +1 -0
  57. data/spec/sources/headers/5.0.csv +1 -0
  58. data/spec/sources/headers/5.1.csv +1 -0
  59. data/spec/sources/headers/5.2.csv +1 -0
  60. data/spec/sources/headers/5.3.csv +1 -0
  61. data/spec/sources/headers/6.1.csv +1 -0
  62. data/spec/sources/headers/6.2.csv +1 -0
  63. data/spec/sources/headers/6.3.csv +1 -0
  64. data/spec/sources/headers/6.4.csv +1 -0
  65. data/spec/sources/headers/7.0.csv +1 -0
  66. data/spec/sources/headers/ignore.csv +5 -0
  67. data/spec/sources/sa.csv +1 -0
  68. data/spec/spec_helper.rb +9 -0
  69. data/spec/translator_spec.rb +195 -0
  70. data/tasks/fech.rake +41 -0
  71. metadata +280 -0
@@ -0,0 +1,7 @@
1
+ sources/rows/*
2
+ pkg/*
3
+ .bundle
4
+ *.gem
5
+ .yardoc/*
6
+ doc/*
7
+ rendered_maps.rb
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --colour
2
+ --profile
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in fech.gemspec
4
+ gemspec
@@ -0,0 +1,49 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ fech (0.1.0)
5
+ fastercsv
6
+ people (~> 0.2.1)
7
+
8
+ GEM
9
+ remote: http://rubygems.org/
10
+ specs:
11
+ ZenTest (4.5.0)
12
+ autotest (4.4.6)
13
+ ZenTest (>= 4.4.1)
14
+ columnize (0.3.2)
15
+ diff-lcs (1.1.2)
16
+ fastercsv (1.5.4)
17
+ linecache (0.43)
18
+ mocha (0.9.12)
19
+ people (0.2.1)
20
+ rcov (0.9.9)
21
+ rdoc (3.9.2)
22
+ rspec (2.6.0)
23
+ rspec-core (~> 2.6.0)
24
+ rspec-expectations (~> 2.6.0)
25
+ rspec-mocks (~> 2.6.0)
26
+ rspec-core (2.6.4)
27
+ rspec-expectations (2.6.0)
28
+ diff-lcs (~> 1.1.2)
29
+ rspec-mocks (2.6.0)
30
+ ruby-debug (0.10.4)
31
+ columnize (>= 0.1)
32
+ ruby-debug-base (~> 0.10.4.0)
33
+ ruby-debug-base (0.10.4)
34
+ linecache (>= 0.3)
35
+ yard (0.7.2)
36
+
37
+ PLATFORMS
38
+ ruby
39
+
40
+ DEPENDENCIES
41
+ autotest
42
+ bundler
43
+ fech!
44
+ mocha
45
+ rcov
46
+ rdoc
47
+ rspec (~> 2.6)
48
+ ruby-debug
49
+ yard
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2011 The New York Times Company
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this library except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -0,0 +1,178 @@
1
+
2
+ ______ ______ ______ __
3
+ /\ ___\ /\ ___\ /\ ___\ /\ \___
4
+ \ \ __\ \ \ __\ \ \ \____ \ \ __ \
5
+ \ \_\ \ \_____\ \ \_____\ \ \_\ \_\
6
+ \/_/ \/_____/ \/_____/ \/_/\/_/
7
+
8
+ Fech provides a layer of abstraction in parsing electronic presidential campaign filings from the Federal Election Commission. It lets you access filing attributes the same way regardless of filing version, and works as a framework for cleaning and filing data.
9
+
10
+ == Installation
11
+
12
+ Install Fech as a gem:
13
+
14
+ gem install Fech
15
+
16
+ For use in a Rails 3 application, put the following in your Gemfile:
17
+
18
+ gem 'Fech'
19
+
20
+ then issue the 'bundle install' command.
21
+
22
+ == Getting Started
23
+
24
+ Start by creating a Filing object that corresponds to any electronic filing from the FEC. You'll then have to download the file before parsing it:
25
+
26
+ filing = Fech::Filing.new(723604)
27
+ filing.download
28
+
29
+ * Pass in the FEC filing id
30
+ * Optional: specify the :download_dir on initialization to set where filings are stored. Otherwise, they'll go into a temp folder on your filesystem.
31
+
32
+ === Summary Data
33
+
34
+ To get summary data for the filing (various aggregate amounts, stats about the filing):
35
+
36
+ filing.summary
37
+ #=> {:coverage_from_date=>"20110101", :coverage_from_date=>"20110301", ... }
38
+
39
+ Returns a named hash of all attributes available for the filing. (Which fields are available can vary depending on the filing version number.)
40
+
41
+ === Accessing specific line items
42
+
43
+ To grab every row in the filing of a certain type (all Schedule A items, for example):
44
+
45
+ filing.rows_like(/^sa/)
46
+ #=> [{:transaction_id=>"SA17.XXXX", :contribution_date>"20110101" ... } ... ]
47
+
48
+ This will return an array of hashes, one hash for each row found in the filing whose line number matches the regular expression you passed in. (SA17s and SA18s would both be returned in this example). You can also pass in strings for exact matches.
49
+
50
+ When given a block, .rows_like will yield a single hash at a time:
51
+
52
+ filing.rows_like(/^sa/) do |contribution|
53
+ contribution.transaction_id
54
+ #=> {:transaction_id=>"SA17.XXXX", :contribution_date>"20110101" ... }
55
+ end
56
+
57
+ == Usage
58
+
59
+ === Accessing specific fields
60
+
61
+ By default, .rows_like will process every field in the matched rows (some rows have 200+ fields). You can speed up performance significantly by asking for just the subset of fields you need.
62
+
63
+ filing.rows_like(/^sa/, :include => [:transaction_id]) do |contribution|
64
+ contribution
65
+ #=> {:transaction_id=>"SA17.XXXX"}
66
+ end
67
+
68
+ === Raw data
69
+
70
+ If you want to access the raw arrays of row data, pass :raw => true to .rows_like or any of its shortcuts:
71
+
72
+ filing.contributions(:raw => true)
73
+ #=> [["SA17A", "C00XXXXXX", "SA17.XXXX", nil, nil, "IND" ... ], ... ]
74
+
75
+ filing.contributions(:raw => true) do |row|
76
+ #row => ["SA17A", "C00XXXXX", "SA17.XXXX", nil, nil, "IND" ... ]
77
+ end
78
+
79
+ The source maps for individual row types and versions may also be accessed directly:
80
+
81
+ Fech::Filing.map_for("sa")
82
+ Fech::Filing.map_for(/sa/, :version => 6.1)
83
+ #=> [:form_type, :filer_committee_id_number, :transaction_id ... ]
84
+
85
+ You can then bypass some of the overhead of Fech if you're building something more targeted.
86
+
87
+ === Converting / Preprocessing data
88
+
89
+ For performing bulk actions on specific types of fields, you can register "translations" which will manipulate specific data under certain conditions.
90
+
91
+ An example: dates within filings are formatted as YYYYMMDD. To automatically convert all Schedule B :expenditure_date values to native Ruby Dates:
92
+
93
+ filing.translate do |t|
94
+ t.convert(:row => /^sb/, :field => :expenditure_date) { |v| Date.parse(v) }
95
+ end
96
+
97
+ The block you give .convert will be given the original value of that field when it is run. After you run .convert, any time you parse a row beginning with "SB", the :expenditure_date value will be a Date object.
98
+
99
+ The :field parameter can also be a regular expression:
100
+
101
+ filing.translate do |t|
102
+ t.convert(:row => /^f3p/, :field => /^coverage_.*_date/) { |v| Date.parse(v) }
103
+ end
104
+
105
+ Now, both :coverage_from_date and :coverage_through_date will be automatically cast to dates.
106
+
107
+ You can leave off any or all of the parameters (row, field, version) for more broad adoption of the translation.
108
+
109
+ === Derived Fields
110
+
111
+ You may want to perform the same calculation on many rows of data (contributions minus refunds to create a net value, for example). This can be done without cluttering up your more app-specific parsing code by using a .combine translation. The translation blocks you pass .combine receive the entire row as their context, not just a single value. The :field parameter becomes what you want the new value to be named.
112
+
113
+ filing.translate do |t|
114
+ t.combine(:row => :f3pn, :field => :net_individual_contributions) do |row|
115
+ contribs = row.col_a_individual_contribution_total.to_f
116
+ refunds = row.col_a_total_contributions_refunds.to_f
117
+ contribs - refunds
118
+ end
119
+ end
120
+
121
+ In this example, every parsed Schedule A row would contain an attribute not found in the original filing data - :net_individual_contributions - which contains the result of the calculation above. The values used to construct combinations will have already been run through any .convert translations you've specified.
122
+
123
+ === Built-in translations
124
+
125
+ There are two sets of translations that come with Fech for some of the more common needs:
126
+ * Breaking apart names into their component parts and joining them back together, depending on which the filing provides
127
+ * Converting date field strings to Ruby Date objects
128
+
129
+ You can mix these translations into your parser when you create it:
130
+
131
+ filing = Fech::Filing.new(723604, :translate => [:names, :dates])
132
+
133
+ Just be aware that as you add more translations, the parsing will start to take slightly longer (although having translations that aren't used will not slow it down).
134
+
135
+ === Aliasing fields
136
+
137
+ You can allow any field (converted, combined, or untranslated) to have an arbitrary number of aliases. For example, you could alias the F3P line's :col_a_6_cash_on_hand_beginning_period to the more manageable :cash_beginning
138
+
139
+ filing.translate do |t|
140
+ t.alias :cash_beginning, :col_a_cash_on_hand_beginning_period, :f3p
141
+ end
142
+
143
+ filing.summary.cash_beginning == filing.summary.col_a_cash_on_hand_beginning_period.
144
+ #=> true
145
+
146
+ We found it useful to be able to access attributes using the name the fields in our database they correspond to.
147
+
148
+ == Warnings
149
+
150
+ Filings can contain data that is incomplete or wrong: contributions might be in excess of legal limits, or data may have just been entered incorrectly. While some of these mistakes are corrected in later filing amendments, you should be aware that the data is not perfect. Fech will only return data as accurate as the source.
151
+
152
+ When filings get very large, be careful not to perform operations that attempt to transform many rows in memory.
153
+
154
+ == Supported row types and versions
155
+
156
+ The following row types are currently supported from filing version 3 through 7.0:
157
+ * F3P (Summaries)
158
+ * F3PS
159
+ * F3S
160
+ * F3P31 (Items to be Liquidated)
161
+ * SA (Contributions)
162
+ * SB (Expenditures)
163
+ * SC (Loans)
164
+ * SC1
165
+ * SC2
166
+ * SD (Debts & Obligations)
167
+ * SE (Independent Expenditures)
168
+ * SF (Coordinated Expenditures)
169
+
170
+ == Authors
171
+
172
+ Michael Strickland, michael.strickland@nytimes.com
173
+
174
+ Evan Carmi
175
+
176
+ == Copyright
177
+
178
+ Copyright (c) 2011 The New York Times Company. See LICENSE for details.
@@ -0,0 +1,3 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+ Dir.glob('tasks/*.rake').each { |r| import r }
@@ -0,0 +1 @@
1
+ Autotest.add_discovery { "rspec2" }
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "fech/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "fech"
7
+ s.version = Fech::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Michael Strickland", "Evan Carmi"]
10
+ s.email = ["michael.c.strickland@gmail.com"]
11
+ s.homepage = "http://github.com/nytimes/fech"
12
+ s.summary = %q{Ruby library for parsing FEC filings.}
13
+ s.description = %q{A Ruby library for interacting with electronic filings from the Federal Election Commission.}
14
+
15
+ s.rubyforge_project = "fech"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {spec}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_dependency "fastercsv"
23
+ s.add_dependency "people", "~> 0.2.1"
24
+ s.add_development_dependency "rspec", "~> 2.6"
25
+ s.add_development_dependency "mocha"
26
+ s.add_development_dependency "autotest"
27
+ s.add_development_dependency "ruby-debug"
28
+ s.add_development_dependency "bundler"
29
+ s.add_development_dependency "rcov"
30
+ s.add_development_dependency "rdoc"
31
+ s.add_development_dependency "yard"
32
+ end
@@ -0,0 +1,13 @@
1
+ require 'fech/filing'
2
+ require 'fech/rendered_maps'
3
+ require 'fech/mappings'
4
+ require 'fech/default_translations'
5
+ require 'fech/translator'
6
+ require 'fech/mapped'
7
+ require 'fech/fech_utils'
8
+ require 'fech/map_generator'
9
+
10
+ module Fech
11
+ extend FechUtils
12
+ DEFAULT_VERSION = "7.0"
13
+ end
@@ -0,0 +1,135 @@
1
+ module Fech
2
+
3
+ # Stores sets of build-in translations that can be mixed in to a Fech::Filing.
4
+ # Contains functions that accept a Translator, and add arbitrary translations
5
+ # to it. The public function names should correspond to the key used to mix it in.
6
+ #
7
+ # filing = Fech::Filing.new(XXXXXX, :translate => [:names, :dates])
8
+ class DefaultTranslations
9
+
10
+ # The five bits that make up a name, and their labels in the People gem
11
+ NAME_BITS = [:prefix, :first_name, :middle_name, :last_name, :suffix]
12
+ PEOPLE_BITS = [:title, :first, :middle, :last, :suffix]
13
+
14
+ attr_reader :t
15
+
16
+ def initialize(translator)
17
+ @t = translator
18
+ end
19
+
20
+ # Splits composite names into its component parts, and combines those parts
21
+ # into composites where appropriate. Assumes that the canonical names of the
22
+ # fields follow the pattern:
23
+ # * FIELD_name - "Mr. John Charles Smith Sr."
24
+ # * FIELD_prefix - "Mr."
25
+ # * FIELD_first_name - "John"
26
+ # * FIELD_middle_name - "Charles"
27
+ # * FIELD_last_name - "Smith"
28
+ # * FIELD_suffix - "Sr."
29
+ def names
30
+
31
+ # COMBINE split names into composite names for these rows
32
+ composites = [
33
+ {:row => :sa, :version => /^[6-7]/, :field => [:contributor, :donor_candidate]},
34
+ {:row => :sb, :version => /^[6-7]/, :field => [:payee, :beneficiary_candidate]},
35
+ {:row => :sc, :version => /^[6-7]/, :field => [:lender, :lender_candidate]},
36
+ {:row => :sc1, :version => /^[6-7]/, :field => [:treasurer, :authorized]},
37
+ {:row => :sc2, :version => /^[6-7]/, :field => :guarantor},
38
+ {:row => :sd, :version => /^[6-7]/, :field => :creditor},
39
+ {:row => :se, :version => /^[6-7]/, :field => [:payee, :candidate]},
40
+ {:row => :sf, :version => /^[6-7]/, :field => [:payee, :payee_candidate]},
41
+ {:row => :f3p, :version => /^[6-7]/, :field => :treasurer},
42
+ {:row => :f3p31, :version => /^[6-7]/, :field => :contributor},
43
+ ]
44
+ # SPLIT composite names into component parts for these rows
45
+ components = [
46
+ {:row => :sa, :version => /^3|(5.0)/, :field => :contributor},
47
+ {:row => :sa, :version => /^[3-5]/, :field => :donor_candidate},
48
+ {:row => :sb, :version => /^3|(5.0)/, :field => :payee},
49
+ {:row => :sb, :version => /^[3-5]/, :field => :beneficiary_candidate},
50
+ {:row => :sc, :version => /^[3-5]/, :field => [:lender, :lender_candidate]},
51
+ {:row => :sc1, :version => /^[3-5]/, :field => [:treasurer, :authorized]},
52
+ {:row => :sc2, :version => /^[3-5]/, :field => :guarantor},
53
+ {:row => :sd, :version => /^[3-5]/, :field => :creditor},
54
+ {:row => :se, :version => /^[3-5]/, :field => [:payee, :cadidate]},
55
+ {:row => :sf, :version => /^[3-5]/, :field => [:payee, :payee_candidate]},
56
+ {:row => :f3p, :version => /^[3-5]/, :field => :treasurer},
57
+ {:row => :f3p31, :version => /^[3-5]/, :field => :contributor},
58
+ ]
59
+
60
+ composites.each { |c| combine_components_into_name(c) }
61
+ components.each { |c| split_name_into_components(c) }
62
+
63
+ end
64
+
65
+ # Converts everything that looks like an FEC-formatted date to a
66
+ # native Ruby Date object.
67
+ def dates
68
+ t.convert do |value|
69
+ if /^\d{8}$/.match(value).nil?
70
+ value
71
+ else
72
+ Date.parse(value)
73
+ end
74
+ end
75
+ end
76
+
77
+ private
78
+
79
+ # Turns "Allred^Ann^Mrs.^III" into "Mrs. Ann Allred III"
80
+ def self.fix_carrot_names(name)
81
+ name = name.split("^").reverse
82
+ # move the suffix to the beginning
83
+ name.push name.shift if name.size > 3
84
+ name.join(" ")
85
+ end
86
+
87
+ # Create a Translation for the given row, version named as :field
88
+ def combine_components_into_name(composite)
89
+ raise ArgumentError, "Must pass a :row, :version AND :field" if composite.nil?
90
+ composite[:field] = [composite[:field]] unless composite[:field].is_a?(Array)
91
+
92
+ composite[:field].each do |field|
93
+ t.combine(:row => composite[:row], :version => composite[:version],
94
+ :field => "#{field}_name") do |row|
95
+
96
+ # Gather each name_bit from the parsed row, and join it into one value
97
+ bits = NAME_BITS.collect do |field_name|
98
+ row.send("#{field}_#{field_name}".to_sym)
99
+ end
100
+ bits.compact.join(" ")
101
+ end
102
+ end
103
+ end
104
+
105
+ # Create a Translation for all five name bits, that will strip
106
+ # out its respective bit from an already-populate composite name field.
107
+ def split_name_into_components(component)
108
+ raise ArgumentError, "Must pass a :row, :version AND :field" if component.nil?
109
+ component[:field] = [component[:field]] unless component[:field].is_a?(Array)
110
+
111
+ component[:field].each do |field|
112
+ NAME_BITS.zip(PEOPLE_BITS).each do |field_name, people_name|
113
+ t.combine(:row => component[:row], :version => component[:version],
114
+ :field => "#{field}_#{field_name}") do |row|
115
+
116
+ # Grab the original, composite name
117
+ name = row.send("#{field}_name")
118
+
119
+ unless name.nil?
120
+ # Fix various name formatting errors
121
+ name = self.class.fix_carrot_names(name) unless name.index("^").nil?
122
+
123
+ # Extract just the component you want
124
+ (Fech::Translator::NAME_PARSER.parse(name)[people_name] || "").strip
125
+ else
126
+ nil
127
+ end
128
+ end
129
+ end
130
+ end
131
+ end
132
+
133
+ end
134
+
135
+ end