fech 1.0.1 → 1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +4 -2
- data/README.rdoc +4 -1
- data/lib/fech.rb +1 -0
- data/lib/fech/csv.rb +12 -5
- data/lib/fech/default_translations.rb +4 -6
- data/lib/fech/filing.rb +9 -6
- data/lib/fech/mappings.rb +2 -1
- data/lib/fech/version.rb +1 -1
- metadata +4 -7
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fech (
|
4
|
+
fech (1.1)
|
5
5
|
fastercsv
|
6
6
|
people
|
7
7
|
|
@@ -10,7 +10,8 @@ GEM
|
|
10
10
|
specs:
|
11
11
|
columnize (0.3.6)
|
12
12
|
diff-lcs (1.1.2)
|
13
|
-
fastercsv (1.5.
|
13
|
+
fastercsv (1.5.5)
|
14
|
+
iconv (0.1)
|
14
15
|
linecache (0.43)
|
15
16
|
mocha (0.9.12)
|
16
17
|
people (0.2.1)
|
@@ -38,6 +39,7 @@ PLATFORMS
|
|
38
39
|
DEPENDENCIES
|
39
40
|
bundler
|
40
41
|
fech!
|
42
|
+
iconv
|
41
43
|
linecache (= 0.43)
|
42
44
|
mocha
|
43
45
|
rake (= 0.8.7)
|
data/README.rdoc
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
|
7
7
|
Fech makes it easy to parse electronic campaign finance filings[http://www.fec.gov/finance/disclosure/efile_search.shtml] by candidates, parties and political action committees from the Federal Election Commission. It lets you access filing attributes the same way regardless of filing version, and works as a framework for cleaning and filing data. Fech is an open source project of The New York Times, but contributions from anyone interested in working with F.E.C. filings are greatly appreciated.
|
8
8
|
|
9
|
-
Latest version: 1.
|
9
|
+
Latest version: 1.1
|
10
10
|
|
11
11
|
Fech is tested under Ruby versions 1.8.7, 1.9.2 and 1.9.3.
|
12
12
|
|
@@ -16,6 +16,7 @@ Can be found at Fech's Github page[http://nytimes.github.com/Fech/].
|
|
16
16
|
|
17
17
|
== News
|
18
18
|
|
19
|
+
* Nov. 13, 2012: Version 1.1 released. CSVDoctor skips rows that don't match row type being searched for, which provides a performance boost, and smaller bugfixes for Form 99 handling and date-field conversions. Thanks to Sai for several patches.
|
19
20
|
* June 16, 2012: Version 1.0.1 released. Bug-fix for older Form 2 support.
|
20
21
|
* April 11, 2012: Version 1.0.0 released! Support for Ruby 1.9.3 added, all form types supported.
|
21
22
|
* April 9, 2012: Version 1.0.0.rc1 released. Release candidate with backwards-incompatible change (renaming zip attribute to zip_code).
|
@@ -71,6 +72,8 @@ Derek Willis, dwillis@nytimes.com
|
|
71
72
|
|
72
73
|
Daniel Pritchett, daniel@sharingatwork.com
|
73
74
|
|
75
|
+
Sai, home@saizai.com
|
76
|
+
|
74
77
|
== Copyright
|
75
78
|
|
76
79
|
Copyright (c) 2012 The New York Times Company. See LICENSE for details.
|
data/lib/fech.rb
CHANGED
data/lib/fech/csv.rb
CHANGED
@@ -24,7 +24,11 @@ module Fech
|
|
24
24
|
# @param [String] file_path location of the filing on the file system
|
25
25
|
# @options opts passed through to FasterCSV
|
26
26
|
def self.parse_row(file_path, opts)
|
27
|
-
foreach(file_path, opts) { |row| yield row }
|
27
|
+
foreach(file_path, clean_opts(opts)) { |row| yield row }
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.clean_opts(opts)
|
31
|
+
opts.reject {|k,v| ![:col_sep, :quote_char].include?(k)}
|
28
32
|
end
|
29
33
|
|
30
34
|
end
|
@@ -33,13 +37,16 @@ module Fech
|
|
33
37
|
|
34
38
|
# Skips FasterCSV's whole-file wrapper, and passes each line in
|
35
39
|
# the file to a function that will parse it individually.
|
40
|
+
# @option opts [Boolean] :row_type yield only rows that match this type
|
36
41
|
def self.parse_row(file_path, opts)
|
37
|
-
opts.reject! {|k,v| ![:col_sep, :quote_char].include?(k)}
|
38
|
-
|
39
42
|
File.open(file_path, 'r').each do |line|
|
40
43
|
# Skip empty lines
|
41
44
|
next if line.strip.empty?
|
42
|
-
|
45
|
+
|
46
|
+
# Skip non-matching row-types
|
47
|
+
next if opts.key?(:row_type) && !Fech.regexify(opts[:row_type]).match(line)
|
48
|
+
|
49
|
+
yield safe_line(line, clean_opts(opts))
|
43
50
|
end
|
44
51
|
end
|
45
52
|
|
@@ -51,7 +58,7 @@ module Fech
|
|
51
58
|
begin
|
52
59
|
parse_line(line, opts)
|
53
60
|
rescue Fech::Csv::MalformedCSVError
|
54
|
-
row = parse_line(line, opts.merge(:quote_char => "\0"))
|
61
|
+
row = parse_line(line, clean_opts(opts).merge(:quote_char => "\0"))
|
55
62
|
row.map! { |val| safe_value(val) }
|
56
63
|
end
|
57
64
|
end
|
@@ -65,12 +65,10 @@ module Fech
|
|
65
65
|
# Converts everything that looks like an FEC-formatted date to a
|
66
66
|
# native Ruby Date object.
|
67
67
|
def dates
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
Date.parse(value)
|
73
|
-
end
|
68
|
+
# only convert fields whose name is date* or *_date*
|
69
|
+
# lots of other things might be 8 digits, and we have to exclude eg 'candidate'
|
70
|
+
t.convert :field => /(^|_)date/ do |value|
|
71
|
+
Date.parse(value) rescue value
|
74
72
|
end
|
75
73
|
end
|
76
74
|
|
data/lib/fech/filing.rb
CHANGED
@@ -66,7 +66,7 @@ module Fech
|
|
66
66
|
# @return [Array] the complete set of mapped hashes for matched lines
|
67
67
|
def rows_like(row_type, opts={}, &block)
|
68
68
|
data = []
|
69
|
-
each_row do |row|
|
69
|
+
each_row(:row_type => row_type) do |row|
|
70
70
|
value = parse_row?(row, opts.merge(:parse_if => row_type))
|
71
71
|
next if value == false
|
72
72
|
if block_given?
|
@@ -86,6 +86,8 @@ module Fech
|
|
86
86
|
# @option opts [Array] :include list of field names that should be included
|
87
87
|
# in the returned hash
|
88
88
|
def parse_row?(row, opts={})
|
89
|
+
return false if row.nil? || row.empty?
|
90
|
+
|
89
91
|
# Always parse, unless :parse_if is given and does not match row
|
90
92
|
if opts[:parse_if].nil? || \
|
91
93
|
Fech.regexify(opts[:parse_if]).match(row.first.downcase)
|
@@ -242,19 +244,19 @@ module Fech
|
|
242
244
|
def fix_f99_contents
|
243
245
|
@customized = true
|
244
246
|
content = file_contents.read
|
245
|
-
regex = /\n\[BEGINTEXT\]\n(.*?)\[ENDTEXT\]\n/
|
247
|
+
regex = /\n\[BEGINTEXT\]\n(.*?)\[ENDTEXT\]\n/mi # some use eg [EndText]
|
246
248
|
match = content.match(regex)
|
247
249
|
if match
|
248
250
|
repl = match[1].gsub(/"/, '""')
|
249
251
|
content.gsub(regex, "#{delimiter}\"#{repl}\"")
|
250
252
|
else
|
251
|
-
|
253
|
+
content
|
252
254
|
end
|
253
255
|
end
|
254
256
|
|
255
257
|
# Resave the "fixed" version of an F99
|
256
258
|
def resave_f99_contents
|
257
|
-
return if @resaved
|
259
|
+
return true if @resaved
|
258
260
|
File.open(custom_file_path, 'w') { |f| f.write(fix_f99_contents) }
|
259
261
|
@resaved = true
|
260
262
|
end
|
@@ -269,6 +271,7 @@ module Fech
|
|
269
271
|
|
270
272
|
# Iterates over and yields the Filing's lines
|
271
273
|
# @option opts [Boolean] :with_index yield both the item and its index
|
274
|
+
# @option opts [Boolean] :row_type yield only rows that match this type
|
272
275
|
# @yield [Array] a row of the filing, split by the delimiter from #delimiter
|
273
276
|
def each_row(opts={}, &block)
|
274
277
|
unless File.exists?(file_path)
|
@@ -276,10 +279,10 @@ module Fech
|
|
276
279
|
end
|
277
280
|
|
278
281
|
# If this is an F99, we need to parse it differently.
|
279
|
-
resave_f99_contents if
|
282
|
+
resave_f99_contents if ['F99', '"F99"'].include? form_type
|
280
283
|
|
281
284
|
c = 0
|
282
|
-
@csv_parser.parse_row(@customized ? custom_file_path : file_path, :col_sep => delimiter, :quote_char => @quote_char, :skip_blanks => true) do |row|
|
285
|
+
@csv_parser.parse_row(@customized ? custom_file_path : file_path, opts.merge(:col_sep => delimiter, :quote_char => @quote_char, :skip_blanks => true)) do |row|
|
283
286
|
if opts[:with_index]
|
284
287
|
yield [row, c]
|
285
288
|
c += 1
|
data/lib/fech/mappings.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
module Fech
|
2
|
+
class VersionError < RuntimeError; end
|
2
3
|
|
3
4
|
# Fech::Mappings loads a set of master mappings between labels and where
|
4
5
|
# their values can be found in Electronic Filings for various row types
|
@@ -58,7 +59,7 @@ module Fech
|
|
58
59
|
return hash[key] if Regexp.new(key, Regexp::IGNORECASE).match(label.to_s)
|
59
60
|
end
|
60
61
|
|
61
|
-
raise "Attempted to access mapping that has not been generated (#{label}). " +
|
62
|
+
raise VersionError, "Attempted to access mapping that has not been generated (#{label}). " +
|
62
63
|
"Supported keys match the format: #{hash.keys.join(', ')}"
|
63
64
|
end
|
64
65
|
|
data/lib/fech/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fech
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 13
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
- 0
|
9
8
|
- 1
|
10
|
-
version: 1.
|
9
|
+
version: "1.1"
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Michael Strickland
|
@@ -18,8 +17,7 @@ autorequire:
|
|
18
17
|
bindir: bin
|
19
18
|
cert_chain: []
|
20
19
|
|
21
|
-
date: 2012-
|
22
|
-
default_executable:
|
20
|
+
date: 2012-11-13 00:00:00 Z
|
23
21
|
dependencies:
|
24
22
|
- !ruby/object:Gem::Dependency
|
25
23
|
name: fastercsv
|
@@ -319,7 +317,6 @@ files:
|
|
319
317
|
- spec/spec_helper.rb
|
320
318
|
- spec/translator_spec.rb
|
321
319
|
- tasks/fech.rake
|
322
|
-
has_rdoc: true
|
323
320
|
homepage: http://github.com/nytimes/fech
|
324
321
|
licenses: []
|
325
322
|
|
@@ -349,7 +346,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
349
346
|
requirements: []
|
350
347
|
|
351
348
|
rubyforge_project: fech
|
352
|
-
rubygems_version: 1.
|
349
|
+
rubygems_version: 1.8.17
|
353
350
|
signing_key:
|
354
351
|
specification_version: 3
|
355
352
|
summary: Ruby library for parsing FEC filings.
|