fech 1.0.1 → 1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +4 -2
- data/README.rdoc +4 -1
- data/lib/fech.rb +1 -0
- data/lib/fech/csv.rb +12 -5
- data/lib/fech/default_translations.rb +4 -6
- data/lib/fech/filing.rb +9 -6
- data/lib/fech/mappings.rb +2 -1
- data/lib/fech/version.rb +1 -1
- metadata +4 -7
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fech (
|
4
|
+
fech (1.1)
|
5
5
|
fastercsv
|
6
6
|
people
|
7
7
|
|
@@ -10,7 +10,8 @@ GEM
|
|
10
10
|
specs:
|
11
11
|
columnize (0.3.6)
|
12
12
|
diff-lcs (1.1.2)
|
13
|
-
fastercsv (1.5.
|
13
|
+
fastercsv (1.5.5)
|
14
|
+
iconv (0.1)
|
14
15
|
linecache (0.43)
|
15
16
|
mocha (0.9.12)
|
16
17
|
people (0.2.1)
|
@@ -38,6 +39,7 @@ PLATFORMS
|
|
38
39
|
DEPENDENCIES
|
39
40
|
bundler
|
40
41
|
fech!
|
42
|
+
iconv
|
41
43
|
linecache (= 0.43)
|
42
44
|
mocha
|
43
45
|
rake (= 0.8.7)
|
data/README.rdoc
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
|
7
7
|
Fech makes it easy to parse electronic campaign finance filings[http://www.fec.gov/finance/disclosure/efile_search.shtml] by candidates, parties and political action committees from the Federal Election Commission. It lets you access filing attributes the same way regardless of filing version, and works as a framework for cleaning and filing data. Fech is an open source project of The New York Times, but contributions from anyone interested in working with F.E.C. filings are greatly appreciated.
|
8
8
|
|
9
|
-
Latest version: 1.
|
9
|
+
Latest version: 1.1
|
10
10
|
|
11
11
|
Fech is tested under Ruby versions 1.8.7, 1.9.2 and 1.9.3.
|
12
12
|
|
@@ -16,6 +16,7 @@ Can be found at Fech's Github page[http://nytimes.github.com/Fech/].
|
|
16
16
|
|
17
17
|
== News
|
18
18
|
|
19
|
+
* Nov. 13, 2012: Version 1.1 released. CSVDoctor skips rows that don't match row type being searched for, which provides a performance boost, and smaller bugfixes for Form 99 handling and date-field conversions. Thanks to Sai for several patches.
|
19
20
|
* June 16, 2012: Version 1.0.1 released. Bug-fix for older Form 2 support.
|
20
21
|
* April 11, 2012: Version 1.0.0 released! Support for Ruby 1.9.3 added, all form types supported.
|
21
22
|
* April 9, 2012: Version 1.0.0.rc1 released. Release candidate with backwards-incompatible change (renaming zip attribute to zip_code).
|
@@ -71,6 +72,8 @@ Derek Willis, dwillis@nytimes.com
|
|
71
72
|
|
72
73
|
Daniel Pritchett, daniel@sharingatwork.com
|
73
74
|
|
75
|
+
Sai, home@saizai.com
|
76
|
+
|
74
77
|
== Copyright
|
75
78
|
|
76
79
|
Copyright (c) 2012 The New York Times Company. See LICENSE for details.
|
data/lib/fech.rb
CHANGED
data/lib/fech/csv.rb
CHANGED
@@ -24,7 +24,11 @@ module Fech
|
|
24
24
|
# @param [String] file_path location of the filing on the file system
|
25
25
|
# @options opts passed through to FasterCSV
|
26
26
|
def self.parse_row(file_path, opts)
|
27
|
-
foreach(file_path, opts) { |row| yield row }
|
27
|
+
foreach(file_path, clean_opts(opts)) { |row| yield row }
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.clean_opts(opts)
|
31
|
+
opts.reject {|k,v| ![:col_sep, :quote_char].include?(k)}
|
28
32
|
end
|
29
33
|
|
30
34
|
end
|
@@ -33,13 +37,16 @@ module Fech
|
|
33
37
|
|
34
38
|
# Skips FasterCSV's whole-file wrapper, and passes each line in
|
35
39
|
# the file to a function that will parse it individually.
|
40
|
+
# @option opts [Boolean] :row_type yield only rows that match this type
|
36
41
|
def self.parse_row(file_path, opts)
|
37
|
-
opts.reject! {|k,v| ![:col_sep, :quote_char].include?(k)}
|
38
|
-
|
39
42
|
File.open(file_path, 'r').each do |line|
|
40
43
|
# Skip empty lines
|
41
44
|
next if line.strip.empty?
|
42
|
-
|
45
|
+
|
46
|
+
# Skip non-matching row-types
|
47
|
+
next if opts.key?(:row_type) && !Fech.regexify(opts[:row_type]).match(line)
|
48
|
+
|
49
|
+
yield safe_line(line, clean_opts(opts))
|
43
50
|
end
|
44
51
|
end
|
45
52
|
|
@@ -51,7 +58,7 @@ module Fech
|
|
51
58
|
begin
|
52
59
|
parse_line(line, opts)
|
53
60
|
rescue Fech::Csv::MalformedCSVError
|
54
|
-
row = parse_line(line, opts.merge(:quote_char => "\0"))
|
61
|
+
row = parse_line(line, clean_opts(opts).merge(:quote_char => "\0"))
|
55
62
|
row.map! { |val| safe_value(val) }
|
56
63
|
end
|
57
64
|
end
|
@@ -65,12 +65,10 @@ module Fech
|
|
65
65
|
# Converts everything that looks like an FEC-formatted date to a
|
66
66
|
# native Ruby Date object.
|
67
67
|
def dates
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
Date.parse(value)
|
73
|
-
end
|
68
|
+
# only convert fields whose name is date* or *_date*
|
69
|
+
# lots of other things might be 8 digits, and we have to exclude eg 'candidate'
|
70
|
+
t.convert :field => /(^|_)date/ do |value|
|
71
|
+
Date.parse(value) rescue value
|
74
72
|
end
|
75
73
|
end
|
76
74
|
|
data/lib/fech/filing.rb
CHANGED
@@ -66,7 +66,7 @@ module Fech
|
|
66
66
|
# @return [Array] the complete set of mapped hashes for matched lines
|
67
67
|
def rows_like(row_type, opts={}, &block)
|
68
68
|
data = []
|
69
|
-
each_row do |row|
|
69
|
+
each_row(:row_type => row_type) do |row|
|
70
70
|
value = parse_row?(row, opts.merge(:parse_if => row_type))
|
71
71
|
next if value == false
|
72
72
|
if block_given?
|
@@ -86,6 +86,8 @@ module Fech
|
|
86
86
|
# @option opts [Array] :include list of field names that should be included
|
87
87
|
# in the returned hash
|
88
88
|
def parse_row?(row, opts={})
|
89
|
+
return false if row.nil? || row.empty?
|
90
|
+
|
89
91
|
# Always parse, unless :parse_if is given and does not match row
|
90
92
|
if opts[:parse_if].nil? || \
|
91
93
|
Fech.regexify(opts[:parse_if]).match(row.first.downcase)
|
@@ -242,19 +244,19 @@ module Fech
|
|
242
244
|
def fix_f99_contents
|
243
245
|
@customized = true
|
244
246
|
content = file_contents.read
|
245
|
-
regex = /\n\[BEGINTEXT\]\n(.*?)\[ENDTEXT\]\n/
|
247
|
+
regex = /\n\[BEGINTEXT\]\n(.*?)\[ENDTEXT\]\n/mi # some use eg [EndText]
|
246
248
|
match = content.match(regex)
|
247
249
|
if match
|
248
250
|
repl = match[1].gsub(/"/, '""')
|
249
251
|
content.gsub(regex, "#{delimiter}\"#{repl}\"")
|
250
252
|
else
|
251
|
-
|
253
|
+
content
|
252
254
|
end
|
253
255
|
end
|
254
256
|
|
255
257
|
# Resave the "fixed" version of an F99
|
256
258
|
def resave_f99_contents
|
257
|
-
return if @resaved
|
259
|
+
return true if @resaved
|
258
260
|
File.open(custom_file_path, 'w') { |f| f.write(fix_f99_contents) }
|
259
261
|
@resaved = true
|
260
262
|
end
|
@@ -269,6 +271,7 @@ module Fech
|
|
269
271
|
|
270
272
|
# Iterates over and yields the Filing's lines
|
271
273
|
# @option opts [Boolean] :with_index yield both the item and its index
|
274
|
+
# @option opts [Boolean] :row_type yield only rows that match this type
|
272
275
|
# @yield [Array] a row of the filing, split by the delimiter from #delimiter
|
273
276
|
def each_row(opts={}, &block)
|
274
277
|
unless File.exists?(file_path)
|
@@ -276,10 +279,10 @@ module Fech
|
|
276
279
|
end
|
277
280
|
|
278
281
|
# If this is an F99, we need to parse it differently.
|
279
|
-
resave_f99_contents if
|
282
|
+
resave_f99_contents if ['F99', '"F99"'].include? form_type
|
280
283
|
|
281
284
|
c = 0
|
282
|
-
@csv_parser.parse_row(@customized ? custom_file_path : file_path, :col_sep => delimiter, :quote_char => @quote_char, :skip_blanks => true) do |row|
|
285
|
+
@csv_parser.parse_row(@customized ? custom_file_path : file_path, opts.merge(:col_sep => delimiter, :quote_char => @quote_char, :skip_blanks => true)) do |row|
|
283
286
|
if opts[:with_index]
|
284
287
|
yield [row, c]
|
285
288
|
c += 1
|
data/lib/fech/mappings.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
module Fech
|
2
|
+
class VersionError < RuntimeError; end
|
2
3
|
|
3
4
|
# Fech::Mappings loads a set of master mappings between labels and where
|
4
5
|
# their values can be found in Electronic Filings for various row types
|
@@ -58,7 +59,7 @@ module Fech
|
|
58
59
|
return hash[key] if Regexp.new(key, Regexp::IGNORECASE).match(label.to_s)
|
59
60
|
end
|
60
61
|
|
61
|
-
raise "Attempted to access mapping that has not been generated (#{label}). " +
|
62
|
+
raise VersionError, "Attempted to access mapping that has not been generated (#{label}). " +
|
62
63
|
"Supported keys match the format: #{hash.keys.join(', ')}"
|
63
64
|
end
|
64
65
|
|
data/lib/fech/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fech
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 13
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
- 0
|
9
8
|
- 1
|
10
|
-
version: 1.
|
9
|
+
version: "1.1"
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Michael Strickland
|
@@ -18,8 +17,7 @@ autorequire:
|
|
18
17
|
bindir: bin
|
19
18
|
cert_chain: []
|
20
19
|
|
21
|
-
date: 2012-
|
22
|
-
default_executable:
|
20
|
+
date: 2012-11-13 00:00:00 Z
|
23
21
|
dependencies:
|
24
22
|
- !ruby/object:Gem::Dependency
|
25
23
|
name: fastercsv
|
@@ -319,7 +317,6 @@ files:
|
|
319
317
|
- spec/spec_helper.rb
|
320
318
|
- spec/translator_spec.rb
|
321
319
|
- tasks/fech.rake
|
322
|
-
has_rdoc: true
|
323
320
|
homepage: http://github.com/nytimes/fech
|
324
321
|
licenses: []
|
325
322
|
|
@@ -349,7 +346,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
349
346
|
requirements: []
|
350
347
|
|
351
348
|
rubyforge_project: fech
|
352
|
-
rubygems_version: 1.
|
349
|
+
rubygems_version: 1.8.17
|
353
350
|
signing_key:
|
354
351
|
specification_version: 3
|
355
352
|
summary: Ruby library for parsing FEC filings.
|