RubyGems - fech - Versions diffs - 1.0.1 → 1.1 - Mend

fech 1.0.1 → 1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/Gemfile.lock +4 -2
data/README.rdoc +4 -1
data/lib/fech.rb +1 -0
data/lib/fech/csv.rb +12 -5
data/lib/fech/default_translations.rb +4 -6
data/lib/fech/filing.rb +9 -6
data/lib/fech/mappings.rb +2 -1
data/lib/fech/version.rb +1 -1
metadata +4 -7

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    fech (0.9.10)
+    fech (1.1)
       fastercsv
       people
@@ -10,7 +10,8 @@ GEM
   specs:
     columnize (0.3.6)
     diff-lcs (1.1.2)
-    fastercsv (1.5.4)
+    fastercsv (1.5.5)
+    iconv (0.1)
     linecache (0.43)
     mocha (0.9.12)
     people (0.2.1)
@@ -38,6 +39,7 @@ PLATFORMS
 DEPENDENCIES
   bundler
   fech!
+  iconv
   linecache (= 0.43)
   mocha
   rake (= 0.8.7)

data/README.rdoc CHANGED Viewed

@@ -6,7 +6,7 @@
 Fech makes it easy to parse electronic campaign finance filings[http://www.fec.gov/finance/disclosure/efile_search.shtml] by candidates, parties and political action committees from the Federal Election Commission. It lets you access filing attributes the same way regardless of filing version, and works as a framework for cleaning and filing data. Fech is an open source project of The New York Times, but contributions from anyone interested in working with F.E.C. filings are greatly appreciated.
-Latest version: 1.0.1
+Latest version: 1.1
 Fech is tested under Ruby versions 1.8.7, 1.9.2 and 1.9.3.
@@ -16,6 +16,7 @@ Can be found at Fech's Github page[http://nytimes.github.com/Fech/].
 == News
+* Nov. 13, 2012: Version 1.1 released. CSVDoctor skips rows that don't match row type being searched for, which provides a performance boost, and smaller bugfixes for Form 99 handling and date-field conversions. Thanks to Sai for several patches.
 * June 16, 2012: Version 1.0.1 released. Bug-fix for older Form 2 support.
 * April 11, 2012: Version 1.0.0 released! Support for Ruby 1.9.3 added, all form types supported.
 * April 9, 2012: Version 1.0.0.rc1 released. Release candidate with backwards-incompatible change (renaming zip attribute to zip_code).
@@ -71,6 +72,8 @@ Derek Willis, dwillis@nytimes.com
 Daniel Pritchett, daniel@sharingatwork.com
+Sai, home@saizai.com
 == Copyright
 Copyright (c) 2012 The New York Times Company. See LICENSE for details.

data/lib/fech.rb CHANGED Viewed

@@ -8,6 +8,7 @@ require 'fech/fech_utils'
 require 'fech/map_generator'
 require 'fech/csv'
 require 'fech/comparison'
+require 'fech/version'
 module Fech
   extend FechUtils

data/lib/fech/csv.rb CHANGED Viewed

@@ -24,7 +24,11 @@ module Fech
     # @param [String] file_path location of the filing on the file system
     # @options opts passed through to FasterCSV
     def self.parse_row(file_path, opts)
-      foreach(file_path, opts) { |row| yield row }
+      foreach(file_path, clean_opts(opts)) { |row| yield row }
+    end
+    def self.clean_opts(opts)
+      opts.reject {|k,v| ![:col_sep, :quote_char].include?(k)}
     end
   end
@@ -33,13 +37,16 @@ module Fech
     # Skips FasterCSV's whole-file wrapper, and passes each line in
     # the file to a function that will parse it individually.
+    # @option opts [Boolean] :row_type yield only rows that match this type
     def self.parse_row(file_path, opts)
-      opts.reject! {|k,v| ![:col_sep, :quote_char].include?(k)}
       File.open(file_path, 'r').each do |line|
         # Skip empty lines
         next if line.strip.empty?
-        yield safe_line(line, opts)
+        # Skip non-matching row-types
+        next if opts.key?(:row_type) && !Fech.regexify(opts[:row_type]).match(line)
+        yield safe_line(line, clean_opts(opts))
       end
     end
@@ -51,7 +58,7 @@ module Fech
       begin
         parse_line(line, opts)
       rescue Fech::Csv::MalformedCSVError
-        row = parse_line(line, opts.merge(:quote_char => "\0"))
+        row = parse_line(line, clean_opts(opts).merge(:quote_char => "\0"))
         row.map! { |val| safe_value(val) }
       end
     end

data/lib/fech/default_translations.rb CHANGED Viewed

@@ -65,12 +65,10 @@ module Fech
     # Converts everything that looks like an FEC-formatted date to a
     # native Ruby Date object.
     def dates
-      t.convert do |value|
-        if /^\d{8}$/.match(value).nil?
-          value
-        else
-          Date.parse(value)
-        end
+      # only convert fields whose name is date* or *_date*
+      # lots of other things might be 8 digits, and we have to exclude eg 'candidate'
+      t.convert :field => /(^|_)date/ do |value|
+        Date.parse(value) rescue value
       end
     end

data/lib/fech/filing.rb CHANGED Viewed

@@ -66,7 +66,7 @@ module Fech
     # @return [Array] the complete set of mapped hashes for matched lines
     def rows_like(row_type, opts={}, &block)
       data = []
-      each_row do |row|
+      each_row(:row_type => row_type) do |row|
         value = parse_row?(row, opts.merge(:parse_if => row_type))
         next if value == false
         if block_given?
@@ -86,6 +86,8 @@ module Fech
     # @option opts [Array] :include list of field names that should be included
     #   in the returned hash
     def parse_row?(row, opts={})
+      return false if row.nil? || row.empty?
       # Always parse, unless :parse_if is given and does not match row
       if opts[:parse_if].nil? || \
           Fech.regexify(opts[:parse_if]).match(row.first.downcase)
@@ -242,19 +244,19 @@ module Fech
     def fix_f99_contents
       @customized = true
       content = file_contents.read
-      regex = /\n\[BEGINTEXT\]\n(.*?)\[ENDTEXT\]\n/m
+      regex = /\n\[BEGINTEXT\]\n(.*?)\[ENDTEXT\]\n/mi # some use eg [EndText]
       match = content.match(regex)
       if match
         repl = match[1].gsub(/"/, '""')
         content.gsub(regex, "#{delimiter}\"#{repl}\"")
       else
-        file_contents
+        content
       end
     end
     # Resave the "fixed" version of an F99
     def resave_f99_contents
-      return if @resaved
+      return true if @resaved
       File.open(custom_file_path, 'w') { |f| f.write(fix_f99_contents) }
       @resaved = true
     end
@@ -269,6 +271,7 @@ module Fech
     # Iterates over and yields the Filing's lines
     # @option opts [Boolean] :with_index yield both the item and its index
+    # @option opts [Boolean] :row_type yield only rows that match this type
     # @yield [Array] a row of the filing, split by the delimiter from #delimiter
     def each_row(opts={}, &block)
       unless File.exists?(file_path)
@@ -276,10 +279,10 @@ module Fech
       end
       # If this is an F99, we need to parse it differently.
-      resave_f99_contents if form_type == 'F99'
+      resave_f99_contents if ['F99', '"F99"'].include? form_type
       c = 0
-      @csv_parser.parse_row(@customized ? custom_file_path : file_path, :col_sep => delimiter, :quote_char => @quote_char, :skip_blanks => true) do |row|
+      @csv_parser.parse_row(@customized ? custom_file_path : file_path, opts.merge(:col_sep => delimiter, :quote_char => @quote_char, :skip_blanks => true)) do |row|
         if opts[:with_index]
           yield [row, c]
           c += 1

data/lib/fech/mappings.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 module Fech
+  class VersionError < RuntimeError; end
   # Fech::Mappings loads a set of master mappings between labels and where
   # their values can be found in Electronic Filings for various row types
@@ -58,7 +59,7 @@ module Fech
         return hash[key] if Regexp.new(key, Regexp::IGNORECASE).match(label.to_s)
       end
-      raise "Attempted to access mapping that has not been generated (#{label}). " +
+      raise VersionError, "Attempted to access mapping that has not been generated (#{label}). " +
             "Supported keys match the format: #{hash.keys.join(', ')}"
     end

data/lib/fech/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Fech
-  VERSION = "1.0.1"
+  VERSION = "1.1"
 end

metadata CHANGED Viewed

@@ -1,13 +1,12 @@
 --- !ruby/object:Gem::Specification
 name: fech
 version: !ruby/object:Gem::Version
-  hash: 21
+  hash: 13
   prerelease:
   segments:
   - 1
-  - 0
   - 1
-  version: 1.0.1
+  version: "1.1"
 platform: ruby
 authors:
 - Michael Strickland
@@ -18,8 +17,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-06-16 00:00:00 -04:00
-default_executable:
+date: 2012-11-13 00:00:00 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: fastercsv
@@ -319,7 +317,6 @@ files:
 - spec/spec_helper.rb
 - spec/translator_spec.rb
 - tasks/fech.rake
-has_rdoc: true
 homepage: http://github.com/nytimes/fech
 licenses: []
@@ -349,7 +346,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubyforge_project: fech
-rubygems_version: 1.4.2
+rubygems_version: 1.8.17
 signing_key:
 specification_version: 3
 summary: Ruby library for parsing FEC filings.