myl-fech 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. data/.gitignore +7 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +4 -0
  4. data/Gemfile.lock +48 -0
  5. data/LICENSE +13 -0
  6. data/README.rdoc +82 -0
  7. data/Rakefile +3 -0
  8. data/autotest/discover.rb +1 -0
  9. data/fech.gemspec +40 -0
  10. data/lib/fech/comparison.rb +36 -0
  11. data/lib/fech/csv.rb +70 -0
  12. data/lib/fech/default_translations.rb +133 -0
  13. data/lib/fech/fech_utils.rb +76 -0
  14. data/lib/fech/filing.rb +341 -0
  15. data/lib/fech/map_generator.rb +233 -0
  16. data/lib/fech/mapped.rb +38 -0
  17. data/lib/fech/mappings.rb +67 -0
  18. data/lib/fech/rendered_maps.rb +238 -0
  19. data/lib/fech/translator.rb +138 -0
  20. data/lib/fech/version.rb +3 -0
  21. data/lib/fech.rb +15 -0
  22. data/sources/F1.csv +106 -0
  23. data/sources/F1M.csv +78 -0
  24. data/sources/F2.csv +43 -0
  25. data/sources/F24.csv +18 -0
  26. data/sources/F3.csv +1 -0
  27. data/sources/F3L.csv +27 -0
  28. data/sources/F3P.csv +208 -0
  29. data/sources/F3P31.csv +39 -0
  30. data/sources/F3PS.csv +94 -0
  31. data/sources/F3S.csv +36 -0
  32. data/sources/F3X.csv +125 -0
  33. data/sources/F4.csv +86 -0
  34. data/sources/F5.csv +39 -0
  35. data/sources/F56.csv +33 -0
  36. data/sources/F57.csv +44 -0
  37. data/sources/F6.csv +1 -0
  38. data/sources/F65.csv +1 -0
  39. data/sources/F7.csv +1 -0
  40. data/sources/F76.csv +1 -0
  41. data/sources/F9.csv +46 -0
  42. data/sources/F91.csv +17 -0
  43. data/sources/F92.csv +23 -0
  44. data/sources/F93.csv +27 -0
  45. data/sources/F94.csv +18 -0
  46. data/sources/F99.csv +1 -0
  47. data/sources/H1.csv +1 -0
  48. data/sources/H2.csv +1 -0
  49. data/sources/H3.csv +1 -0
  50. data/sources/H4.csv +1 -0
  51. data/sources/H5.csv +1 -0
  52. data/sources/H6.csv +1 -0
  53. data/sources/HDR.csv +10 -0
  54. data/sources/SchA.csv +50 -0
  55. data/sources/SchB.csv +50 -0
  56. data/sources/SchC.csv +41 -0
  57. data/sources/SchC1.csv +52 -0
  58. data/sources/SchC2.csv +19 -0
  59. data/sources/SchD.csv +34 -0
  60. data/sources/SchE.csv +57 -0
  61. data/sources/SchF.csv +55 -0
  62. data/sources/SchL.csv +1 -0
  63. data/sources/TEXT.csv +1 -0
  64. data/sources/headers/3.csv +1 -0
  65. data/sources/headers/5.0.csv +1 -0
  66. data/sources/headers/5.1.csv +1 -0
  67. data/sources/headers/5.2.csv +1 -0
  68. data/sources/headers/5.3.csv +1 -0
  69. data/sources/headers/6.1.csv +1 -0
  70. data/sources/headers/6.2.csv +1 -0
  71. data/sources/headers/6.3.csv +1 -0
  72. data/sources/headers/6.4.csv +1 -0
  73. data/sources/headers/7.0.csv +49 -0
  74. data/sources/headers/8.0.csv +49 -0
  75. data/sources/headers/ignore.csv +5 -0
  76. data/spec/comparison_spec.rb +30 -0
  77. data/spec/data/467627.fec +608 -0
  78. data/spec/data/723604.fec +4 -0
  79. data/spec/data/730635.fec +2 -0
  80. data/spec/data/747058.fec +4 -0
  81. data/spec/data/748730.fec +1196 -0
  82. data/spec/data/752356.fec +5 -0
  83. data/spec/data/753533.fec +7 -0
  84. data/spec/data/764901.fec +7 -0
  85. data/spec/data/765310.fec +2 -0
  86. data/spec/data/767339.fec +648 -0
  87. data/spec/data/82094.fec +144 -0
  88. data/spec/data/97405.fec +10 -0
  89. data/spec/default_translations_spec.rb +104 -0
  90. data/spec/fech_utils_spec.rb +29 -0
  91. data/spec/filing_spec.rb +314 -0
  92. data/spec/map_generator_spec.rb +49 -0
  93. data/spec/mapped_spec.rb +44 -0
  94. data/spec/mappings_spec.rb +46 -0
  95. data/spec/sources/F24.csv +18 -0
  96. data/spec/sources/F3P.csv +1 -0
  97. data/spec/sources/F3P31.csv +39 -0
  98. data/spec/sources/SchA.csv +1 -0
  99. data/spec/sources/SchB.csv +1 -0
  100. data/spec/sources/SchC.csv +1 -0
  101. data/spec/sources/headers/3.csv +1 -0
  102. data/spec/sources/headers/5.0.csv +1 -0
  103. data/spec/sources/headers/5.1.csv +1 -0
  104. data/spec/sources/headers/5.2.csv +1 -0
  105. data/spec/sources/headers/5.3.csv +1 -0
  106. data/spec/sources/headers/6.1.csv +1 -0
  107. data/spec/sources/headers/6.2.csv +1 -0
  108. data/spec/sources/headers/6.3.csv +1 -0
  109. data/spec/sources/headers/6.4.csv +1 -0
  110. data/spec/sources/headers/7.0.csv +1 -0
  111. data/spec/sources/headers/8.0.csv +49 -0
  112. data/spec/sources/headers/ignore.csv +5 -0
  113. data/spec/sources/sa.csv +1 -0
  114. data/spec/spec_helper.rb +9 -0
  115. data/spec/translator_spec.rb +195 -0
  116. data/tasks/fech.rake +41 -0
  117. metadata +342 -0
data/.gitignore ADDED
@@ -0,0 +1,7 @@
1
+ sources/rows/*
2
+ pkg/*
3
+ .bundle
4
+ *.gem
5
+ .yardoc/*
6
+ doc/*
7
+
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --colour
2
+ --profile
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in fech.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,48 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ fech (0.9.10)
5
+ fastercsv
6
+ people
7
+
8
+ GEM
9
+ remote: http://rubygems.org/
10
+ specs:
11
+ columnize (0.3.6)
12
+ diff-lcs (1.1.2)
13
+ fastercsv (1.5.4)
14
+ linecache (0.43)
15
+ mocha (0.9.12)
16
+ people (0.2.1)
17
+ rake (0.8.7)
18
+ rcov (0.9.9)
19
+ rdoc (3.9.2)
20
+ rspec (2.6.0)
21
+ rspec-core (~> 2.6.0)
22
+ rspec-expectations (~> 2.6.0)
23
+ rspec-mocks (~> 2.6.0)
24
+ rspec-core (2.6.4)
25
+ rspec-expectations (2.6.0)
26
+ diff-lcs (~> 1.1.2)
27
+ rspec-mocks (2.6.0)
28
+ ruby-debug (0.10.4)
29
+ columnize (>= 0.1)
30
+ ruby-debug-base (~> 0.10.4.0)
31
+ ruby-debug-base (0.10.4)
32
+ linecache (>= 0.3)
33
+ yard (0.7.2)
34
+
35
+ PLATFORMS
36
+ ruby
37
+
38
+ DEPENDENCIES
39
+ bundler
40
+ fech!
41
+ linecache (= 0.43)
42
+ mocha
43
+ rake (= 0.8.7)
44
+ rcov
45
+ rdoc
46
+ rspec (~> 2.6)
47
+ ruby-debug
48
+ yard
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2011 The New York Times Company
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this library except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.rdoc ADDED
@@ -0,0 +1,82 @@
1
+ ______ ______ ______ __
2
+ /\ ___\ /\ ___\ /\ ___\ /\ \___
3
+ \ \ __\ \ \ __\ \ \ \____ \ \ __ \
4
+ \ \_\ \ \_____\ \ \_____\ \ \_\ \_\
5
+ \/_/ \/_____/ \/_____/ \/_/\/_/
6
+
7
+ Fech makes it easy to parse electronic campaign finance filings[http://www.fec.gov/finance/disclosure/efile_search.shtml] by candidates, parties and political action committees from the Federal Election Commission. It lets you access filing attributes the same way regardless of filing version, and works as a framework for cleaning and filing data. Fech is an open source project of The New York Times, but contributions from anyone interested in working with F.E.C. filings are greatly appreciated.
8
+
9
+
10
+ NOTE: You're looking at the Make Your Laws fork of New York Times' Fech gem. It has bugfixes and features that NYT's doesn't. Take a look at the commit history / network graph.
11
+
12
+ Latest (myl-fech) version: 1.0.3
13
+
14
+
15
+ Fech is tested under Ruby versions 1.8.7, 1.9.2 and 1.9.3.
16
+
17
+ == Documentation
18
+
19
+ Can be found at Fech's Github page[http://nytimes.github.com/Fech/].
20
+
21
+ == News
22
+
23
+ * June 16, 2012: Version 1.0.1 released. Bug-fix for older Form 2 support.
24
+ * April 11, 2012: Version 1.0.0 released! Support for Ruby 1.9.3 added, all form types supported.
25
+ * April 9, 2012: Version 1.0.0.rc1 released. Release candidate with backwards-incompatible change (renaming zip attribute to zip_code).
26
+ * March 29, 2012: Version 0.9.10 released. Bug-fix for Form 24 in versions 6.4 and 7.0.
27
+ * March 28, 2012: Version 0.9.9 released. Bug-fix to add support for F3XA form type, support for Schedule H and L.
28
+ * March 23, 2012: Version 0.9.6 and 0.9.5 released. Bug-fixes for F6 mappings.
29
+ * March 10, 2012: Version 0.9.4 released. Added support for F6 filings.
30
+ * March 8, 2012: Version 0.9.3 released. Bug-fix for F2 & F24 mappings.
31
+ * Feb. 29, 2012: Version 0.9.2 released. Bug-fix for F3 mappings, added filing comparison class.
32
+ * Feb. 21, 2012: Versions 0.9.0, 0.9.1 released. Added support for alternative CSV Parsers, F4 filings.
33
+ * Feb. 19, 2012: Version 0.8.2 released. Added layouts for F1M and F2 filings.
34
+ * Feb. 15, 2012: Version 0.8.1 released. Bug-fix to support F3 termination filings.
35
+ * Feb. 13, 2012: Version 0.8.0 released. Layouts for form 3 and form 1 added, for parsing House candidate committee filings.
36
+ * Feb. 11, 2012: Version 0.7.0 released. Layouts for form 9 added, for parsing electioneering communications filings.
37
+ * Jan. 28, 2012: Version 0.6.0 released. Added support for quoted fields.
38
+ * Nov. 22, 2011: Version 0.5.0 released. Layouts for form 3X added, for parsing filings from non-candidate committees.
39
+ * Nov. 13, 2011: Version 0.3.0 released. Layouts for forms 24, 5, 56 and 57 added, for parsing independent expenditure filings.
40
+ * Nov. 4, 2011: Version 0.2.1 released. Bug-fix release to address a problem with the :include option for selecting only certain columns. Thanks to Aaron Bycoffe for the report and patch.
41
+
42
+ == Installation
43
+
44
+ Install Fech as a gem:
45
+
46
+ gem install myl-fech
47
+
48
+ For use in a Rails 3 application, put the following in your Gemfile:
49
+
50
+ gem 'myl-fech', :require => 'fech'
51
+
52
+ then issue the 'bundle install' command. Fech has been tested under Ruby 1.8.7, 1.9.2 and JRuby 1.6.3.
53
+
54
+ == How to contribute
55
+
56
+ Fech's goal is to provide support for all electronically filed forms and their row types, so contributors can pick an form type that is currently unsupported and try to implement it, or to improve an existing implementation. For entirely new form types, please include specs showing successful parsing of the summary. A good way to start is to look at the mappings for a similar form type. Please note that Fech currently commits to supporting the F.E.C.'s filing formats back to version 3.0, where applicable.
57
+
58
+ Bug reports and feature requests are welcomed by submitting an Issue. Please be advised that development is focused on parsing filings, which may contain errors or be improperly filed, and not on providing wrappers or helper methods for working with filings in another context.
59
+
60
+ To get started, fork the repo, make your additions or changes and send a pull request.
61
+
62
+ == Pronunciation guide
63
+
64
+ It's "fetch", with a soft "ch" sound. There are no other acceptable pronunciations.
65
+
66
+ == Authors
67
+
68
+ Michael Strickland, michael.strickland@nytimes.com
69
+
70
+ Evan Carmi, evan@ecarmi.org
71
+
72
+ Aaron Bycoffe, bycoffe@huffingtonpost.com
73
+
74
+ Derek Willis, dwillis@nytimes.com
75
+
76
+ Daniel Pritchett, daniel@sharingatwork.com
77
+
78
+ Sai, github@saizai.com
79
+
80
+ == Copyright
81
+
82
+ Copyright (c) 2012 The New York Times Company & Sai. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+ Dir.glob('tasks/*.rake').each { |r| import r }
@@ -0,0 +1 @@
1
+ Autotest.add_discovery { "rspec2" }
data/fech.gemspec ADDED
@@ -0,0 +1,40 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "fech/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "myl-fech"
7
+ s.version = Fech::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Michael Strickland", "Evan Carmi", "Aaron Bycoffe", "Derek Willis", "Sai"]
10
+ s.email = ["dwillis@gmail.com"]
11
+ s.homepage = "http://github.com/MakeYourLaws/fech"
12
+ s.summary = %q{Ruby library for parsing FEC filings.}
13
+ s.description = %q{A Ruby library for interacting with electronic filings from the Federal Election Commission.}
14
+
15
+ s.rubyforge_project = "fech"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {spec}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_dependency "fastercsv"
23
+ s.add_dependency "people"
24
+ if RUBY_VERSION < "1.9"
25
+ s.add_development_dependency "linecache", "0.43"
26
+ s.add_development_dependency "ruby-debug"
27
+ s.add_development_dependency "iconv"
28
+ end
29
+ if RUBY_VERSION >= "1.9"
30
+ s.add_development_dependency "ruby-debug19"
31
+ s.add_development_dependency "linecache19"
32
+ end
33
+ s.add_development_dependency "rake", "0.8.7"
34
+ s.add_development_dependency "rspec", "~> 2.6"
35
+ s.add_development_dependency "mocha"
36
+ s.add_development_dependency "bundler"
37
+ s.add_development_dependency "rcov"
38
+ s.add_development_dependency "rdoc"
39
+ s.add_development_dependency "yard"
40
+ end
@@ -0,0 +1,36 @@
1
+ module Fech
2
+ # Fech::Comparison takes two Filing objects and does comparisons on them,
3
+ # checking for differences between an original and amended filing, or
4
+ # two filings covering the same period in different years.
5
+ class Comparison
6
+ attr_accessor :filing_1, :filing_2
7
+
8
+ # Create a new Comparison object by passing in two Filing objects
9
+ # Filing objects need to be downloaded first
10
+ # f1 = Fech::Filing.new(767437)
11
+ # f1.download
12
+ # f2 = Fech::Filing.new(751798)
13
+ # f2.download
14
+ # comparison = Fech::Comparison.new(f1, f2)
15
+ # comparison.summary
16
+ def initialize(filing_1, filing_2, opts={})
17
+ @filing_1 = filing_1
18
+ @filing_2 = filing_2
19
+ end
20
+
21
+ # compares summary of this filing with summary of an earlier
22
+ # or later version of the filing, returning a Fech::Mapped hash
23
+ # of mapped fields whose values have changed. based on rails' hash diff:
24
+ # https://github.com/rails/rails/blob/master/activesupport/lib/active_support/core_ext/hash/diff.rb
25
+ def summary
26
+ @filing_1.summary.delete_if { |k, v| @filing_2.summary[k] == v }.merge!(@filing_2.summary.dup.delete_if { |k, v| @filing_1.summary.has_key?(k) })
27
+ end
28
+
29
+ # compares a schedule of itemized records from one filing to another
30
+ # returns an array of records that are new or have changed.
31
+ def schedule(schedule)
32
+ @filing_1.rows_like(schedule.to_sym) - @filing_2.rows_like(schedule.to_sym)
33
+ end
34
+
35
+ end
36
+ end
data/lib/fech/csv.rb ADDED
@@ -0,0 +1,70 @@
1
+ require 'csv' if RUBY_VERSION > '1.9'
2
+ require 'fastercsv' unless RUBY_VERSION > '1.9'
3
+
4
+ # Fech::Csv is a wrapper that provides simple CSV handling consistency
5
+ # between Ruby 1.8 and Ruby 1.9.
6
+ #
7
+ # 1.8's "FasterCSV" library has been brought into the Ruby core as of 1.9
8
+ # as "CSV". The methods are the same so no overloading should be needed.
9
+ module Fech
10
+ begin
11
+ # Ruby 1.9 and up compatibility
12
+ class Csv < CSV
13
+ end
14
+ rescue
15
+ # 1.8 compatibility
16
+ class Csv < FasterCSV
17
+ end
18
+ end
19
+
20
+ class Csv
21
+
22
+ # Loads a given file and parses it into an array, line by line.
23
+ # Basic wrapper around FasterCSV.foreach
24
+ # @param [String] file_path location of the filing on the file system
25
+ # @options opts passed through to FasterCSV
26
+ def self.parse_row(file_path, opts)
27
+ foreach(file_path, opts) { |row| yield row }
28
+ end
29
+
30
+ end
31
+
32
+ class CsvDoctor < Fech::Csv
33
+
34
+ # Skips FasterCSV's whole-file wrapper, and passes each line in
35
+ # the file to a function that will parse it individually.
36
+ def self.parse_row(file_path, opts)
37
+ opts.reject! {|k,v| ![:col_sep, :quote_char].include?(k)}
38
+
39
+ File.open(file_path, 'r').each do |line|
40
+ # Skip empty lines
41
+ next if line.strip.empty?
42
+ yield safe_line(line, opts)
43
+ end
44
+ end
45
+
46
+ # Tries to parse the line with FasterCSV.parse_line and the given
47
+ # quote_char. If this fails, try again with the "\0" quote_char.
48
+ # @param [String] line the file's row to parse
49
+ # @options opts :quote_char the quote_char to try initially
50
+ def self.safe_line(line, opts)
51
+ begin
52
+ parse_line(line, opts)
53
+ rescue Fech::Csv::MalformedCSVError
54
+ row = parse_line(line, opts.merge(:quote_char => "\0"))
55
+ row.map! { |val| safe_value(val) }
56
+ end
57
+ end
58
+
59
+ # Removes extraneous quotes from values.
60
+ def self.safe_value(val)
61
+ return val unless val.is_a?(String)
62
+ begin
63
+ parse_line(val).first
64
+ rescue Fech::Csv::MalformedCSVError
65
+ val
66
+ end
67
+ end
68
+
69
+ end
70
+ end
@@ -0,0 +1,133 @@
1
+ module Fech
2
+
3
+ # Stores sets of build-in translations that can be mixed in to a Fech::Filing.
4
+ # Contains functions that accept a Translator, and add arbitrary translations
5
+ # to it. The public function names should correspond to the key used to mix it in.
6
+ #
7
+ # filing = Fech::Filing.new(XXXXXX, :translate => [:names, :dates])
8
+ class DefaultTranslations
9
+
10
+ # The five bits that make up a name, and their labels in the People gem
11
+ NAME_BITS = [:prefix, :first_name, :middle_name, :last_name, :suffix]
12
+ PEOPLE_BITS = [:title, :first, :middle, :last, :suffix]
13
+
14
+ attr_reader :t
15
+
16
+ def initialize(translator)
17
+ @t = translator
18
+ end
19
+
20
+ # Splits composite names into its component parts, and combines those parts
21
+ # into composites where appropriate. Assumes that the canonical names of the
22
+ # fields follow the pattern:
23
+ # * FIELD_name - "Mr. John Charles Smith Sr."
24
+ # * FIELD_prefix - "Mr."
25
+ # * FIELD_first_name - "John"
26
+ # * FIELD_middle_name - "Charles"
27
+ # * FIELD_last_name - "Smith"
28
+ # * FIELD_suffix - "Sr."
29
+ def names
30
+
31
+ # COMBINE split names into composite names for these rows
32
+ composites = [
33
+ {:row => :sa, :version => /^[6-7]/, :field => [:contributor, :donor_candidate]},
34
+ {:row => :sb, :version => /^[6-7]/, :field => [:payee, :beneficiary_candidate]},
35
+ {:row => :sc, :version => /^[6-7]/, :field => [:lender, :lender_candidate]},
36
+ {:row => :sc1, :version => /^[6-7]/, :field => [:treasurer, :authorized]},
37
+ {:row => :sc2, :version => /^[6-7]/, :field => :guarantor},
38
+ {:row => :sd, :version => /^[6-7]/, :field => :creditor},
39
+ {:row => :se, :version => /^[6-7]/, :field => [:payee, :candidate]},
40
+ {:row => :sf, :version => /^[6-7]/, :field => [:payee, :payee_candidate]},
41
+ {:row => :f3p, :version => /^[6-7]/, :field => :treasurer},
42
+ {:row => :f3p31, :version => /^[6-7]/, :field => :contributor},
43
+ ]
44
+ # SPLIT composite names into component parts for these rows
45
+ components = [
46
+ {:row => :sa, :version => /^3|(5.0)/, :field => :contributor},
47
+ {:row => :sa, :version => /^[3-5]/, :field => :donor_candidate},
48
+ {:row => :sb, :version => /^3|(5.0)/, :field => :payee},
49
+ {:row => :sb, :version => /^[3-5]/, :field => :beneficiary_candidate},
50
+ {:row => :sc, :version => /^[3-5]/, :field => [:lender, :lender_candidate]},
51
+ {:row => :sc1, :version => /^[3-5]/, :field => [:treasurer, :authorized]},
52
+ {:row => :sc2, :version => /^[3-5]/, :field => :guarantor},
53
+ {:row => :sd, :version => /^[3-5]/, :field => :creditor},
54
+ {:row => :se, :version => /^[3-5]/, :field => [:payee, :cadidate]},
55
+ {:row => :sf, :version => /^[3-5]/, :field => [:payee, :payee_candidate]},
56
+ {:row => :f3p, :version => /^[3-5]/, :field => :treasurer},
57
+ {:row => :f3p31, :version => /^[3-5]/, :field => :contributor},
58
+ ]
59
+
60
+ composites.each { |c| combine_components_into_name(c) }
61
+ components.each { |c| split_name_into_components(c) }
62
+
63
+ end
64
+
65
+ # Converts everything that looks like an FEC-formatted date to a
66
+ # native Ruby Date object.
67
+ def dates
68
+ # only convert fields whose name is date* or *_date*
69
+ # lots of other things might be 8 digits, and we have to exclude eg 'candidate'
70
+ t.convert :field => /(^|_)date/ do |value|
71
+ Date.parse(value) rescue value
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ # Turns "Allred^Ann^Mrs.^III" into "Mrs. Ann Allred III"
78
+ def self.fix_carrot_names(name)
79
+ name = name.split("^").reverse
80
+ # move the suffix to the beginning
81
+ name.push name.shift if name.size > 3
82
+ name.join(" ")
83
+ end
84
+
85
+ # Create a Translation for the given row, version named as :field
86
+ def combine_components_into_name(composite)
87
+ raise ArgumentError, "Must pass a :row, :version AND :field" if composite.nil?
88
+ composite[:field] = [composite[:field]] unless composite[:field].is_a?(Array)
89
+
90
+ composite[:field].each do |field|
91
+ t.combine(:row => composite[:row], :version => composite[:version],
92
+ :field => "#{field}_name") do |row|
93
+
94
+ # Gather each name_bit from the parsed row, and join it into one value
95
+ bits = NAME_BITS.collect do |field_name|
96
+ row.send("#{field}_#{field_name}".to_sym)
97
+ end
98
+ bits.compact.join(" ")
99
+ end
100
+ end
101
+ end
102
+
103
+ # Create a Translation for all five name bits, that will strip
104
+ # out its respective bit from an already-populate composite name field.
105
+ def split_name_into_components(component)
106
+ raise ArgumentError, "Must pass a :row, :version AND :field" if component.nil?
107
+ component[:field] = [component[:field]] unless component[:field].is_a?(Array)
108
+
109
+ component[:field].each do |field|
110
+ NAME_BITS.zip(PEOPLE_BITS).each do |field_name, people_name|
111
+ t.combine(:row => component[:row], :version => component[:version],
112
+ :field => "#{field}_#{field_name}") do |row|
113
+
114
+ # Grab the original, composite name
115
+ name = row.send("#{field}_name")
116
+
117
+ unless name.nil?
118
+ # Fix various name formatting errors
119
+ name = self.class.fix_carrot_names(name) unless name.index("^").nil?
120
+
121
+ # Extract just the component you want
122
+ (Fech::Translator::NAME_PARSER.parse(name)[people_name] || "").strip
123
+ else
124
+ nil
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
130
+
131
+ end
132
+
133
+ end
@@ -0,0 +1,76 @@
1
+ # Contains helper functions and static variables used by various
2
+ # Fech classes.
3
+ module FechUtils
4
+
5
+ # All supported row types pointed to regular expressions that will correctly
6
+ # match that row type in the wild. If multiple matches exist, Fech will match
7
+ # the longest regex pattern found.
8
+ ROW_TYPES = {
9
+ :hdr => /^hdr$/i,
10
+ :f1 => /^f1/i,
11
+ :f1m => /(^f1m[^a|n])/i,
12
+ :f2 => /(^f2$)|(^f2[^4])/i,
13
+ :f24 => /(^f24$)|(^f24[an])/i,
14
+ :f3 => /^f3[a|n|t]/i,
15
+ :f3l => /^f3l[a|n]/i,
16
+ :f3p => /(^f3p$)|(^f3p[^s|3])/i,
17
+ :f3s => /^f3s/i,
18
+ :f3p31 => /^f3p31/i,
19
+ :f3ps => /^f3ps/i,
20
+ :f3x => /(^f3x$)|(^f3x[ant])/i,
21
+ :f4 => /^f4[na]/i,
22
+ :f5 => /^f5[na]/i,
23
+ :f56 => /^f56/i,
24
+ :f57 => /^f57/i,
25
+ :f6 => /(^f6$)|(^f6[an])/i,
26
+ :f65 => /^f65/i,
27
+ :f7 => /^f7[na]/i,
28
+ :f76 => /^f76/i,
29
+ :f9 => /^f9/i,
30
+ :f91 => /^f91/i,
31
+ :f92 => /^f92/i,
32
+ :f93 => /^f93/i,
33
+ :f94 => /^f94/i,
34
+ :f99 => /^f99/i,
35
+ :h1 => /^h1/i,
36
+ :h2 => /^h2/i,
37
+ :h3 => /^h3/i,
38
+ :h4 => /^h4/i,
39
+ :h5 => /^h5/i,
40
+ :h6 => /^h6/i,
41
+ :sa => /^sa/i,
42
+ :sb => /^sb/i,
43
+ :sc => /^sc[^1-2]/i,
44
+ :sc1 => /^sc1/i,
45
+ :sc2 => /^sc2/i,
46
+ :sd => /^sd/i,
47
+ :se => /^se/i,
48
+ :sf => /^sf/i,
49
+ :sl => /^sl/i,
50
+ :text => /^text/i,
51
+ }
52
+
53
+ # Converts symbols and strings to Regexp objects for use in regex-keyed maps.
54
+ # Assumes that symbols should be matched literally, strings unanchored.
55
+ # @param [String,Symbol,Regexp] label the object to convert to a Regexp
56
+ def regexify(label)
57
+ if label.is_a?(Regexp)
58
+ Regexp.new(label.source, Regexp::IGNORECASE)
59
+ elsif label.is_a?(Symbol)
60
+ if ROW_TYPES.keys.include?(label)
61
+ ROW_TYPES[label]
62
+ else
63
+ Regexp.new("^#{label.to_s}$", Regexp::IGNORECASE)
64
+ end
65
+ else
66
+ Regexp.new(Regexp.escape(label.to_s), Regexp::IGNORECASE)
67
+ end
68
+ end
69
+
70
+ # get symbol (eg :f1) based on the literal row type returned (eg 'f1nwhatever')
71
+ # this is used to pick which table to store it in, since it's one table per base type (not per equivalent variant)
72
+ def base_type row_type
73
+ ROW_TYPES.sort{|x,y| y[1].to_s.length <=> x[1].to_s.length }.find{|k,v| k if row_type =~ v }.first
74
+ end
75
+
76
+ end