irs_pub78 0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ .idea
2
+ .DS_Store
3
+ data
4
+ Gemfile.lock
5
+ irs*.gem
6
+ coverage
data/Changelog ADDED
File without changes
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in gem_template.gemspec
4
+ gemspec
5
+
data/LICENSE ADDED
@@ -0,0 +1,3 @@
1
+ This project is subject to the Apache License 2.0, dated 2004.
2
+
3
+ The full license is referenced here: http://www.apache.org/licenses/LICENSE-2.0.html
data/README.rdoc ADDED
@@ -0,0 +1,62 @@
1
+ = Overview
2
+
3
+ == About this IRS.gem
4
+
5
+ This IRS .gem is designed to allow improve the accessibility of public nonprofit information
6
+ provided by the IRS via Publication 78 online.
7
+
8
+ == NOTICE
9
+
10
+ This .gem is not provided or maintained by the IRS.
11
+
12
+ == Usage
13
+
14
+ irb
15
+ load 'irs.rb'
16
+ i = IRS.new
17
+ i.download
18
+ i.unzip
19
+ i.parse_txt_to_json => /data/irs.json
20
+
21
+ == Notes
22
+
23
+ IRS Pub 78
24
+ The source is a .zip file.
25
+ There are 800,000+ records in the file.
26
+
27
+ this .gem does not support more than 500 nonprofits, which most large cities have.
28
+ support may be added in the future, if a Use Case requires it
29
+
30
+ == Compatibility
31
+
32
+ This wrapper is developed against Ruby 1.9.2, using RSpec and SimpleCov for testing.
33
+
34
+ = License
35
+
36
+ Copyright (c) 2011 Ryan Wold
37
+
38
+ Permission is hereby granted, free of charge, to any person obtaining
39
+ a copy of this software and associated documentation files (the
40
+ "Software"), to deal in the Software without restriction, including
41
+ without limitation the rights to use, copy, modify, merge, publish,
42
+ distribute, sublicense, and/or sell copies of the Software, and to
43
+ permit persons to whom the Software is furnished to do so, subject to
44
+ the following conditions:
45
+
46
+ The above copyright notice and this permission notice shall be
47
+ included in all copies or substantial portions of the Software.
48
+
49
+ You will notify the author and give permission to have your software
50
+ listed on this and other websites of the author as using the Software.
51
+
52
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
53
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
54
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
55
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
56
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
57
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
58
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
59
+
60
+ = Credits
61
+
62
+ Ryan Wold
data/irs.gemspec ADDED
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "irs_pub78"
6
+ s.version = 0.1
7
+ s.platform = Gem::Platform::RUBY
8
+ s.authors = ["Ryan Wold"]
9
+ s.email = "rwold@morequality.org"
10
+ s.summary = "Wrapper for the IRS.gov's Publication 78"
11
+ s.homepage = "http://github.com/morequality/IRS_Pub78"
12
+ s.description = "Wrapper for the IRS.gov's Publication 78"
13
+
14
+ s.rubyforge_project = "irs_pub78"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_development_dependency('rspec', '~> 2.6')
22
+ s.add_development_dependency('simplecov', '~> 0.4.0')
23
+ s.add_development_dependency('nokogiri', '~> 1.5.0')
24
+ s.add_development_dependency('json', '~> 1.5.0')
25
+ s.add_development_dependency('rubyzip', '~> 0.9.4')
26
+
27
+ s.add_dependency('nokogiri', '~> 1.5.0')
28
+ s.add_dependency('json', '~> 1.5.0')
29
+ s.add_dependency('rubyzip', '~> 0.9.4')
30
+
31
+ end
data/lib/irs.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'irs/client'
2
+
3
+ module Irs
4
+ end
data/lib/irs/client.rb ADDED
@@ -0,0 +1,83 @@
1
+ module Irs
2
+ class Client
3
+
4
+ require 'open-uri'
5
+ require 'zip/zip'
6
+ require 'json'
7
+ require 'nokogiri'
8
+
9
+ $debug = false # set to true to debug
10
+
11
+ def find(city = "", state_id = "")
12
+ raise ArgumentError unless !city.empty? && !state_id.empty?
13
+ @url = "http://www.irs.gov/app/pub-78/search.do?resultsPerPage=500&nameSearchTypeStarts=false&names=&nameSearchTypeAll=false&city=#{URI.escape(city.downcase)}&state=#{state_id.upcase}&country=USA&deductibility=all&sortColumn=name&indexOfFirstRow=0&isDescending=false&dispatchMethod=search"
14
+ stream = open(@url).read
15
+ nodes = Nokogiri::HTML(stream)
16
+ process_nodes(nodes)
17
+ end
18
+
19
+ # accepts nodes from Nokogiri
20
+ def process_nodes(nodes = nil)
21
+ rows = nodes.css(".epi-dataTable tr")
22
+ nonprofits = []
23
+ rows.each do |r|
24
+ attrs = {}
25
+ attrs["name"] = r.css("td")[0].text.strip rescue 'error'
26
+ attrs["city"] = r.css("td")[1].text.strip rescue 'error'
27
+ attrs["state_id"] = r.css("td")[2].text.strip rescue 'error'
28
+ attrs["code"] = r.css("td")[3].text.strip rescue 'error'
29
+ nonprofits << attrs
30
+ end
31
+
32
+ nonprofits
33
+ end
34
+
35
+ # ----------------------------------------------------------------------------------------------------------
36
+
37
+ # grab .zip file from IRS.gov
38
+ # and save it locally
39
+ def download
40
+ unless File.exist? "data/eopub78.zip"
41
+ @url = "http://www.irs.gov/pub/irs-utl/eopub78.zip"
42
+ stream = open(@url).read
43
+
44
+ directory_name = Dir::pwd + "/" + "data"
45
+ unless FileTest::directory?(directory_name)
46
+ Dir::mkdir(directory_name)
47
+ end
48
+
49
+ File.open("data/eopub78.zip", "wb") { |f| f << stream }
50
+ else
51
+ puts "The file has already been downloaded. Delete and try again if you really want the latest."
52
+ end
53
+ end
54
+
55
+ # unzip the .zip file to /data/irs.txt
56
+ def unzip
57
+ Zip::ZipFile.open("data/eopub78.zip").extract("eopub780611.txt", "data/irs.txt")
58
+ end
59
+
60
+ # Open the .txt file
61
+ # Read it and loop through it and write the data to a hash
62
+ def parse_txt_to_json
63
+ entities = []
64
+ i = 0 # count
65
+ lines = File.open("data/irs.txt", "r").readlines
66
+ lines[i..-1].each do |line| # ignore the first header line
67
+ i = i + 1
68
+ entities << {
69
+ :name => (line[0..105].strip rescue ""),
70
+ :city => (line[106..136].strip rescue ""),
71
+ :state_id => (line[137..138].strip rescue ""),
72
+ :code => (line[141..141].strip rescue "")
73
+ }
74
+ puts i if $debug
75
+ end
76
+
77
+ File.open("data/irs.json", "w") { |f| f << entities.to_json }
78
+ end
79
+
80
+
81
+ end
82
+
83
+ end
File without changes
data/spec/helper.rb ADDED
@@ -0,0 +1,18 @@
1
+ # enable SimpleCov
2
+ require 'simplecov'
3
+ SimpleCov.start
4
+
5
+ require 'rubygems'
6
+ require 'bundler/setup'
7
+ require 'irs'
8
+
9
+ RSpec.configure do |config|
10
+ end
11
+
12
+ def fixture_path
13
+ File.expand_path("../fixtures", __FILE__)
14
+ end
15
+
16
+ def fixture(file)
17
+ File.new(fixture_path + '/' + file)
18
+ end
data/spec/irs_spec.rb ADDED
@@ -0,0 +1,31 @@
1
+ require 'helper'
2
+
3
+ # fixtures = YAML.load(ERB.new(File.new(File.dirname(__FILE__) + '/fixtures/fixtures.yml').read).result)
4
+
5
+ # public methods
6
+ client = Irs::Client.new
7
+
8
+ # The projects method
9
+ describe Irs, "::Client.new" do
10
+ it "should download" do
11
+ #file = client.download
12
+ file.should be_kind_of(File)
13
+ # a file should exist
14
+ # it should be over 10mb, arbitrarily
15
+ end
16
+
17
+ it "should unzip" do
18
+ end
19
+
20
+ it "should parse the unzipped txt file to json" do
21
+ end
22
+
23
+ end
24
+
25
+ describe Irs, "::Other Methods" do
26
+ before do
27
+ end
28
+
29
+ it "should do something" do
30
+ end
31
+ end
metadata ADDED
@@ -0,0 +1,148 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: irs_pub78
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ryan Wold
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-09-28 00:00:00.000000000 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ requirement: &70137131188020 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: '2.6'
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: *70137131188020
26
+ - !ruby/object:Gem::Dependency
27
+ name: simplecov
28
+ requirement: &70137131187380 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 0.4.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: *70137131187380
37
+ - !ruby/object:Gem::Dependency
38
+ name: nokogiri
39
+ requirement: &70137131167680 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ~>
43
+ - !ruby/object:Gem::Version
44
+ version: 1.5.0
45
+ type: :development
46
+ prerelease: false
47
+ version_requirements: *70137131167680
48
+ - !ruby/object:Gem::Dependency
49
+ name: json
50
+ requirement: &70137131167080 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ~>
54
+ - !ruby/object:Gem::Version
55
+ version: 1.5.0
56
+ type: :development
57
+ prerelease: false
58
+ version_requirements: *70137131167080
59
+ - !ruby/object:Gem::Dependency
60
+ name: rubyzip
61
+ requirement: &70137131166620 !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ~>
65
+ - !ruby/object:Gem::Version
66
+ version: 0.9.4
67
+ type: :development
68
+ prerelease: false
69
+ version_requirements: *70137131166620
70
+ - !ruby/object:Gem::Dependency
71
+ name: nokogiri
72
+ requirement: &70137131166060 !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 1.5.0
78
+ type: :runtime
79
+ prerelease: false
80
+ version_requirements: *70137131166060
81
+ - !ruby/object:Gem::Dependency
82
+ name: json
83
+ requirement: &70137131165540 !ruby/object:Gem::Requirement
84
+ none: false
85
+ requirements:
86
+ - - ~>
87
+ - !ruby/object:Gem::Version
88
+ version: 1.5.0
89
+ type: :runtime
90
+ prerelease: false
91
+ version_requirements: *70137131165540
92
+ - !ruby/object:Gem::Dependency
93
+ name: rubyzip
94
+ requirement: &70137131164980 !ruby/object:Gem::Requirement
95
+ none: false
96
+ requirements:
97
+ - - ~>
98
+ - !ruby/object:Gem::Version
99
+ version: 0.9.4
100
+ type: :runtime
101
+ prerelease: false
102
+ version_requirements: *70137131164980
103
+ description: Wrapper for the IRS.gov's Publication 78
104
+ email: rwold@morequality.org
105
+ executables: []
106
+ extensions: []
107
+ extra_rdoc_files: []
108
+ files:
109
+ - .gitignore
110
+ - Changelog
111
+ - Gemfile
112
+ - LICENSE
113
+ - README.rdoc
114
+ - irs.gemspec
115
+ - lib/irs.rb
116
+ - lib/irs/client.rb
117
+ - spec/fixtures/fixtures.yml
118
+ - spec/helper.rb
119
+ - spec/irs_spec.rb
120
+ has_rdoc: true
121
+ homepage: http://github.com/morequality/IRS_Pub78
122
+ licenses: []
123
+ post_install_message:
124
+ rdoc_options: []
125
+ require_paths:
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ! '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ none: false
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ requirements: []
140
+ rubyforge_project: irs_pub78
141
+ rubygems_version: 1.6.2
142
+ signing_key:
143
+ specification_version: 3
144
+ summary: Wrapper for the IRS.gov's Publication 78
145
+ test_files:
146
+ - spec/fixtures/fixtures.yml
147
+ - spec/helper.rb
148
+ - spec/irs_spec.rb