irs_pub78 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ .idea
2
+ .DS_Store
3
+ data
4
+ Gemfile.lock
5
+ irs*.gem
6
+ coverage
data/Changelog ADDED
File without changes
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in gem_template.gemspec
4
+ gemspec
5
+
data/LICENSE ADDED
@@ -0,0 +1,3 @@
1
+ This project is subject to the Apache License 2.0, dated 2004.
2
+
3
+ The full license is referenced here: http://www.apache.org/licenses/LICENSE-2.0.html
data/README.rdoc ADDED
@@ -0,0 +1,62 @@
1
+ = Overview
2
+
3
+ == About this IRS.gem
4
+
5
+ This IRS .gem is designed to allow improve the accessibility of public nonprofit information
6
+ provided by the IRS via Publication 78 online.
7
+
8
+ == NOTICE
9
+
10
+ This .gem is not provided or maintained by the IRS.
11
+
12
+ == Usage
13
+
14
+ irb
15
+ load 'irs.rb'
16
+ i = IRS.new
17
+ i.download
18
+ i.unzip
19
+ i.parse_txt_to_json => /data/irs.json
20
+
21
+ == Notes
22
+
23
+ IRS Pub 78
24
+ The source is a .zip file.
25
+ There are 800,000+ records in the file.
26
+
27
+ this .gem does not support more than 500 nonprofits, which most large cities have.
28
+ support may be added in the future, if a Use Case requires it
29
+
30
+ == Compatibility
31
+
32
+ This wrapper is developed against Ruby 1.9.2, using RSpec and SimpleCov for testing.
33
+
34
+ = License
35
+
36
+ Copyright (c) 2011 Ryan Wold
37
+
38
+ Permission is hereby granted, free of charge, to any person obtaining
39
+ a copy of this software and associated documentation files (the
40
+ "Software"), to deal in the Software without restriction, including
41
+ without limitation the rights to use, copy, modify, merge, publish,
42
+ distribute, sublicense, and/or sell copies of the Software, and to
43
+ permit persons to whom the Software is furnished to do so, subject to
44
+ the following conditions:
45
+
46
+ The above copyright notice and this permission notice shall be
47
+ included in all copies or substantial portions of the Software.
48
+
49
+ You will notify the author and give permission to have your software
50
+ listed on this and other websites of the author as using the Software.
51
+
52
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
53
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
54
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
55
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
56
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
57
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
58
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
59
+
60
+ = Credits
61
+
62
+ Ryan Wold
data/irs.gemspec ADDED
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "irs_pub78"
6
+ s.version = 0.1
7
+ s.platform = Gem::Platform::RUBY
8
+ s.authors = ["Ryan Wold"]
9
+ s.email = "rwold@morequality.org"
10
+ s.summary = "Wrapper for the IRS.gov's Publication 78"
11
+ s.homepage = "http://github.com/morequality/IRS_Pub78"
12
+ s.description = "Wrapper for the IRS.gov's Publication 78"
13
+
14
+ s.rubyforge_project = "irs_pub78"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_development_dependency('rspec', '~> 2.6')
22
+ s.add_development_dependency('simplecov', '~> 0.4.0')
23
+ s.add_development_dependency('nokogiri', '~> 1.5.0')
24
+ s.add_development_dependency('json', '~> 1.5.0')
25
+ s.add_development_dependency('rubyzip', '~> 0.9.4')
26
+
27
+ s.add_dependency('nokogiri', '~> 1.5.0')
28
+ s.add_dependency('json', '~> 1.5.0')
29
+ s.add_dependency('rubyzip', '~> 0.9.4')
30
+
31
+ end
data/lib/irs.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'irs/client'
2
+
3
+ module Irs
4
+ end
data/lib/irs/client.rb ADDED
@@ -0,0 +1,83 @@
1
+ module Irs
2
+ class Client
3
+
4
+ require 'open-uri'
5
+ require 'zip/zip'
6
+ require 'json'
7
+ require 'nokogiri'
8
+
9
+ $debug = false # set to true to debug
10
+
11
+ def find(city = "", state_id = "")
12
+ raise ArgumentError unless !city.empty? && !state_id.empty?
13
+ @url = "http://www.irs.gov/app/pub-78/search.do?resultsPerPage=500&nameSearchTypeStarts=false&names=&nameSearchTypeAll=false&city=#{URI.escape(city.downcase)}&state=#{state_id.upcase}&country=USA&deductibility=all&sortColumn=name&indexOfFirstRow=0&isDescending=false&dispatchMethod=search"
14
+ stream = open(@url).read
15
+ nodes = Nokogiri::HTML(stream)
16
+ process_nodes(nodes)
17
+ end
18
+
19
+ # accepts nodes from Nokogiri
20
+ def process_nodes(nodes = nil)
21
+ rows = nodes.css(".epi-dataTable tr")
22
+ nonprofits = []
23
+ rows.each do |r|
24
+ attrs = {}
25
+ attrs["name"] = r.css("td")[0].text.strip rescue 'error'
26
+ attrs["city"] = r.css("td")[1].text.strip rescue 'error'
27
+ attrs["state_id"] = r.css("td")[2].text.strip rescue 'error'
28
+ attrs["code"] = r.css("td")[3].text.strip rescue 'error'
29
+ nonprofits << attrs
30
+ end
31
+
32
+ nonprofits
33
+ end
34
+
35
+ # ----------------------------------------------------------------------------------------------------------
36
+
37
+ # grab .zip file from IRS.gov
38
+ # and save it locally
39
+ def download
40
+ unless File.exist? "data/eopub78.zip"
41
+ @url = "http://www.irs.gov/pub/irs-utl/eopub78.zip"
42
+ stream = open(@url).read
43
+
44
+ directory_name = Dir::pwd + "/" + "data"
45
+ unless FileTest::directory?(directory_name)
46
+ Dir::mkdir(directory_name)
47
+ end
48
+
49
+ File.open("data/eopub78.zip", "wb") { |f| f << stream }
50
+ else
51
+ puts "The file has already been downloaded. Delete and try again if you really want the latest."
52
+ end
53
+ end
54
+
55
+ # unzip the .zip file to /data/irs.txt
56
+ def unzip
57
+ Zip::ZipFile.open("data/eopub78.zip").extract("eopub780611.txt", "data/irs.txt")
58
+ end
59
+
60
+ # Open the .txt file
61
+ # Read it and loop through it and write the data to a hash
62
+ def parse_txt_to_json
63
+ entities = []
64
+ i = 0 # count
65
+ lines = File.open("data/irs.txt", "r").readlines
66
+ lines[i..-1].each do |line| # ignore the first header line
67
+ i = i + 1
68
+ entities << {
69
+ :name => (line[0..105].strip rescue ""),
70
+ :city => (line[106..136].strip rescue ""),
71
+ :state_id => (line[137..138].strip rescue ""),
72
+ :code => (line[141..141].strip rescue "")
73
+ }
74
+ puts i if $debug
75
+ end
76
+
77
+ File.open("data/irs.json", "w") { |f| f << entities.to_json }
78
+ end
79
+
80
+
81
+ end
82
+
83
+ end
File without changes
data/spec/helper.rb ADDED
@@ -0,0 +1,18 @@
1
+ # enable SimpleCov
2
+ require 'simplecov'
3
+ SimpleCov.start
4
+
5
+ require 'rubygems'
6
+ require 'bundler/setup'
7
+ require 'irs'
8
+
9
+ RSpec.configure do |config|
10
+ end
11
+
12
+ def fixture_path
13
+ File.expand_path("../fixtures", __FILE__)
14
+ end
15
+
16
+ def fixture(file)
17
+ File.new(fixture_path + '/' + file)
18
+ end
data/spec/irs_spec.rb ADDED
@@ -0,0 +1,31 @@
1
+ require 'helper'
2
+
3
+ # fixtures = YAML.load(ERB.new(File.new(File.dirname(__FILE__) + '/fixtures/fixtures.yml').read).result)
4
+
5
+ # public methods
6
+ client = Irs::Client.new
7
+
8
+ # The projects method
9
+ describe Irs, "::Client.new" do
10
+ it "should download" do
11
+ #file = client.download
12
+ file.should be_kind_of(File)
13
+ # a file should exist
14
+ # it should be over 10mb, arbitrarily
15
+ end
16
+
17
+ it "should unzip" do
18
+ end
19
+
20
+ it "should parse the unzipped txt file to json" do
21
+ end
22
+
23
+ end
24
+
25
+ describe Irs, "::Other Methods" do
26
+ before do
27
+ end
28
+
29
+ it "should do something" do
30
+ end
31
+ end
metadata ADDED
@@ -0,0 +1,148 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: irs_pub78
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ryan Wold
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-09-28 00:00:00.000000000 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ requirement: &70137131188020 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: '2.6'
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: *70137131188020
26
+ - !ruby/object:Gem::Dependency
27
+ name: simplecov
28
+ requirement: &70137131187380 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 0.4.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: *70137131187380
37
+ - !ruby/object:Gem::Dependency
38
+ name: nokogiri
39
+ requirement: &70137131167680 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ~>
43
+ - !ruby/object:Gem::Version
44
+ version: 1.5.0
45
+ type: :development
46
+ prerelease: false
47
+ version_requirements: *70137131167680
48
+ - !ruby/object:Gem::Dependency
49
+ name: json
50
+ requirement: &70137131167080 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ~>
54
+ - !ruby/object:Gem::Version
55
+ version: 1.5.0
56
+ type: :development
57
+ prerelease: false
58
+ version_requirements: *70137131167080
59
+ - !ruby/object:Gem::Dependency
60
+ name: rubyzip
61
+ requirement: &70137131166620 !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ~>
65
+ - !ruby/object:Gem::Version
66
+ version: 0.9.4
67
+ type: :development
68
+ prerelease: false
69
+ version_requirements: *70137131166620
70
+ - !ruby/object:Gem::Dependency
71
+ name: nokogiri
72
+ requirement: &70137131166060 !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 1.5.0
78
+ type: :runtime
79
+ prerelease: false
80
+ version_requirements: *70137131166060
81
+ - !ruby/object:Gem::Dependency
82
+ name: json
83
+ requirement: &70137131165540 !ruby/object:Gem::Requirement
84
+ none: false
85
+ requirements:
86
+ - - ~>
87
+ - !ruby/object:Gem::Version
88
+ version: 1.5.0
89
+ type: :runtime
90
+ prerelease: false
91
+ version_requirements: *70137131165540
92
+ - !ruby/object:Gem::Dependency
93
+ name: rubyzip
94
+ requirement: &70137131164980 !ruby/object:Gem::Requirement
95
+ none: false
96
+ requirements:
97
+ - - ~>
98
+ - !ruby/object:Gem::Version
99
+ version: 0.9.4
100
+ type: :runtime
101
+ prerelease: false
102
+ version_requirements: *70137131164980
103
+ description: Wrapper for the IRS.gov's Publication 78
104
+ email: rwold@morequality.org
105
+ executables: []
106
+ extensions: []
107
+ extra_rdoc_files: []
108
+ files:
109
+ - .gitignore
110
+ - Changelog
111
+ - Gemfile
112
+ - LICENSE
113
+ - README.rdoc
114
+ - irs.gemspec
115
+ - lib/irs.rb
116
+ - lib/irs/client.rb
117
+ - spec/fixtures/fixtures.yml
118
+ - spec/helper.rb
119
+ - spec/irs_spec.rb
120
+ has_rdoc: true
121
+ homepage: http://github.com/morequality/IRS_Pub78
122
+ licenses: []
123
+ post_install_message:
124
+ rdoc_options: []
125
+ require_paths:
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ! '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ none: false
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ requirements: []
140
+ rubyforge_project: irs_pub78
141
+ rubygems_version: 1.6.2
142
+ signing_key:
143
+ specification_version: 3
144
+ summary: Wrapper for the IRS.gov's Publication 78
145
+ test_files:
146
+ - spec/fixtures/fixtures.yml
147
+ - spec/helper.rb
148
+ - spec/irs_spec.rb