propublica-nonprofits 0.1.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/Gemfile.lock +34 -15
- data/README.md +4 -6
- data/bin/build-and-publish-gem +7 -0
- data/bin/console +0 -1
- data/lib/propublica/nonprofits.rb +78 -14
- data/lib/propublica/nonprofits/organization.rb +23 -9
- data/lib/propublica/nonprofits/organization/dynamic_parser.rb +7 -5
- data/lib/propublica/nonprofits/version.rb +1 -1
- data/propublica-nonprofits.gemspec +14 -14
- metadata +30 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c05c2a3977841806bac525c3c642db9a94bf0755c3d098c4852276b092b49168
|
4
|
+
data.tar.gz: ecbbd8799e9d43592ea52d8e2ab4754f74fef539dec2a3eab0d7a415513acf1b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1e30af0a7b67f51505022bf3a6e8b63f91fff5bb702c5a531dbed24585194d8e9d0f966b4097d128c67d1a98bc26e44ca533acd64e4681fa6d7ab2ef17da55c3
|
7
|
+
data.tar.gz: 9c34f58bc0c29ccabd62af0d62974f71f3557a907551e0f83521e657b20e5534f4afa9c78d1203dd83270be48cd3f7420059dba90069e263eb885c1e58628c30
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,48 +1,67 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
propublica-nonprofits (
|
4
|
+
propublica-nonprofits (1.0.0)
|
5
5
|
faraday
|
6
6
|
json
|
7
7
|
|
8
8
|
GEM
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
|
-
addressable (2.
|
12
|
-
public_suffix (>= 2.0.2, <
|
11
|
+
addressable (2.8.0)
|
12
|
+
public_suffix (>= 2.0.2, < 5.0)
|
13
13
|
coderay (1.1.2)
|
14
14
|
crack (0.4.3)
|
15
15
|
safe_yaml (~> 1.0.0)
|
16
|
-
faraday (
|
16
|
+
faraday (1.5.1)
|
17
|
+
faraday-em_http (~> 1.0)
|
18
|
+
faraday-em_synchrony (~> 1.0)
|
19
|
+
faraday-excon (~> 1.1)
|
20
|
+
faraday-httpclient (~> 1.0.1)
|
21
|
+
faraday-net_http (~> 1.0)
|
22
|
+
faraday-net_http_persistent (~> 1.1)
|
23
|
+
faraday-patron (~> 1.0)
|
17
24
|
multipart-post (>= 1.2, < 3)
|
18
|
-
|
19
|
-
|
25
|
+
ruby2_keywords (>= 0.0.4)
|
26
|
+
faraday-em_http (1.0.0)
|
27
|
+
faraday-em_synchrony (1.0.0)
|
28
|
+
faraday-excon (1.1.0)
|
29
|
+
faraday-httpclient (1.0.1)
|
30
|
+
faraday-net_http (1.0.1)
|
31
|
+
faraday-net_http_persistent (1.2.0)
|
32
|
+
faraday-patron (1.0.0)
|
33
|
+
hashdiff (1.0.0)
|
34
|
+
json (2.5.1)
|
20
35
|
method_source (0.9.0)
|
21
36
|
minitest (5.11.3)
|
22
|
-
|
37
|
+
minitest-line (0.6.5)
|
38
|
+
minitest (~> 5.0)
|
39
|
+
multipart-post (2.1.1)
|
23
40
|
pry (0.11.3)
|
24
41
|
coderay (~> 1.1.0)
|
25
42
|
method_source (~> 0.9.0)
|
26
|
-
public_suffix (
|
27
|
-
rake (
|
28
|
-
|
43
|
+
public_suffix (4.0.6)
|
44
|
+
rake (13.0.1)
|
45
|
+
ruby2_keywords (0.0.4)
|
46
|
+
safe_yaml (1.0.5)
|
29
47
|
vcr (4.0.0)
|
30
|
-
webmock (3.
|
48
|
+
webmock (3.7.6)
|
31
49
|
addressable (>= 2.3.6)
|
32
50
|
crack (>= 0.3.2)
|
33
|
-
hashdiff
|
51
|
+
hashdiff (>= 0.4.0, < 2.0.0)
|
34
52
|
|
35
53
|
PLATFORMS
|
36
54
|
ruby
|
37
55
|
|
38
56
|
DEPENDENCIES
|
39
|
-
bundler (
|
57
|
+
bundler (>= 2.2.10)
|
40
58
|
minitest (~> 5.0)
|
59
|
+
minitest-line
|
41
60
|
propublica-nonprofits!
|
42
61
|
pry
|
43
|
-
rake (~>
|
62
|
+
rake (~> 13.0)
|
44
63
|
vcr
|
45
64
|
webmock
|
46
65
|
|
47
66
|
BUNDLED WITH
|
48
|
-
|
67
|
+
2.2.23
|
data/README.md
CHANGED
@@ -40,8 +40,11 @@ To search the API and return an array of Propublica::Nonprofits::Objects, use th
|
|
40
40
|
result.details.deductibility_code
|
41
41
|
result.filings_with_data.first.totassetsend
|
42
42
|
|
43
|
+
Want to be as memory and API efficient as possible? Be lazy!
|
44
|
+
`Propublica::Nonprofits.lazy_search("rural").first(10)` will only fetch first 10 items
|
45
|
+
|
43
46
|
*Note: This will currently only return the first 100 organizations*
|
44
|
-
*Note: This is returned as
|
47
|
+
*Note: This is returned as an Array*
|
45
48
|
|
46
49
|
|
47
50
|
### EIN Lookup - As Organization
|
@@ -80,11 +83,6 @@ consider using the `find_attributes` method.
|
|
80
83
|
attributes.dig("organization", "name")
|
81
84
|
attributes.dig("filings_without_data").first.dig("pdf_url")
|
82
85
|
|
83
|
-
## TODO
|
84
|
-
|
85
|
-
- Add more robust searching parameters https://projects.propublica.org/nonprofits/api/#endpoint-search-example
|
86
|
-
- Handle paginated search results (with Enumerator::Lazy)
|
87
|
-
|
88
86
|
## Development
|
89
87
|
|
90
88
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/bin/console
CHANGED
@@ -1,19 +1,78 @@
|
|
1
1
|
require "propublica/nonprofits/version"
|
2
2
|
require "propublica/nonprofits/organization"
|
3
3
|
|
4
|
-
require
|
5
|
-
require
|
4
|
+
require "faraday"
|
5
|
+
require "json"
|
6
6
|
|
7
|
-
API_BASE_URL = %(https://projects.propublica.org
|
7
|
+
API_BASE_URL = %(https://projects.propublica.org)
|
8
|
+
API_SEARCH_PATH = %(nonprofits/api/v2/search.json)
|
9
|
+
|
10
|
+
def self.results(column_names)
|
11
|
+
columns = column_names.each
|
12
|
+
|
13
|
+
Enumerator.new do |yielder|
|
14
|
+
loop do
|
15
|
+
yielder << Event.where(columns.next => query)
|
16
|
+
end
|
17
|
+
|
18
|
+
yielder << Event.fuzzy_search(query)
|
19
|
+
end
|
20
|
+
end
|
8
21
|
|
9
22
|
module Propublica
|
10
23
|
module Nonprofits
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
24
|
+
class DataNotFetched < StandardError
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.search(term, state: nil, ntee: nil, page: nil, fetch_all: false)
|
28
|
+
search_results(term, state: state, ntee: ntee, page: page, fetch_all: fetch_all)
|
29
|
+
.flat_map(&:itself)
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.lazy_search(term, state: nil, ntee: nil, page: nil, fetch_all: false)
|
33
|
+
search_results(term, state: state, ntee: ntee, page: page, fetch_all: fetch_all)
|
34
|
+
.lazy
|
35
|
+
.flat_map(&:itself)
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.search_results(term, state: nil, ntee: nil, page: nil, fetch_all: false)
|
39
|
+
raise ArgumentError.new("`page` and `fetch_all` are both: choose one or the other") if fetch_all && page
|
40
|
+
page ||= 0
|
41
|
+
max_pages = nil
|
42
|
+
|
43
|
+
Enumerator.new do |yielder|
|
44
|
+
loop do
|
45
|
+
params = {}
|
46
|
+
params["q"] = term
|
47
|
+
params["state[id]"] = state if state
|
48
|
+
params["ntee[id]"] = ntee if ntee
|
49
|
+
params["page"] = page if page
|
50
|
+
|
51
|
+
response = Faraday.default_connection.get("#{API_BASE_URL}/#{API_SEARCH_PATH}", params)
|
52
|
+
parsed_response =
|
53
|
+
if response.body.is_a?(Hash)
|
54
|
+
response.body
|
55
|
+
else
|
56
|
+
begin
|
57
|
+
JSON.parse(response.body)
|
58
|
+
rescue JSON::ParserError => e
|
59
|
+
raise JSON::ParserError.new("Propublica API Parsing Error: #{e.message}")
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
max_pages = parsed_response.dig("num_pages") || max_pages
|
64
|
+
|
65
|
+
yielder <<
|
66
|
+
parsed_response
|
67
|
+
.fetch("organizations", [])
|
68
|
+
.map { |basic_attrs| Propublica::Nonprofits::Organization.new("basic" => basic_attrs) }
|
69
|
+
|
70
|
+
if fetch_all && page + 1 < max_pages
|
71
|
+
page += 1
|
72
|
+
else
|
73
|
+
raise(StopIteration)
|
74
|
+
end
|
75
|
+
end
|
17
76
|
end
|
18
77
|
end
|
19
78
|
|
@@ -23,11 +82,16 @@ module Propublica
|
|
23
82
|
end
|
24
83
|
|
25
84
|
def self.find_attributes(ein)
|
26
|
-
response = Faraday.get("#{API_BASE_URL}/organizations/#{ein}.json")
|
27
|
-
|
85
|
+
response = Faraday.get("#{API_BASE_URL}/nonprofits/api/v2/organizations/#{ein}.json")
|
86
|
+
if response.body.is_a?(Hash)
|
87
|
+
response.body
|
88
|
+
else
|
89
|
+
begin
|
90
|
+
JSON.parse(response.body)
|
91
|
+
rescue JSON::ParserError => e
|
92
|
+
raise JSON::ParserError.new("Propublica API Parsing Error: #{e.message}")
|
93
|
+
end
|
94
|
+
end
|
28
95
|
end
|
29
|
-
|
30
|
-
class Error < StandardError; end
|
31
|
-
class DataNotFetched < Error; end
|
32
96
|
end
|
33
97
|
end
|
@@ -1,8 +1,6 @@
|
|
1
1
|
module Propublica
|
2
2
|
module Nonprofits
|
3
3
|
class Organization
|
4
|
-
attr_reader :basic
|
5
|
-
|
6
4
|
def initialize(attributes)
|
7
5
|
@attributes = attributes
|
8
6
|
end
|
@@ -38,28 +36,44 @@ module Propublica
|
|
38
36
|
|
39
37
|
def error
|
40
38
|
ensure_full_request!
|
41
|
-
@error ||= attributes["error"]
|
39
|
+
@error ||= attributes["error"] || ""
|
40
|
+
end
|
41
|
+
|
42
|
+
def error?
|
43
|
+
!error.empty?
|
42
44
|
end
|
43
45
|
|
44
46
|
private
|
45
47
|
|
46
48
|
attr_reader :attributes
|
49
|
+
attr_accessor :full_request_made
|
50
|
+
|
51
|
+
def full_request_made?
|
52
|
+
!!full_request_made
|
53
|
+
end
|
47
54
|
|
48
55
|
def ensure_full_request!
|
49
|
-
return if
|
56
|
+
return if full_request_made?
|
50
57
|
|
51
|
-
|
52
|
-
|
53
|
-
return if @full_request == true
|
58
|
+
self.full_request_made = (attributes.keys & required_keys) == required_keys
|
59
|
+
return if full_request_made?
|
54
60
|
|
55
61
|
fetch_full_request!
|
56
62
|
end
|
57
63
|
|
64
|
+
def required_keys
|
65
|
+
["organization", "filings_with_data", "filings_without_data", "data_source", "api_version", "error"].freeze
|
66
|
+
end
|
67
|
+
|
68
|
+
def ein
|
69
|
+
@attributes.dig("organization", "ein") || self.basic.ein
|
70
|
+
end
|
71
|
+
|
58
72
|
def fetch_full_request!
|
59
73
|
# Fetch all attributes and merge with what we have now
|
60
|
-
new_attrs = Propublica::Nonprofits.find_attributes(
|
74
|
+
new_attrs = Propublica::Nonprofits.find_attributes(ein)
|
61
75
|
attributes.merge!(new_attrs)
|
62
|
-
|
76
|
+
self.full_request_made = true
|
63
77
|
end
|
64
78
|
end
|
65
79
|
end
|
@@ -7,11 +7,7 @@ module Propublica
|
|
7
7
|
fields.each do |field|
|
8
8
|
define_method field do
|
9
9
|
vars = self.instance_variable_get("@attributes")
|
10
|
-
vars.fetch(field.to_s)
|
11
|
-
class_name = self.class.to_s.split("::").last
|
12
|
-
raise Propublica::Nonprofits::DataNotFetched,
|
13
|
-
"#{class_name}##{field} not fetched from API. This may be due to an API error or because you tried to access a Basic property on the full results"
|
14
|
-
end
|
10
|
+
vars.fetch(field.to_s) { raise_field_fetch_error(field) }
|
15
11
|
end
|
16
12
|
end
|
17
13
|
end
|
@@ -27,6 +23,12 @@ module Propublica
|
|
27
23
|
private
|
28
24
|
|
29
25
|
attr_reader :attributes
|
26
|
+
|
27
|
+
def raise_field_fetch_error(field)
|
28
|
+
class_name = self.class.to_s.split("::").last
|
29
|
+
raise Propublica::Nonprofits::DataNotFetched,
|
30
|
+
"#{class_name}##{field} not fetched from API. This may be due to an API error or because you tried to access a Basic property on the full results"
|
31
|
+
end
|
30
32
|
end
|
31
33
|
end
|
32
34
|
end
|
@@ -1,32 +1,32 @@
|
|
1
|
-
|
2
1
|
lib = File.expand_path("../lib", __FILE__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
3
|
require "propublica/nonprofits/version"
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name
|
8
|
-
spec.version
|
9
|
-
spec.authors
|
10
|
-
spec.email
|
6
|
+
spec.name = "propublica-nonprofits"
|
7
|
+
spec.version = Propublica::Nonprofits::VERSION
|
8
|
+
spec.authors = ["Ricky Chilcott"]
|
9
|
+
spec.email = ["ricky@rakefire.io"]
|
11
10
|
|
12
|
-
spec.summary
|
13
|
-
spec.description
|
14
|
-
spec.homepage
|
15
|
-
spec.license
|
11
|
+
spec.summary = %q{Ruby wrapper for the Propublica Nonprofits API https://projects.propublica.org/nonprofits/api/v2}
|
12
|
+
spec.description = %q{Ruby wrapper for the Propublica Nonprofits API https://projects.propublica.org/nonprofits/api/v2}
|
13
|
+
spec.homepage = "https://github.com/Rakefire/propublica-nonprofits"
|
14
|
+
spec.license = "MIT"
|
16
15
|
|
17
16
|
# Specify which files should be added to the gem when it is released.
|
18
17
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
19
|
-
spec.files
|
18
|
+
spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
|
20
19
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
21
20
|
end
|
22
|
-
spec.bindir
|
23
|
-
spec.executables
|
21
|
+
spec.bindir = "exe"
|
22
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
24
23
|
spec.require_paths = ["lib"]
|
25
24
|
|
26
|
-
spec.add_development_dependency "bundler", "
|
27
|
-
spec.add_development_dependency "
|
25
|
+
spec.add_development_dependency "bundler", ">= 2.2.10"
|
26
|
+
spec.add_development_dependency "minitest-line"
|
28
27
|
spec.add_development_dependency "minitest", "~> 5.0"
|
29
28
|
spec.add_development_dependency "pry"
|
29
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
30
30
|
spec.add_development_dependency "vcr"
|
31
31
|
spec.add_development_dependency "webmock"
|
32
32
|
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: propublica-nonprofits
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ricky Chilcott
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-07-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 2.2.10
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 2.2.10
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: minitest-line
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '13.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '13.0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: vcr
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -137,6 +151,7 @@ files:
|
|
137
151
|
- LICENSE.txt
|
138
152
|
- README.md
|
139
153
|
- Rakefile
|
154
|
+
- bin/build-and-publish-gem
|
140
155
|
- bin/console
|
141
156
|
- bin/setup
|
142
157
|
- bin/test
|
@@ -153,7 +168,7 @@ homepage: https://github.com/Rakefire/propublica-nonprofits
|
|
153
168
|
licenses:
|
154
169
|
- MIT
|
155
170
|
metadata: {}
|
156
|
-
post_install_message:
|
171
|
+
post_install_message:
|
157
172
|
rdoc_options: []
|
158
173
|
require_paths:
|
159
174
|
- lib
|
@@ -168,9 +183,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
168
183
|
- !ruby/object:Gem::Version
|
169
184
|
version: '0'
|
170
185
|
requirements: []
|
171
|
-
|
172
|
-
|
173
|
-
signing_key:
|
186
|
+
rubygems_version: 3.1.2
|
187
|
+
signing_key:
|
174
188
|
specification_version: 4
|
175
189
|
summary: Ruby wrapper for the Propublica Nonprofits API https://projects.propublica.org/nonprofits/api/v2
|
176
190
|
test_files: []
|