propublica-nonprofits 0.4.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +4 -6
- data/lib/propublica/nonprofits.rb +70 -39
- data/lib/propublica/nonprofits/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c05c2a3977841806bac525c3c642db9a94bf0755c3d098c4852276b092b49168
|
4
|
+
data.tar.gz: ecbbd8799e9d43592ea52d8e2ab4754f74fef539dec2a3eab0d7a415513acf1b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1e30af0a7b67f51505022bf3a6e8b63f91fff5bb702c5a531dbed24585194d8e9d0f966b4097d128c67d1a98bc26e44ca533acd64e4681fa6d7ab2ef17da55c3
|
7
|
+
data.tar.gz: 9c34f58bc0c29ccabd62af0d62974f71f3557a907551e0f83521e657b20e5534f4afa9c78d1203dd83270be48cd3f7420059dba90069e263eb885c1e58628c30
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -40,8 +40,11 @@ To search the API and return an array of Propublica::Nonprofits::Objects, use th
|
|
40
40
|
result.details.deductibility_code
|
41
41
|
result.filings_with_data.first.totassetsend
|
42
42
|
|
43
|
+
Want to be as memory and API efficient as possible? Be lazy!
|
44
|
+
`Propublica::Nonprofits.lazy_search("rural").first(10)` will only fetch first 10 items
|
45
|
+
|
43
46
|
*Note: This will currently only return the first 100 organizations*
|
44
|
-
*Note: This is returned as
|
47
|
+
*Note: This is returned as an Array*
|
45
48
|
|
46
49
|
|
47
50
|
### EIN Lookup - As Organization
|
@@ -80,11 +83,6 @@ consider using the `find_attributes` method.
|
|
80
83
|
attributes.dig("organization", "name")
|
81
84
|
attributes.dig("filings_without_data").first.dig("pdf_url")
|
82
85
|
|
83
|
-
## TODO
|
84
|
-
|
85
|
-
- Add more robust searching parameters https://projects.propublica.org/nonprofits/api/#endpoint-search-example
|
86
|
-
- Handle paginated search results (with Enumerator::Lazy)
|
87
|
-
|
88
86
|
## Development
|
89
87
|
|
90
88
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -5,44 +5,74 @@ require "faraday"
|
|
5
5
|
require "json"
|
6
6
|
|
7
7
|
API_BASE_URL = %(https://projects.propublica.org)
|
8
|
-
API_SEARCH_PATH = %(
|
8
|
+
API_SEARCH_PATH = %(nonprofits/api/v2/search.json)
|
9
|
+
|
10
|
+
def self.results(column_names)
|
11
|
+
columns = column_names.each
|
12
|
+
|
13
|
+
Enumerator.new do |yielder|
|
14
|
+
loop do
|
15
|
+
yielder << Event.where(columns.next => query)
|
16
|
+
end
|
17
|
+
|
18
|
+
yielder << Event.fuzzy_search(query)
|
19
|
+
end
|
20
|
+
end
|
9
21
|
|
10
22
|
module Propublica
|
11
23
|
module Nonprofits
|
12
|
-
|
13
|
-
|
14
|
-
more_pages = true
|
15
|
-
|
16
|
-
page =
|
17
|
-
case
|
18
|
-
when page
|
19
|
-
page
|
20
|
-
when fetch_all && page
|
21
|
-
raise "Page is set but we are fetching all, chose one or the other"
|
22
|
-
else
|
23
|
-
0
|
24
|
-
end
|
24
|
+
class DataNotFetched < StandardError
|
25
|
+
end
|
25
26
|
|
26
|
-
|
27
|
-
|
28
|
-
|
27
|
+
def self.search(term, state: nil, ntee: nil, page: nil, fetch_all: false)
|
28
|
+
search_results(term, state: state, ntee: ntee, page: page, fetch_all: fetch_all)
|
29
|
+
.flat_map(&:itself)
|
30
|
+
end
|
29
31
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
32
|
+
def self.lazy_search(term, state: nil, ntee: nil, page: nil, fetch_all: false)
|
33
|
+
search_results(term, state: state, ntee: ntee, page: page, fetch_all: fetch_all)
|
34
|
+
.lazy
|
35
|
+
.flat_map(&:itself)
|
36
|
+
end
|
35
37
|
|
36
|
-
|
37
|
-
|
38
|
-
|
38
|
+
def self.search_results(term, state: nil, ntee: nil, page: nil, fetch_all: false)
|
39
|
+
raise ArgumentError.new("`page` and `fetch_all` are both: choose one or the other") if fetch_all && page
|
40
|
+
page ||= 0
|
41
|
+
max_pages = nil
|
39
42
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
+
Enumerator.new do |yielder|
|
44
|
+
loop do
|
45
|
+
params = {}
|
46
|
+
params["q"] = term
|
47
|
+
params["state[id]"] = state if state
|
48
|
+
params["ntee[id]"] = ntee if ntee
|
49
|
+
params["page"] = page if page
|
50
|
+
|
51
|
+
response = Faraday.default_connection.get("#{API_BASE_URL}/#{API_SEARCH_PATH}", params)
|
52
|
+
parsed_response =
|
53
|
+
if response.body.is_a?(Hash)
|
54
|
+
response.body
|
55
|
+
else
|
56
|
+
begin
|
57
|
+
JSON.parse(response.body)
|
58
|
+
rescue JSON::ParserError => e
|
59
|
+
raise JSON::ParserError.new("Propublica API Parsing Error: #{e.message}")
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
max_pages = parsed_response.dig("num_pages") || max_pages
|
43
64
|
|
44
|
-
|
45
|
-
|
65
|
+
yielder <<
|
66
|
+
parsed_response
|
67
|
+
.fetch("organizations", [])
|
68
|
+
.map { |basic_attrs| Propublica::Nonprofits::Organization.new("basic" => basic_attrs) }
|
69
|
+
|
70
|
+
if fetch_all && page + 1 < max_pages
|
71
|
+
page += 1
|
72
|
+
else
|
73
|
+
raise(StopIteration)
|
74
|
+
end
|
75
|
+
end
|
46
76
|
end
|
47
77
|
end
|
48
78
|
|
@@ -52,15 +82,16 @@ module Propublica
|
|
52
82
|
end
|
53
83
|
|
54
84
|
def self.find_attributes(ein)
|
55
|
-
response =
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
85
|
+
response = Faraday.get("#{API_BASE_URL}/nonprofits/api/v2/organizations/#{ein}.json")
|
86
|
+
if response.body.is_a?(Hash)
|
87
|
+
response.body
|
88
|
+
else
|
89
|
+
begin
|
90
|
+
JSON.parse(response.body)
|
91
|
+
rescue JSON::ParserError => e
|
92
|
+
raise JSON::ParserError.new("Propublica API Parsing Error: #{e.message}")
|
93
|
+
end
|
94
|
+
end
|
61
95
|
end
|
62
|
-
|
63
|
-
class Error < StandardError; end
|
64
|
-
class DataNotFetched < Error; end
|
65
96
|
end
|
66
97
|
end
|