propublica-nonprofits 0.4.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +4 -6
- data/lib/propublica/nonprofits.rb +70 -39
- data/lib/propublica/nonprofits/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c05c2a3977841806bac525c3c642db9a94bf0755c3d098c4852276b092b49168
|
|
4
|
+
data.tar.gz: ecbbd8799e9d43592ea52d8e2ab4754f74fef539dec2a3eab0d7a415513acf1b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1e30af0a7b67f51505022bf3a6e8b63f91fff5bb702c5a531dbed24585194d8e9d0f966b4097d128c67d1a98bc26e44ca533acd64e4681fa6d7ab2ef17da55c3
|
|
7
|
+
data.tar.gz: 9c34f58bc0c29ccabd62af0d62974f71f3557a907551e0f83521e657b20e5534f4afa9c78d1203dd83270be48cd3f7420059dba90069e263eb885c1e58628c30
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -40,8 +40,11 @@ To search the API and return an array of Propublica::Nonprofits::Objects, use th
|
|
|
40
40
|
result.details.deductibility_code
|
|
41
41
|
result.filings_with_data.first.totassetsend
|
|
42
42
|
|
|
43
|
+
Want to be as memory and API efficient as possible? Be lazy!
|
|
44
|
+
`Propublica::Nonprofits.lazy_search("rural").first(10)` will only fetch first 10 items
|
|
45
|
+
|
|
43
46
|
*Note: This will currently only return the first 100 organizations*
|
|
44
|
-
*Note: This is returned as
|
|
47
|
+
*Note: This is returned as an Array*
|
|
45
48
|
|
|
46
49
|
|
|
47
50
|
### EIN Lookup - As Organization
|
|
@@ -80,11 +83,6 @@ consider using the `find_attributes` method.
|
|
|
80
83
|
attributes.dig("organization", "name")
|
|
81
84
|
attributes.dig("filings_without_data").first.dig("pdf_url")
|
|
82
85
|
|
|
83
|
-
## TODO
|
|
84
|
-
|
|
85
|
-
- Add more robust searching parameters https://projects.propublica.org/nonprofits/api/#endpoint-search-example
|
|
86
|
-
- Handle paginated search results (with Enumerator::Lazy)
|
|
87
|
-
|
|
88
86
|
## Development
|
|
89
87
|
|
|
90
88
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
@@ -5,44 +5,74 @@ require "faraday"
|
|
|
5
5
|
require "json"
|
|
6
6
|
|
|
7
7
|
API_BASE_URL = %(https://projects.propublica.org)
|
|
8
|
-
API_SEARCH_PATH = %(
|
|
8
|
+
API_SEARCH_PATH = %(nonprofits/api/v2/search.json)
|
|
9
|
+
|
|
10
|
+
def self.results(column_names)
|
|
11
|
+
columns = column_names.each
|
|
12
|
+
|
|
13
|
+
Enumerator.new do |yielder|
|
|
14
|
+
loop do
|
|
15
|
+
yielder << Event.where(columns.next => query)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
yielder << Event.fuzzy_search(query)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
9
21
|
|
|
10
22
|
module Propublica
|
|
11
23
|
module Nonprofits
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
more_pages = true
|
|
15
|
-
|
|
16
|
-
page =
|
|
17
|
-
case
|
|
18
|
-
when page
|
|
19
|
-
page
|
|
20
|
-
when fetch_all && page
|
|
21
|
-
raise "Page is set but we are fetching all, chose one or the other"
|
|
22
|
-
else
|
|
23
|
-
0
|
|
24
|
-
end
|
|
24
|
+
class DataNotFetched < StandardError
|
|
25
|
+
end
|
|
25
26
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
def self.search(term, state: nil, ntee: nil, page: nil, fetch_all: false)
|
|
28
|
+
search_results(term, state: state, ntee: ntee, page: page, fetch_all: fetch_all)
|
|
29
|
+
.flat_map(&:itself)
|
|
30
|
+
end
|
|
29
31
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
def self.lazy_search(term, state: nil, ntee: nil, page: nil, fetch_all: false)
|
|
33
|
+
search_results(term, state: state, ntee: ntee, page: page, fetch_all: fetch_all)
|
|
34
|
+
.lazy
|
|
35
|
+
.flat_map(&:itself)
|
|
36
|
+
end
|
|
35
37
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
def self.search_results(term, state: nil, ntee: nil, page: nil, fetch_all: false)
|
|
39
|
+
raise ArgumentError.new("`page` and `fetch_all` are both: choose one or the other") if fetch_all && page
|
|
40
|
+
page ||= 0
|
|
41
|
+
max_pages = nil
|
|
39
42
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
+
Enumerator.new do |yielder|
|
|
44
|
+
loop do
|
|
45
|
+
params = {}
|
|
46
|
+
params["q"] = term
|
|
47
|
+
params["state[id]"] = state if state
|
|
48
|
+
params["ntee[id]"] = ntee if ntee
|
|
49
|
+
params["page"] = page if page
|
|
50
|
+
|
|
51
|
+
response = Faraday.default_connection.get("#{API_BASE_URL}/#{API_SEARCH_PATH}", params)
|
|
52
|
+
parsed_response =
|
|
53
|
+
if response.body.is_a?(Hash)
|
|
54
|
+
response.body
|
|
55
|
+
else
|
|
56
|
+
begin
|
|
57
|
+
JSON.parse(response.body)
|
|
58
|
+
rescue JSON::ParserError => e
|
|
59
|
+
raise JSON::ParserError.new("Propublica API Parsing Error: #{e.message}")
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
max_pages = parsed_response.dig("num_pages") || max_pages
|
|
43
64
|
|
|
44
|
-
|
|
45
|
-
|
|
65
|
+
yielder <<
|
|
66
|
+
parsed_response
|
|
67
|
+
.fetch("organizations", [])
|
|
68
|
+
.map { |basic_attrs| Propublica::Nonprofits::Organization.new("basic" => basic_attrs) }
|
|
69
|
+
|
|
70
|
+
if fetch_all && page + 1 < max_pages
|
|
71
|
+
page += 1
|
|
72
|
+
else
|
|
73
|
+
raise(StopIteration)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
46
76
|
end
|
|
47
77
|
end
|
|
48
78
|
|
|
@@ -52,15 +82,16 @@ module Propublica
|
|
|
52
82
|
end
|
|
53
83
|
|
|
54
84
|
def self.find_attributes(ein)
|
|
55
|
-
response =
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
85
|
+
response = Faraday.get("#{API_BASE_URL}/nonprofits/api/v2/organizations/#{ein}.json")
|
|
86
|
+
if response.body.is_a?(Hash)
|
|
87
|
+
response.body
|
|
88
|
+
else
|
|
89
|
+
begin
|
|
90
|
+
JSON.parse(response.body)
|
|
91
|
+
rescue JSON::ParserError => e
|
|
92
|
+
raise JSON::ParserError.new("Propublica API Parsing Error: #{e.message}")
|
|
93
|
+
end
|
|
94
|
+
end
|
|
61
95
|
end
|
|
62
|
-
|
|
63
|
-
class Error < StandardError; end
|
|
64
|
-
class DataNotFetched < Error; end
|
|
65
96
|
end
|
|
66
97
|
end
|