propublica-nonprofits 0.4.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9f22696c280743b8ad727b26a7e42d245a6860df751e93de02417ccec08f886d
4
- data.tar.gz: 633d7e23680142bb516fd2d5b5299fcd0c6db79a0162cfe5d4fb0d72f9961c77
3
+ metadata.gz: c05c2a3977841806bac525c3c642db9a94bf0755c3d098c4852276b092b49168
4
+ data.tar.gz: ecbbd8799e9d43592ea52d8e2ab4754f74fef539dec2a3eab0d7a415513acf1b
5
5
  SHA512:
6
- metadata.gz: c7f1e635ba864223d2a962745d0643d67a77bc722dc1c07e8bd217e24ff80875a06ab9f9dbb12c03b5ca8ba6a2930ff94cdbb71773bd63b43bdf6dae993060fb
7
- data.tar.gz: 9a94138157fb0f95608cc181e1cb5fe071e42c1df9cec12004b6c0740b6f09b1f900689cade98616c88f41244613b258d17e0e03b462b38e38c898c341063fdb
6
+ metadata.gz: 1e30af0a7b67f51505022bf3a6e8b63f91fff5bb702c5a531dbed24585194d8e9d0f966b4097d128c67d1a98bc26e44ca533acd64e4681fa6d7ab2ef17da55c3
7
+ data.tar.gz: 9c34f58bc0c29ccabd62af0d62974f71f3557a907551e0f83521e657b20e5534f4afa9c78d1203dd83270be48cd3f7420059dba90069e263eb885c1e58628c30
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- propublica-nonprofits (0.4.1)
4
+ propublica-nonprofits (1.0.0)
5
5
  faraday
6
6
  json
7
7
 
data/README.md CHANGED
@@ -40,8 +40,11 @@ To search the API and return an array of Propublica::Nonprofits::Objects, use th
40
40
  result.details.deductibility_code
41
41
  result.filings_with_data.first.totassetsend
42
42
 
43
+ Want to be as memory and API efficient as possible? Be lazy!
44
+ `Propublica::Nonprofits.lazy_search("rural").first(10)` will only fetch first 10 items
45
+
43
46
  *Note: This will currently only return the first 100 organizations*
44
- *Note: This is returned as a Enumerator::Lazy, to be as memory efficient as possible, if you do something like `Propublica::Nonprofits.search("rural").first(10)`*
47
+ *Note: This is returned as an Array*
45
48
 
46
49
 
47
50
  ### EIN Lookup - As Organization
@@ -80,11 +83,6 @@ consider using the `find_attributes` method.
80
83
  attributes.dig("organization", "name")
81
84
  attributes.dig("filings_without_data").first.dig("pdf_url")
82
85
 
83
- ## TODO
84
-
85
- - Add more robust searching parameters https://projects.propublica.org/nonprofits/api/#endpoint-search-example
86
- - Handle paginated search results (with Enumerator::Lazy)
87
-
88
86
  ## Development
89
87
 
90
88
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -5,44 +5,74 @@ require "faraday"
5
5
  require "json"
6
6
 
7
7
  API_BASE_URL = %(https://projects.propublica.org)
8
- API_SEARCH_PATH = %(/nonprofits/api/v2/search.json)
8
+ API_SEARCH_PATH = %(nonprofits/api/v2/search.json)
9
+
10
+ def self.results(column_names)
11
+ columns = column_names.each
12
+
13
+ Enumerator.new do |yielder|
14
+ loop do
15
+ yielder << Event.where(columns.next => query)
16
+ end
17
+
18
+ yielder << Event.fuzzy_search(query)
19
+ end
20
+ end
9
21
 
10
22
  module Propublica
11
23
  module Nonprofits
12
- def self.search(term, state: nil, page: nil, fetch_all: false)
13
- organizations = []
14
- more_pages = true
15
-
16
- page =
17
- case
18
- when page
19
- page
20
- when fetch_all && page
21
- raise "Page is set but we are fetching all, chose one or the other"
22
- else
23
- 0
24
- end
24
+ class DataNotFetched < StandardError
25
+ end
25
26
 
26
- while (more_pages)
27
- response = connection.get do |request|
28
- request.url(API_SEARCH_PATH)
27
+ def self.search(term, state: nil, ntee: nil, page: nil, fetch_all: false)
28
+ search_results(term, state: state, ntee: ntee, page: page, fetch_all: fetch_all)
29
+ .flat_map(&:itself)
30
+ end
29
31
 
30
- request.params["q"] = term
31
- request.params["state[id]"] = state if state
32
- request.params["page"] = page if page
33
- end
34
-
32
+ def self.lazy_search(term, state: nil, ntee: nil, page: nil, fetch_all: false)
33
+ search_results(term, state: state, ntee: ntee, page: page, fetch_all: fetch_all)
34
+ .lazy
35
+ .flat_map(&:itself)
36
+ end
35
37
 
36
- attributes = JSON.parse(response.body)
37
- new_organizations = attributes.fetch("organizations", [])
38
- organizations.push(*new_organizations)
38
+ def self.search_results(term, state: nil, ntee: nil, page: nil, fetch_all: false)
39
+ raise ArgumentError.new("`page` and `fetch_all` are both: choose one or the other") if fetch_all && page
40
+ page ||= 0
41
+ max_pages = nil
39
42
 
40
- more_pages = fetch_all && new_organizations.any?
41
- page += 1
42
- end
43
+ Enumerator.new do |yielder|
44
+ loop do
45
+ params = {}
46
+ params["q"] = term
47
+ params["state[id]"] = state if state
48
+ params["ntee[id]"] = ntee if ntee
49
+ params["page"] = page if page
50
+
51
+ response = Faraday.default_connection.get("#{API_BASE_URL}/#{API_SEARCH_PATH}", params)
52
+ parsed_response =
53
+ if response.body.is_a?(Hash)
54
+ response.body
55
+ else
56
+ begin
57
+ JSON.parse(response.body)
58
+ rescue JSON::ParserError => e
59
+ raise JSON::ParserError.new("Propublica API Parsing Error: #{e.message}")
60
+ end
61
+ end
62
+
63
+ max_pages = parsed_response.dig("num_pages") || max_pages
43
64
 
44
- organizations.lazy.map do |basic_attrs|
45
- Propublica::Nonprofits::Organization.new("basic" => basic_attrs)
65
+ yielder <<
66
+ parsed_response
67
+ .fetch("organizations", [])
68
+ .map { |basic_attrs| Propublica::Nonprofits::Organization.new("basic" => basic_attrs) }
69
+
70
+ if fetch_all && page + 1 < max_pages
71
+ page += 1
72
+ else
73
+ raise(StopIteration)
74
+ end
75
+ end
46
76
  end
47
77
  end
48
78
 
@@ -52,15 +82,16 @@ module Propublica
52
82
  end
53
83
 
54
84
  def self.find_attributes(ein)
55
- response = connection.get("/nonprofits/api/v2/organizations/#{ein}.json")
56
- JSON.parse(response.body)
57
- end
58
-
59
- def self.connection
60
- @connection ||= Faraday.new(url: API_BASE_URL)
85
+ response = Faraday.get("#{API_BASE_URL}/nonprofits/api/v2/organizations/#{ein}.json")
86
+ if response.body.is_a?(Hash)
87
+ response.body
88
+ else
89
+ begin
90
+ JSON.parse(response.body)
91
+ rescue JSON::ParserError => e
92
+ raise JSON::ParserError.new("Propublica API Parsing Error: #{e.message}")
93
+ end
94
+ end
61
95
  end
62
-
63
- class Error < StandardError; end
64
- class DataNotFetched < Error; end
65
96
  end
66
97
  end
@@ -1,5 +1,5 @@
1
1
  module Propublica
2
2
  module Nonprofits
3
- VERSION = "0.4.1"
3
+ VERSION = "1.0.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: propublica-nonprofits
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ricky Chilcott