propublica-nonprofits 0.4.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9f22696c280743b8ad727b26a7e42d245a6860df751e93de02417ccec08f886d
4
- data.tar.gz: 633d7e23680142bb516fd2d5b5299fcd0c6db79a0162cfe5d4fb0d72f9961c77
3
+ metadata.gz: c05c2a3977841806bac525c3c642db9a94bf0755c3d098c4852276b092b49168
4
+ data.tar.gz: ecbbd8799e9d43592ea52d8e2ab4754f74fef539dec2a3eab0d7a415513acf1b
5
5
  SHA512:
6
- metadata.gz: c7f1e635ba864223d2a962745d0643d67a77bc722dc1c07e8bd217e24ff80875a06ab9f9dbb12c03b5ca8ba6a2930ff94cdbb71773bd63b43bdf6dae993060fb
7
- data.tar.gz: 9a94138157fb0f95608cc181e1cb5fe071e42c1df9cec12004b6c0740b6f09b1f900689cade98616c88f41244613b258d17e0e03b462b38e38c898c341063fdb
6
+ metadata.gz: 1e30af0a7b67f51505022bf3a6e8b63f91fff5bb702c5a531dbed24585194d8e9d0f966b4097d128c67d1a98bc26e44ca533acd64e4681fa6d7ab2ef17da55c3
7
+ data.tar.gz: 9c34f58bc0c29ccabd62af0d62974f71f3557a907551e0f83521e657b20e5534f4afa9c78d1203dd83270be48cd3f7420059dba90069e263eb885c1e58628c30
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- propublica-nonprofits (0.4.1)
4
+ propublica-nonprofits (1.0.0)
5
5
  faraday
6
6
  json
7
7
 
data/README.md CHANGED
@@ -40,8 +40,11 @@ To search the API and return an array of Propublica::Nonprofits::Objects, use th
40
40
  result.details.deductibility_code
41
41
  result.filings_with_data.first.totassetsend
42
42
 
43
+ Want to be as memory and API efficient as possible? Be lazy!
44
+ `Propublica::Nonprofits.lazy_search("rural").first(10)` will only fetch first 10 items
45
+
43
46
  *Note: This will currently only return the first 100 organizations*
44
- *Note: This is returned as a Enumerator::Lazy, to be as memory efficient as possible, if you do something like `Propublica::Nonprofits.search("rural").first(10)`*
47
+ *Note: This is returned as an Array*
45
48
 
46
49
 
47
50
  ### EIN Lookup - As Organization
@@ -80,11 +83,6 @@ consider using the `find_attributes` method.
80
83
  attributes.dig("organization", "name")
81
84
  attributes.dig("filings_without_data").first.dig("pdf_url")
82
85
 
83
- ## TODO
84
-
85
- - Add more robust searching parameters https://projects.propublica.org/nonprofits/api/#endpoint-search-example
86
- - Handle paginated search results (with Enumerator::Lazy)
87
-
88
86
  ## Development
89
87
 
90
88
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -5,44 +5,74 @@ require "faraday"
5
5
  require "json"
6
6
 
7
7
  API_BASE_URL = %(https://projects.propublica.org)
8
- API_SEARCH_PATH = %(/nonprofits/api/v2/search.json)
8
+ API_SEARCH_PATH = %(nonprofits/api/v2/search.json)
9
+
10
+ def self.results(column_names)
11
+ columns = column_names.each
12
+
13
+ Enumerator.new do |yielder|
14
+ loop do
15
+ yielder << Event.where(columns.next => query)
16
+ end
17
+
18
+ yielder << Event.fuzzy_search(query)
19
+ end
20
+ end
9
21
 
10
22
  module Propublica
11
23
  module Nonprofits
12
- def self.search(term, state: nil, page: nil, fetch_all: false)
13
- organizations = []
14
- more_pages = true
15
-
16
- page =
17
- case
18
- when page
19
- page
20
- when fetch_all && page
21
- raise "Page is set but we are fetching all, chose one or the other"
22
- else
23
- 0
24
- end
24
+ class DataNotFetched < StandardError
25
+ end
25
26
 
26
- while (more_pages)
27
- response = connection.get do |request|
28
- request.url(API_SEARCH_PATH)
27
+ def self.search(term, state: nil, ntee: nil, page: nil, fetch_all: false)
28
+ search_results(term, state: state, ntee: ntee, page: page, fetch_all: fetch_all)
29
+ .flat_map(&:itself)
30
+ end
29
31
 
30
- request.params["q"] = term
31
- request.params["state[id]"] = state if state
32
- request.params["page"] = page if page
33
- end
34
-
32
+ def self.lazy_search(term, state: nil, ntee: nil, page: nil, fetch_all: false)
33
+ search_results(term, state: state, ntee: ntee, page: page, fetch_all: fetch_all)
34
+ .lazy
35
+ .flat_map(&:itself)
36
+ end
35
37
 
36
- attributes = JSON.parse(response.body)
37
- new_organizations = attributes.fetch("organizations", [])
38
- organizations.push(*new_organizations)
38
+ def self.search_results(term, state: nil, ntee: nil, page: nil, fetch_all: false)
39
+ raise ArgumentError.new("`page` and `fetch_all` are both: choose one or the other") if fetch_all && page
40
+ page ||= 0
41
+ max_pages = nil
39
42
 
40
- more_pages = fetch_all && new_organizations.any?
41
- page += 1
42
- end
43
+ Enumerator.new do |yielder|
44
+ loop do
45
+ params = {}
46
+ params["q"] = term
47
+ params["state[id]"] = state if state
48
+ params["ntee[id]"] = ntee if ntee
49
+ params["page"] = page if page
50
+
51
+ response = Faraday.default_connection.get("#{API_BASE_URL}/#{API_SEARCH_PATH}", params)
52
+ parsed_response =
53
+ if response.body.is_a?(Hash)
54
+ response.body
55
+ else
56
+ begin
57
+ JSON.parse(response.body)
58
+ rescue JSON::ParserError => e
59
+ raise JSON::ParserError.new("Propublica API Parsing Error: #{e.message}")
60
+ end
61
+ end
62
+
63
+ max_pages = parsed_response.dig("num_pages") || max_pages
43
64
 
44
- organizations.lazy.map do |basic_attrs|
45
- Propublica::Nonprofits::Organization.new("basic" => basic_attrs)
65
+ yielder <<
66
+ parsed_response
67
+ .fetch("organizations", [])
68
+ .map { |basic_attrs| Propublica::Nonprofits::Organization.new("basic" => basic_attrs) }
69
+
70
+ if fetch_all && page + 1 < max_pages
71
+ page += 1
72
+ else
73
+ raise(StopIteration)
74
+ end
75
+ end
46
76
  end
47
77
  end
48
78
 
@@ -52,15 +82,16 @@ module Propublica
52
82
  end
53
83
 
54
84
  def self.find_attributes(ein)
55
- response = connection.get("/nonprofits/api/v2/organizations/#{ein}.json")
56
- JSON.parse(response.body)
57
- end
58
-
59
- def self.connection
60
- @connection ||= Faraday.new(url: API_BASE_URL)
85
+ response = Faraday.get("#{API_BASE_URL}/nonprofits/api/v2/organizations/#{ein}.json")
86
+ if response.body.is_a?(Hash)
87
+ response.body
88
+ else
89
+ begin
90
+ JSON.parse(response.body)
91
+ rescue JSON::ParserError => e
92
+ raise JSON::ParserError.new("Propublica API Parsing Error: #{e.message}")
93
+ end
94
+ end
61
95
  end
62
-
63
- class Error < StandardError; end
64
- class DataNotFetched < Error; end
65
96
  end
66
97
  end
@@ -1,5 +1,5 @@
1
1
  module Propublica
2
2
  module Nonprofits
3
- VERSION = "0.4.1"
3
+ VERSION = "1.0.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: propublica-nonprofits
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ricky Chilcott