eaternet 0.3.11 → 0.3.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/eaternet.gemspec +1 -0
- data/lib/eaternet/agencies/nyc.rb +8 -20
- data/lib/eaternet/loggable.rb +1 -0
- data/lib/eaternet/util.rb +38 -1
- data/lib/eaternet/version.rb +1 -1
- data/test/eaternet/loggable_test.rb +1 -1
- data/test/eaternet/util_test.rb +47 -0
- data/test/test_helper.rb +2 -1
- metadata +17 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cfd0797bd4c249ea732ef25485f7310ee1d9ba74
|
4
|
+
data.tar.gz: 84e4914a0375a5c84b1d8a48196a19a4f06053ec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 12ba76b4684ae01e31e2b97a8e0b18c6cace0a7cebc381ef4550a8ad13d03784982f8f660064de0f849caac930db81365b65f6d096e77302471deff71d041611
|
7
|
+
data.tar.gz: 5b207a3fb1c0520bf9721a156a36dc008ac1abfa3c592ede5a57548a9f6e18ca5024f440fa6664ff86c23a93b3cd1e5d7735419bc6b728f4cf4c4e1a5b0679df
|
data/eaternet.gemspec
CHANGED
@@ -25,6 +25,7 @@ Gem::Specification.new do |spec|
|
|
25
25
|
spec.add_development_dependency 'minitest'
|
26
26
|
spec.add_development_dependency 'pry'
|
27
27
|
spec.add_development_dependency 'rake', '~> 10'
|
28
|
+
spec.add_development_dependency 'timecop'
|
28
29
|
spec.add_development_dependency 'vcr'
|
29
30
|
spec.add_development_dependency 'webmock'
|
30
31
|
|
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'csv'
|
2
2
|
require 'json'
|
3
3
|
require 'logger'
|
4
|
-
require 'open-uri'
|
5
4
|
require 'set'
|
6
5
|
require 'tempfile'
|
7
6
|
|
@@ -14,8 +13,9 @@ require 'eaternet/loggable'
|
|
14
13
|
module Eaternet
|
15
14
|
module Agencies
|
16
15
|
# A data source for New York City food service health inspections. It
|
17
|
-
# retrieves the latest info from
|
18
|
-
#
|
16
|
+
# retrieves the latest info from
|
17
|
+
# [the official source](https://data.cityofnewyork.us/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59)
|
18
|
+
# and makes it easy to work with.
|
19
19
|
#
|
20
20
|
# This library conforms to the
|
21
21
|
# [LIVES 1.0 standard](http://www.yelp.com/healthscores) developed by
|
@@ -66,7 +66,6 @@ module Eaternet
|
|
66
66
|
end
|
67
67
|
|
68
68
|
# @example Compute the average inspection score for NYC.
|
69
|
-
#
|
70
69
|
# # The library is optimized for memory use at the expense
|
71
70
|
# # of speed. E.g., each call to #inspections will iterate
|
72
71
|
# # through the raw CSV. So here, we first retrieve the
|
@@ -104,6 +103,7 @@ module Eaternet
|
|
104
103
|
#
|
105
104
|
# @return [FeedInfo]
|
106
105
|
def feed_info
|
106
|
+
# Anyone know a contact email?
|
107
107
|
FeedInfo.new do |fi|
|
108
108
|
fi.feed_date = Date.today
|
109
109
|
fi.feed_version = '1.0'
|
@@ -233,29 +233,17 @@ module Eaternet
|
|
233
233
|
.map { |row| block.call(row) }
|
234
234
|
end
|
235
235
|
|
236
|
-
def unique(objects)
|
237
|
-
Set.new(objects)
|
238
|
-
end
|
239
|
-
|
240
236
|
def table_file
|
241
|
-
|
242
|
-
@table_file = Tempfile.new('all.csv.')
|
243
|
-
Nyc.download_to(@table_file)
|
244
|
-
end
|
245
|
-
@table_file
|
246
|
-
end
|
247
|
-
|
248
|
-
def self.download_to(a_file)
|
249
|
-
download_via_url(a_file)
|
237
|
+
@table_file ||= Nyc.download_via_url
|
250
238
|
end
|
251
239
|
|
252
240
|
# Fastest method for downloading all data but may be non-standard
|
253
241
|
# for Socrata.
|
254
|
-
def self.download_via_url
|
255
|
-
|
242
|
+
def self.download_via_url
|
243
|
+
Eaternet::Util.download_and_cache(source: csv_url, dest: Tempfile.new('nyc'))
|
256
244
|
end
|
257
245
|
|
258
|
-
def csv_url
|
246
|
+
def self.csv_url
|
259
247
|
domain = 'data.cityofnewyork.us'
|
260
248
|
dataset = 'xx67-kt59'
|
261
249
|
"https://#{domain}/api/views/#{dataset}/rows.csv?accessType=DOWNLOAD"
|
data/lib/eaternet/loggable.rb
CHANGED
data/lib/eaternet/util.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'base64'
|
1
2
|
require 'open-uri'
|
2
3
|
require 'zip'
|
3
4
|
|
@@ -22,12 +23,24 @@ module Eaternet
|
|
22
23
|
dir
|
23
24
|
end
|
24
25
|
|
26
|
+
# Download a file from the network, caching it for 12 hours.
|
27
|
+
#
|
28
|
+
# @param [String] source the URL to retrieve
|
29
|
+
# @param [String] dest pathname in which to save the file
|
30
|
+
def self.download_and_cache(source:, dest:)
|
31
|
+
cache_path = generate_cache_path(source)
|
32
|
+
download(source: source, dest: cache_path) if expired? cache_path
|
33
|
+
FileUtils.cp cache_path, dest
|
34
|
+
File.open dest
|
35
|
+
end
|
36
|
+
|
25
37
|
# Download a file from the network.
|
26
38
|
#
|
27
39
|
# @param [String] source the URL to retrieve
|
28
40
|
# @param [String] dest pathname in which to save the file
|
41
|
+
# @return [File] the file
|
29
42
|
def self.download(source:, dest:)
|
30
|
-
open(dest, 'wb') { |file| file << open(source).read }
|
43
|
+
File.open(dest, 'wb') { |file| file << open(source).read }
|
31
44
|
end
|
32
45
|
|
33
46
|
# Extract a Zip archive.
|
@@ -56,5 +69,29 @@ module Eaternet
|
|
56
69
|
return nil if a_string.nil?
|
57
70
|
a_string.strip.gsub(/ +/, ' ')
|
58
71
|
end
|
72
|
+
|
73
|
+
def self.file_age_in_days(path)
|
74
|
+
(Time.now - File.mtime(path)).to_i / 86_400.0
|
75
|
+
end
|
76
|
+
|
77
|
+
#
|
78
|
+
# private
|
79
|
+
#
|
80
|
+
|
81
|
+
def self.expired?(cache_path)
|
82
|
+
!File.exist?(cache_path) || file_age_in_days(cache_path) > 0.5
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.generate_cache_path(url)
|
86
|
+
cache_dir = prepare_cache_dir
|
87
|
+
cache_key = Base64.strict_encode64(url)
|
88
|
+
File.join(cache_dir, cache_key)
|
89
|
+
end
|
90
|
+
|
91
|
+
def self.prepare_cache_dir
|
92
|
+
cache_dir = '/tmp/eaternet'
|
93
|
+
`mkdir -p #{cache_dir}`
|
94
|
+
cache_dir
|
95
|
+
end
|
59
96
|
end
|
60
97
|
end
|
data/lib/eaternet/version.rb
CHANGED
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
require 'test_helper'
|
3
|
+
require 'timecop'
|
4
|
+
|
5
|
+
require 'eaternet/util'
|
6
|
+
|
7
|
+
class UtilTest < Minitest::Test
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@file_contents = 'This is the file'
|
11
|
+
@url = "http://downloadtest.com/file-#{rand(1_000_000)}.txt"
|
12
|
+
@file = Tempfile.new('temp')
|
13
|
+
stub_request(:get, @url).to_return(body: @file_contents)
|
14
|
+
end
|
15
|
+
|
16
|
+
def download
|
17
|
+
Eaternet::Util.download(source: @url, dest: @file)
|
18
|
+
end
|
19
|
+
|
20
|
+
def download_and_cache
|
21
|
+
Eaternet::Util.download_and_cache(source: @url, dest: @file)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_downloads_a_file
|
25
|
+
download
|
26
|
+
assert_equal @file_contents, `cat #{@file.path}`.strip
|
27
|
+
assert_requested :get, @url, times: 1
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_downloads_a_file_with_cache_option
|
31
|
+
download_and_cache
|
32
|
+
assert_equal @file_contents, `cat #{@file.path}`.strip
|
33
|
+
assert_requested :get, @url, times: 1
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_uses_a_file_cache
|
37
|
+
(1..3).each { download_and_cache }
|
38
|
+
assert_requested :get, @url, times: 1
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_expires_a_cache_file
|
42
|
+
thirteen_hours = 13 * 60 * 60
|
43
|
+
download_and_cache
|
44
|
+
Timecop.freeze(Time.now + thirteen_hours) { download_and_cache }
|
45
|
+
assert_requested :get, @url, times: 2
|
46
|
+
end
|
47
|
+
end
|
data/test/test_helper.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
require 'minitest/autorun'
|
2
2
|
require 'pry'
|
3
|
+
require 'webmock/minitest'
|
3
4
|
require 'vcr'
|
4
5
|
|
5
6
|
VCR.configure do |c|
|
6
7
|
c.cassette_library_dir = 'test/vcr_cassettes'
|
7
8
|
c.hook_into :webmock
|
9
|
+
c.ignore_hosts 'downloadtest.com'
|
8
10
|
end
|
9
11
|
|
10
|
-
|
11
12
|
#
|
12
13
|
# Testing helpers
|
13
14
|
#
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: eaternet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robb Shecter
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '10'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: timecop
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: vcr
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -194,6 +208,7 @@ files:
|
|
194
208
|
- test/eaternet/lives_1_0/inspection_test.rb
|
195
209
|
- test/eaternet/lives_1_0/legend_test.rb
|
196
210
|
- test/eaternet/loggable_test.rb
|
211
|
+
- test/eaternet/util_test.rb
|
197
212
|
- test/fixtures/morris-park-bake-shop.csv
|
198
213
|
- test/script.rb
|
199
214
|
- test/test_helper.rb
|
@@ -230,6 +245,7 @@ test_files:
|
|
230
245
|
- test/eaternet/lives_1_0/inspection_test.rb
|
231
246
|
- test/eaternet/lives_1_0/legend_test.rb
|
232
247
|
- test/eaternet/loggable_test.rb
|
248
|
+
- test/eaternet/util_test.rb
|
233
249
|
- test/fixtures/morris-park-bake-shop.csv
|
234
250
|
- test/script.rb
|
235
251
|
- test/test_helper.rb
|