open-uri-cached 0.0.5 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of open-uri-cached might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/README.md +32 -0
- data/lib/open-uri/cached.rb +12 -5
- metadata +53 -48
- data/README.markdown +0 -14
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 349b8c42b059c7fd2cf0284c5df387559ba97ddccea7e4c1b9fe0f93f030af1b
|
4
|
+
data.tar.gz: bf8e5c09b35ccd42f04a37d791340e9532892380eacea4e8d7657ab301a1894c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c66cd1f8fc5a320ece25968fb8651100dd2011b4eadb3ee5c99c24273e192f423a78b3f00019366ff8ca6e8b960ddd90024a92603bb942fea6289321290b6295
|
7
|
+
data.tar.gz: bb4bbcfdd49782212169c7372156455f725de04ae56c451eb20427fb3da4e82bf306b61b7b4347fabb945c64ece16e66b70bf426a3bc10feee96c688b22f3681
|
data/README.md
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# OpenURI with caching
|
2
|
+
|
3
|
+
Carelessly make OpenURI requests without getting hate mail.
|
4
|
+
|
5
|
+
## Usage
|
6
|
+
|
7
|
+
Require the library
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
require 'open-uri/cached'
|
11
|
+
open('http://www.someone-that-hates-being-scraped.com').read
|
12
|
+
```
|
13
|
+
|
14
|
+
## Configuring
|
15
|
+
|
16
|
+
If you're not super pumped about reading files from `/tmp`, you can configure the cache path:
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
OpenURI::Cache.cache_path = '/tmp/open-uri'
|
20
|
+
```
|
21
|
+
|
22
|
+
## Invalidating
|
23
|
+
|
24
|
+
They say cache invalidation is hard, but not really:
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
# Invalidate a single URL
|
28
|
+
OpenURI::Cache.invalidate('https://example.com/')
|
29
|
+
|
30
|
+
# Invalidate everything
|
31
|
+
OpenURI::Cache.invalidate_all!
|
32
|
+
```
|
data/lib/open-uri/cached.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
-
require 'open-uri'
|
2
1
|
require 'digest/sha1'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'securerandom'
|
3
5
|
require 'yaml'
|
4
6
|
|
5
7
|
module OpenURI
|
@@ -22,7 +24,7 @@ module OpenURI
|
|
22
24
|
end
|
23
25
|
|
24
26
|
class Cache
|
25
|
-
@cache_path = "/tmp/open-uri-#{
|
27
|
+
@cache_path = "/tmp/open-uri-#{SecureRandom.uuid}"
|
26
28
|
|
27
29
|
class << self
|
28
30
|
attr_accessor :cache_path
|
@@ -36,9 +38,9 @@ module OpenURI
|
|
36
38
|
# TODO: head request to determine last_modified vs file modtime
|
37
39
|
|
38
40
|
# Read metadata, if it exists
|
39
|
-
meta = YAML
|
41
|
+
meta = YAML.unsafe_load(File.read("#{filename}.meta")) if File.exist?("#{filename}.meta")
|
40
42
|
|
41
|
-
f = File.
|
43
|
+
f = File.exist?(filename) ? StringIO.new(File.open(filename, "rb") { |fd| fd.read }) : nil
|
42
44
|
|
43
45
|
# Add meta accessors
|
44
46
|
if meta && f
|
@@ -111,6 +113,11 @@ module OpenURI
|
|
111
113
|
false
|
112
114
|
end
|
113
115
|
|
116
|
+
# Invalidate all caches we know about
|
117
|
+
def invalidate_all!
|
118
|
+
FileUtils.rm_r(@cache_path, force: true, secure: true)
|
119
|
+
end
|
120
|
+
|
114
121
|
protected
|
115
122
|
def filename_from_url(url)
|
116
123
|
uri = URI.parse(url) # TODO: rescue here?
|
@@ -124,7 +131,7 @@ module OpenURI
|
|
124
131
|
full.push(dir)
|
125
132
|
dir = full.join('/')
|
126
133
|
next if dir.to_s == ''
|
127
|
-
Dir.mkdir(dir) unless File.
|
134
|
+
Dir.mkdir(dir) unless File.exist?(dir)
|
128
135
|
end
|
129
136
|
end
|
130
137
|
end
|
metadata
CHANGED
@@ -1,66 +1,71 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: open-uri-cached
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
segments:
|
6
|
-
- 0
|
7
|
-
- 0
|
8
|
-
- 5
|
9
|
-
version: 0.0.5
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0
|
10
5
|
platform: ruby
|
11
|
-
authors:
|
6
|
+
authors:
|
12
7
|
- Danial Pearce
|
13
|
-
autorequire:
|
14
8
|
bindir: bin
|
15
9
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
|
+
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: rspec
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - "~>"
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '3.10'
|
19
|
+
type: :development
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - "~>"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: '3.10'
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: webmock
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - "~>"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '3.14'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '3.14'
|
21
40
|
description: OpenURI with transparent disk caching
|
22
|
-
email:
|
41
|
+
email: danial.pearce@gmail.com
|
23
42
|
executables: []
|
24
|
-
|
25
43
|
extensions: []
|
26
|
-
|
27
44
|
extra_rdoc_files: []
|
28
|
-
|
29
|
-
files:
|
30
|
-
- README.markdown
|
45
|
+
files:
|
31
46
|
- LICENSE
|
47
|
+
- README.md
|
32
48
|
- lib/open-uri/cached.rb
|
33
|
-
|
34
|
-
homepage: http://github.com/tigris/open-uri-cached
|
49
|
+
homepage: https://github.com/tigris/open-uri-cached
|
35
50
|
licenses: []
|
36
|
-
|
37
|
-
post_install_message:
|
51
|
+
metadata: {}
|
38
52
|
rdoc_options: []
|
39
|
-
|
40
|
-
require_paths:
|
53
|
+
require_paths:
|
41
54
|
- lib
|
42
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
43
|
-
|
44
|
-
requirements:
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
45
57
|
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
-
none: false
|
52
|
-
requirements:
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
60
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
53
62
|
- - ">="
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
|
56
|
-
- 0
|
57
|
-
version: "0"
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
58
65
|
requirements: []
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
summary: Do a lot of site scraping but take lots of attempts at parsing the content before reaching your end result? This gem is for you. But wait, there's more... Ok, no there isn't.
|
66
|
+
rubygems_version: 3.6.9
|
67
|
+
specification_version: 4
|
68
|
+
summary: Do a lot of site scraping but take lots of attempts at parsing the content
|
69
|
+
before reaching your end result? This gem is for you. But wait, there's more...
|
70
|
+
Ok, no there isn't.
|
65
71
|
test_files: []
|
66
|
-
|
data/README.markdown
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
# OpenURI with caching
|
2
|
-
|
3
|
-
Carelessly make OpenURI requests without getting hate mail.
|
4
|
-
|
5
|
-
## Usage
|
6
|
-
|
7
|
-
Require the library
|
8
|
-
|
9
|
-
require 'open-uri/cached'
|
10
|
-
open('http://www.someone-that-hates-being-scraped.com').read
|
11
|
-
|
12
|
-
## Configuring
|
13
|
-
|
14
|
-
OpenURI::Cache.cache_path = '/tmp/open-uri'
|