open-uri-cached 0.0.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of open-uri-cached might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/{README.markdown → README.md} +7 -1
- data/lib/open-uri/cached.rb +94 -13
- metadata +56 -39
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1cf2ef137bca3a2593b5adc10d8baee89ec92bd28fec595f69dde388e6fbc0d5
|
4
|
+
data.tar.gz: c22f7c6d400194f783623f3b38cb38c45b10c3c88586b9fcf866339eca1d2b13
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5d6511c368e3d2be307f5aa9ee64b49750437e0771e5f7e0649b381d6022a20733b1e9f03bea86a388f08e3c5d243ccc2d0fd8fbb8c28a5e0b47cada6f15404a
|
7
|
+
data.tar.gz: b0848773a410b08fa05e6546b12b9316ef45658f71b0b0f4558dff734cc5bbb586a0bf80dd785571e0a6b70ffc14e8cea02b35329ece7f4d3c51f6d0715e4019
|
@@ -11,4 +11,10 @@ Require the library
|
|
11
11
|
|
12
12
|
## Configuring
|
13
13
|
|
14
|
-
OpenURI::Cache.cache_path = '/tmp/open-uri'
|
14
|
+
`OpenURI::Cache.cache_path = '/tmp/open-uri'`
|
15
|
+
|
16
|
+
## Invalidating
|
17
|
+
|
18
|
+
`OpenURI::Cache.invalidate('https://example.com/')`
|
19
|
+
|
20
|
+
`OpenURI::Cache.invalidate_all!`
|
data/lib/open-uri/cached.rb
CHANGED
@@ -1,18 +1,14 @@
|
|
1
|
-
require 'open-uri'
|
2
1
|
require 'digest/sha1'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'yaml'
|
3
5
|
|
4
6
|
module OpenURI
|
5
7
|
class << self
|
6
8
|
alias original_open_uri open_uri #:nodoc:
|
7
9
|
def open_uri(uri, *rest, &block)
|
8
|
-
response = Cache.get(uri.to_s)
|
9
|
-
|
10
|
-
unless response
|
11
|
-
response = original_open_uri(uri, *rest).read
|
12
|
-
Cache.set(uri.to_s, response)
|
13
|
-
end
|
14
|
-
|
15
|
-
response = StringIO.new(response)
|
10
|
+
response = Cache.get(uri.to_s) ||
|
11
|
+
Cache.set(uri.to_s, original_open_uri(uri, *rest))
|
16
12
|
|
17
13
|
if block_given?
|
18
14
|
begin
|
@@ -27,19 +23,104 @@ module OpenURI
|
|
27
23
|
end
|
28
24
|
|
29
25
|
class Cache
|
30
|
-
@cache_path =
|
26
|
+
@cache_path = "/tmp/open-uri-#{Process.uid}"
|
31
27
|
|
32
28
|
class << self
|
29
|
+
attr_accessor :cache_path
|
30
|
+
|
31
|
+
##
|
32
|
+
# Retrieve file content and meta data from cache
|
33
|
+
# @param [String] key
|
34
|
+
# @return [StringIO]
|
33
35
|
def get(key)
|
34
36
|
filename = filename_from_url(key)
|
35
37
|
# TODO: head request to determine last_modified vs file modtime
|
36
|
-
|
38
|
+
|
39
|
+
# Read metadata, if it exists
|
40
|
+
if File.exist?("#{filename}.meta")
|
41
|
+
if YAML.respond_to?(:unsafe_load)
|
42
|
+
meta = YAML.unsafe_load(File.read("#{filename}.meta"))
|
43
|
+
else
|
44
|
+
meta = YAML.load(File.read("#{filename}.meta"))
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
f = File.exist?(filename) ? StringIO.new(File.open(filename, "rb") { |fd| fd.read }) : nil
|
49
|
+
|
50
|
+
# Add meta accessors
|
51
|
+
if meta && f
|
52
|
+
f.instance_variable_set(:"@meta", meta)
|
53
|
+
|
54
|
+
def f.meta
|
55
|
+
@meta
|
56
|
+
end
|
57
|
+
def f.base_uri
|
58
|
+
@meta[:base_uri]
|
59
|
+
end
|
60
|
+
def f.content_type
|
61
|
+
@meta[:content_type]
|
62
|
+
end
|
63
|
+
def f.charset
|
64
|
+
@meta[:charset]
|
65
|
+
end
|
66
|
+
def f.content_encoding
|
67
|
+
@meta[:content_encoding]
|
68
|
+
end
|
69
|
+
def f.last_modified
|
70
|
+
@meta[:last_modified]
|
71
|
+
end
|
72
|
+
def f.status
|
73
|
+
@meta[:status]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
f
|
37
78
|
end
|
38
79
|
|
80
|
+
# Cache file content and metadata
|
81
|
+
# @param [String] key
|
82
|
+
# URL of content to be cached
|
83
|
+
# @param [StringIO] value
|
84
|
+
# value to be cached, typically StringIO returned from `original_open_uri`
|
85
|
+
# @return [StringIO]
|
86
|
+
# Returns value
|
39
87
|
def set(key, value)
|
40
88
|
filename = filename_from_url(key)
|
41
89
|
mkpath(filename)
|
42
|
-
|
90
|
+
|
91
|
+
# Save metadata in a parallel file
|
92
|
+
if value.respond_to?(:meta)
|
93
|
+
filename_meta = "#{filename}.meta"
|
94
|
+
meta = value.meta
|
95
|
+
meta[:status] = value.status if value.respond_to?(:status)
|
96
|
+
meta[:content_type] = value.content_type if value.respond_to?(:content_type)
|
97
|
+
meta[:base_uri] = value.base_uri if value.respond_to?(:base_uri)
|
98
|
+
File.open(filename_meta, 'wb') {|f| YAML::dump(meta, f)}
|
99
|
+
end
|
100
|
+
|
101
|
+
# Save file contents
|
102
|
+
File.open(filename, 'wb'){|f| f.write value.read }
|
103
|
+
value.rewind
|
104
|
+
value
|
105
|
+
end
|
106
|
+
|
107
|
+
# Invalidate cache for a key, optionally if older than time givan
|
108
|
+
# @param [String] key
|
109
|
+
# URL of content to be invalidated
|
110
|
+
# @param [Time] time
|
111
|
+
# (optional): the maximum age at which the cached value is still acceptable
|
112
|
+
# @return
|
113
|
+
# Returns 1 if a cached value was invalidated, false otherwise
|
114
|
+
def invalidate(key, time = Time.now)
|
115
|
+
filename = filename_from_url(key)
|
116
|
+
File.delete(filename) if File.stat(filename).mtime < time
|
117
|
+
rescue Errno::ENOENT
|
118
|
+
false
|
119
|
+
end
|
120
|
+
|
121
|
+
# Invalidate all caches we know about
|
122
|
+
def invalidate_all!
|
123
|
+
FileUtils.rm_r(@cache_path, force: true, secure: true)
|
43
124
|
end
|
44
125
|
|
45
126
|
protected
|
@@ -55,7 +136,7 @@ module OpenURI
|
|
55
136
|
full.push(dir)
|
56
137
|
dir = full.join('/')
|
57
138
|
next if dir.to_s == ''
|
58
|
-
Dir.mkdir(dir) unless File.
|
139
|
+
Dir.mkdir(dir) unless File.exist?(dir)
|
59
140
|
end
|
60
141
|
end
|
61
142
|
end
|
metadata
CHANGED
@@ -1,57 +1,74 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: open-uri-cached
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
|
-
authors:
|
6
|
+
authors:
|
7
7
|
- Danial Pearce
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
11
|
+
date: 2021-12-28 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rspec
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.10.0
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.10.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: webmock
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 3.14.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 3.14.0
|
16
41
|
description: OpenURI with transparent disk caching
|
17
|
-
email:
|
42
|
+
email: danial.pearce@gmail.com
|
18
43
|
executables: []
|
19
|
-
|
20
44
|
extensions: []
|
21
|
-
|
22
45
|
extra_rdoc_files: []
|
23
|
-
|
24
|
-
files:
|
25
|
-
- README.markdown
|
46
|
+
files:
|
26
47
|
- LICENSE
|
48
|
+
- README.md
|
27
49
|
- lib/open-uri/cached.rb
|
28
|
-
|
29
|
-
homepage: http://github.com/tigris/open-uri-cached
|
50
|
+
homepage: https://github.com/tigris/open-uri-cached
|
30
51
|
licenses: []
|
31
|
-
|
32
|
-
post_install_message:
|
52
|
+
metadata: {}
|
53
|
+
post_install_message:
|
33
54
|
rdoc_options: []
|
34
|
-
|
35
|
-
require_paths:
|
55
|
+
require_paths:
|
36
56
|
- lib
|
37
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
38
|
-
requirements:
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
39
59
|
- - ">="
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
version:
|
42
|
-
|
43
|
-
|
44
|
-
requirements:
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
45
64
|
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version:
|
48
|
-
version:
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
49
67
|
requirements: []
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
68
|
+
rubygems_version: 3.2.22
|
69
|
+
signing_key:
|
70
|
+
specification_version: 4
|
71
|
+
summary: Do a lot of site scraping but take lots of attempts at parsing the content
|
72
|
+
before reaching your end result? This gem is for you. But wait, there's more...
|
73
|
+
Ok, no there isn't.
|
56
74
|
test_files: []
|
57
|
-
|