open-uri-cached 0.0.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of open-uri-cached might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/{README.markdown → README.md} +7 -1
- data/lib/open-uri/cached.rb +94 -13
- metadata +56 -39
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1cf2ef137bca3a2593b5adc10d8baee89ec92bd28fec595f69dde388e6fbc0d5
|
4
|
+
data.tar.gz: c22f7c6d400194f783623f3b38cb38c45b10c3c88586b9fcf866339eca1d2b13
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5d6511c368e3d2be307f5aa9ee64b49750437e0771e5f7e0649b381d6022a20733b1e9f03bea86a388f08e3c5d243ccc2d0fd8fbb8c28a5e0b47cada6f15404a
|
7
|
+
data.tar.gz: b0848773a410b08fa05e6546b12b9316ef45658f71b0b0f4558dff734cc5bbb586a0bf80dd785571e0a6b70ffc14e8cea02b35329ece7f4d3c51f6d0715e4019
|
@@ -11,4 +11,10 @@ Require the library
|
|
11
11
|
|
12
12
|
## Configuring
|
13
13
|
|
14
|
-
OpenURI::Cache.cache_path = '/tmp/open-uri'
|
14
|
+
`OpenURI::Cache.cache_path = '/tmp/open-uri'`
|
15
|
+
|
16
|
+
## Invalidating
|
17
|
+
|
18
|
+
`OpenURI::Cache.invalidate('https://example.com/')`
|
19
|
+
|
20
|
+
`OpenURI::Cache.invalidate_all!`
|
data/lib/open-uri/cached.rb
CHANGED
@@ -1,18 +1,14 @@
|
|
1
|
-
require 'open-uri'
|
2
1
|
require 'digest/sha1'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'yaml'
|
3
5
|
|
4
6
|
module OpenURI
|
5
7
|
class << self
|
6
8
|
alias original_open_uri open_uri #:nodoc:
|
7
9
|
def open_uri(uri, *rest, &block)
|
8
|
-
response = Cache.get(uri.to_s)
|
9
|
-
|
10
|
-
unless response
|
11
|
-
response = original_open_uri(uri, *rest).read
|
12
|
-
Cache.set(uri.to_s, response)
|
13
|
-
end
|
14
|
-
|
15
|
-
response = StringIO.new(response)
|
10
|
+
response = Cache.get(uri.to_s) ||
|
11
|
+
Cache.set(uri.to_s, original_open_uri(uri, *rest))
|
16
12
|
|
17
13
|
if block_given?
|
18
14
|
begin
|
@@ -27,19 +23,104 @@ module OpenURI
|
|
27
23
|
end
|
28
24
|
|
29
25
|
class Cache
|
30
|
-
@cache_path =
|
26
|
+
@cache_path = "/tmp/open-uri-#{Process.uid}"
|
31
27
|
|
32
28
|
class << self
|
29
|
+
attr_accessor :cache_path
|
30
|
+
|
31
|
+
##
|
32
|
+
# Retrieve file content and meta data from cache
|
33
|
+
# @param [String] key
|
34
|
+
# @return [StringIO]
|
33
35
|
def get(key)
|
34
36
|
filename = filename_from_url(key)
|
35
37
|
# TODO: head request to determine last_modified vs file modtime
|
36
|
-
|
38
|
+
|
39
|
+
# Read metadata, if it exists
|
40
|
+
if File.exist?("#{filename}.meta")
|
41
|
+
if YAML.respond_to?(:unsafe_load)
|
42
|
+
meta = YAML.unsafe_load(File.read("#{filename}.meta"))
|
43
|
+
else
|
44
|
+
meta = YAML.load(File.read("#{filename}.meta"))
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
f = File.exist?(filename) ? StringIO.new(File.open(filename, "rb") { |fd| fd.read }) : nil
|
49
|
+
|
50
|
+
# Add meta accessors
|
51
|
+
if meta && f
|
52
|
+
f.instance_variable_set(:"@meta", meta)
|
53
|
+
|
54
|
+
def f.meta
|
55
|
+
@meta
|
56
|
+
end
|
57
|
+
def f.base_uri
|
58
|
+
@meta[:base_uri]
|
59
|
+
end
|
60
|
+
def f.content_type
|
61
|
+
@meta[:content_type]
|
62
|
+
end
|
63
|
+
def f.charset
|
64
|
+
@meta[:charset]
|
65
|
+
end
|
66
|
+
def f.content_encoding
|
67
|
+
@meta[:content_encoding]
|
68
|
+
end
|
69
|
+
def f.last_modified
|
70
|
+
@meta[:last_modified]
|
71
|
+
end
|
72
|
+
def f.status
|
73
|
+
@meta[:status]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
f
|
37
78
|
end
|
38
79
|
|
80
|
+
# Cache file content and metadata
|
81
|
+
# @param [String] key
|
82
|
+
# URL of content to be cached
|
83
|
+
# @param [StringIO] value
|
84
|
+
# value to be cached, typically StringIO returned from `original_open_uri`
|
85
|
+
# @return [StringIO]
|
86
|
+
# Returns value
|
39
87
|
def set(key, value)
|
40
88
|
filename = filename_from_url(key)
|
41
89
|
mkpath(filename)
|
42
|
-
|
90
|
+
|
91
|
+
# Save metadata in a parallel file
|
92
|
+
if value.respond_to?(:meta)
|
93
|
+
filename_meta = "#{filename}.meta"
|
94
|
+
meta = value.meta
|
95
|
+
meta[:status] = value.status if value.respond_to?(:status)
|
96
|
+
meta[:content_type] = value.content_type if value.respond_to?(:content_type)
|
97
|
+
meta[:base_uri] = value.base_uri if value.respond_to?(:base_uri)
|
98
|
+
File.open(filename_meta, 'wb') {|f| YAML::dump(meta, f)}
|
99
|
+
end
|
100
|
+
|
101
|
+
# Save file contents
|
102
|
+
File.open(filename, 'wb'){|f| f.write value.read }
|
103
|
+
value.rewind
|
104
|
+
value
|
105
|
+
end
|
106
|
+
|
107
|
+
# Invalidate cache for a key, optionally if older than time givan
|
108
|
+
# @param [String] key
|
109
|
+
# URL of content to be invalidated
|
110
|
+
# @param [Time] time
|
111
|
+
# (optional): the maximum age at which the cached value is still acceptable
|
112
|
+
# @return
|
113
|
+
# Returns 1 if a cached value was invalidated, false otherwise
|
114
|
+
def invalidate(key, time = Time.now)
|
115
|
+
filename = filename_from_url(key)
|
116
|
+
File.delete(filename) if File.stat(filename).mtime < time
|
117
|
+
rescue Errno::ENOENT
|
118
|
+
false
|
119
|
+
end
|
120
|
+
|
121
|
+
# Invalidate all caches we know about
|
122
|
+
def invalidate_all!
|
123
|
+
FileUtils.rm_r(@cache_path, force: true, secure: true)
|
43
124
|
end
|
44
125
|
|
45
126
|
protected
|
@@ -55,7 +136,7 @@ module OpenURI
|
|
55
136
|
full.push(dir)
|
56
137
|
dir = full.join('/')
|
57
138
|
next if dir.to_s == ''
|
58
|
-
Dir.mkdir(dir) unless File.
|
139
|
+
Dir.mkdir(dir) unless File.exist?(dir)
|
59
140
|
end
|
60
141
|
end
|
61
142
|
end
|
metadata
CHANGED
@@ -1,57 +1,74 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: open-uri-cached
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
|
-
authors:
|
6
|
+
authors:
|
7
7
|
- Danial Pearce
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
11
|
+
date: 2021-12-28 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rspec
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.10.0
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.10.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: webmock
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 3.14.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 3.14.0
|
16
41
|
description: OpenURI with transparent disk caching
|
17
|
-
email:
|
42
|
+
email: danial.pearce@gmail.com
|
18
43
|
executables: []
|
19
|
-
|
20
44
|
extensions: []
|
21
|
-
|
22
45
|
extra_rdoc_files: []
|
23
|
-
|
24
|
-
files:
|
25
|
-
- README.markdown
|
46
|
+
files:
|
26
47
|
- LICENSE
|
48
|
+
- README.md
|
27
49
|
- lib/open-uri/cached.rb
|
28
|
-
|
29
|
-
homepage: http://github.com/tigris/open-uri-cached
|
50
|
+
homepage: https://github.com/tigris/open-uri-cached
|
30
51
|
licenses: []
|
31
|
-
|
32
|
-
post_install_message:
|
52
|
+
metadata: {}
|
53
|
+
post_install_message:
|
33
54
|
rdoc_options: []
|
34
|
-
|
35
|
-
require_paths:
|
55
|
+
require_paths:
|
36
56
|
- lib
|
37
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
38
|
-
requirements:
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
39
59
|
- - ">="
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
version:
|
42
|
-
|
43
|
-
|
44
|
-
requirements:
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
45
64
|
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version:
|
48
|
-
version:
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
49
67
|
requirements: []
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
68
|
+
rubygems_version: 3.2.22
|
69
|
+
signing_key:
|
70
|
+
specification_version: 4
|
71
|
+
summary: Do a lot of site scraping but take lots of attempts at parsing the content
|
72
|
+
before reaching your end result? This gem is for you. But wait, there's more...
|
73
|
+
Ok, no there isn't.
|
56
74
|
test_files: []
|
57
|
-
|