webget 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Rakefile +1 -1
- data/lib/webget/version.rb +1 -1
- data/lib/webget/webcache.rb +105 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6912004e235fc4a3aad299490d04d9ab9d44ea5f98005a348aa7a456349cfbcb
|
4
|
+
data.tar.gz: f2b8bcda71c738557c76957eb4e6e78ee4af52ec9311e5c18a47f27baf2b45df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 40d7bf5f844e00b67554455f1c191a7f189169ad075bffd0b8f8888f856128ed1c1ece29bf1a5932c7d22dc841b9f3b2c2f8ce9640fbed70ba4952c0cd7aa2f7
|
7
|
+
data.tar.gz: 5e4d940c50db3f43bb10160a2019eaa7dfc8d56743bf8d5aa1ad8130db089a971d8eb18ab7913398a5606a9347288734d34b5e5ad19bce63b903de899bcd3f3a
|
data/CHANGELOG.md
CHANGED
data/Rakefile
CHANGED
data/lib/webget/version.rb
CHANGED
data/lib/webget/webcache.rb
CHANGED
@@ -2,6 +2,69 @@
|
|
2
2
|
|
3
3
|
module Webcache
|
4
4
|
|
5
|
+
|
6
|
+
class Headers # nested class for convenience access to (meta) headers
|
7
|
+
|
8
|
+
def self.parse( txt )
|
9
|
+
data = {}
|
10
|
+
txt.each_line do |line|
|
11
|
+
line = line.strip
|
12
|
+
next if line.empty? || line.start_with?( '#' )
|
13
|
+
|
14
|
+
key, value = line.split( ':', 2 ) ## split on first colon
|
15
|
+
## always downcase keys for now
|
16
|
+
## and strip value from leading and trailing spaces
|
17
|
+
##
|
18
|
+
## todo/fix: deal with possible duplicate header keys!!
|
19
|
+
## if duplicate do NOT replease, add with leading ", " comma-separated!!!
|
20
|
+
##
|
21
|
+
## check if multi-line headers are possible!!!
|
22
|
+
data[ key.strip.downcase ] = value.strip
|
23
|
+
end
|
24
|
+
new( data )
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
def initialize( data )
|
30
|
+
@data = data
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_h() @data; end
|
34
|
+
def [](key) @data[key];; end
|
35
|
+
|
36
|
+
def each( &blk )
|
37
|
+
@data.each do |key, value|
|
38
|
+
blk.call( key, value )
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
def date
|
44
|
+
## return date header
|
45
|
+
## parses the time as RFC 1123 date of HTTP-date defined by RFC 2616:
|
46
|
+
## day-of-week, DD month-name CCYY hh:mm:ss GMT
|
47
|
+
## !!! Note that the result is always UTC (GMT). !!!
|
48
|
+
## e.g. Sun, 19 May 2024 15:15:34 GMT
|
49
|
+
## Mon, 10 Jun 2024 15:58:16 GMT
|
50
|
+
@date ||= Time.httpdate( @data['date'] )
|
51
|
+
@date
|
52
|
+
end
|
53
|
+
|
54
|
+
## default to 12h (60secs*60min*12h)
|
55
|
+
def expired?( expires_in_date=Time.now.utc-60*60*12 )
|
56
|
+
## pp expires_in_date
|
57
|
+
expires_in_date > date
|
58
|
+
end
|
59
|
+
|
60
|
+
## add convenience helpers - why? why not?
|
61
|
+
def expired_in_12h?() expired?( Time.now.utc-60*60*12 ); end
|
62
|
+
def expired_in_24h?() expired?( Time.now.utc-60*60*24 ); end
|
63
|
+
alias_method :expired_in_1d?, :expired_in_24h?
|
64
|
+
end # class Headers
|
65
|
+
|
66
|
+
|
67
|
+
|
5
68
|
#####
|
6
69
|
# copied from props gem, see Env.home
|
7
70
|
# - https://github.com/rubycoco/props/blob/master/props/lib/props/env.rb
|
@@ -73,6 +136,26 @@ module Webcache
|
|
73
136
|
def self.read_json( url ) cache.read_json( url ); end
|
74
137
|
def self.read_csv( url ) cache.read_csv( url ); end
|
75
138
|
|
139
|
+
#### new - read (cached) meta data
|
140
|
+
## todo/check - find a better/different name - why? why not?
|
141
|
+
## e.g. read_headers or simply meta or headers or such
|
142
|
+
def self.read_meta( url ) cache.read_meta( url ); end
|
143
|
+
|
144
|
+
## add convenience expire (shortcut) helpers
|
145
|
+
def self.expired?( url, expires_in: Time.now.utc-60*60*12 )
|
146
|
+
if cached?( url )
|
147
|
+
meta = read_meta( url )
|
148
|
+
meta.expired?( expires_in )
|
149
|
+
else
|
150
|
+
true # note - not in cache; expired by default
|
151
|
+
end
|
152
|
+
end
|
153
|
+
def self.expired_in_12h?( url ) expired?( url, expires_in: Time.now.utc-60*60*12 ); end
|
154
|
+
def self.expired_in_24h?( url ) expired?( url, expires_in: Time.now.utc-60*60*24 ); end
|
155
|
+
class << self
|
156
|
+
alias_method :expired_in_1d?, :expired_in_24h?
|
157
|
+
end
|
158
|
+
|
76
159
|
|
77
160
|
|
78
161
|
class DiskCache
|
@@ -103,6 +186,15 @@ class DiskCache
|
|
103
186
|
end
|
104
187
|
|
105
188
|
|
189
|
+
def read_meta( url )
|
190
|
+
body_path = "#{Webcache.root}/#{url_to_path( url )}"
|
191
|
+
meta_path = "#{body_path}.meta.txt"
|
192
|
+
txt = File.open( meta_path, 'r:utf-8' ) {|f| f.read }
|
193
|
+
data = Headers.parse( txt )
|
194
|
+
data
|
195
|
+
end
|
196
|
+
|
197
|
+
|
106
198
|
## add more save / put / etc. aliases - why? why not?
|
107
199
|
## rename to record_html - why? why not?
|
108
200
|
def record( url, response,
|
@@ -177,7 +269,19 @@ class DiskCache
|
|
177
269
|
|
178
270
|
### special "prettify" rule for weltfussball
|
179
271
|
## /eng-league-one-2019-2020/ => /eng-league-one-2019-2020.html
|
180
|
-
|
272
|
+
|
273
|
+
### todo/fix - move rules downstream to user - why? why not?
|
274
|
+
|
275
|
+
if host_dir.index( 'uefa.com' ) ||
|
276
|
+
host_dir.index( 'kicker.de' ) ||
|
277
|
+
host_dir.index( 'kicekr.at' )
|
278
|
+
if req_path.end_with?( '/' )
|
279
|
+
req_path = "#{req_path[0..-2]}.html"
|
280
|
+
else
|
281
|
+
puts "ERROR: expected request_uri for >#{host_dir}< ending with '/'; got: >#{req_path}<"
|
282
|
+
exit 1
|
283
|
+
end
|
284
|
+
elsif host_dir.index( 'weltfussball.de' ) ||
|
181
285
|
host_dir.index( 'worldfootball.net' )
|
182
286
|
if req_path.end_with?( '/' )
|
183
287
|
req_path = "#{req_path[0..-2]}.html"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: webget
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: webclient
|
@@ -117,7 +117,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
117
117
|
requirements:
|
118
118
|
- - ">="
|
119
119
|
- !ruby/object:Gem::Version
|
120
|
-
version:
|
120
|
+
version: 3.1.0
|
121
121
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
122
122
|
requirements:
|
123
123
|
- - ">="
|