webget 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: de0060acabe176d1a11a4f2e36e8bee5090083c4907b75d9b22e9cbb9c7b22e2
4
- data.tar.gz: fb64c2b1294932b00fa401af006cd27b263b255789bad75e4ef62bec5598fd74
3
+ metadata.gz: 6912004e235fc4a3aad299490d04d9ab9d44ea5f98005a348aa7a456349cfbcb
4
+ data.tar.gz: f2b8bcda71c738557c76957eb4e6e78ee4af52ec9311e5c18a47f27baf2b45df
5
5
  SHA512:
6
- metadata.gz: d1f1653c68729e7d609c3c848e0146de9668cd2932ddab2e794ab361d3f786603a10c7586bcf0029de4955255803987000b2fb35b0d1e14cfa35d0582f919be7
7
- data.tar.gz: c6cff08b2f683bb5b8e39607735250f954115244be3e19dfcdeb29fd2d0a0f0a27be1fed5a2add9a1b75a9cbe81dbafea1e80f06b0a1bdd93d4bd3bd12d5a9a1
6
+ metadata.gz: 40d7bf5f844e00b67554455f1c191a7f189169ad075bffd0b8f8888f856128ed1c1ece29bf1a5932c7d22dc841b9f3b2c2f8ce9640fbed70ba4952c0cd7aa2f7
7
+ data.tar.gz: 5e4d940c50db3f43bb10160a2019eaa7dfc8d56743bf8d5aa1ad8130db089a971d8eb18ab7913398a5606a9347288734d34b5e5ad19bce63b903de899bcd3f3a
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.3.1
1
+ ### 0.3.2
2
2
 
3
3
  ### 0.0.1 / 2020-10-04
4
4
 
data/Rakefile CHANGED
@@ -26,7 +26,7 @@ Hoe.spec 'webget' do
26
26
  self.licenses = ['Public Domain']
27
27
 
28
28
  self.spec_extras = {
29
- required_ruby_version: '>= 2.2.2'
29
+ required_ruby_version: '>= 3.1.0'
30
30
  }
31
31
 
32
32
  end
@@ -2,7 +2,7 @@
2
2
  class Webget
3
3
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
4
4
  MINOR = 3
5
- PATCH = 1
5
+ PATCH = 2
6
6
  VERSION = [MAJOR,MINOR,PATCH].join('.')
7
7
 
8
8
  def self.version
@@ -2,6 +2,69 @@
2
2
 
3
3
  module Webcache
4
4
 
5
+
6
+ class Headers # nested class for convenience access to (meta) headers
7
+
8
+ def self.parse( txt )
9
+ data = {}
10
+ txt.each_line do |line|
11
+ line = line.strip
12
+ next if line.empty? || line.start_with?( '#' )
13
+
14
+ key, value = line.split( ':', 2 ) ## split on first colon
15
+ ## always downcase keys for now
16
+ ## and strip value from leading and trailing spaces
17
+ ##
18
+ ## todo/fix: deal with possible duplicate header keys!!
19
+ ## if duplicate do NOT replease, add with leading ", " comma-separated!!!
20
+ ##
21
+ ## check if multi-line headers are possible!!!
22
+ data[ key.strip.downcase ] = value.strip
23
+ end
24
+ new( data )
25
+ end
26
+
27
+
28
+
29
+ def initialize( data )
30
+ @data = data
31
+ end
32
+
33
+ def to_h() @data; end
34
+ def [](key) @data[key];; end
35
+
36
+ def each( &blk )
37
+ @data.each do |key, value|
38
+ blk.call( key, value )
39
+ end
40
+ end
41
+
42
+
43
+ def date
44
+ ## return date header
45
+ ## parses the time as RFC 1123 date of HTTP-date defined by RFC 2616:
46
+ ## day-of-week, DD month-name CCYY hh:mm:ss GMT
47
+ ## !!! Note that the result is always UTC (GMT). !!!
48
+ ## e.g. Sun, 19 May 2024 15:15:34 GMT
49
+ ## Mon, 10 Jun 2024 15:58:16 GMT
50
+ @date ||= Time.httpdate( @data['date'] )
51
+ @date
52
+ end
53
+
54
+ ## default to 12h (60secs*60min*12h)
55
+ def expired?( expires_in_date=Time.now.utc-60*60*12 )
56
+ ## pp expires_in_date
57
+ expires_in_date > date
58
+ end
59
+
60
+ ## add convenience helpers - why? why not?
61
+ def expired_in_12h?() expired?( Time.now.utc-60*60*12 ); end
62
+ def expired_in_24h?() expired?( Time.now.utc-60*60*24 ); end
63
+ alias_method :expired_in_1d?, :expired_in_24h?
64
+ end # class Headers
65
+
66
+
67
+
5
68
  #####
6
69
  # copied from props gem, see Env.home
7
70
  # - https://github.com/rubycoco/props/blob/master/props/lib/props/env.rb
@@ -73,6 +136,26 @@ module Webcache
73
136
  def self.read_json( url ) cache.read_json( url ); end
74
137
  def self.read_csv( url ) cache.read_csv( url ); end
75
138
 
139
+ #### new - read (cached) meta data
140
+ ## todo/check - find a better/different name - why? why not?
141
+ ## e.g. read_headers or simply meta or headers or such
142
+ def self.read_meta( url ) cache.read_meta( url ); end
143
+
144
+ ## add convenience expire (shortcut) helpers
145
+ def self.expired?( url, expires_in: Time.now.utc-60*60*12 )
146
+ if cached?( url )
147
+ meta = read_meta( url )
148
+ meta.expired?( expires_in )
149
+ else
150
+ true # note - not in cache; expired by default
151
+ end
152
+ end
153
+ def self.expired_in_12h?( url ) expired?( url, expires_in: Time.now.utc-60*60*12 ); end
154
+ def self.expired_in_24h?( url ) expired?( url, expires_in: Time.now.utc-60*60*24 ); end
155
+ class << self
156
+ alias_method :expired_in_1d?, :expired_in_24h?
157
+ end
158
+
76
159
 
77
160
 
78
161
  class DiskCache
@@ -103,6 +186,15 @@ class DiskCache
103
186
  end
104
187
 
105
188
 
189
+ def read_meta( url )
190
+ body_path = "#{Webcache.root}/#{url_to_path( url )}"
191
+ meta_path = "#{body_path}.meta.txt"
192
+ txt = File.open( meta_path, 'r:utf-8' ) {|f| f.read }
193
+ data = Headers.parse( txt )
194
+ data
195
+ end
196
+
197
+
106
198
  ## add more save / put / etc. aliases - why? why not?
107
199
  ## rename to record_html - why? why not?
108
200
  def record( url, response,
@@ -177,7 +269,19 @@ class DiskCache
177
269
 
178
270
  ### special "prettify" rule for weltfussball
179
271
  ## /eng-league-one-2019-2020/ => /eng-league-one-2019-2020.html
180
- if host_dir.index( 'weltfussball.de' ) ||
272
+
273
+ ### todo/fix - move rules downstream to user - why? why not?
274
+
275
+ if host_dir.index( 'uefa.com' ) ||
276
+ host_dir.index( 'kicker.de' ) ||
277
+ host_dir.index( 'kicekr.at' )
278
+ if req_path.end_with?( '/' )
279
+ req_path = "#{req_path[0..-2]}.html"
280
+ else
281
+ puts "ERROR: expected request_uri for >#{host_dir}< ending with '/'; got: >#{req_path}<"
282
+ exit 1
283
+ end
284
+ elsif host_dir.index( 'weltfussball.de' ) ||
181
285
  host_dir.index( 'worldfootball.net' )
182
286
  if req_path.end_with?( '/' )
183
287
  req_path = "#{req_path[0..-2]}.html"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-04 00:00:00.000000000 Z
11
+ date: 2024-09-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: webclient
@@ -117,7 +117,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
117
117
  requirements:
118
118
  - - ">="
119
119
  - !ruby/object:Gem::Version
120
- version: 2.2.2
120
+ version: 3.1.0
121
121
  required_rubygems_version: !ruby/object:Gem::Requirement
122
122
  requirements:
123
123
  - - ">="