webget 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: de0060acabe176d1a11a4f2e36e8bee5090083c4907b75d9b22e9cbb9c7b22e2
4
- data.tar.gz: fb64c2b1294932b00fa401af006cd27b263b255789bad75e4ef62bec5598fd74
3
+ metadata.gz: 6912004e235fc4a3aad299490d04d9ab9d44ea5f98005a348aa7a456349cfbcb
4
+ data.tar.gz: f2b8bcda71c738557c76957eb4e6e78ee4af52ec9311e5c18a47f27baf2b45df
5
5
  SHA512:
6
- metadata.gz: d1f1653c68729e7d609c3c848e0146de9668cd2932ddab2e794ab361d3f786603a10c7586bcf0029de4955255803987000b2fb35b0d1e14cfa35d0582f919be7
7
- data.tar.gz: c6cff08b2f683bb5b8e39607735250f954115244be3e19dfcdeb29fd2d0a0f0a27be1fed5a2add9a1b75a9cbe81dbafea1e80f06b0a1bdd93d4bd3bd12d5a9a1
6
+ metadata.gz: 40d7bf5f844e00b67554455f1c191a7f189169ad075bffd0b8f8888f856128ed1c1ece29bf1a5932c7d22dc841b9f3b2c2f8ce9640fbed70ba4952c0cd7aa2f7
7
+ data.tar.gz: 5e4d940c50db3f43bb10160a2019eaa7dfc8d56743bf8d5aa1ad8130db089a971d8eb18ab7913398a5606a9347288734d34b5e5ad19bce63b903de899bcd3f3a
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.3.1
1
+ ### 0.3.2
2
2
 
3
3
  ### 0.0.1 / 2020-10-04
4
4
 
data/Rakefile CHANGED
@@ -26,7 +26,7 @@ Hoe.spec 'webget' do
26
26
  self.licenses = ['Public Domain']
27
27
 
28
28
  self.spec_extras = {
29
- required_ruby_version: '>= 2.2.2'
29
+ required_ruby_version: '>= 3.1.0'
30
30
  }
31
31
 
32
32
  end
@@ -2,7 +2,7 @@
2
2
  class Webget
3
3
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
4
4
  MINOR = 3
5
- PATCH = 1
5
+ PATCH = 2
6
6
  VERSION = [MAJOR,MINOR,PATCH].join('.')
7
7
 
8
8
  def self.version
@@ -2,6 +2,69 @@
2
2
 
3
3
  module Webcache
4
4
 
5
+
6
+ class Headers # nested class for convenience access to (meta) headers
7
+
8
+ def self.parse( txt )
9
+ data = {}
10
+ txt.each_line do |line|
11
+ line = line.strip
12
+ next if line.empty? || line.start_with?( '#' )
13
+
14
+ key, value = line.split( ':', 2 ) ## split on first colon
15
+ ## always downcase keys for now
16
+ ## and strip value from leading and trailing spaces
17
+ ##
18
+ ## todo/fix: deal with possible duplicate header keys!!
19
+ ## if duplicate do NOT replease, add with leading ", " comma-separated!!!
20
+ ##
21
+ ## check if multi-line headers are possible!!!
22
+ data[ key.strip.downcase ] = value.strip
23
+ end
24
+ new( data )
25
+ end
26
+
27
+
28
+
29
+ def initialize( data )
30
+ @data = data
31
+ end
32
+
33
+ def to_h() @data; end
34
+ def [](key) @data[key];; end
35
+
36
+ def each( &blk )
37
+ @data.each do |key, value|
38
+ blk.call( key, value )
39
+ end
40
+ end
41
+
42
+
43
+ def date
44
+ ## return date header
45
+ ## parses the time as RFC 1123 date of HTTP-date defined by RFC 2616:
46
+ ## day-of-week, DD month-name CCYY hh:mm:ss GMT
47
+ ## !!! Note that the result is always UTC (GMT). !!!
48
+ ## e.g. Sun, 19 May 2024 15:15:34 GMT
49
+ ## Mon, 10 Jun 2024 15:58:16 GMT
50
+ @date ||= Time.httpdate( @data['date'] )
51
+ @date
52
+ end
53
+
54
+ ## default to 12h (60secs*60min*12h)
55
+ def expired?( expires_in_date=Time.now.utc-60*60*12 )
56
+ ## pp expires_in_date
57
+ expires_in_date > date
58
+ end
59
+
60
+ ## add convenience helpers - why? why not?
61
+ def expired_in_12h?() expired?( Time.now.utc-60*60*12 ); end
62
+ def expired_in_24h?() expired?( Time.now.utc-60*60*24 ); end
63
+ alias_method :expired_in_1d?, :expired_in_24h?
64
+ end # class Headers
65
+
66
+
67
+
5
68
  #####
6
69
  # copied from props gem, see Env.home
7
70
  # - https://github.com/rubycoco/props/blob/master/props/lib/props/env.rb
@@ -73,6 +136,26 @@ module Webcache
73
136
  def self.read_json( url ) cache.read_json( url ); end
74
137
  def self.read_csv( url ) cache.read_csv( url ); end
75
138
 
139
+ #### new - read (cached) meta data
140
+ ## todo/check - find a better/different name - why? why not?
141
+ ## e.g. read_headers or simply meta or headers or such
142
+ def self.read_meta( url ) cache.read_meta( url ); end
143
+
144
+ ## add convenience expire (shortcut) helpers
145
+ def self.expired?( url, expires_in: Time.now.utc-60*60*12 )
146
+ if cached?( url )
147
+ meta = read_meta( url )
148
+ meta.expired?( expires_in )
149
+ else
150
+ true # note - not in cache; expired by default
151
+ end
152
+ end
153
+ def self.expired_in_12h?( url ) expired?( url, expires_in: Time.now.utc-60*60*12 ); end
154
+ def self.expired_in_24h?( url ) expired?( url, expires_in: Time.now.utc-60*60*24 ); end
155
+ class << self
156
+ alias_method :expired_in_1d?, :expired_in_24h?
157
+ end
158
+
76
159
 
77
160
 
78
161
  class DiskCache
@@ -103,6 +186,15 @@ class DiskCache
103
186
  end
104
187
 
105
188
 
189
+ def read_meta( url )
190
+ body_path = "#{Webcache.root}/#{url_to_path( url )}"
191
+ meta_path = "#{body_path}.meta.txt"
192
+ txt = File.open( meta_path, 'r:utf-8' ) {|f| f.read }
193
+ data = Headers.parse( txt )
194
+ data
195
+ end
196
+
197
+
106
198
  ## add more save / put / etc. aliases - why? why not?
107
199
  ## rename to record_html - why? why not?
108
200
  def record( url, response,
@@ -177,7 +269,19 @@ class DiskCache
177
269
 
178
270
  ### special "prettify" rule for weltfussball
179
271
  ## /eng-league-one-2019-2020/ => /eng-league-one-2019-2020.html
180
- if host_dir.index( 'weltfussball.de' ) ||
272
+
273
+ ### todo/fix - move rules downstream to user - why? why not?
274
+
275
+ if host_dir.index( 'uefa.com' ) ||
276
+ host_dir.index( 'kicker.de' ) ||
277
+ host_dir.index( 'kicekr.at' )
278
+ if req_path.end_with?( '/' )
279
+ req_path = "#{req_path[0..-2]}.html"
280
+ else
281
+ puts "ERROR: expected request_uri for >#{host_dir}< ending with '/'; got: >#{req_path}<"
282
+ exit 1
283
+ end
284
+ elsif host_dir.index( 'weltfussball.de' ) ||
181
285
  host_dir.index( 'worldfootball.net' )
182
286
  if req_path.end_with?( '/' )
183
287
  req_path = "#{req_path[0..-2]}.html"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-04 00:00:00.000000000 Z
11
+ date: 2024-09-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: webclient
@@ -117,7 +117,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
117
117
  requirements:
118
118
  - - ">="
119
119
  - !ruby/object:Gem::Version
120
- version: 2.2.2
120
+ version: 3.1.0
121
121
  required_rubygems_version: !ruby/object:Gem::Requirement
122
122
  requirements:
123
123
  - - ">="