RubyGems - webget - Versions diffs - 0.2.5 → 0.3.1 - Mend

webget 0.2.5 → 0.3.1

Files changed (9) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 33af918725af96b367234f9280264a7cbc0a5c5fd965f4f5cd6da07a26ac43f4
-  data.tar.gz: a53145c4aa919e3073408decd06ebaa8262f70f2077a85bcb8c5cad1a295ec25
+  metadata.gz: de0060acabe176d1a11a4f2e36e8bee5090083c4907b75d9b22e9cbb9c7b22e2
+  data.tar.gz: fb64c2b1294932b00fa401af006cd27b263b255789bad75e4ef62bec5598fd74
 SHA512:
-  metadata.gz: 7459a96f8235fd8a9cec9d0f12512a3d8b26a4ef6b9f6f87b768ef75e5b806f32986ba4d507a78957f068b181abab4ec4386fd059f2f7a2c6c1f1098c96f82c4
-  data.tar.gz: 5272e8b6ce21110745d41b1c137aad4b92f99d7b1e519a1f0aade275e22d9bbc5f14b15c306eb9d92c81373519e3ef48e95b2e0dcf22367fd49514cceee5f265
+  metadata.gz: d1f1653c68729e7d609c3c848e0146de9668cd2932ddab2e794ab361d3f786603a10c7586bcf0029de4955255803987000b2fb35b0d1e14cfa35d0582f919be7
+  data.tar.gz: c6cff08b2f683bb5b8e39607735250f954115244be3e19dfcdeb29fd2d0a0f0a27be1fed5a2add9a1b75a9cbe81dbafea1e80f06b0a1bdd93d4bd3bd12d5a9a1

data/CHANGELOG.md CHANGED Viewed

@@ -1,4 +1,6 @@
-### 0.0.1 / 2020-10-04
-* Everything is new. First release.
+### 0.3.1
+### 0.0.1 / 2020-10-04
+* Everything is new. First release.

data/README.md CHANGED Viewed

@@ -1,20 +1,19 @@
-# webget
-webget gem - a web (go get) crawler incl. web cache
-* home  :: [github.com/rubycoco/webclient](https://github.com/rubycoco/webclient)
-* bugs  :: [github.com/rubycoco/webclient/issues](https://github.com/rubycoco/webclient/issues)
-* gem   :: [rubygems.org/gems/webget](https://rubygems.org/gems/webget)
-* rdoc  :: [rubydoc.info/gems/webget](http://rubydoc.info/gems/webget)
-* forum :: [groups.google.com/group/wwwmake](https://groups.google.com/group/wwwmake)
-## Usage
-TBD
-## License
-The `webget` scripts are dedicated to the public domain.
-Use it as you please with no restrictions whatsoever.
+# webget
+webget gem - a web (go get) crawler incl. web cache
+* home  :: [github.com/rubycoco/webclient](https://github.com/rubycoco/webclient)
+* bugs  :: [github.com/rubycoco/webclient/issues](https://github.com/rubycoco/webclient/issues)
+* gem   :: [rubygems.org/gems/webget](https://rubygems.org/gems/webget)
+* rdoc  :: [rubydoc.info/gems/webget](http://rubydoc.info/gems/webget)
+## Usage
+TBD
+## License
+The `webget` scripts are dedicated to the public domain.
+Use it as you please with no restrictions whatsoever.

data/Rakefile CHANGED Viewed

@@ -1,31 +1,32 @@
-require 'hoe'
-require './lib/webget/version.rb'
-Hoe.spec 'webget' do
-  self.version = Webget::VERSION
-  self.summary = 'webget gem - a web (go get) crawler incl. web cache'
-  self.description = summary
-  self.urls    = { home: 'https://github.com/rubycoco/webclient' }
-  self.author  = 'Gerald Bauer'
-  self.email   = 'ruby-talk@ruby-lang.org'
-  # switch extension to .markdown for gihub formatting
-  self.readme_file  = 'README.md'
-  self.history_file = 'CHANGELOG.md'
-  self.extra_deps = [
-    ['webclient', '>= 0.2.0'],
-    ['csvreader', '>= 1.2.4'],
-  ]
-  self.licenses = ['Public Domain']
-  self.spec_extras = {
-   required_ruby_version: '>= 2.2.2'
-  }
-end
+require 'hoe'
+require './lib/webget/version.rb'
+Hoe.spec 'webget' do
+  self.version = Webget::VERSION
+  self.summary = 'webget gem - a web (go get) crawler incl. web cache'
+  self.description = summary
+  self.urls    = { home: 'https://github.com/rubycoco/webclient' }
+  self.author  = 'Gerald Bauer'
+  self.email   = 'gerald.bauer@gmail.com'
+  # switch extension to .markdown for gihub formatting
+  self.readme_file  = 'README.md'
+  self.history_file = 'CHANGELOG.md'
+  self.extra_deps = [
+    ['webclient', '>= 0.2.0'],
+    ['csvreader', '>= 1.2.4'],
+    ['cocos'],
+  ]
+  self.licenses = ['Public Domain']
+  self.spec_extras = {
+   required_ruby_version: '>= 2.2.2'
+  }
+end

data/lib/webget/version.rb CHANGED Viewed

@@ -1,23 +1,21 @@
-class Webget
-  MAJOR = 0    ## todo: namespace inside version or something - why? why not??
-  MINOR = 2
-  PATCH = 5
-  VERSION = [MAJOR,MINOR,PATCH].join('.')
-  def self.version
-    VERSION
-  end
-  # version string for generator meta tag (includes ruby version)
-  def self.banner
-    "webget/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
-  end
-  def self.root
-    "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
-  end
-end  # module Webget
+class Webget
+  MAJOR = 0    ## todo: namespace inside version or something - why? why not??
+  MINOR = 3
+  PATCH = 1
+  VERSION = [MAJOR,MINOR,PATCH].join('.')
+  def self.version
+    VERSION
+  end
+  # version string for generator meta tag (includes ruby version)
+  def self.banner
+    "webget/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
+  end
+  def self.root
+    File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
+  end
+end  # module Webget

data/lib/webget/webcache.rb CHANGED Viewed

@@ -1,230 +1,234 @@
-module Webcache
-  #####
-  # copied from props gem, see Env.home
-  #    - https://github.com/rubycoco/props/blob/master/props/lib/props/env.rb
-  #   todo/fix: use original - and do NOT copy-n-paste!!! - why? why not?
-  def self.home
-    path = if( ENV['HOME'] || ENV['USERPROFILE'] )
-             ENV['HOME'] || ENV['USERPROFILE']
-           elsif( ENV['HOMEDRIVE'] && ENV['HOMEPATH'] )
-             "#{ENV['HOMEDRIVE']}#{ENV['HOMEPATH']}"
-           else
-             begin
-                File.expand_path('~')
-             rescue
-                if File::ALT_SEPARATOR
-                   'C:/'
-                else
-                   '/'
-                end
-             end
-           end
-    ## note: use File.expand_path to "unify" path e.g
-    ##  C:\Users\roman  becomes
-    ##  C:/Users/roman
-    File.expand_path( path )
- end
- class Configuration
-    ## root directory - todo/check: find/use a better name - why? why not?
-    def root()       @root || "#{Webcache.home}/.cache"; end
-    def root=(value) @root = value; end
- end # class Configuration
- ## lets you use
- ##   Webcache.configure do |config|
- ##      config.root = './cache'
- ##   end
- def self.configure() yield( config ); end
- def self.config()    @config ||= Configuration.new;  end
- ## add "high level" root convenience helpers
- def self.root()       config.root; end
- def self.root=(value) config.root = value; end
- ### "interface" for "generic" cache storage (might be sqlite database or filesystem)
- def self.cache() @cache ||= DiskCache.new; end
- def self.record( url, response,
-                   path: nil,
-                   encoding: 'UTF-8',
-                   format: 'html' )
-   cache.record( url, response,
-                   path: path,
-                   encoding: encoding,
-                   format: format );
- end
- def self.cached?( url ) cache.cached?( url ); end
- class << self
-   alias_method :exist?, :cached?
- end
- def self.url_to_id( url )  cache.url_to_id( url ); end  ## todo/check: rename to just id or something - why? why not?
- def self.read( url )       cache.read( url );      end
- def self.read_json( url )  cache.read_json( url ); end
- def self.read_csv( url )   cache.read_csv( url );  end
-class DiskCache
-  def cached?( url )
-    body_path = "#{Webcache.root}/#{url_to_path( url )}"
-    File.exist?( body_path )
-  end
-  alias_method :exist?, :cached?
-  def read( url )
-    body_path = "#{Webcache.root}/#{url_to_path( url )}"
-    File.open( body_path, 'r:utf-8' ) {|f| f.read }
-  end
-  def read_json( url )
-    body_path = "#{Webcache.root}/#{url_to_path( url )}"
-    txt = File.open( body_path, 'r:utf-8' ) {|f| f.read }
-    data = JSON.parse( txt )
-    data
-  end
-  def read_csv( url )
-    body_path = "#{Webcache.root}/#{url_to_path( url )}"
-    txt = File.open( body_path, 'r:utf-8' ) {|f| f.read }
-    data = CsvHash.parse( txt )
-    data
-  end
-  ## add more save / put / etc. aliases - why? why not?
-  ##  rename to record_html - why? why not?
-  def record( url, response,
-              path: nil,
-              encoding: 'UTF-8',
-              format: 'html' )
-    body_path = "#{Webcache.root}/#{url_to_path( url, path: path )}"
-    meta_path = "#{body_path}.meta.txt"
-    ## make sure path exits
-    FileUtils.mkdir_p( File.dirname( body_path ) )
-    puts "[cache] saving #{body_path}..."
-    ## todo/check: verify content-type - why? why not?
-    ## note - for now respone.text always assume (converted) to utf8!!!!!!!!!
-    if format == 'json'
-      File.open( body_path, 'w:utf-8' ) {|f| f.write( JSON.pretty_generate( response.json )) }
-    elsif format == 'csv'
-      ## fix: newlines - always use "unix" style" - why? why not?
-      ## fix:  use :newline => :universal option? translates to univeral "\n"
-      text = response.text( encoding: encoding ).gsub( "\r\n", "\n" )
-      File.open( body_path, 'w:utf-8' ) {|f| f.write( text ) }
-    else   ## html or txt
-      text = response.text( encoding: encoding )
-      File.open( body_path, 'w:utf-8' ) {|f| f.write( text ) }
-    end
-    File.open( meta_path, 'w:utf-8' ) do |f|
-      ## todo/check:
-      ##  do headers also need to converted (like text) if encoding is NOT utf-8 ???
-      response.headers.each do |key, value|  # iterate all response headers
-        f.write( "#{key}: #{value}" )
-        f.write( "\n" )
-      end
-    end
-  end
-  ### note: use file path as id for DiskCache  (is different for DbCache/SqlCache?)
-  ##    use file:// instead of disk:// - why? why not?
-  def url_to_id( str ) "disk://#{url_to_path( str )}"; end
-  ### helpers
-  def url_to_path( str, path: nil )
-    ## map url to file path
-    uri = URI.parse( str )
-    ## note: ignore scheme (e.g. http/https)
-    ##         and  post  (e.g. 80, 8080, etc.) for now
-    ##    always downcase for now (internet domain is case insensitive)
-    host_dir = uri.host.downcase
-    req_path = if path   ## use "custom" (file)path for cache storage if passed in
-                 path
-               else
-                ## "/this/is/everything?query=params"
-                ##   cut-off leading slash and
-                ##    convert query ? =
-                 uri.request_uri[1..-1]
-               end
-    ### special "prettify" rule for weltfussball
-    ##   /eng-league-one-2019-2020/  => /eng-league-one-2019-2020.html
-    if host_dir.index( 'weltfussball.de' ) ||
-       host_dir.index( 'worldfootball.net' )
-          if req_path.end_with?( '/' )
-             req_path = "#{req_path[0..-2]}.html"
-          else
-            puts "ERROR: expected request_uri for >#{host_dir}< ending with '/'; got: >#{req_path}<"
-            exit 1
-          end
-    elsif host_dir.index( 'tipp3.at' )
-      req_path = req_path.sub( '.jsp', '' )  # shorten - cut off .jsp extension
-      ##   change ? to -I-
-      ##   change = to ~
-      ##   Example:
-      ##   sportwetten/classicresults.jsp?oddsetProgramID=888
-      ##     =>
-      ##   sportwetten/classicresults-I-oddsetProgramID~888
-      req_path = req_path.gsub( '?', '-I-' )
-                         .gsub( '=', '~')
-      req_path = "#{req_path}.html"
-    elsif host_dir.index( 'fbref.com' )
-      req_path = req_path.sub( 'en/', '' )      # shorten - cut off en/
-      req_path = "#{req_path}.html"             # auto-add html extension
-    elsif host_dir.index( 'football-data.co.uk' )
-      req_path = req_path.sub( 'mmz4281/', '' )  # shorten - cut off mmz4281/
-      req_path = req_path.sub( 'new/', '' )      # shorten - cut off new/
-    elsif host_dir.index( 'football-data.org' )
-      req_path = req_path.sub( 'v2/', '' )  # shorten - cut off v2/
-      ## flattern - make a file path - for auto-save
-      ##   change ? to -I-
-      ##   change / to ~~
-      ##   change = to ~
-      req_path = req_path.gsub( '?', '-I-' )
-                         .gsub( '/', '~~' )
-                         .gsub( '=', '~')
-      req_path = "#{req_path}.json"
-    elsif host_dir.index( 'api.cryptokitties.co' )
-      ## for now always auto-add .json extensions e.g.
-      ##     kitties/1   => kitties/1.json
-      ##     cattributes => cattributes.json
-      req_path = "#{req_path}.json"
-    else
-      ## no special rule
-    end
-    page_path = "#{host_dir}/#{req_path}"
-    page_path
-  end
-end # class DiskCache
-end  # module Webcache
+module Webcache
+  #####
+  # copied from props gem, see Env.home
+  #    - https://github.com/rubycoco/props/blob/master/props/lib/props/env.rb
+  #   todo/fix: use original - and do NOT copy-n-paste!!! - why? why not?
+  def self.home
+    path = if( ENV['HOME'] || ENV['USERPROFILE'] )
+             ENV['HOME'] || ENV['USERPROFILE']
+           elsif( ENV['HOMEDRIVE'] && ENV['HOMEPATH'] )
+             "#{ENV['HOMEDRIVE']}#{ENV['HOMEPATH']}"
+           else
+             begin
+                File.expand_path('~')
+             rescue
+                if File::ALT_SEPARATOR
+                   'C:/'
+                else
+                   '/'
+                end
+             end
+           end
+    ## note: use File.expand_path to "unify" path e.g
+    ##  C:\Users\roman  becomes
+    ##  C:/Users/roman
+    File.expand_path( path )
+ end
+ class Configuration
+    ## root directory - todo/check: find/use a better name - why? why not?
+    def root()       @root || "#{Webcache.home}/.cache"; end
+    def root=(value) @root = value; end
+ end # class Configuration
+ ## lets you use
+ ##   Webcache.configure do |config|
+ ##      config.root = './cache'
+ ##   end
+ def self.configure() yield( config ); end
+ def self.config()    @config ||= Configuration.new;  end
+ ## add "high level" root convenience helpers
+ ##   use delegate helper - why? why not?
+ def self.root()       config.root; end
+ def self.root=(value) config.root = value; end
+ ### "interface" for "generic" cache storage (might be sqlite database or filesystem)
+ def self.cache() @cache ||= DiskCache.new; end
+ def self.record( url, response,
+                   path: nil,
+                   encoding: 'UTF-8',
+                   format: 'html' )
+   cache.record( url, response,
+                   path: path,
+                   encoding: encoding,
+                   format: format );
+ end
+ def self.cached?( url ) cache.cached?( url ); end
+ class << self
+   alias_method :exist?, :cached?
+ end
+ def self.url_to_id( url )  cache.url_to_id( url ); end  ## todo/check: rename to just id or something - why? why not?
+ def self.read( url )       cache.read( url );      end
+ def self.read_json( url )  cache.read_json( url ); end
+ def self.read_csv( url )   cache.read_csv( url );  end
+class DiskCache
+  def cached?( url )
+    body_path = "#{Webcache.root}/#{url_to_path( url )}"
+    File.exist?( body_path )
+  end
+  alias_method :exist?, :cached?
+  def read( url )
+    body_path = "#{Webcache.root}/#{url_to_path( url )}"
+    File.open( body_path, 'r:utf-8' ) {|f| f.read }
+  end
+  def read_json( url )
+    body_path = "#{Webcache.root}/#{url_to_path( url )}"
+    txt = File.open( body_path, 'r:utf-8' ) {|f| f.read }
+    data = JSON.parse( txt )
+    data
+  end
+  def read_csv( url )
+    body_path = "#{Webcache.root}/#{url_to_path( url )}"
+    txt = File.open( body_path, 'r:utf-8' ) {|f| f.read }
+    data = CsvHash.parse( txt )
+    data
+  end
+  ## add more save / put / etc. aliases - why? why not?
+  ##  rename to record_html - why? why not?
+  def record( url, response,
+              path: nil,
+              encoding: 'UTF-8',
+              format: 'html' )
+    body_path = "#{Webcache.root}/#{url_to_path( url, path: path )}"
+    meta_path = "#{body_path}.meta.txt"
+    ## make sure path exits
+    FileUtils.mkdir_p( File.dirname( body_path ) )
+    puts "[cache] saving #{body_path}..."
+    ## todo/check: verify content-type - why? why not?
+    ## note - for now respone.text always assume (converted) to utf8!!!!!!!!!
+    ##
+    ## fix: newlines - always use "unix" style" - why? why not?
+    ## fix:  use :newline => :universal option? translates to univeral "\n"
+    if format == 'json'
+      File.open( body_path, 'w:utf-8' ) {|f| f.write( JSON.pretty_generate( response.json )) }
+    elsif format == 'csv'
+      ## fix: newlines - always use "unix" style" - why? why not?
+      ## fix:  use :newline => :universal option? translates to univeral "\n"
+      text = response.text( encoding: encoding ).gsub( "\r\n", "\n" )
+      File.open( body_path, 'w:utf-8' ) {|f| f.write( text ) }
+    else   ## html or txt
+      text = response.text( encoding: encoding ).gsub( "\r\n", "\n" )
+      File.open( body_path, 'w:utf-8' ) {|f| f.write( text ) }
+    end
+    File.open( meta_path, 'w:utf-8' ) do |f|
+      ## todo/check:
+      ##  do headers also need to converted (like text) if encoding is NOT utf-8 ???
+      response.headers.each do |key, value|  # iterate all response headers
+        f.write( "#{key}: #{value}" )
+        f.write( "\n" )
+      end
+    end
+  end
+  ### note: use file path as id for DiskCache  (is different for DbCache/SqlCache?)
+  ##    use file:// instead of disk:// - why? why not?
+  def url_to_id( str ) "disk://#{url_to_path( str )}"; end
+  ### helpers
+  def url_to_path( str, path: nil )
+    ## map url to file path
+    uri = URI.parse( str )
+    ## note: ignore scheme (e.g. http/https)
+    ##         and  post  (e.g. 80, 8080, etc.) for now
+    ##    always downcase for now (internet domain is case insensitive)
+    host_dir = uri.host.downcase
+    req_path = if path   ## use "custom" (file)path for cache storage if passed in
+                 path
+               else
+                ## "/this/is/everything?query=params"
+                ##   cut-off leading slash and
+                ##    convert query ? =
+                 uri.request_uri[1..-1]
+               end
+    ### special "prettify" rule for weltfussball
+    ##   /eng-league-one-2019-2020/  => /eng-league-one-2019-2020.html
+    if host_dir.index( 'weltfussball.de' ) ||
+       host_dir.index( 'worldfootball.net' )
+          if req_path.end_with?( '/' )
+             req_path = "#{req_path[0..-2]}.html"
+          else
+            puts "ERROR: expected request_uri for >#{host_dir}< ending with '/'; got: >#{req_path}<"
+            exit 1
+          end
+    elsif host_dir.index( 'tipp3.at' )
+      req_path = req_path.sub( '.jsp', '' )  # shorten - cut off .jsp extension
+      ##   change ? to -I-
+      ##   change = to ~
+      ##   Example:
+      ##   sportwetten/classicresults.jsp?oddsetProgramID=888
+      ##     =>
+      ##   sportwetten/classicresults-I-oddsetProgramID~888
+      req_path = req_path.gsub( '?', '-I-' )
+                         .gsub( '=', '~')
+      req_path = "#{req_path}.html"
+    elsif host_dir.index( 'fbref.com' )
+      req_path = req_path.sub( 'en/', '' )      # shorten - cut off en/
+      req_path = "#{req_path}.html"             # auto-add html extension
+    elsif host_dir.index( 'football-data.co.uk' )
+      req_path = req_path.sub( 'mmz4281/', '' )  # shorten - cut off mmz4281/
+      req_path = req_path.sub( 'new/', '' )      # shorten - cut off new/
+    elsif host_dir.index( 'football-data.org' )
+      ##  req_path = req_path.sub( 'v2/', '' )  # shorten - cut off v2/
+      ## flattern - make a file path - for auto-save
+      ##   change ? to -I-
+      ##   change / to ~~
+      ##   change = to ~
+      req_path = req_path.gsub( '?', '-I-' )
+                         .gsub( '/', '~~' )
+                         .gsub( '=', '~')
+      req_path = "#{req_path}.json"
+    elsif host_dir.index( 'api.cryptokitties.co' )
+      ## for now always auto-add .json extensions e.g.
+      ##     kitties/1   => kitties/1.json
+      ##     cattributes => cattributes.json
+      req_path = "#{req_path}.json"
+    else
+      ## no special rule
+    end
+    page_path = "#{host_dir}/#{req_path}"
+    page_path
+  end
+end # class DiskCache
+end  # module Webcache

data/lib/webget/webget.rb CHANGED Viewed

@@ -1,114 +1,124 @@
-class Webget   # a web (go get) crawler
-  class Configuration  ## nested class
-    #######################
-    ## accessors
-    def sleep()       @sleep || 3; end     ### todo/check: use delay / wait or such?
-    def sleep=(value) @sleep = value; end
-  end # (nested) class Configuration
-  ## lets you use
-  ##   Webget.configure do |config|
-  ##      config.sleep = 10
-  ##   end
-  def self.configure() yield( config ); end
-  def self.config()    @config ||= Configuration.new;  end
-  def self.call( url, headers: {} )  ## assumes json format (note - encoding always utf-8 by definition! - double check?)
-    puts "  sleep #{config.sleep} sec(s)..."
-    sleep( config.sleep )   ## slow down - sleep 3secs before each http request
-    response = Webclient.get( url, headers: headers )
-    if response.status.ok?  ## must be HTTP 200
-      puts "#{response.status.code} #{response.status.message}"
-      ## note: use format json for pretty printing and parse check!!!!
-      Webcache.record( url, response,
-                       format: 'json' )
-    else
-      ## todo/check - log error
-      puts "!! ERROR - #{response.status.code} #{response.status.message}:"
-      pp response.raw  ## note: dump inner (raw) response (NOT the wrapped)
-    end
-    ## to be done / continued
-    response
-  end  # method self.call
-  ## todo/check: rename encoding to html/http-like charset - why? why not?
-  def self.page( url, encoding: 'UTF-8', headers: {} )  ## assumes html format
-    puts "  sleep #{config.sleep} sec(s)..."
-    sleep( config.sleep )   ## slow down - sleep 3secs before each http request
-    response = Webclient.get( url, headers: headers )
-    if response.status.ok?  ## must be HTTP 200
-      puts "#{response.status.code} #{response.status.message}"
-      Webcache.record( url, response,
-                       encoding: encoding  )   ## assumes format: html (default)
-    else
-      ## todo/check - log error
-      puts "!! ERROR - #{response.status.code} #{response.status.message}:"
-      pp response.raw  ## note: dump inner (raw) response (NOT the wrapped)
-    end
-    ## to be done / continued
-    response
-  end  # method self.page
-  def self.text( url, path: nil, headers: {} )  ## assumes txt format
-    puts "  sleep #{config.sleep} sec(s)..."
-    sleep( config.sleep )   ## slow down - sleep 3secs before each http request
-    response = Webclient.get( url, headers: headers )
-    if response.status.ok?  ## must be HTTP 200
-      puts "#{response.status.code} #{response.status.message}"
-      ## note: like json assumes always utf-8 encoding for now !!!
-      Webcache.record( url, response,
-                       path: path,   ## optional "custom" (file)path for saving in cache
-                       format: 'txt' )
-    else
-      ## todo/check - log error
-      puts "!! ERROR - #{response.status.code} #{response.status.message}:"
-      pp response.raw  ## note: dump inner (raw) response (NOT the wrapped)
-    end
-    ## to be done / continued
-    response
-  end  # method self.text
-  ## todo/check: rename to csv or file or records or - why? why not?
-  ## todo/check: rename encoding to html/http-like charset - why? why not?
-  def self.dataset( url, encoding: 'UTF-8', headers: {} )  ## assumes csv format
-    puts "  sleep #{config.sleep} sec(s)..."
-    sleep( config.sleep )   ## slow down - sleep 3secs before each http request
-    response = Webclient.get( url, headers: headers )
-    if response.status.ok?  ## must be HTTP 200
-      puts "#{response.status.code} #{response.status.message}"
-      Webcache.record( url, response,
-                       encoding: encoding,
-                       format:   'csv' )    ## pass along csv format - why? why not?
-    else
-      ## todo/check - log error
-      puts "!! ERROR - #{response.status.code} #{response.status.message}:"
-      pp response.raw  ## note: dump inner (raw) response (NOT the wrapped)
-    end
-    ## to be done / continued
-    response
-  end  # method self.dataset
-end  # class Webget
+class Webget   # a web (go get) crawler
+  class Configuration  ## nested class
+    #######################
+    ## accessors
+    def sleep()       @sleep || 3; end     ### todo/check: use delay / wait or such?
+    def sleep=(value) @sleep = value; end
+    ## add delay, delay_in_s alias - why? why not?
+    alias_method :delay,       :sleep
+    alias_method :delay_in_s,  :sleep
+    alias_method :delay=,      :sleep=
+    alias_method :delay_in_s=, :sleep=
+  end # (nested) class Configuration
+  ## lets you use
+  ##   Webget.configure do |config|
+  ##      config.sleep = 10
+  ##   end
+  def self.configure() yield( config ); end
+  def self.config()    @config ||= Configuration.new;  end
+  ## note - assumes json format
+  ##   encoding always utf-8 by definition! - double check?)
+  def self.call( url, headers: {} )
+    response = _get( url, headers: headers )
+    if response.status.ok?  ## must be HTTP 200
+      puts "#{response.status.code} #{response.status.message}"
+      ## note: use format json for pretty printing and parse check!!!!
+      Webcache.record( url, response,
+                       format: 'json' )
+    else
+      ## todo/check - log error
+      puts "!! HTTP ERROR - #{response.status.code} #{response.status.message}:"
+      pp response.raw  ## note: dump inner (raw) response (NOT the wrapped)
+    end
+    ## to be done / continued
+    response
+  end  # method self.call
+  ## todo/check: rename encoding to html/http-like charset - why? why not?
+  ##   check encoding UTF-8 or utf-8  - makes a difference?
+  def self.page( url, encoding: 'UTF-8', headers: {} )  ## assumes html format
+    response = _get( url, headers: headers )
+    if response.status.ok?  ## must be HTTP 200
+      puts "#{response.status.code} #{response.status.message}"
+      Webcache.record( url, response,
+                       encoding: encoding  )   ## assumes format: html (default)
+    else
+      ## todo/check - log error
+      puts "!! HTTP ERROR - #{response.status.code} #{response.status.message}:"
+      pp response.raw  ## note: dump inner (raw) response (NOT the wrapped)
+    end
+    ## to be done / continued
+    response
+  end  # method self.page
+  ## assumes txt format
+  def self.text( url, path: nil, headers: {} )
+    response = _get( url, headers: headers )
+    if response.status.ok?  ## must be HTTP 200
+      puts "#{response.status.code} #{response.status.message}"
+      ## note: like json assumes always utf-8 encoding for now !!!
+      Webcache.record( url, response,
+                       path: path,   ## optional "custom" (file)path for saving in cache
+                       format: 'txt' )
+    else
+      ## todo/check - log error
+      puts "!! HTTP ERROR - #{response.status.code} #{response.status.message}:"
+      pp response.raw  ## note: dump inner (raw) response (NOT the wrapped)
+    end
+    ## to be done / continued
+    response
+  end  # method self.text
+  ## todo/check: rename to csv or file or records or - why? why not?
+  ## todo/check: rename encoding to html/http-like charset - why? why not?
+  def self.dataset( url, encoding: 'UTF-8', headers: {} )  ## assumes csv format
+    response = _get( url, headers: headers )
+    if response.status.ok?  ## must be HTTP 200
+      puts "#{response.status.code} #{response.status.message}"
+      Webcache.record( url, response,
+                       encoding: encoding,
+                       format:   'csv' )    ## pass along csv format - why? why not?
+    else
+      ## todo/check - log error
+      puts "!! HTTP ERROR - #{response.status.code} #{response.status.message}:"
+      pp response.raw  ## note: dump inner (raw) response (NOT the wrapped)
+    end
+    ## to be done / continued
+    response
+  end  # method self.dataset
+  ####
+  ##  private helpers
+  ##   make private - why? why not?
+  def self._get( url, headers: {} )
+     @@requests ||= 0     ## track number of requests
+     if @@requests > 0    ## note - do NOT sleep on very first request!!!
+       puts "  sleep #{config.sleep} sec(s)..."
+       sleep( config.sleep )   ## slow down - sleep x secs before each http request
+     end
+     @@requests += 1
+     Webclient.get( url, headers: headers )  ## returns respone
+  end
+end  # class Webget

data/lib/webget.rb CHANGED Viewed

@@ -1,25 +1,29 @@
-require 'webclient'
-## more (our own) 3rd party libs
-require 'csvreader'
-## our own code
-require 'webget/version'   # let version go first
-require 'webget/webcache'
-require 'webget/webget'
-############
-## add convenience alias for camel case / alternate different spelling
-WebCache  = Webcache
-WebGet    = Webget
-## use Webgo as (alias) name (keep reserver for now) - why? why not?
-WebGo    = Webget
-Webgo    = Webget
-puts Webget.banner   # say hello
+require 'webclient'
+## more (our own) 3rd party libs
+require 'csvreader'
+# NEW!! - require/add cocos
+require 'cocos'   # - note - cococs incl. webclient & cvsreader  !!!!
+## our own code
+require_relative 'webget/version'   # let version go first
+require_relative 'webget/webcache'
+require_relative 'webget/webget'
+############
+## add convenience alias for camel case / alternate different spelling
+WebCache  = Webcache
+WebGet    = Webget
+## use Webgo as (alias) name (keep reserver for now) - why? why not?
+WebGo    = Webget
+Webgo    = Webget
+puts Webget.banner   # say hello

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: webget
 version: !ruby/object:Gem::Version
-  version: 0.2.5
+  version: 0.3.1
 platform: ruby
 authors:
 - Gerald Bauer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-02-21 00:00:00.000000000 Z
+date: 2024-07-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: webclient
@@ -38,6 +38,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: 1.2.4
+- !ruby/object:Gem::Dependency
+  name: cocos
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: rdoc
   requirement: !ruby/object:Gem::Requirement
@@ -64,16 +78,16 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '3.22'
+        version: '4.1'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '3.22'
+        version: '4.1'
 description: webget gem - a web (go get) crawler incl. web cache
-email: ruby-talk@ruby-lang.org
+email: gerald.bauer@gmail.com
 executables: []
 extensions: []
 extra_rdoc_files:
@@ -110,7 +124,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.1.4
+rubygems_version: 3.4.10
 signing_key:
 specification_version: 4
 summary: webget gem - a web (go get) crawler incl. web cache