spidr 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 46a2f2ad2ca789b83fac0e2519294403734e2ad6d647fbc3a612d429e57c1b43
4
- data.tar.gz: b72f561e337c6a0fcdbca9f59562e06f0b5854b15d321f90be1a4168b352faca
3
+ metadata.gz: 471764341b98b0cfeb57db24ac34a849dcfdcf43a751b648451a20c29c1ec051
4
+ data.tar.gz: '009c903cf30a13e55bbb8029fe2fdbfa4f8a8af32126b74aeb558f1afd3d3d88'
5
5
  SHA512:
6
- metadata.gz: ced221d8cdbeaf95df12d6c038de6539a5148657209137433cc82c5abc69779a13376a7e6becdf423d2f2bdd9ebfaf8c7b94a51dda70ffcbab932da4fc5260b3
7
- data.tar.gz: f54bedf3648dd033b8a37388413ae4ab71b4b09f16cc508b8e43e72f2ef870c59fe325e3f36a841791d9d843acb08bb02009469168e9b231a9835a0249b55b6c
6
+ metadata.gz: bddb65750dce8f6193764ac9d372adfa1893dc8743c24c383c359069043b51cd94e09ecd8bffad16bb8b4d92f99324c98ca95f8f59a9c9655a3f2fb7c42b9f57
7
+ data.tar.gz: c02f98806d9297ee22c6552eaaf6bb82f619001af25b0d8eeaabf91d0e32ab7154b5436de71ed4773b15353ba5556b52ece92a6035a891eb001c27b90e5cdda5
@@ -9,18 +9,18 @@ jobs:
9
9
  fail-fast: false
10
10
  matrix:
11
11
  ruby:
12
- - 2.7
13
12
  - '3.0'
14
13
  - '3.1'
14
+ - '3.2'
15
+ - '3.3'
15
16
  - jruby
16
17
  name: Ruby ${{ matrix.ruby }}
17
18
  steps:
18
- - uses: actions/checkout@v2
19
+ - uses: actions/checkout@v4
19
20
  - name: Set up Ruby
20
21
  uses: ruby/setup-ruby@v1
21
22
  with:
22
23
  ruby-version: ${{ matrix.ruby }}
23
- - name: Install dependencies
24
- run: bundle install --jobs 4 --retry 3
24
+ bundler-cache: true
25
25
  - name: Run tests
26
26
  run: bundle exec rake test
data/ChangeLog.md CHANGED
@@ -1,3 +1,11 @@
1
+ ### 0.7.1 / 2024-01-25
2
+
3
+ * Switched to using `require_relative` to improve load-times.
4
+ * Added `# frozen_string_literal: true` to all files.
5
+ * Use keyword arguments for {Spidr.domain}.
6
+ * Rescue `URI::Error` instead of `Exception` when calling `URI::HTTP#merge` in
7
+ {Spidr::Page#to_absolute}.
8
+
1
9
  ### 0.7.0 / 2022-12-31
2
10
 
3
11
  * Added {Spidr.domain} and {Spidr::Agent.domain}.
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2008-2022 Hal Brodigan
1
+ Copyright (c) 2008-2024 Hal Brodigan
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -251,8 +251,6 @@ $ gem install spidr
251
251
 
252
252
  ## License
253
253
 
254
- Copyright (c) 2008-2022 Hal Brodigan
255
-
256
254
  See {file:LICENSE.txt} for license information.
257
255
 
258
256
  [ruby]: https://www.ruby-lang.org/
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  class Agent
3
5
  module Actions
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  class Agent
3
5
  #
@@ -1,4 +1,6 @@
1
- require 'spidr/rules'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../rules'
2
4
 
3
5
  module Spidr
4
6
  class Agent
@@ -170,7 +172,7 @@ module Spidr
170
172
  #
171
173
  # @yieldparam [String] link
172
174
  # A link to accept or reject.
173
- #
175
+ #
174
176
  # @since 0.2.4
175
177
  #
176
178
  def visit_links_like(pattern=nil,&block)
@@ -238,7 +240,7 @@ module Spidr
238
240
  #
239
241
  # @yieldparam [URI::HTTP, URI::HTTPS] url
240
242
  # A URL to accept or reject.
241
- #
243
+ #
242
244
  # @since 0.2.4
243
245
  #
244
246
  def visit_urls_like(pattern=nil,&block)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  begin
2
4
  require 'robots'
3
5
  rescue LoadError
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'uri'
2
4
 
3
5
  module Spidr
data/lib/spidr/agent.rb CHANGED
@@ -1,14 +1,16 @@
1
- require 'spidr/settings/user_agent'
2
- require 'spidr/agent/sanitizers'
3
- require 'spidr/agent/filters'
4
- require 'spidr/agent/events'
5
- require 'spidr/agent/actions'
6
- require 'spidr/agent/robots'
7
- require 'spidr/page'
8
- require 'spidr/session_cache'
9
- require 'spidr/cookie_jar'
10
- require 'spidr/auth_store'
11
- require 'spidr/spidr'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'settings/user_agent'
4
+ require_relative 'agent/sanitizers'
5
+ require_relative 'agent/filters'
6
+ require_relative 'agent/events'
7
+ require_relative 'agent/actions'
8
+ require_relative 'agent/robots'
9
+ require_relative 'page'
10
+ require_relative 'session_cache'
11
+ require_relative 'cookie_jar'
12
+ require_relative 'auth_store'
13
+ require_relative 'spidr'
12
14
 
13
15
  require 'openssl'
14
16
  require 'net/http'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  #
3
5
  # Represents HTTP Authentication credentials for a website.
@@ -1,6 +1,8 @@
1
- require 'spidr/extensions/uri'
2
- require 'spidr/auth_credential'
3
- require 'spidr/page'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'extensions/uri'
4
+ require_relative 'auth_credential'
5
+ require_relative 'page'
4
6
 
5
7
  require 'base64'
6
8
 
@@ -20,7 +22,7 @@ module Spidr
20
22
  @credentials = {}
21
23
  end
22
24
 
23
- #
25
+ #
24
26
  # Given a URL, return the most specific matching auth credential.
25
27
  #
26
28
  # @param [URI] url
@@ -54,7 +56,7 @@ module Spidr
54
56
  return nil
55
57
  end
56
58
 
57
- #
59
+ #
58
60
  # Add an auth credential to the store for supplied base URL.
59
61
  #
60
62
  # @param [URI] url
@@ -122,7 +124,7 @@ module Spidr
122
124
  end
123
125
  end
124
126
 
125
- #
127
+ #
126
128
  # Clear the contents of the auth store.
127
129
  #
128
130
  # @return [AuthStore]
@@ -1,4 +1,6 @@
1
- require 'spidr/page'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'page'
2
4
 
3
5
  require 'set'
4
6
 
@@ -42,8 +44,8 @@ module Spidr
42
44
  @params.each(&block)
43
45
  end
44
46
 
45
- #
46
- # Return all relevant cookies in a single string for the
47
+ #
48
+ # Return all relevant cookies in a single string for the
47
49
  # named host or domain (in browser request format).
48
50
  #
49
51
  # @param [String] host
@@ -59,7 +61,7 @@ module Spidr
59
61
  @params[host] ||= {}
60
62
  end
61
63
 
62
- #
64
+ #
63
65
  # Add a cookie to the jar for a particular domain.
64
66
  #
65
67
  # @param [String] host
@@ -166,7 +168,7 @@ module Spidr
166
168
  return host_cookies
167
169
  end
168
170
 
169
- #
171
+ #
170
172
  # Clear out the jar, removing all stored cookies.
171
173
  #
172
174
  # @since 0.2.2
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'uri'
2
4
  require 'strscan'
3
5
 
@@ -58,7 +60,7 @@ module URI
58
60
  unless stack.empty?
59
61
  "#{leading_slash}#{stack.join('/')}#{trailing_slash}"
60
62
  else
61
- '/'
63
+ String.new('/')
62
64
  end
63
65
  end
64
66
  end
@@ -1 +1,3 @@
1
- require 'spidr/extensions/uri'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'extensions/uri'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  class Page
3
5
  #
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
 
3
5
  module Spidr
@@ -1,5 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../extensions/uri'
4
+
1
5
  require 'nokogiri'
2
- require 'spidr/extensions/uri'
3
6
 
4
7
  module Spidr
5
8
  class Page
@@ -265,7 +268,7 @@ module Spidr
265
268
  link = link.to_s
266
269
  new_url = begin
267
270
  url.merge(link)
268
- rescue Exception
271
+ rescue URI::Error
269
272
  return
270
273
  end
271
274
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  class Page
3
5
  #
@@ -92,7 +94,7 @@ module Spidr
92
94
 
93
95
  #
94
96
  # Determines if the response code is `300`, `301`, `302`, `303`
95
- # or `307`. Also checks for "soft" redirects added at the page
97
+ # or `307`. Also checks for "soft" redirects added at the page
96
98
  # level by a meta refresh tag.
97
99
  #
98
100
  # @return [Boolean]
data/lib/spidr/page.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  #
3
5
  # Represents a requested page from a website.
@@ -142,7 +144,7 @@ module Spidr
142
144
 
143
145
  return super(name,*arguments,&block)
144
146
  end
145
-
147
+
146
148
  end
147
149
  end
148
150
 
data/lib/spidr/proxy.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  #
3
5
  # @since 0.6.0
data/lib/spidr/rules.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  #
3
5
  # The {Rules} class represents collections of acceptance and rejection
@@ -1,6 +1,8 @@
1
- require 'spidr/settings/proxy'
2
- require 'spidr/settings/timeouts'
3
- require 'spidr/spidr'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'settings/proxy'
4
+ require_relative 'settings/timeouts'
5
+ require_relative 'spidr'
4
6
 
5
7
  require 'net/http'
6
8
  require 'openssl'
@@ -135,7 +137,7 @@ module Spidr
135
137
  key = key_for(url)
136
138
 
137
139
  if (sess = @sessions[key])
138
- begin
140
+ begin
139
141
  sess.finish
140
142
  rescue IOError
141
143
  end
@@ -1,4 +1,6 @@
1
- require 'spidr/proxy'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../proxy'
2
4
 
3
5
  require 'uri/http'
4
6
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  module Settings
3
5
  #
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  module Settings
3
5
  #
@@ -1,3 +1,5 @@
1
- require 'spidr/settings/proxy'
2
- require 'spidr/settings/timeouts'
3
- require 'spidr/settings/user_agent'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'settings/proxy'
4
+ require_relative 'settings/timeouts'
5
+ require_relative 'settings/user_agent'
data/lib/spidr/spidr.rb CHANGED
@@ -1,7 +1,9 @@
1
- require 'spidr/settings/proxy'
2
- require 'spidr/settings/timeouts'
3
- require 'spidr/settings/user_agent'
4
- require 'spidr/agent'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'settings/proxy'
4
+ require_relative 'settings/timeouts'
5
+ require_relative 'settings/user_agent'
6
+ require_relative 'agent'
5
7
 
6
8
  module Spidr
7
9
  extend Settings::Proxy
@@ -52,8 +54,8 @@ module Spidr
52
54
  #
53
55
  # @since 0.7.0
54
56
  #
55
- def self.domain(name,options={},&block)
56
- Agent.domain(name,options,&block)
57
+ def self.domain(name,**kwargs,&block)
58
+ Agent.domain(name,**kwargs,&block)
57
59
  end
58
60
 
59
61
  #
@@ -63,7 +65,7 @@ module Spidr
63
65
  Agent.site(url,**kwargs,&block)
64
66
  end
65
67
 
66
- #
68
+ #
67
69
  # @abstract
68
70
  #
69
71
  def self.robots
data/lib/spidr/version.rb CHANGED
@@ -1,4 +1,6 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  # Spidr version
3
- VERSION = '0.7.0'
5
+ VERSION = '0.7.1'
4
6
  end
data/lib/spidr.rb CHANGED
@@ -1,3 +1,5 @@
1
- require 'spidr/agent'
2
- require 'spidr/spidr'
3
- require 'spidr/version'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'spidr/agent'
4
+ require_relative 'spidr/spidr'
5
+ require_relative 'spidr/version'
data/spidr.gemspec CHANGED
@@ -7,10 +7,7 @@ Gem::Specification.new do |gem|
7
7
 
8
8
  gem.name = gemspec.fetch('name')
9
9
  gem.version = gemspec.fetch('version') do
10
- lib_dir = File.join(File.dirname(__FILE__),'lib')
11
- $LOAD_PATH << lib_dir unless $LOAD_PATH.include?(lib_dir)
12
-
13
- require 'spidr/version'
10
+ require_relative 'lib/spidr/version'
14
11
  Spidr::VERSION
15
12
  end
16
13
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spidr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Postmodern
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-01 00:00:00.000000000 Z
11
+ date: 2024-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -128,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
128
  - !ruby/object:Gem::Version
129
129
  version: '0'
130
130
  requirements: []
131
- rubygems_version: 3.3.26
131
+ rubygems_version: 3.4.10
132
132
  signing_key:
133
133
  specification_version: 4
134
134
  summary: A versatile Ruby web spidering library