spidr 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 46a2f2ad2ca789b83fac0e2519294403734e2ad6d647fbc3a612d429e57c1b43
4
- data.tar.gz: b72f561e337c6a0fcdbca9f59562e06f0b5854b15d321f90be1a4168b352faca
3
+ metadata.gz: 471764341b98b0cfeb57db24ac34a849dcfdcf43a751b648451a20c29c1ec051
4
+ data.tar.gz: '009c903cf30a13e55bbb8029fe2fdbfa4f8a8af32126b74aeb558f1afd3d3d88'
5
5
  SHA512:
6
- metadata.gz: ced221d8cdbeaf95df12d6c038de6539a5148657209137433cc82c5abc69779a13376a7e6becdf423d2f2bdd9ebfaf8c7b94a51dda70ffcbab932da4fc5260b3
7
- data.tar.gz: f54bedf3648dd033b8a37388413ae4ab71b4b09f16cc508b8e43e72f2ef870c59fe325e3f36a841791d9d843acb08bb02009469168e9b231a9835a0249b55b6c
6
+ metadata.gz: bddb65750dce8f6193764ac9d372adfa1893dc8743c24c383c359069043b51cd94e09ecd8bffad16bb8b4d92f99324c98ca95f8f59a9c9655a3f2fb7c42b9f57
7
+ data.tar.gz: c02f98806d9297ee22c6552eaaf6bb82f619001af25b0d8eeaabf91d0e32ab7154b5436de71ed4773b15353ba5556b52ece92a6035a891eb001c27b90e5cdda5
@@ -9,18 +9,18 @@ jobs:
9
9
  fail-fast: false
10
10
  matrix:
11
11
  ruby:
12
- - 2.7
13
12
  - '3.0'
14
13
  - '3.1'
14
+ - '3.2'
15
+ - '3.3'
15
16
  - jruby
16
17
  name: Ruby ${{ matrix.ruby }}
17
18
  steps:
18
- - uses: actions/checkout@v2
19
+ - uses: actions/checkout@v4
19
20
  - name: Set up Ruby
20
21
  uses: ruby/setup-ruby@v1
21
22
  with:
22
23
  ruby-version: ${{ matrix.ruby }}
23
- - name: Install dependencies
24
- run: bundle install --jobs 4 --retry 3
24
+ bundler-cache: true
25
25
  - name: Run tests
26
26
  run: bundle exec rake test
data/ChangeLog.md CHANGED
@@ -1,3 +1,11 @@
1
+ ### 0.7.1 / 2024-01-25
2
+
3
+ * Switched to using `require_relative` to improve load-times.
4
+ * Added `# frozen_string_literal: true` to all files.
5
+ * Use keyword arguments for {Spidr.domain}.
6
+ * Rescue `URI::Error` instead of `Exception` when calling `URI::HTTP#merge` in
7
+ {Spidr::Page#to_absolute}.
8
+
1
9
  ### 0.7.0 / 2022-12-31
2
10
 
3
11
  * Added {Spidr.domain} and {Spidr::Agent.domain}.
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2008-2022 Hal Brodigan
1
+ Copyright (c) 2008-2024 Hal Brodigan
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -251,8 +251,6 @@ $ gem install spidr
251
251
 
252
252
  ## License
253
253
 
254
- Copyright (c) 2008-2022 Hal Brodigan
255
-
256
254
  See {file:LICENSE.txt} for license information.
257
255
 
258
256
  [ruby]: https://www.ruby-lang.org/
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  class Agent
3
5
  module Actions
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  class Agent
3
5
  #
@@ -1,4 +1,6 @@
1
- require 'spidr/rules'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../rules'
2
4
 
3
5
  module Spidr
4
6
  class Agent
@@ -170,7 +172,7 @@ module Spidr
170
172
  #
171
173
  # @yieldparam [String] link
172
174
  # A link to accept or reject.
173
- #
175
+ #
174
176
  # @since 0.2.4
175
177
  #
176
178
  def visit_links_like(pattern=nil,&block)
@@ -238,7 +240,7 @@ module Spidr
238
240
  #
239
241
  # @yieldparam [URI::HTTP, URI::HTTPS] url
240
242
  # A URL to accept or reject.
241
- #
243
+ #
242
244
  # @since 0.2.4
243
245
  #
244
246
  def visit_urls_like(pattern=nil,&block)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  begin
2
4
  require 'robots'
3
5
  rescue LoadError
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'uri'
2
4
 
3
5
  module Spidr
data/lib/spidr/agent.rb CHANGED
@@ -1,14 +1,16 @@
1
- require 'spidr/settings/user_agent'
2
- require 'spidr/agent/sanitizers'
3
- require 'spidr/agent/filters'
4
- require 'spidr/agent/events'
5
- require 'spidr/agent/actions'
6
- require 'spidr/agent/robots'
7
- require 'spidr/page'
8
- require 'spidr/session_cache'
9
- require 'spidr/cookie_jar'
10
- require 'spidr/auth_store'
11
- require 'spidr/spidr'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'settings/user_agent'
4
+ require_relative 'agent/sanitizers'
5
+ require_relative 'agent/filters'
6
+ require_relative 'agent/events'
7
+ require_relative 'agent/actions'
8
+ require_relative 'agent/robots'
9
+ require_relative 'page'
10
+ require_relative 'session_cache'
11
+ require_relative 'cookie_jar'
12
+ require_relative 'auth_store'
13
+ require_relative 'spidr'
12
14
 
13
15
  require 'openssl'
14
16
  require 'net/http'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  #
3
5
  # Represents HTTP Authentication credentials for a website.
@@ -1,6 +1,8 @@
1
- require 'spidr/extensions/uri'
2
- require 'spidr/auth_credential'
3
- require 'spidr/page'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'extensions/uri'
4
+ require_relative 'auth_credential'
5
+ require_relative 'page'
4
6
 
5
7
  require 'base64'
6
8
 
@@ -20,7 +22,7 @@ module Spidr
20
22
  @credentials = {}
21
23
  end
22
24
 
23
- #
25
+ #
24
26
  # Given a URL, return the most specific matching auth credential.
25
27
  #
26
28
  # @param [URI] url
@@ -54,7 +56,7 @@ module Spidr
54
56
  return nil
55
57
  end
56
58
 
57
- #
59
+ #
58
60
  # Add an auth credential to the store for supplied base URL.
59
61
  #
60
62
  # @param [URI] url
@@ -122,7 +124,7 @@ module Spidr
122
124
  end
123
125
  end
124
126
 
125
- #
127
+ #
126
128
  # Clear the contents of the auth store.
127
129
  #
128
130
  # @return [AuthStore]
@@ -1,4 +1,6 @@
1
- require 'spidr/page'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'page'
2
4
 
3
5
  require 'set'
4
6
 
@@ -42,8 +44,8 @@ module Spidr
42
44
  @params.each(&block)
43
45
  end
44
46
 
45
- #
46
- # Return all relevant cookies in a single string for the
47
+ #
48
+ # Return all relevant cookies in a single string for the
47
49
  # named host or domain (in browser request format).
48
50
  #
49
51
  # @param [String] host
@@ -59,7 +61,7 @@ module Spidr
59
61
  @params[host] ||= {}
60
62
  end
61
63
 
62
- #
64
+ #
63
65
  # Add a cookie to the jar for a particular domain.
64
66
  #
65
67
  # @param [String] host
@@ -166,7 +168,7 @@ module Spidr
166
168
  return host_cookies
167
169
  end
168
170
 
169
- #
171
+ #
170
172
  # Clear out the jar, removing all stored cookies.
171
173
  #
172
174
  # @since 0.2.2
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'uri'
2
4
  require 'strscan'
3
5
 
@@ -58,7 +60,7 @@ module URI
58
60
  unless stack.empty?
59
61
  "#{leading_slash}#{stack.join('/')}#{trailing_slash}"
60
62
  else
61
- '/'
63
+ String.new('/')
62
64
  end
63
65
  end
64
66
  end
@@ -1 +1,3 @@
1
- require 'spidr/extensions/uri'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'extensions/uri'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  class Page
3
5
  #
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
 
3
5
  module Spidr
@@ -1,5 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../extensions/uri'
4
+
1
5
  require 'nokogiri'
2
- require 'spidr/extensions/uri'
3
6
 
4
7
  module Spidr
5
8
  class Page
@@ -265,7 +268,7 @@ module Spidr
265
268
  link = link.to_s
266
269
  new_url = begin
267
270
  url.merge(link)
268
- rescue Exception
271
+ rescue URI::Error
269
272
  return
270
273
  end
271
274
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  class Page
3
5
  #
@@ -92,7 +94,7 @@ module Spidr
92
94
 
93
95
  #
94
96
  # Determines if the response code is `300`, `301`, `302`, `303`
95
- # or `307`. Also checks for "soft" redirects added at the page
97
+ # or `307`. Also checks for "soft" redirects added at the page
96
98
  # level by a meta refresh tag.
97
99
  #
98
100
  # @return [Boolean]
data/lib/spidr/page.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  #
3
5
  # Represents a requested page from a website.
@@ -142,7 +144,7 @@ module Spidr
142
144
 
143
145
  return super(name,*arguments,&block)
144
146
  end
145
-
147
+
146
148
  end
147
149
  end
148
150
 
data/lib/spidr/proxy.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  #
3
5
  # @since 0.6.0
data/lib/spidr/rules.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  #
3
5
  # The {Rules} class represents collections of acceptance and rejection
@@ -1,6 +1,8 @@
1
- require 'spidr/settings/proxy'
2
- require 'spidr/settings/timeouts'
3
- require 'spidr/spidr'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'settings/proxy'
4
+ require_relative 'settings/timeouts'
5
+ require_relative 'spidr'
4
6
 
5
7
  require 'net/http'
6
8
  require 'openssl'
@@ -135,7 +137,7 @@ module Spidr
135
137
  key = key_for(url)
136
138
 
137
139
  if (sess = @sessions[key])
138
- begin
140
+ begin
139
141
  sess.finish
140
142
  rescue IOError
141
143
  end
@@ -1,4 +1,6 @@
1
- require 'spidr/proxy'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../proxy'
2
4
 
3
5
  require 'uri/http'
4
6
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  module Settings
3
5
  #
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  module Settings
3
5
  #
@@ -1,3 +1,5 @@
1
- require 'spidr/settings/proxy'
2
- require 'spidr/settings/timeouts'
3
- require 'spidr/settings/user_agent'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'settings/proxy'
4
+ require_relative 'settings/timeouts'
5
+ require_relative 'settings/user_agent'
data/lib/spidr/spidr.rb CHANGED
@@ -1,7 +1,9 @@
1
- require 'spidr/settings/proxy'
2
- require 'spidr/settings/timeouts'
3
- require 'spidr/settings/user_agent'
4
- require 'spidr/agent'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'settings/proxy'
4
+ require_relative 'settings/timeouts'
5
+ require_relative 'settings/user_agent'
6
+ require_relative 'agent'
5
7
 
6
8
  module Spidr
7
9
  extend Settings::Proxy
@@ -52,8 +54,8 @@ module Spidr
52
54
  #
53
55
  # @since 0.7.0
54
56
  #
55
- def self.domain(name,options={},&block)
56
- Agent.domain(name,options,&block)
57
+ def self.domain(name,**kwargs,&block)
58
+ Agent.domain(name,**kwargs,&block)
57
59
  end
58
60
 
59
61
  #
@@ -63,7 +65,7 @@ module Spidr
63
65
  Agent.site(url,**kwargs,&block)
64
66
  end
65
67
 
66
- #
68
+ #
67
69
  # @abstract
68
70
  #
69
71
  def self.robots
data/lib/spidr/version.rb CHANGED
@@ -1,4 +1,6 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Spidr
2
4
  # Spidr version
3
- VERSION = '0.7.0'
5
+ VERSION = '0.7.1'
4
6
  end
data/lib/spidr.rb CHANGED
@@ -1,3 +1,5 @@
1
- require 'spidr/agent'
2
- require 'spidr/spidr'
3
- require 'spidr/version'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'spidr/agent'
4
+ require_relative 'spidr/spidr'
5
+ require_relative 'spidr/version'
data/spidr.gemspec CHANGED
@@ -7,10 +7,7 @@ Gem::Specification.new do |gem|
7
7
 
8
8
  gem.name = gemspec.fetch('name')
9
9
  gem.version = gemspec.fetch('version') do
10
- lib_dir = File.join(File.dirname(__FILE__),'lib')
11
- $LOAD_PATH << lib_dir unless $LOAD_PATH.include?(lib_dir)
12
-
13
- require 'spidr/version'
10
+ require_relative 'lib/spidr/version'
14
11
  Spidr::VERSION
15
12
  end
16
13
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spidr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Postmodern
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-01 00:00:00.000000000 Z
11
+ date: 2024-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -128,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
128
  - !ruby/object:Gem::Version
129
129
  version: '0'
130
130
  requirements: []
131
- rubygems_version: 3.3.26
131
+ rubygems_version: 3.4.10
132
132
  signing_key:
133
133
  specification_version: 4
134
134
  summary: A versatile Ruby web spidering library