scraper_utils 0.9.0 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -1
- data/docs/example_bin_console +14 -0
- data/lib/scraper_utils/debug_utils.rb +3 -3
- data/lib/scraper_utils/spec_support.rb +2 -2
- data/lib/scraper_utils/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e2d77d081c52024dbba8afdfd03f939af5d3d44d82e5491a109e4dd3e853f225
|
|
4
|
+
data.tar.gz: 67df28089a8bf743ef895c3dafdbdf9461d2a15b68273e6b0bc0cfef0f9288d5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1fd29f183695351bbfa82c22fa938ac00c6278a13baf8a5dcdde89f469b8d266edcd5473393082c1972e6db70df27dbe0831fc39de078c12acb62c04787c5d8e
|
|
7
|
+
data.tar.gz: e1fa93082d439e16d27884d83da6520be20756745ec843a1eb6aa6a2b747254bdcf2089462c4164d313a67c9bc052f295da335e9fa78e6c6d71f322610d8ad2e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
## 0.9.
|
|
3
|
+
## 0.9.2 - 2026-01-27
|
|
4
|
+
|
|
5
|
+
* Removed Emoticons as they are four byte UTF-8 and some databases are configured to only store 3 byte UTF-8
|
|
6
|
+
|
|
7
|
+
## 0.9.1 - 2025-07-11
|
|
8
|
+
|
|
9
|
+
* Revert VCR to using `<authority>_info_urls.yml` for VCR cassette cache of `info_urls` check
|
|
10
|
+
|
|
11
|
+
## 0.9.0 - 2025-07-11
|
|
4
12
|
|
|
5
13
|
**Significant cleanup - removed code we ended up not using as none of the councils are actually concerned about server load**
|
|
6
14
|
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require_relative "../scraper"
|
|
5
|
+
|
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
|
8
|
+
|
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
|
10
|
+
# require "pry"
|
|
11
|
+
# Pry.start
|
|
12
|
+
|
|
13
|
+
require "irb"
|
|
14
|
+
IRB.start(__FILE__)
|
|
@@ -61,7 +61,7 @@ module ScraperUtils
|
|
|
61
61
|
return unless basic?
|
|
62
62
|
|
|
63
63
|
puts
|
|
64
|
-
LogUtils.log "
|
|
64
|
+
LogUtils.log "DEBUG REQUEST: #{http_method.upcase} #{url}"
|
|
65
65
|
puts "Parameters:", JSON.pretty_generate(parameters) if parameters
|
|
66
66
|
puts "Headers:", JSON.pretty_generate(headers) if headers
|
|
67
67
|
puts "Body:", JSON.pretty_generate(body) if body
|
|
@@ -77,7 +77,7 @@ module ScraperUtils
|
|
|
77
77
|
return unless trace?
|
|
78
78
|
|
|
79
79
|
puts
|
|
80
|
-
LogUtils.log "
|
|
80
|
+
LogUtils.log "DEBUG PAGE: #{message}"
|
|
81
81
|
puts "Current URL: #{page.uri}"
|
|
82
82
|
puts "Page title: #{page.at('title').text.strip}" if page.at("title")
|
|
83
83
|
puts "",
|
|
@@ -98,7 +98,7 @@ module ScraperUtils
|
|
|
98
98
|
return unless trace?
|
|
99
99
|
|
|
100
100
|
puts
|
|
101
|
-
LogUtils.log "
|
|
101
|
+
LogUtils.log "DEBUG SELECTION: #{message}"
|
|
102
102
|
puts "Looking for selector: #{selector}"
|
|
103
103
|
element = page.at(selector)
|
|
104
104
|
if element
|
|
@@ -193,7 +193,7 @@ module ScraperUtils
|
|
|
193
193
|
puts "Checking the one expected info_url returns 200: #{expected_url}"
|
|
194
194
|
|
|
195
195
|
if defined?(VCR)
|
|
196
|
-
VCR.use_cassette("#{authority_label(results, suffix: '_')}
|
|
196
|
+
VCR.use_cassette("#{authority_label(results, suffix: '_')}info_url") do
|
|
197
197
|
page = block_given? ? block.call(expected_url) : fetch_url_with_redirects(expected_url)
|
|
198
198
|
validate_page_response(page, bot_check_expected)
|
|
199
199
|
end
|
|
@@ -212,7 +212,7 @@ module ScraperUtils
|
|
|
212
212
|
# @raise RuntimeError if insufficient detail checks pass
|
|
213
213
|
def self.validate_info_urls_have_expected_details!(results, percentage: 75, variation: 3, bot_check_expected: false, &block)
|
|
214
214
|
if defined?(VCR)
|
|
215
|
-
VCR.use_cassette("#{authority_label(results, suffix: '_')}
|
|
215
|
+
VCR.use_cassette("#{authority_label(results, suffix: '_')}info_urls") do
|
|
216
216
|
check_info_url_details(results, percentage, variation, bot_check_expected, &block)
|
|
217
217
|
end
|
|
218
218
|
else
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: scraper_utils
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.9.
|
|
4
|
+
version: 0.9.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ian Heggie
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-01-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: mechanize
|
|
@@ -81,6 +81,7 @@ files:
|
|
|
81
81
|
- bin/setup
|
|
82
82
|
- docs/debugging.md
|
|
83
83
|
- docs/enhancing_specs.md
|
|
84
|
+
- docs/example_bin_console
|
|
84
85
|
- docs/example_custom_Rakefile
|
|
85
86
|
- docs/example_dot_scraper_validation.yml
|
|
86
87
|
- docs/example_parallel_scraper.rb
|
|
@@ -109,7 +110,7 @@ metadata:
|
|
|
109
110
|
allowed_push_host: https://rubygems.org
|
|
110
111
|
homepage_uri: https://github.com/ianheggie-oaf/scraper_utils
|
|
111
112
|
source_code_uri: https://github.com/ianheggie-oaf/scraper_utils
|
|
112
|
-
documentation_uri: https://rubydoc.info/gems/scraper_utils/0.9.
|
|
113
|
+
documentation_uri: https://rubydoc.info/gems/scraper_utils/0.9.2
|
|
113
114
|
changelog_uri: https://github.com/ianheggie-oaf/scraper_utils/blob/main/CHANGELOG.md
|
|
114
115
|
rubygems_mfa_required: 'true'
|
|
115
116
|
post_install_message:
|