scraper_utils 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/docs/example_bin_console +14 -0
- data/lib/scraper_utils/debug_utils.rb +3 -3
- data/lib/scraper_utils/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e2d77d081c52024dbba8afdfd03f939af5d3d44d82e5491a109e4dd3e853f225
|
|
4
|
+
data.tar.gz: 67df28089a8bf743ef895c3dafdbdf9461d2a15b68273e6b0bc0cfef0f9288d5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1fd29f183695351bbfa82c22fa938ac00c6278a13baf8a5dcdde89f469b8d266edcd5473393082c1972e6db70df27dbe0831fc39de078c12acb62c04787c5d8e
|
|
7
|
+
data.tar.gz: e1fa93082d439e16d27884d83da6520be20756745ec843a1eb6aa6a2b747254bdcf2089462c4164d313a67c9bc052f295da335e9fa78e6c6d71f322610d8ad2e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.9.2 - 2026-01-27
|
|
4
|
+
|
|
5
|
+
* Removed Emoticons as they are four byte UTF-8 and some databases are configured to only store 3 byte UTF-8
|
|
6
|
+
|
|
3
7
|
## 0.9.1 - 2025-07-11
|
|
4
8
|
|
|
5
9
|
* Revert VCR to using `<authority>_info_urls.yml` for VCR cassette cache of `info_urls` check
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require_relative "../scraper"
|
|
5
|
+
|
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
|
8
|
+
|
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
|
10
|
+
# require "pry"
|
|
11
|
+
# Pry.start
|
|
12
|
+
|
|
13
|
+
require "irb"
|
|
14
|
+
IRB.start(__FILE__)
|
|
@@ -61,7 +61,7 @@ module ScraperUtils
|
|
|
61
61
|
return unless basic?
|
|
62
62
|
|
|
63
63
|
puts
|
|
64
|
-
LogUtils.log "
|
|
64
|
+
LogUtils.log "DEBUG REQUEST: #{http_method.upcase} #{url}"
|
|
65
65
|
puts "Parameters:", JSON.pretty_generate(parameters) if parameters
|
|
66
66
|
puts "Headers:", JSON.pretty_generate(headers) if headers
|
|
67
67
|
puts "Body:", JSON.pretty_generate(body) if body
|
|
@@ -77,7 +77,7 @@ module ScraperUtils
|
|
|
77
77
|
return unless trace?
|
|
78
78
|
|
|
79
79
|
puts
|
|
80
|
-
LogUtils.log "
|
|
80
|
+
LogUtils.log "DEBUG PAGE: #{message}"
|
|
81
81
|
puts "Current URL: #{page.uri}"
|
|
82
82
|
puts "Page title: #{page.at('title').text.strip}" if page.at("title")
|
|
83
83
|
puts "",
|
|
@@ -98,7 +98,7 @@ module ScraperUtils
|
|
|
98
98
|
return unless trace?
|
|
99
99
|
|
|
100
100
|
puts
|
|
101
|
-
LogUtils.log "
|
|
101
|
+
LogUtils.log "DEBUG SELECTION: #{message}"
|
|
102
102
|
puts "Looking for selector: #{selector}"
|
|
103
103
|
element = page.at(selector)
|
|
104
104
|
if element
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: scraper_utils
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.9.
|
|
4
|
+
version: 0.9.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ian Heggie
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-01-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: mechanize
|
|
@@ -81,6 +81,7 @@ files:
|
|
|
81
81
|
- bin/setup
|
|
82
82
|
- docs/debugging.md
|
|
83
83
|
- docs/enhancing_specs.md
|
|
84
|
+
- docs/example_bin_console
|
|
84
85
|
- docs/example_custom_Rakefile
|
|
85
86
|
- docs/example_dot_scraper_validation.yml
|
|
86
87
|
- docs/example_parallel_scraper.rb
|
|
@@ -109,7 +110,7 @@ metadata:
|
|
|
109
110
|
allowed_push_host: https://rubygems.org
|
|
110
111
|
homepage_uri: https://github.com/ianheggie-oaf/scraper_utils
|
|
111
112
|
source_code_uri: https://github.com/ianheggie-oaf/scraper_utils
|
|
112
|
-
documentation_uri: https://rubydoc.info/gems/scraper_utils/0.9.
|
|
113
|
+
documentation_uri: https://rubydoc.info/gems/scraper_utils/0.9.2
|
|
113
114
|
changelog_uri: https://github.com/ianheggie-oaf/scraper_utils/blob/main/CHANGELOG.md
|
|
114
115
|
rubygems_mfa_required: 'true'
|
|
115
116
|
post_install_message:
|