snapcrawl 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/snapcrawl/crawler.rb +6 -5
- data/lib/snapcrawl/version.rb +1 -1
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64c0d3c62258c70417520749bdd71b7d33c8d920ec94f2dfb0aaed961310563b
|
4
|
+
data.tar.gz: 72ece0ddb0f5f9f2b578d4c369814507bb1b2343170d799eb27497a3c937475a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b60088e4dab7944269f76344094a6f80e6b304c0591fff2c3e9aa0e18ffac89b2a9927bf2759e7a17f9944dd925b01b342f46ec6587754ac206bf614741be0c3
|
7
|
+
data.tar.gz: ae1b3dda9b0339440b9f37727503ad87f1f5a71d2eadc3283250f481b9391f1d530213a05a0f5ce0adc7a1f9bd183c049c5cfc7a7fcd1b06d5286c312d8d0747
|
data/lib/snapcrawl/crawler.rb
CHANGED
@@ -6,6 +6,7 @@ require 'nokogiri'
|
|
6
6
|
require 'ostruct'
|
7
7
|
require 'pstore'
|
8
8
|
require 'uri'
|
9
|
+
require 'addressable/uri'
|
9
10
|
require 'webshot'
|
10
11
|
|
11
12
|
module Snapcrawl
|
@@ -41,7 +42,7 @@ module Snapcrawl
|
|
41
42
|
FileUtils.rm @storefile if File.exist? @storefile
|
42
43
|
end
|
43
44
|
|
44
|
-
|
45
|
+
private
|
45
46
|
|
46
47
|
def crawl(url, opts={})
|
47
48
|
url = protocolize url
|
@@ -190,7 +191,7 @@ module Snapcrawl
|
|
190
191
|
warnings = []
|
191
192
|
|
192
193
|
links.each do |link|
|
193
|
-
link = link.attribute('href').to_s
|
194
|
+
link = link.attribute('href').to_s.dup
|
194
195
|
|
195
196
|
# Remove #hash
|
196
197
|
link.gsub!(/#.+$/, '')
|
@@ -205,11 +206,11 @@ module Snapcrawl
|
|
205
206
|
|
206
207
|
# Convert relative links to absolute
|
207
208
|
begin
|
208
|
-
link = URI.join( @opts.base, link ).to_s
|
209
|
+
link = URI.join( @opts.base, link ).to_s.dup
|
209
210
|
rescue URI::InvalidURIError
|
210
|
-
escaped_link = URI.
|
211
|
+
escaped_link = Addressable::URI.encode link
|
211
212
|
warnings << { link: link, message: "Using escaped link: #{escaped_link}" }
|
212
|
-
link = URI.join( @opts.base, escaped_link ).to_s
|
213
|
+
link = URI.join( @opts.base, escaped_link ).to_s.dup
|
213
214
|
rescue => e
|
214
215
|
warnings << { link: link, message: "#{e.class} #{e.message}" }
|
215
216
|
next
|
data/lib/snapcrawl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: snapcrawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danny Ben Shitrit
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-01-
|
11
|
+
date: 2020-01-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colsole
|
@@ -86,6 +86,20 @@ dependencies:
|
|
86
86
|
- - "~>"
|
87
87
|
- !ruby/object:Gem::Version
|
88
88
|
version: '0.17'
|
89
|
+
- !ruby/object:Gem::Dependency
|
90
|
+
name: addressable
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - "~>"
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '2.7'
|
96
|
+
type: :runtime
|
97
|
+
prerelease: false
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - "~>"
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '2.7'
|
89
103
|
description: Snapcrawl is a command line utility for crawling a website and saving
|
90
104
|
screenshots.
|
91
105
|
email: db@dannyben.com
|
@@ -112,7 +126,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
112
126
|
requirements:
|
113
127
|
- - ">="
|
114
128
|
- !ruby/object:Gem::Version
|
115
|
-
version: '2.
|
129
|
+
version: '2.3'
|
116
130
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
131
|
requirements:
|
118
132
|
- - ">="
|