snapcrawl 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +20 -2
- data/lib/snapcrawl/crawler.rb +3 -3
- data/lib/snapcrawl/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7c2f0d180ff2e9b9cdeed900ceb8b3ccb7bbdbe2ca4f6312b22e913189e73902
|
|
4
|
+
data.tar.gz: fd1ff5e8abadde31e0bebf702e3ac589a88e5e79e54857a10078cc1e793d80bf
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9c9462ca2fe28a1fa4b93462104f609dbc9608f5dec191767f81401a00283bdf030026e5e94c6394594beafbf35800fe68ec137fa6131fe09cd59de07ae0e4ee
|
|
7
|
+
data.tar.gz: ade326042ed804293b56451c6f7cd204ba0b8d4c86d15d5d1b51323ef7401c4529b7e4d67b25e1ca6c09cf6b1ca1bfd2d667f4a3d2647813f4b9f98ba9d393df
|
data/README.md
CHANGED
|
@@ -27,6 +27,21 @@ Prerequisites
|
|
|
27
27
|
Snapcrawl requires [PhantomJS][1] and [ImageMagick][2].
|
|
28
28
|
|
|
29
29
|
|
|
30
|
+
Docker Image
|
|
31
|
+
--------------------------------------------------
|
|
32
|
+
|
|
33
|
+
You can run Snapcrawl by using this docker image (which contains all the
|
|
34
|
+
necessary prerequisites):
|
|
35
|
+
|
|
36
|
+
$ docker pull dannyben/snapcrawl
|
|
37
|
+
|
|
38
|
+
Then you can use it like this:
|
|
39
|
+
|
|
40
|
+
$ docker run --rm -it dannyben/snapcrawl --help
|
|
41
|
+
|
|
42
|
+
For more information refer to the [docker-snapcrawl][3] repository.
|
|
43
|
+
|
|
44
|
+
|
|
30
45
|
Install
|
|
31
46
|
--------------------------------------------------
|
|
32
47
|
|
|
@@ -71,8 +86,10 @@ Usage
|
|
|
71
86
|
Notes
|
|
72
87
|
--------------------------------------------------
|
|
73
88
|
|
|
74
|
-
|
|
75
|
-
|
|
89
|
+
If a URL cannot be found, Snapcrawl will report to stderr.
|
|
90
|
+
You can create a report by running
|
|
91
|
+
|
|
92
|
+
$ snapcrawl go example.com 2> err.txt
|
|
76
93
|
|
|
77
94
|
|
|
78
95
|
|
|
@@ -80,3 +97,4 @@ Notes
|
|
|
80
97
|
|
|
81
98
|
[1]: http://phantomjs.org/download.html
|
|
82
99
|
[2]: https://imagemagick.org/script/download.php
|
|
100
|
+
[3]: https://github.com/DannyBen/docker-snapcrawl
|
data/lib/snapcrawl/crawler.rb
CHANGED
|
@@ -50,14 +50,14 @@ module Snapcrawl
|
|
|
50
50
|
height: 0,
|
|
51
51
|
depth: 1,
|
|
52
52
|
age: 86400,
|
|
53
|
-
|
|
53
|
+
folder: 'snaps',
|
|
54
54
|
base: url,
|
|
55
55
|
}
|
|
56
56
|
urls = [protocolize(url)]
|
|
57
57
|
|
|
58
58
|
@opts = OpenStruct.new defaults.merge(opts)
|
|
59
59
|
|
|
60
|
-
make_screenshot_dir @opts.
|
|
60
|
+
make_screenshot_dir @opts.folder
|
|
61
61
|
|
|
62
62
|
@opts.depth.times do
|
|
63
63
|
urls = crawl_and_snap urls
|
|
@@ -155,7 +155,7 @@ module Snapcrawl
|
|
|
155
155
|
|
|
156
156
|
# Return proper image path for a UR
|
|
157
157
|
def image_path_for(url)
|
|
158
|
-
"#{@opts.
|
|
158
|
+
"#{@opts.folder}/#{handelize(url)}.png"
|
|
159
159
|
end
|
|
160
160
|
|
|
161
161
|
# Add protocol to a URL if neeed
|
data/lib/snapcrawl/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: snapcrawl
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Danny Ben Shitrit
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2019-
|
|
11
|
+
date: 2019-06-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: colsole
|