snapcrawl 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +64 -54
- data/lib/snapcrawl/crawler.rb +4 -6
- data/lib/snapcrawl/templates/docopt.txt +9 -9
- data/lib/snapcrawl/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a98c3d8f09eb8572a01eefb9ceb9a9b925849e4dc0eb127f9c22a8cb79a7a954
|
4
|
+
data.tar.gz: 649528bf60dda4bc787bcd01e1eac7b73b017aa63d6ede6ccf794b398bc8758c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3f447cfd3536ed826acb9f597748242313259a38fecb473a2f6b1949e98f9d579fa70a8b29b1f3d3909f34ce287336e3305de7d4f73c684fc5d8708a0436073f
|
7
|
+
data.tar.gz: f966675b9465bca79b004207cd9861e7fe26129d4993394ffb7f02a80deab7e206027ff2ccfa18c81cb74aee0063da3f4de1e85446c24d6ea4623e26d156f69b
|
data/README.md
CHANGED
@@ -14,7 +14,7 @@ screenshots.
|
|
14
14
|
Features
|
15
15
|
--------------------------------------------------
|
16
16
|
|
17
|
-
- Crawls a website to any given depth and
|
17
|
+
- Crawls a website to any given depth and saves screenshots
|
18
18
|
- Can capture the full length of the page
|
19
19
|
- Can use a specific resolution for screenshots
|
20
20
|
- Skips capturing if the screenshot was already saved recently
|
@@ -34,11 +34,15 @@ Docker Image
|
|
34
34
|
You can run Snapcrawl by using this docker image (which contains all the
|
35
35
|
necessary prerequisites):
|
36
36
|
|
37
|
-
|
37
|
+
```
|
38
|
+
$ docker pull dannyben/snapcrawl
|
39
|
+
```
|
38
40
|
|
39
41
|
Then you can use it like this:
|
40
42
|
|
41
|
-
|
43
|
+
```
|
44
|
+
$ docker run --rm -it dannyben/snapcrawl --help
|
45
|
+
```
|
42
46
|
|
43
47
|
For more information refer to the [docker-snapcrawl][3] repository.
|
44
48
|
|
@@ -46,65 +50,71 @@ For more information refer to the [docker-snapcrawl][3] repository.
|
|
46
50
|
Install
|
47
51
|
--------------------------------------------------
|
48
52
|
|
49
|
-
|
53
|
+
```
|
54
|
+
$ gem install snapcrawl
|
55
|
+
```
|
50
56
|
|
51
57
|
|
52
58
|
Usage
|
53
59
|
--------------------------------------------------
|
54
60
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
61
|
+
```
|
62
|
+
$ snapcrawl --help
|
63
|
+
|
64
|
+
Snapcrawl
|
65
|
+
|
66
|
+
Usage:
|
67
|
+
snapcrawl URL [options]
|
68
|
+
snapcrawl -h | --help
|
69
|
+
snapcrawl -v | --version
|
70
|
+
|
71
|
+
Options:
|
72
|
+
-f, --folder PATH
|
73
|
+
Where to save screenshots [default: snaps]
|
74
|
+
|
75
|
+
-n, --name TEMPLATE
|
76
|
+
Filename template. Include the string '%{url}' anywhere in the name to
|
77
|
+
use the captured URL in the filename [default: %{url}]
|
78
|
+
|
79
|
+
-a, --age SECONDS
|
80
|
+
Number of seconds to consider screenshots fresh [default: 86400]
|
81
|
+
|
82
|
+
-d, --depth LEVELS
|
83
|
+
Number of levels to crawl [default: 1]
|
84
|
+
|
85
|
+
-W, --width PIXELS
|
86
|
+
Screen width in pixels [default: 1280]
|
87
|
+
|
88
|
+
-H, --height PIXELS
|
89
|
+
Screen height in pixels. Use 0 to capture the full page [default: 0]
|
90
|
+
|
91
|
+
-s, --selector SELECTOR
|
92
|
+
CSS selector to capture
|
93
|
+
|
94
|
+
-o, --only REGEX
|
95
|
+
Include only URLs that match REGEX
|
96
|
+
|
97
|
+
-h, --help
|
98
|
+
Show this screen
|
99
|
+
|
100
|
+
-v, --version
|
101
|
+
Show version number
|
102
|
+
|
103
|
+
Examples:
|
104
|
+
snapcrawl example.com
|
105
|
+
snapcrawl example.com -d2 -fscreens
|
106
|
+
snapcrawl example.com -d2 > out.txt 2> err.txt &
|
107
|
+
snapcrawl example.com -W360 -H480
|
108
|
+
snapcrawl example.com --selector "#main-content"
|
109
|
+
snapcrawl example.com --only "products|collections"
|
110
|
+
snapcrawl example.com --name "screenshot-%{url}"
|
111
|
+
snapcrawl example.com --name "`date +%Y%m%d`_%{url}"
|
112
|
+
```
|
105
113
|
|
106
114
|
---
|
107
115
|
|
108
116
|
[1]: http://phantomjs.org/download.html
|
109
117
|
[2]: https://imagemagick.org/script/download.php
|
110
118
|
[3]: https://github.com/DannyBen/docker-snapcrawl
|
119
|
+
|
120
|
+
|
data/lib/snapcrawl/crawler.rb
CHANGED
@@ -15,10 +15,8 @@ module Snapcrawl
|
|
15
15
|
class MissingImageMagick < StandardError; end
|
16
16
|
|
17
17
|
class Crawler
|
18
|
-
|
19
|
-
|
20
|
-
end
|
21
|
-
|
18
|
+
include Singleton
|
19
|
+
|
22
20
|
def initialize
|
23
21
|
@storefile = "snapcrawl.pstore"
|
24
22
|
@store = PStore.new(@storefile)
|
@@ -256,11 +254,11 @@ module Snapcrawl
|
|
256
254
|
# prints some output to stdout, this is why we override $stdout for
|
257
255
|
# the duration of the run.
|
258
256
|
def hide_output
|
259
|
-
|
257
|
+
keep_stdout, keep_stderr = $stdout, $stderr
|
260
258
|
$stdout, $stderr = StringIO.new, StringIO.new
|
261
259
|
yield
|
262
260
|
ensure
|
263
|
-
$stdout, $stderr =
|
261
|
+
$stdout, $stderr = keep_stdout, keep_stderr
|
264
262
|
end
|
265
263
|
end
|
266
264
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Snapcrawl
|
2
2
|
|
3
3
|
Usage:
|
4
|
-
snapcrawl
|
4
|
+
snapcrawl URL [options]
|
5
5
|
snapcrawl -h | --help
|
6
6
|
snapcrawl -v | --version
|
7
7
|
|
@@ -38,11 +38,11 @@ Options:
|
|
38
38
|
Show version number
|
39
39
|
|
40
40
|
Examples:
|
41
|
-
snapcrawl
|
42
|
-
snapcrawl
|
43
|
-
snapcrawl
|
44
|
-
snapcrawl
|
45
|
-
snapcrawl
|
46
|
-
snapcrawl
|
47
|
-
snapcrawl
|
48
|
-
snapcrawl
|
41
|
+
snapcrawl example.com
|
42
|
+
snapcrawl example.com -d2 -fscreens
|
43
|
+
snapcrawl example.com -d2 > out.txt 2> err.txt &
|
44
|
+
snapcrawl example.com -W360 -H480
|
45
|
+
snapcrawl example.com --selector "#main-content"
|
46
|
+
snapcrawl example.com --only "products|collections"
|
47
|
+
snapcrawl example.com --name "screenshot-%{url}"
|
48
|
+
snapcrawl example.com --name "`date +%Y%m%d`_%{url}"
|
data/lib/snapcrawl/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: snapcrawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danny Ben Shitrit
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-01-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colsole
|
@@ -119,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
119
|
- !ruby/object:Gem::Version
|
120
120
|
version: '0'
|
121
121
|
requirements: []
|
122
|
-
rubygems_version: 3.0.
|
122
|
+
rubygems_version: 3.0.3
|
123
123
|
signing_key:
|
124
124
|
specification_version: 4
|
125
125
|
summary: Crawl a website and take screenshots (CLI + Library)
|