powerdlz23 1.2.4 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/grell/.rspec +2 -0
  2. package/grell/.travis.yml +28 -0
  3. package/grell/CHANGELOG.md +111 -0
  4. package/grell/Gemfile +7 -0
  5. package/grell/LICENSE.txt +22 -0
  6. package/grell/README.md +213 -0
  7. package/grell/Rakefile +2 -0
  8. package/grell/grell.gemspec +36 -0
  9. package/grell/lib/grell/capybara_driver.rb +44 -0
  10. package/grell/lib/grell/crawler.rb +83 -0
  11. package/grell/lib/grell/crawler_manager.rb +84 -0
  12. package/grell/lib/grell/grell_logger.rb +10 -0
  13. package/grell/lib/grell/page.rb +275 -0
  14. package/grell/lib/grell/page_collection.rb +62 -0
  15. package/grell/lib/grell/rawpage.rb +62 -0
  16. package/grell/lib/grell/reader.rb +18 -0
  17. package/grell/lib/grell/version.rb +3 -0
  18. package/grell/lib/grell.rb +11 -0
  19. package/grell/spec/lib/capybara_driver_spec.rb +38 -0
  20. package/grell/spec/lib/crawler_manager_spec.rb +174 -0
  21. package/grell/spec/lib/crawler_spec.rb +361 -0
  22. package/grell/spec/lib/page_collection_spec.rb +159 -0
  23. package/grell/spec/lib/page_spec.rb +418 -0
  24. package/grell/spec/lib/reader_spec.rb +43 -0
  25. package/grell/spec/spec_helper.rb +66 -0
  26. package/heartmagic/config.py +1 -0
  27. package/heartmagic/heart.py +3 -0
  28. package/heartmagic/pytransform/__init__.py +483 -0
  29. package/heartmagic/pytransform/_pytransform.dll +0 -0
  30. package/heartmagic/pytransform/_pytransform.so +0 -0
  31. package/httpStatusCode/README.md +2 -0
  32. package/httpStatusCode/httpStatusCode.js +4 -0
  33. package/httpStatusCode/reasonPhrases.js +344 -0
  34. package/httpStatusCode/statusCodes.js +344 -0
  35. package/package.json +1 -1
  36. package/snapcrawl/.changelog.old.md +157 -0
  37. package/snapcrawl/.gitattributes +1 -0
  38. package/snapcrawl/.github/workflows/test.yml +41 -0
  39. package/snapcrawl/.rspec +3 -0
  40. package/snapcrawl/.rubocop.yml +23 -0
  41. package/snapcrawl/CHANGELOG.md +182 -0
  42. package/snapcrawl/Gemfile +15 -0
  43. package/snapcrawl/LICENSE +21 -0
  44. package/snapcrawl/README.md +135 -0
  45. package/snapcrawl/Runfile +35 -0
  46. package/snapcrawl/bin/snapcrawl +25 -0
  47. package/snapcrawl/lib/snapcrawl/cli.rb +52 -0
  48. package/snapcrawl/lib/snapcrawl/config.rb +60 -0
  49. package/snapcrawl/lib/snapcrawl/crawler.rb +98 -0
  50. package/snapcrawl/lib/snapcrawl/dependencies.rb +21 -0
  51. package/snapcrawl/lib/snapcrawl/exceptions.rb +5 -0
  52. package/snapcrawl/lib/snapcrawl/log_helpers.rb +36 -0
  53. package/snapcrawl/lib/snapcrawl/page.rb +118 -0
  54. package/snapcrawl/lib/snapcrawl/pretty_logger.rb +11 -0
  55. package/snapcrawl/lib/snapcrawl/refinements/pair_split.rb +26 -0
  56. package/snapcrawl/lib/snapcrawl/refinements/string_refinements.rb +13 -0
  57. package/snapcrawl/lib/snapcrawl/screenshot.rb +73 -0
  58. package/snapcrawl/lib/snapcrawl/templates/config.yml +49 -0
  59. package/snapcrawl/lib/snapcrawl/templates/docopt.txt +26 -0
  60. package/snapcrawl/lib/snapcrawl/version.rb +3 -0
  61. package/snapcrawl/lib/snapcrawl.rb +20 -0
  62. package/snapcrawl/snapcrawl.gemspec +27 -0
  63. package/snapcrawl/snapcrawl.yml +41 -0
  64. package/snapcrawl/spec/README.md +16 -0
  65. package/snapcrawl/spec/approvals/bin/help +26 -0
  66. package/snapcrawl/spec/approvals/bin/usage +4 -0
  67. package/snapcrawl/spec/approvals/cli/usage +4 -0
  68. package/snapcrawl/spec/approvals/config/defaults +15 -0
  69. package/snapcrawl/spec/approvals/config/minimal +15 -0
  70. package/snapcrawl/spec/approvals/integration/blacklist +14 -0
  71. package/snapcrawl/spec/approvals/integration/default-config +14 -0
  72. package/snapcrawl/spec/approvals/integration/depth-0 +6 -0
  73. package/snapcrawl/spec/approvals/integration/depth-3 +6 -0
  74. package/snapcrawl/spec/approvals/integration/log-color-no +6 -0
  75. package/snapcrawl/spec/approvals/integration/screenshot-error +3 -0
  76. package/snapcrawl/spec/approvals/integration/whitelist +14 -0
  77. package/snapcrawl/spec/approvals/models/pretty_logger/colors +1 -0
  78. package/snapcrawl/spec/fixtures/config/minimal.yml +4 -0
  79. package/snapcrawl/spec/server/config.ru +97 -0
  80. package/snapcrawl/spec/snapcrawl/bin_spec.rb +15 -0
  81. package/snapcrawl/spec/snapcrawl/cli_spec.rb +9 -0
  82. package/snapcrawl/spec/snapcrawl/config_spec.rb +26 -0
  83. package/snapcrawl/spec/snapcrawl/integration_spec.rb +65 -0
  84. package/snapcrawl/spec/snapcrawl/page_spec.rb +89 -0
  85. package/snapcrawl/spec/snapcrawl/pretty_logger_spec.rb +19 -0
  86. package/snapcrawl/spec/snapcrawl/refinements/pair_split_spec.rb +27 -0
  87. package/snapcrawl/spec/snapcrawl/refinements/string_refinements_spec.rb +29 -0
  88. package/snapcrawl/spec/snapcrawl/screenshot_spec.rb +62 -0
  89. package/snapcrawl/spec/spec_helper.rb +22 -0
  90. package/snapcrawl/spec/spec_mixin.rb +10 -0
@@ -0,0 +1,182 @@
1
+ Change Log
2
+ ========================================
3
+
4
+ v0.5.4 - 2023-07-27
5
+ ----------------------------------------
6
+
7
+ - Drop support for Ruby <= 2.6
8
+ - Upgrade dependencies and rubocop cleanup
9
+ - Fix css_selector option
10
+ - Drop support for Ruby 2.x
11
+
12
+
13
+ v0.5.3 - 2021-03-29
14
+ ----------------------------------------
15
+
16
+ - Add skip_ssl_verification config option
17
+ - Add screenshot_delay config option
18
+
19
+
20
+ v0.5.2 - 2021-02-25
21
+ ----------------------------------------
22
+
23
+ - Fix logging percent issue
24
+
25
+
26
+ ## [v0.5.1](https://github.com/DannyBen/snapcrawl/tree/v0.5.1) (2020-03-14)
27
+
28
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.5.0...v0.5.1)
29
+
30
+ **Merged pull requests:**
31
+
32
+ - Add additional test cases and exception safeguards [\#30](https://github.com/DannyBen/snapcrawl/pull/30) ([DannyBen](https://github.com/DannyBen))
33
+
34
+ ## [v0.5.0](https://github.com/DannyBen/snapcrawl/tree/v0.5.0) (2020-03-14)
35
+
36
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.5.0.rc1...v0.5.0)
37
+
38
+ **Merged pull requests:**
39
+
40
+ - Epic refactor [\#29](https://github.com/DannyBen/snapcrawl/pull/29) ([DannyBen](https://github.com/DannyBen))
41
+
42
+ ## [v0.5.0.rc1](https://github.com/DannyBen/snapcrawl/tree/v0.5.0.rc1) (2020-03-14)
43
+
44
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.4...v0.5.0.rc1)
45
+
46
+ ## [v0.4.4](https://github.com/DannyBen/snapcrawl/tree/v0.4.4) (2020-03-12)
47
+
48
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.3...v0.4.4)
49
+
50
+ **Merged pull requests:**
51
+
52
+ - Rescue imagemagick exceptions [\#28](https://github.com/DannyBen/snapcrawl/pull/28) ([DannyBen](https://github.com/DannyBen))
53
+ - Switch to github actions [\#27](https://github.com/DannyBen/snapcrawl/pull/27) ([DannyBen](https://github.com/DannyBen))
54
+
55
+ ## [v0.4.3](https://github.com/DannyBen/snapcrawl/tree/v0.4.3) (2020-01-09)
56
+
57
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.2...v0.4.3)
58
+
59
+ ## [v0.4.2](https://github.com/DannyBen/snapcrawl/tree/v0.4.2) (2020-01-09)
60
+
61
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.1...v0.4.2)
62
+
63
+ **Merged pull requests:**
64
+
65
+ - Improve handling of malformed URIs [\#26](https://github.com/DannyBen/snapcrawl/pull/26) ([DannyBen](https://github.com/DannyBen))
66
+
67
+ ## [v0.4.1](https://github.com/DannyBen/snapcrawl/tree/v0.4.1) (2020-01-09)
68
+
69
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.0...v0.4.1)
70
+
71
+ **Merged pull requests:**
72
+
73
+ - Updates for ruby 2.7 [\#25](https://github.com/DannyBen/snapcrawl/pull/25) ([DannyBen](https://github.com/DannyBen))
74
+ - Test with ruby 2.7 [\#23](https://github.com/DannyBen/snapcrawl/pull/23) ([DannyBen](https://github.com/DannyBen))
75
+ - Improve error handling [\#20](https://github.com/DannyBen/snapcrawl/pull/20) ([DannyBen](https://github.com/DannyBen))
76
+
77
+ ## [v0.4.0](https://github.com/DannyBen/snapcrawl/tree/v0.4.0) (2020-01-01)
78
+
79
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.3.1...v0.4.0)
80
+
81
+ **Merged pull requests:**
82
+
83
+ - Remove go subcommand [\#22](https://github.com/DannyBen/snapcrawl/pull/22) ([DannyBen](https://github.com/DannyBen))
84
+ - Make CI more consistent [\#21](https://github.com/DannyBen/snapcrawl/pull/21) ([DannyBen](https://github.com/DannyBen))
85
+
86
+ ## [v0.3.1](https://github.com/DannyBen/snapcrawl/tree/v0.3.1) (2019-09-11)
87
+
88
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.3.0...v0.3.1)
89
+
90
+ **Fixed bugs:**
91
+
92
+ - Try catch error instead of stopping script [\#19](https://github.com/DannyBen/snapcrawl/issues/19)
93
+ - error : Cliver::Dependency::VersionMismatch [\#18](https://github.com/DannyBen/snapcrawl/issues/18)
94
+ - RuntimeError redirection forbidden [\#16](https://github.com/DannyBen/snapcrawl/issues/16)
95
+
96
+ ## [v0.3.0](https://github.com/DannyBen/snapcrawl/tree/v0.3.0) (2019-09-10)
97
+
98
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.8...v0.3.0)
99
+
100
+ **Merged pull requests:**
101
+
102
+ - Fixes round [\#17](https://github.com/DannyBen/snapcrawl/pull/17) ([DannyBen](https://github.com/DannyBen))
103
+
104
+ ## [v0.2.8](https://github.com/DannyBen/snapcrawl/tree/v0.2.8) (2019-06-14)
105
+
106
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.7...v0.2.8)
107
+
108
+ **Closed issues:**
109
+
110
+ - Improve tests and run tests on Travis [\#13](https://github.com/DannyBen/snapcrawl/issues/13)
111
+ - Save all versions of snapshot? [\#11](https://github.com/DannyBen/snapcrawl/issues/11)
112
+
113
+ **Merged pull requests:**
114
+
115
+ - Add Travis CI [\#15](https://github.com/DannyBen/snapcrawl/pull/15) ([DannyBen](https://github.com/DannyBen))
116
+ - Add ability to set filename template [\#14](https://github.com/DannyBen/snapcrawl/pull/14) ([DannyBen](https://github.com/DannyBen))
117
+
118
+ ## [v0.2.7](https://github.com/DannyBen/snapcrawl/tree/v0.2.7) (2019-06-13)
119
+
120
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.6...v0.2.7)
121
+
122
+ **Closed issues:**
123
+
124
+ - Using snapcrawl via proxy? [\#10](https://github.com/DannyBen/snapcrawl/issues/10)
125
+
126
+ **Merged pull requests:**
127
+
128
+ - Fix ignored --folder parameter [\#12](https://github.com/DannyBen/snapcrawl/pull/12) ([DannyBen](https://github.com/DannyBen))
129
+
130
+ ## [v0.2.6](https://github.com/DannyBen/snapcrawl/tree/v0.2.6) (2019-04-18)
131
+
132
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.5...v0.2.6)
133
+
134
+ **Closed issues:**
135
+
136
+ - Screenshots not saving to default snaps folder on Windows machine [\#6](https://github.com/DannyBen/snapcrawl/issues/6)
137
+ - Add the ability to pass headers into the application [\#3](https://github.com/DannyBen/snapcrawl/issues/3)
138
+
139
+ **Merged pull requests:**
140
+
141
+ - Upgrade colsole to fix windows command\_exist [\#9](https://github.com/DannyBen/snapcrawl/pull/9) ([DannyBen](https://github.com/DannyBen))
142
+
143
+ ## [v0.2.5](https://github.com/DannyBen/snapcrawl/tree/v0.2.5) (2019-03-14)
144
+
145
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.4...v0.2.5)
146
+
147
+ **Fixed bugs:**
148
+
149
+ - Screenshots not saving to default or specified folder locations [\#4](https://github.com/DannyBen/snapcrawl/issues/4)
150
+
151
+ **Merged pull requests:**
152
+
153
+ - Alert when imagemagick is not installed [\#7](https://github.com/DannyBen/snapcrawl/pull/7) ([DannyBen](https://github.com/DannyBen))
154
+
155
+ ## [v0.2.4](https://github.com/DannyBen/snapcrawl/tree/v0.2.4) (2018-10-18)
156
+
157
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.3...v0.2.4)
158
+
159
+ **Merged pull requests:**
160
+
161
+ - Switch from screencap to webshot [\#5](https://github.com/DannyBen/snapcrawl/pull/5) ([DannyBen](https://github.com/DannyBen))
162
+ - Switch from minitest to rspec [\#2](https://github.com/DannyBen/snapcrawl/pull/2) ([DannyBen](https://github.com/DannyBen))
163
+
164
+ ## [v0.2.3](https://github.com/DannyBen/snapcrawl/tree/v0.2.3) (2017-03-15)
165
+
166
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.2...v0.2.3)
167
+
168
+ **Merged pull requests:**
169
+
170
+ - Fixes [\#1](https://github.com/DannyBen/snapcrawl/pull/1) ([DannyBen](https://github.com/DannyBen))
171
+
172
+ ## [v0.2.2](https://github.com/DannyBen/snapcrawl/tree/v0.2.2) (2015-12-05)
173
+
174
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.1...v0.2.2)
175
+
176
+ ## [v0.2.1](https://github.com/DannyBen/snapcrawl/tree/v0.2.1) (2015-12-05)
177
+
178
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.0...v0.2.1)
179
+
180
+ ## [v0.2.0](https://github.com/DannyBen/snapcrawl/tree/v0.2.0) (2015-12-05)
181
+
182
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/0710e5f8d5e45b5341ae4a9fa2212d5c76c72de4...v0.2.0)
@@ -0,0 +1,15 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ group :development, :test do
6
+ gem 'byebug'
7
+ gem 'lp'
8
+ gem 'puma'
9
+ gem 'rspec'
10
+ gem 'rspec_approvals'
11
+ gem 'runfile'
12
+ gem 'runfile-tasks'
13
+ gem 'simplecov'
14
+ gem 'sinatra'
15
+ end
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Danny Ben Shitrit
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,135 @@
1
+ # Snapcrawl - crawl a website and take screenshots
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/snapcrawl.svg)](http://badge.fury.io/rb/snapcrawl)
4
+ [![Build Status](https://github.com/DannyBen/snapcrawl/workflows/Test/badge.svg)](https://github.com/DannyBen/snapcrawl/actions?query=workflow%3ATest)
5
+ [![Code Climate](https://codeclimate.com/github/DannyBen/snapcrawl/badges/gpa.svg)](https://codeclimate.com/github/DannyBen/snapcrawl)
6
+
7
+ ---
8
+
9
+ Snapcrawl is a command line utility for crawling a website and saving
10
+ screenshots.
11
+
12
+
13
+ ## Features
14
+
15
+ - Crawls a website to any given depth and saves screenshots
16
+ - Can capture the full length of the page
17
+ - Can use a specific resolution for screenshots
18
+ - Skips capturing if the screenshot was already saved recently
19
+ - Uses local caching to avoid expensive crawl operations if not needed
20
+ - Reports broken links
21
+
22
+ ## Install
23
+
24
+ **Using Docker**
25
+
26
+ You can run Snapcrawl by using this docker image (which contains all the
27
+ necessary prerequisites):
28
+
29
+ ```shell
30
+ $ alias snapcrawl='docker run --rm -it --network host --volume "$PWD:/app" dannyben/snapcrawl'
31
+ ```
32
+
33
+ For more information on the Docker image, refer to the [docker-snapcrawl][3] repository.
34
+
35
+ **Using Ruby**
36
+
37
+ ```shell
38
+ $ gem install snapcrawl
39
+ ```
40
+
41
+ Note that Snapcrawl requires [PhantomJS][1] and [ImageMagick][2].
42
+
43
+ ## Usage
44
+
45
+ Snapcrawl can be configured either through a configuration file (YAML), or by specifying options in the command line.
46
+
47
+ ```shell
48
+ $ snapcrawl
49
+ Usage:
50
+ snapcrawl URL [--config FILE] [SETTINGS...]
51
+ snapcrawl -h | --help
52
+ snapcrawl -v | --version
53
+ ```
54
+
55
+ The default configuration filename is `snapcrawl.yml`.
56
+
57
+ Using the `--config` flag will create a template configuration file if it is not present:
58
+
59
+ ```shell
60
+ $ snapcrawl example.com --config snapcrawl
61
+ ```
62
+
63
+ ### Specifying options in the command line
64
+
65
+ All configuration options can be specified in the command line as `key=value` pairs:
66
+
67
+ ```shell
68
+ $ snapcrawl example.com log_level=0 depth=2 width=1024
69
+ ```
70
+
71
+ ### Sample configuration file
72
+
73
+ ```yaml
74
+ # All values below are the default values
75
+
76
+ # log level (0-4) 0=DEBUG 1=INFO 2=WARN 3=ERROR 4=FATAL
77
+ log_level: 1
78
+
79
+ # log_color (yes, no, auto)
80
+ # yes = always show log color
81
+ # no = never use colors
82
+ # auto = only use colors when running in an interactive terminal
83
+ log_color: auto
84
+
85
+ # number of levels to crawl, 0 means capture only the root URL
86
+ depth: 1
87
+
88
+ # screenshot width in pixels
89
+ width: 1280
90
+
91
+ # screenshot height in pixels, 0 means the entire height
92
+ height: 0
93
+
94
+ # number of seconds to consider the page cache and its screenshot fresh
95
+ cache_life: 86400
96
+
97
+ # where to store the HTML page cache
98
+ cache_dir: cache
99
+
100
+ # where to store screenshots
101
+ snaps_dir: snaps
102
+
103
+ # screenshot filename template, where '%{url}' will be replaced with a
104
+ # slug version of the URL (no need to include the .png extension)
105
+ name_template: '%{url}'
106
+
107
+ # urls not matching this regular expression will be ignored
108
+ url_whitelist:
109
+
110
+ # urls matching this regular expression will be ignored
111
+ url_blacklist:
112
+
113
+ # take a screenshot of this CSS selector only
114
+ css_selector:
115
+
116
+ # when true, ignore SSL related errors
117
+ skip_ssl_verification: false
118
+
119
+ # set to any number of seconds to wait for the page to load before taking
120
+ # a screenshot, leave empty to not wait at all (only needed for pages with
121
+ # animations or other post-load events).
122
+ screenshot_delay:
123
+ ```
124
+
125
+ ## Contributing / Support
126
+ If you experience any issue, have a question or a suggestion, or if you wish
127
+ to contribute, feel free to [open an issue][issues].
128
+
129
+ ---
130
+
131
+ [1]: http://phantomjs.org/download.html
132
+ [2]: https://imagemagick.org/script/download.php
133
+ [3]: https://github.com/DannyBen/docker-snapcrawl
134
+ [issues]: https://github.com/DannyBen/snapcrawl/issues
135
+
@@ -0,0 +1,35 @@
1
+ require 'snapcrawl/version'
2
+
3
+ title 'Snapcrawl Runfile'
4
+ summary 'Runfile tasks for building the Snapcrawl gem'
5
+ version Snapcrawl::VERSION
6
+
7
+ import_gem 'runfile-tasks/gem'
8
+ import 'debug'
9
+
10
+ help "Regenerate the command line output in the README file"
11
+ action :patchme do
12
+ readme = File.read 'README.md'
13
+ usage = `bundle exec snapcrawl -h`
14
+ usage.gsub!(/^/, " ")
15
+ readme.gsub!(/(\$ snapcrawl --help)(.*)(---\s*)/m) { "#{$1}\n\n#{usage}\n#{$3}" }
16
+ File.write "README.md", readme
17
+ end
18
+
19
+ help "Generate changelog and append old changelog"
20
+ action :changelog do
21
+ system "git changelog --save"
22
+ # append older changelog (prior to switching to git-changelog)
23
+ system "cat .changelog.old.md >> CHANGELOG.md"
24
+ end
25
+
26
+ usage "mockserver"
27
+ help "Start the mock server"
28
+ action :mockserver do
29
+ Dir.chdir 'spec/server' do
30
+ system 'rackup -p 3000 -o 0.0.0.0'
31
+ end
32
+ rescue Interrupt
33
+ abort "\rBye"
34
+ end
35
+
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'snapcrawl'
4
+ require 'colsole'
5
+
6
+ trap(:INT) { abort "\r\nGoodbye" }
7
+
8
+ include Snapcrawl
9
+ include Colsole
10
+
11
+ begin
12
+ CLI.new.call ARGV
13
+ rescue MissingPhantomJS => e
14
+ message = 'Cannot find phantomjs executable in the path, please install it first.'
15
+ say! "\n\nru`#{e.class}`\n#{message}"
16
+ exit 2
17
+ rescue MissingImageMagick => e
18
+ message = 'Cannot find convert (ImageMagick) executable in the path, please install it first.'
19
+ say! "\n\nru`#{e.class}`\n#{message}"
20
+ exit 3
21
+ rescue => e
22
+ puts e.backtrace.reverse if ENV['DEBUG']
23
+ say! "\nru`#{e.class}`\n#{e.message}"
24
+ exit 1
25
+ end
@@ -0,0 +1,52 @@
1
+ require 'colsole'
2
+ require 'docopt'
3
+ require 'fileutils'
4
+
5
+ module Snapcrawl
6
+ class CLI
7
+ include Colsole
8
+ using StringRefinements
9
+ using PairSplit
10
+
11
+ def call(args = [])
12
+ execute Docopt.docopt(docopt, version: VERSION, argv: args)
13
+ rescue Docopt::Exit => e
14
+ puts e.message
15
+ end
16
+
17
+ private
18
+
19
+ def execute(args)
20
+ config_file = args['--config']
21
+ Config.load config_file if config_file
22
+
23
+ tweaks = args['SETTINGS'].pair_split
24
+ apply_tweaks tweaks if tweaks
25
+
26
+ Dependencies.verify
27
+
28
+ $logger.debug 'initializing cli'
29
+ FileUtils.mkdir_p Config.snaps_dir
30
+
31
+ url = args['URL'].protocolize
32
+ crawler = Crawler.new url
33
+
34
+ crawler.crawl
35
+ end
36
+
37
+ def docopt
38
+ @docopt ||= File.read docopt_path
39
+ end
40
+
41
+ def docopt_path
42
+ File.expand_path 'templates/docopt.txt', __dir__
43
+ end
44
+
45
+ def apply_tweaks(tweaks)
46
+ tweaks.each do |key, value|
47
+ Config.settings[key] = value
48
+ $logger.level = value if key == 'log_level'
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,60 @@
1
+ require 'sting'
2
+ require 'fileutils'
3
+
4
+ module Snapcrawl
5
+ class Config < Sting
6
+ class << self
7
+ def load(file = nil)
8
+ reset!
9
+ push defaults
10
+
11
+ return unless file
12
+
13
+ file = "#{file}.yml" unless /\.ya?ml$/.match?(file)
14
+
15
+ # FIXME: Cannot use logger here due to the "chicken and egg" with
16
+ # Config. The $logger is available, but it was not yet fully
17
+ # configured with log_level etc.
18
+ if File.exist? file
19
+ # $logger.debug "loading config file g`#{file}`"
20
+ push file
21
+ else
22
+ # $logger.debug "creating config file g`#{file}`"
23
+ create_config file
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def defaults
30
+ {
31
+ depth: 1,
32
+ width: 1280,
33
+ height: 0,
34
+ cache_life: 86_400,
35
+ cache_dir: 'cache',
36
+ snaps_dir: 'snaps',
37
+ name_template: '%{url}',
38
+ url_whitelist: nil,
39
+ url_blacklist: nil,
40
+ css_selector: nil,
41
+ log_level: 1,
42
+ log_color: 'auto',
43
+ skip_ssl_verification: false,
44
+ screenshot_delay: nil,
45
+ }
46
+ end
47
+
48
+ def create_config(file)
49
+ content = File.read config_template
50
+ dir = File.dirname file
51
+ FileUtils.mkdir_p dir
52
+ File.write file, content
53
+ end
54
+
55
+ def config_template
56
+ File.expand_path 'templates/config.yml', __dir__
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,98 @@
1
+ require 'fileutils'
2
+
3
+ module Snapcrawl
4
+ class Crawler
5
+ using StringRefinements
6
+
7
+ attr_reader :url
8
+
9
+ def initialize(url)
10
+ $logger.debug "initializing crawler with g`#{url}`"
11
+
12
+ config_for_display = Config.settings.dup
13
+ config_for_display['name_template'] = '%%{url}'
14
+
15
+ $logger.debug "config #{config_for_display}"
16
+ @url = url
17
+ end
18
+
19
+ def crawl
20
+ Dependencies.verify
21
+ todo[url] = Page.new url
22
+ process_todo while todo.any?
23
+ end
24
+
25
+ private
26
+
27
+ def process_todo
28
+ $logger.debug "processing queue: g`#{todo.count} remaining`"
29
+
30
+ url, page = todo.shift
31
+ done.push url
32
+
33
+ return unless process_page page
34
+
35
+ register_sub_pages page.pages if page.depth < Config.depth
36
+ end
37
+
38
+ def register_sub_pages(pages)
39
+ pages.each do |sub_page|
40
+ next if todo.has_key?(sub_page) || done.include?(sub_page)
41
+
42
+ if Config.url_whitelist && sub_page.path !~ (/#{Config.url_whitelist}/)
43
+ $logger.debug "ignoring mu`#{sub_page.url}`, reason: whitelist"
44
+ next
45
+ end
46
+
47
+ if Config.url_blacklist && sub_page.path =~ (/#{Config.url_blacklist}/)
48
+ $logger.debug "ignoring mu`#{sub_page.url}`, reason: blacklist"
49
+ next
50
+ end
51
+
52
+ todo[sub_page.url] = sub_page
53
+ end
54
+ end
55
+
56
+ def process_page(page)
57
+ outfile = "#{Config.snaps_dir}/#{Config.name_template}.png" % { url: page.url.to_slug }
58
+
59
+ $logger.info "processing mu`#{page.url}`, depth: #{page.depth}"
60
+
61
+ unless page.valid?
62
+ $logger.debug "page #{page.path} is invalid, aborting process"
63
+ return false
64
+ end
65
+
66
+ if file_fresh? outfile
67
+ $logger.info "screenshot for #{page.path} already exists"
68
+ else
69
+ $logger.info "gb`capturing screenshot for #{page.path}`"
70
+ save_screenshot page, outfile
71
+ end
72
+
73
+ true
74
+ end
75
+
76
+ def save_screenshot(page, outfile)
77
+ page.save_screenshot outfile
78
+ rescue => e
79
+ $logger.error "screenshot error on mu`#{page.path}` - r`#{e.class}`: #{e.message}"
80
+ end
81
+
82
+ def file_fresh?(file)
83
+ Config.cache_life.positive? and File.exist?(file) and file_age(file) < Config.cache_life
84
+ end
85
+
86
+ def file_age(file)
87
+ (Time.now - File.stat(file).mtime).to_i
88
+ end
89
+
90
+ def todo
91
+ @todo ||= {}
92
+ end
93
+
94
+ def done
95
+ @done ||= []
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,21 @@
1
+ require 'colsole'
2
+
3
+ module Snapcrawl
4
+ class Dependencies
5
+ class << self
6
+ include Colsole
7
+
8
+ def verify
9
+ return if @verified
10
+
11
+ $logger.debug 'verifying g`phantomjs` is present'
12
+ raise MissingPhantomJS unless command_exist? 'phantomjs'
13
+
14
+ $logger.debug 'verifying g`imagemagick` is present'
15
+ raise MissingImageMagick unless command_exist? 'convert'
16
+
17
+ @verified = true
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,5 @@
1
+ module Snapcrawl
2
+ class MissingPhantomJS < StandardError; end
3
+ class MissingImageMagick < StandardError; end
4
+ class ScreenshotError < StandardError; end
5
+ end
@@ -0,0 +1,36 @@
1
+ require 'colsole'
2
+
3
+ module Snapcrawl
4
+ module LogHelpers
5
+ include Colsole
6
+
7
+ SEVERITY_COLORS = {
8
+ 'INFO' => :b,
9
+ 'WARN' => :y,
10
+ 'ERROR' => :r,
11
+ 'FATAL' => :r,
12
+ 'DEBUG' => :c,
13
+ }
14
+
15
+ def log_formatter
16
+ proc do |severity, _time, _prog, message|
17
+ severity_color = SEVERITY_COLORS[severity]
18
+ line = "#{severity_color}`#{severity.rjust 5}` : #{message}\n"
19
+ use_colors? ? colorize(line) : strip_colors(line)
20
+ end
21
+ end
22
+
23
+ def use_colors?
24
+ @use_colors ||= (Config.log_color == 'auto' ? tty? : Config.log_color)
25
+ end
26
+
27
+ def tty?
28
+ case ENV['TTY']
29
+ when 'on' then true
30
+ when 'off' then false
31
+ else
32
+ $stdout.tty?
33
+ end
34
+ end
35
+ end
36
+ end