powerdlz23 1.2.3 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/Spider/README.md +19 -0
  2. package/Spider/domain.py +18 -0
  3. package/Spider/general.py +51 -0
  4. package/Spider/link_finder.py +25 -0
  5. package/Spider/main.py +50 -0
  6. package/Spider/spider.py +74 -0
  7. package/crawler/.formatter.exs +5 -0
  8. package/crawler/.github/workflows/ci.yml +29 -0
  9. package/crawler/.recode.exs +33 -0
  10. package/crawler/.tool-versions +2 -0
  11. package/crawler/CHANGELOG.md +82 -0
  12. package/crawler/README.md +198 -0
  13. package/crawler/architecture.svg +4 -0
  14. package/crawler/config/config.exs +9 -0
  15. package/crawler/config/dev.exs +5 -0
  16. package/crawler/config/test.exs +5 -0
  17. package/crawler/examples/google_search/scraper.ex +37 -0
  18. package/crawler/examples/google_search/url_filter.ex +11 -0
  19. package/crawler/examples/google_search.ex +77 -0
  20. package/crawler/lib/crawler/dispatcher/worker.ex +14 -0
  21. package/crawler/lib/crawler/dispatcher.ex +20 -0
  22. package/crawler/lib/crawler/fetcher/header_preparer.ex +60 -0
  23. package/crawler/lib/crawler/fetcher/modifier.ex +45 -0
  24. package/crawler/lib/crawler/fetcher/policer.ex +77 -0
  25. package/crawler/lib/crawler/fetcher/recorder.ex +55 -0
  26. package/crawler/lib/crawler/fetcher/requester.ex +32 -0
  27. package/crawler/lib/crawler/fetcher/retrier.ex +43 -0
  28. package/crawler/lib/crawler/fetcher/url_filter.ex +26 -0
  29. package/crawler/lib/crawler/fetcher.ex +81 -0
  30. package/crawler/lib/crawler/http.ex +7 -0
  31. package/crawler/lib/crawler/linker/path_builder.ex +71 -0
  32. package/crawler/lib/crawler/linker/path_expander.ex +59 -0
  33. package/crawler/lib/crawler/linker/path_finder.ex +106 -0
  34. package/crawler/lib/crawler/linker/path_offliner.ex +59 -0
  35. package/crawler/lib/crawler/linker/path_prefixer.ex +46 -0
  36. package/crawler/lib/crawler/linker.ex +173 -0
  37. package/crawler/lib/crawler/options.ex +127 -0
  38. package/crawler/lib/crawler/parser/css_parser.ex +37 -0
  39. package/crawler/lib/crawler/parser/guarder.ex +38 -0
  40. package/crawler/lib/crawler/parser/html_parser.ex +41 -0
  41. package/crawler/lib/crawler/parser/link_parser/link_expander.ex +32 -0
  42. package/crawler/lib/crawler/parser/link_parser.ex +50 -0
  43. package/crawler/lib/crawler/parser.ex +122 -0
  44. package/crawler/lib/crawler/queue_handler.ex +45 -0
  45. package/crawler/lib/crawler/scraper.ex +28 -0
  46. package/crawler/lib/crawler/snapper/dir_maker.ex +45 -0
  47. package/crawler/lib/crawler/snapper/link_replacer.ex +95 -0
  48. package/crawler/lib/crawler/snapper.ex +82 -0
  49. package/crawler/lib/crawler/store/counter.ex +19 -0
  50. package/crawler/lib/crawler/store/page.ex +7 -0
  51. package/crawler/lib/crawler/store.ex +87 -0
  52. package/crawler/lib/crawler/worker.ex +62 -0
  53. package/crawler/lib/crawler.ex +91 -0
  54. package/crawler/mix.exs +78 -0
  55. package/crawler/mix.lock +40 -0
  56. package/crawler/test/fixtures/introducing-elixir.jpg +0 -0
  57. package/crawler/test/integration_test.exs +135 -0
  58. package/crawler/test/lib/crawler/dispatcher/worker_test.exs +7 -0
  59. package/crawler/test/lib/crawler/dispatcher_test.exs +5 -0
  60. package/crawler/test/lib/crawler/fetcher/header_preparer_test.exs +7 -0
  61. package/crawler/test/lib/crawler/fetcher/policer_test.exs +71 -0
  62. package/crawler/test/lib/crawler/fetcher/recorder_test.exs +9 -0
  63. package/crawler/test/lib/crawler/fetcher/requester_test.exs +9 -0
  64. package/crawler/test/lib/crawler/fetcher/retrier_test.exs +7 -0
  65. package/crawler/test/lib/crawler/fetcher/url_filter_test.exs +7 -0
  66. package/crawler/test/lib/crawler/fetcher_test.exs +153 -0
  67. package/crawler/test/lib/crawler/http_test.exs +47 -0
  68. package/crawler/test/lib/crawler/linker/path_builder_test.exs +7 -0
  69. package/crawler/test/lib/crawler/linker/path_expander_test.exs +7 -0
  70. package/crawler/test/lib/crawler/linker/path_finder_test.exs +7 -0
  71. package/crawler/test/lib/crawler/linker/path_offliner_test.exs +7 -0
  72. package/crawler/test/lib/crawler/linker/path_prefixer_test.exs +7 -0
  73. package/crawler/test/lib/crawler/linker_test.exs +7 -0
  74. package/crawler/test/lib/crawler/options_test.exs +7 -0
  75. package/crawler/test/lib/crawler/parser/css_parser_test.exs +7 -0
  76. package/crawler/test/lib/crawler/parser/guarder_test.exs +7 -0
  77. package/crawler/test/lib/crawler/parser/html_parser_test.exs +7 -0
  78. package/crawler/test/lib/crawler/parser/link_parser/link_expander_test.exs +7 -0
  79. package/crawler/test/lib/crawler/parser/link_parser_test.exs +7 -0
  80. package/crawler/test/lib/crawler/parser_test.exs +8 -0
  81. package/crawler/test/lib/crawler/queue_handler_test.exs +7 -0
  82. package/crawler/test/lib/crawler/scraper_test.exs +7 -0
  83. package/crawler/test/lib/crawler/snapper/dir_maker_test.exs +7 -0
  84. package/crawler/test/lib/crawler/snapper/link_replacer_test.exs +7 -0
  85. package/crawler/test/lib/crawler/snapper_test.exs +9 -0
  86. package/crawler/test/lib/crawler/worker_test.exs +5 -0
  87. package/crawler/test/lib/crawler_test.exs +295 -0
  88. package/crawler/test/support/test_case.ex +24 -0
  89. package/crawler/test/support/test_helpers.ex +28 -0
  90. package/crawler/test/test_helper.exs +7 -0
  91. package/grell/.rspec +2 -0
  92. package/grell/.travis.yml +28 -0
  93. package/grell/CHANGELOG.md +111 -0
  94. package/grell/Gemfile +7 -0
  95. package/grell/LICENSE.txt +22 -0
  96. package/grell/README.md +213 -0
  97. package/grell/Rakefile +2 -0
  98. package/grell/grell.gemspec +36 -0
  99. package/grell/lib/grell/capybara_driver.rb +44 -0
  100. package/grell/lib/grell/crawler.rb +83 -0
  101. package/grell/lib/grell/crawler_manager.rb +84 -0
  102. package/grell/lib/grell/grell_logger.rb +10 -0
  103. package/grell/lib/grell/page.rb +275 -0
  104. package/grell/lib/grell/page_collection.rb +62 -0
  105. package/grell/lib/grell/rawpage.rb +62 -0
  106. package/grell/lib/grell/reader.rb +18 -0
  107. package/grell/lib/grell/version.rb +3 -0
  108. package/grell/lib/grell.rb +11 -0
  109. package/grell/spec/lib/capybara_driver_spec.rb +38 -0
  110. package/grell/spec/lib/crawler_manager_spec.rb +174 -0
  111. package/grell/spec/lib/crawler_spec.rb +361 -0
  112. package/grell/spec/lib/page_collection_spec.rb +159 -0
  113. package/grell/spec/lib/page_spec.rb +418 -0
  114. package/grell/spec/lib/reader_spec.rb +43 -0
  115. package/grell/spec/spec_helper.rb +66 -0
  116. package/heartmagic/config.py +1 -0
  117. package/heartmagic/heart.py +3 -0
  118. package/heartmagic/pytransform/__init__.py +483 -0
  119. package/heartmagic/pytransform/_pytransform.dll +0 -0
  120. package/heartmagic/pytransform/_pytransform.so +0 -0
  121. package/httpStatusCode/README.md +2 -0
  122. package/httpStatusCode/httpStatusCode.js +4 -0
  123. package/httpStatusCode/reasonPhrases.js +344 -0
  124. package/httpStatusCode/statusCodes.js +344 -0
  125. package/package.json +1 -1
  126. package/rubyretriever/.rspec +2 -0
  127. package/rubyretriever/.travis.yml +7 -0
  128. package/rubyretriever/Gemfile +3 -0
  129. package/rubyretriever/Gemfile.lock +64 -0
  130. package/rubyretriever/LICENSE +20 -0
  131. package/rubyretriever/Rakefile +7 -0
  132. package/rubyretriever/bin/rr +79 -0
  133. package/rubyretriever/lib/retriever/cli.rb +25 -0
  134. package/rubyretriever/lib/retriever/core_ext.rb +13 -0
  135. package/rubyretriever/lib/retriever/fetch.rb +268 -0
  136. package/rubyretriever/lib/retriever/fetchfiles.rb +71 -0
  137. package/rubyretriever/lib/retriever/fetchseo.rb +18 -0
  138. package/rubyretriever/lib/retriever/fetchsitemap.rb +43 -0
  139. package/rubyretriever/lib/retriever/link.rb +47 -0
  140. package/rubyretriever/lib/retriever/openuri_redirect_patch.rb +8 -0
  141. package/rubyretriever/lib/retriever/page.rb +104 -0
  142. package/rubyretriever/lib/retriever/page_iterator.rb +21 -0
  143. package/rubyretriever/lib/retriever/target.rb +47 -0
  144. package/rubyretriever/lib/retriever/version.rb +4 -0
  145. package/rubyretriever/lib/retriever.rb +15 -0
  146. package/rubyretriever/readme.md +166 -0
  147. package/rubyretriever/rubyretriever.gemspec +41 -0
  148. package/rubyretriever/spec/link_spec.rb +77 -0
  149. package/rubyretriever/spec/page_spec.rb +94 -0
  150. package/rubyretriever/spec/retriever_spec.rb +84 -0
  151. package/rubyretriever/spec/spec_helper.rb +17 -0
  152. package/rubyretriever/spec/target_spec.rb +55 -0
  153. package/snapcrawl/.changelog.old.md +157 -0
  154. package/snapcrawl/.gitattributes +1 -0
  155. package/snapcrawl/.github/workflows/test.yml +41 -0
  156. package/snapcrawl/.rspec +3 -0
  157. package/snapcrawl/.rubocop.yml +23 -0
  158. package/snapcrawl/CHANGELOG.md +182 -0
  159. package/snapcrawl/Gemfile +15 -0
  160. package/snapcrawl/LICENSE +21 -0
  161. package/snapcrawl/README.md +135 -0
  162. package/snapcrawl/Runfile +35 -0
  163. package/snapcrawl/bin/snapcrawl +25 -0
  164. package/snapcrawl/lib/snapcrawl/cli.rb +52 -0
  165. package/snapcrawl/lib/snapcrawl/config.rb +60 -0
  166. package/snapcrawl/lib/snapcrawl/crawler.rb +98 -0
  167. package/snapcrawl/lib/snapcrawl/dependencies.rb +21 -0
  168. package/snapcrawl/lib/snapcrawl/exceptions.rb +5 -0
  169. package/snapcrawl/lib/snapcrawl/log_helpers.rb +36 -0
  170. package/snapcrawl/lib/snapcrawl/page.rb +118 -0
  171. package/snapcrawl/lib/snapcrawl/pretty_logger.rb +11 -0
  172. package/snapcrawl/lib/snapcrawl/refinements/pair_split.rb +26 -0
  173. package/snapcrawl/lib/snapcrawl/refinements/string_refinements.rb +13 -0
  174. package/snapcrawl/lib/snapcrawl/screenshot.rb +73 -0
  175. package/snapcrawl/lib/snapcrawl/templates/config.yml +49 -0
  176. package/snapcrawl/lib/snapcrawl/templates/docopt.txt +26 -0
  177. package/snapcrawl/lib/snapcrawl/version.rb +3 -0
  178. package/snapcrawl/lib/snapcrawl.rb +20 -0
  179. package/snapcrawl/snapcrawl.gemspec +27 -0
  180. package/snapcrawl/snapcrawl.yml +41 -0
  181. package/snapcrawl/spec/README.md +16 -0
  182. package/snapcrawl/spec/approvals/bin/help +26 -0
  183. package/snapcrawl/spec/approvals/bin/usage +4 -0
  184. package/snapcrawl/spec/approvals/cli/usage +4 -0
  185. package/snapcrawl/spec/approvals/config/defaults +15 -0
  186. package/snapcrawl/spec/approvals/config/minimal +15 -0
  187. package/snapcrawl/spec/approvals/integration/blacklist +14 -0
  188. package/snapcrawl/spec/approvals/integration/default-config +14 -0
  189. package/snapcrawl/spec/approvals/integration/depth-0 +6 -0
  190. package/snapcrawl/spec/approvals/integration/depth-3 +6 -0
  191. package/snapcrawl/spec/approvals/integration/log-color-no +6 -0
  192. package/snapcrawl/spec/approvals/integration/screenshot-error +3 -0
  193. package/snapcrawl/spec/approvals/integration/whitelist +14 -0
  194. package/snapcrawl/spec/approvals/models/pretty_logger/colors +1 -0
  195. package/snapcrawl/spec/fixtures/config/minimal.yml +4 -0
  196. package/snapcrawl/spec/server/config.ru +97 -0
  197. package/snapcrawl/spec/snapcrawl/bin_spec.rb +15 -0
  198. package/snapcrawl/spec/snapcrawl/cli_spec.rb +9 -0
  199. package/snapcrawl/spec/snapcrawl/config_spec.rb +26 -0
  200. package/snapcrawl/spec/snapcrawl/integration_spec.rb +65 -0
  201. package/snapcrawl/spec/snapcrawl/page_spec.rb +89 -0
  202. package/snapcrawl/spec/snapcrawl/pretty_logger_spec.rb +19 -0
  203. package/snapcrawl/spec/snapcrawl/refinements/pair_split_spec.rb +27 -0
  204. package/snapcrawl/spec/snapcrawl/refinements/string_refinements_spec.rb +29 -0
  205. package/snapcrawl/spec/snapcrawl/screenshot_spec.rb +62 -0
  206. package/snapcrawl/spec/spec_helper.rb +22 -0
  207. package/snapcrawl/spec/spec_mixin.rb +10 -0
@@ -0,0 +1,157 @@
1
+ ## [v0.5.1](https://github.com/DannyBen/snapcrawl/tree/v0.5.1) (2020-03-14)
2
+
3
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.5.0...v0.5.1)
4
+
5
+ **Merged pull requests:**
6
+
7
+ - Add additional test cases and exception safeguards [\#30](https://github.com/DannyBen/snapcrawl/pull/30) ([DannyBen](https://github.com/DannyBen))
8
+
9
+ ## [v0.5.0](https://github.com/DannyBen/snapcrawl/tree/v0.5.0) (2020-03-14)
10
+
11
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.5.0.rc1...v0.5.0)
12
+
13
+ **Merged pull requests:**
14
+
15
+ - Epic refactor [\#29](https://github.com/DannyBen/snapcrawl/pull/29) ([DannyBen](https://github.com/DannyBen))
16
+
17
+ ## [v0.5.0.rc1](https://github.com/DannyBen/snapcrawl/tree/v0.5.0.rc1) (2020-03-14)
18
+
19
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.4...v0.5.0.rc1)
20
+
21
+ ## [v0.4.4](https://github.com/DannyBen/snapcrawl/tree/v0.4.4) (2020-03-12)
22
+
23
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.3...v0.4.4)
24
+
25
+ **Merged pull requests:**
26
+
27
+ - Rescue imagemagick exceptions [\#28](https://github.com/DannyBen/snapcrawl/pull/28) ([DannyBen](https://github.com/DannyBen))
28
+ - Switch to github actions [\#27](https://github.com/DannyBen/snapcrawl/pull/27) ([DannyBen](https://github.com/DannyBen))
29
+
30
+ ## [v0.4.3](https://github.com/DannyBen/snapcrawl/tree/v0.4.3) (2020-01-09)
31
+
32
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.2...v0.4.3)
33
+
34
+ ## [v0.4.2](https://github.com/DannyBen/snapcrawl/tree/v0.4.2) (2020-01-09)
35
+
36
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.1...v0.4.2)
37
+
38
+ **Merged pull requests:**
39
+
40
+ - Improve handling of malformed URIs [\#26](https://github.com/DannyBen/snapcrawl/pull/26) ([DannyBen](https://github.com/DannyBen))
41
+
42
+ ## [v0.4.1](https://github.com/DannyBen/snapcrawl/tree/v0.4.1) (2020-01-09)
43
+
44
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.0...v0.4.1)
45
+
46
+ **Merged pull requests:**
47
+
48
+ - Updates for ruby 2.7 [\#25](https://github.com/DannyBen/snapcrawl/pull/25) ([DannyBen](https://github.com/DannyBen))
49
+ - Test with ruby 2.7 [\#23](https://github.com/DannyBen/snapcrawl/pull/23) ([DannyBen](https://github.com/DannyBen))
50
+ - Improve error handling [\#20](https://github.com/DannyBen/snapcrawl/pull/20) ([DannyBen](https://github.com/DannyBen))
51
+
52
+ ## [v0.4.0](https://github.com/DannyBen/snapcrawl/tree/v0.4.0) (2020-01-01)
53
+
54
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.3.1...v0.4.0)
55
+
56
+ **Merged pull requests:**
57
+
58
+ - Remove go subcommand [\#22](https://github.com/DannyBen/snapcrawl/pull/22) ([DannyBen](https://github.com/DannyBen))
59
+ - Make CI more consistent [\#21](https://github.com/DannyBen/snapcrawl/pull/21) ([DannyBen](https://github.com/DannyBen))
60
+
61
+ ## [v0.3.1](https://github.com/DannyBen/snapcrawl/tree/v0.3.1) (2019-09-11)
62
+
63
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.3.0...v0.3.1)
64
+
65
+ **Fixed bugs:**
66
+
67
+ - Try catch error instead of stopping script [\#19](https://github.com/DannyBen/snapcrawl/issues/19)
68
+ - error : Cliver::Dependency::VersionMismatch [\#18](https://github.com/DannyBen/snapcrawl/issues/18)
69
+ - RuntimeError redirection forbidden [\#16](https://github.com/DannyBen/snapcrawl/issues/16)
70
+
71
+ ## [v0.3.0](https://github.com/DannyBen/snapcrawl/tree/v0.3.0) (2019-09-10)
72
+
73
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.8...v0.3.0)
74
+
75
+ **Merged pull requests:**
76
+
77
+ - Fixes round [\#17](https://github.com/DannyBen/snapcrawl/pull/17) ([DannyBen](https://github.com/DannyBen))
78
+
79
+ ## [v0.2.8](https://github.com/DannyBen/snapcrawl/tree/v0.2.8) (2019-06-14)
80
+
81
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.7...v0.2.8)
82
+
83
+ **Closed issues:**
84
+
85
+ - Improve tests and run tests on Travis [\#13](https://github.com/DannyBen/snapcrawl/issues/13)
86
+ - Save all versions of snapshot? [\#11](https://github.com/DannyBen/snapcrawl/issues/11)
87
+
88
+ **Merged pull requests:**
89
+
90
+ - Add Travis CI [\#15](https://github.com/DannyBen/snapcrawl/pull/15) ([DannyBen](https://github.com/DannyBen))
91
+ - Add ability to set filename template [\#14](https://github.com/DannyBen/snapcrawl/pull/14) ([DannyBen](https://github.com/DannyBen))
92
+
93
+ ## [v0.2.7](https://github.com/DannyBen/snapcrawl/tree/v0.2.7) (2019-06-13)
94
+
95
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.6...v0.2.7)
96
+
97
+ **Closed issues:**
98
+
99
+ - Using snapcrawl via proxy? [\#10](https://github.com/DannyBen/snapcrawl/issues/10)
100
+
101
+ **Merged pull requests:**
102
+
103
+ - Fix ignored --folder parameter [\#12](https://github.com/DannyBen/snapcrawl/pull/12) ([DannyBen](https://github.com/DannyBen))
104
+
105
+ ## [v0.2.6](https://github.com/DannyBen/snapcrawl/tree/v0.2.6) (2019-04-18)
106
+
107
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.5...v0.2.6)
108
+
109
+ **Closed issues:**
110
+
111
+ - Screenshots not saving to default snaps folder on Windows machine [\#6](https://github.com/DannyBen/snapcrawl/issues/6)
112
+ - Add the ability to pass headers into the application [\#3](https://github.com/DannyBen/snapcrawl/issues/3)
113
+
114
+ **Merged pull requests:**
115
+
116
+ - Upgrade colsole to fix windows command\_exist [\#9](https://github.com/DannyBen/snapcrawl/pull/9) ([DannyBen](https://github.com/DannyBen))
117
+
118
+ ## [v0.2.5](https://github.com/DannyBen/snapcrawl/tree/v0.2.5) (2019-03-14)
119
+
120
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.4...v0.2.5)
121
+
122
+ **Fixed bugs:**
123
+
124
+ - Screenshots not saving to default or specified folder locations [\#4](https://github.com/DannyBen/snapcrawl/issues/4)
125
+
126
+ **Merged pull requests:**
127
+
128
+ - Alert when imagemagick is not installed [\#7](https://github.com/DannyBen/snapcrawl/pull/7) ([DannyBen](https://github.com/DannyBen))
129
+
130
+ ## [v0.2.4](https://github.com/DannyBen/snapcrawl/tree/v0.2.4) (2018-10-18)
131
+
132
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.3...v0.2.4)
133
+
134
+ **Merged pull requests:**
135
+
136
+ - Switch from screencap to webshot [\#5](https://github.com/DannyBen/snapcrawl/pull/5) ([DannyBen](https://github.com/DannyBen))
137
+ - Switch from minitest to rspec [\#2](https://github.com/DannyBen/snapcrawl/pull/2) ([DannyBen](https://github.com/DannyBen))
138
+
139
+ ## [v0.2.3](https://github.com/DannyBen/snapcrawl/tree/v0.2.3) (2017-03-15)
140
+
141
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.2...v0.2.3)
142
+
143
+ **Merged pull requests:**
144
+
145
+ - Fixes [\#1](https://github.com/DannyBen/snapcrawl/pull/1) ([DannyBen](https://github.com/DannyBen))
146
+
147
+ ## [v0.2.2](https://github.com/DannyBen/snapcrawl/tree/v0.2.2) (2015-12-05)
148
+
149
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.1...v0.2.2)
150
+
151
+ ## [v0.2.1](https://github.com/DannyBen/snapcrawl/tree/v0.2.1) (2015-12-05)
152
+
153
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.0...v0.2.1)
154
+
155
+ ## [v0.2.0](https://github.com/DannyBen/snapcrawl/tree/v0.2.0) (2015-12-05)
156
+
157
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/0710e5f8d5e45b5341ae4a9fa2212d5c76c72de4...v0.2.0)
@@ -0,0 +1 @@
1
+ Runfile linguist-language=Ruby
@@ -0,0 +1,41 @@
1
+ name: Test
2
+ on:
3
+ pull_request:
4
+ push: { branches: master }
5
+
6
+ jobs:
7
+ test:
8
+ name: Ruby ${{ matrix.ruby }}
9
+
10
+ runs-on: ubuntu-latest
11
+
12
+ strategy:
13
+ matrix: { ruby: ['3.0', '3.1', '3.2', head] }
14
+
15
+ steps:
16
+ - name: Checkout code
17
+ uses: actions/checkout@v3
18
+
19
+ # Rush needed for easy installation of phantomjs
20
+ - name: Install rush
21
+ run: curl -Ls http://get.dannyb.co/rush/setup | bash
22
+
23
+ - name: Install phantomjs
24
+ run: rush snatch dannyben phantomjs
25
+
26
+ - name: Install OS dependencies
27
+ run: sudo apt-get -y install libyaml-dev
28
+
29
+ - name: Setup Ruby
30
+ uses: ruby/setup-ruby@v1
31
+ with:
32
+ ruby-version: '${{ matrix.ruby }}'
33
+ bundler-cache: true
34
+
35
+ - name: Run mock server
36
+ run: |
37
+ nohup bundle exec run mockserver &
38
+ sleep 2
39
+
40
+ - name: Run tests
41
+ run: bundle exec rspec
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format documentation
3
+ --fail-fast
@@ -0,0 +1,23 @@
1
+ require:
2
+ - rubocop-performance
3
+ - rubocop-rspec
4
+
5
+ inherit_gem:
6
+ rentacop:
7
+ - rentacop.yml
8
+ - rspec.yml
9
+
10
+ AllCops:
11
+ TargetRubyVersion: 3.0
12
+ Exclude:
13
+ - dev/**/*
14
+
15
+ Style/GlobalVars:
16
+ AllowedVariables:
17
+ - '$logger'
18
+
19
+ RSpec/AnyInstance:
20
+ Enabled: false
21
+
22
+ RSpec/InstanceVariable:
23
+ Enabled: false
@@ -0,0 +1,182 @@
1
+ Change Log
2
+ ========================================
3
+
4
+ v0.5.4 - 2023-07-27
5
+ ----------------------------------------
6
+
7
+ - Drop support for Ruby <= 2.6
8
+ - Upgrade dependencies and rubocop cleanup
9
+ - Fix css_selector option
10
+ - Drop support for Ruby 2.x
11
+
12
+
13
+ v0.5.3 - 2021-03-29
14
+ ----------------------------------------
15
+
16
+ - Add skip_ssl_verification config option
17
+ - Add screenshot_delay config option
18
+
19
+
20
+ v0.5.2 - 2021-02-25
21
+ ----------------------------------------
22
+
23
+ - Fix logging percent issue
24
+
25
+
26
+ ## [v0.5.1](https://github.com/DannyBen/snapcrawl/tree/v0.5.1) (2020-03-14)
27
+
28
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.5.0...v0.5.1)
29
+
30
+ **Merged pull requests:**
31
+
32
+ - Add additional test cases and exception safeguards [\#30](https://github.com/DannyBen/snapcrawl/pull/30) ([DannyBen](https://github.com/DannyBen))
33
+
34
+ ## [v0.5.0](https://github.com/DannyBen/snapcrawl/tree/v0.5.0) (2020-03-14)
35
+
36
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.5.0.rc1...v0.5.0)
37
+
38
+ **Merged pull requests:**
39
+
40
+ - Epic refactor [\#29](https://github.com/DannyBen/snapcrawl/pull/29) ([DannyBen](https://github.com/DannyBen))
41
+
42
+ ## [v0.5.0.rc1](https://github.com/DannyBen/snapcrawl/tree/v0.5.0.rc1) (2020-03-14)
43
+
44
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.4...v0.5.0.rc1)
45
+
46
+ ## [v0.4.4](https://github.com/DannyBen/snapcrawl/tree/v0.4.4) (2020-03-12)
47
+
48
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.3...v0.4.4)
49
+
50
+ **Merged pull requests:**
51
+
52
+ - Rescue imagemagick exceptions [\#28](https://github.com/DannyBen/snapcrawl/pull/28) ([DannyBen](https://github.com/DannyBen))
53
+ - Switch to github actions [\#27](https://github.com/DannyBen/snapcrawl/pull/27) ([DannyBen](https://github.com/DannyBen))
54
+
55
+ ## [v0.4.3](https://github.com/DannyBen/snapcrawl/tree/v0.4.3) (2020-01-09)
56
+
57
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.2...v0.4.3)
58
+
59
+ ## [v0.4.2](https://github.com/DannyBen/snapcrawl/tree/v0.4.2) (2020-01-09)
60
+
61
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.1...v0.4.2)
62
+
63
+ **Merged pull requests:**
64
+
65
+ - Improve handling of malformed URIs [\#26](https://github.com/DannyBen/snapcrawl/pull/26) ([DannyBen](https://github.com/DannyBen))
66
+
67
+ ## [v0.4.1](https://github.com/DannyBen/snapcrawl/tree/v0.4.1) (2020-01-09)
68
+
69
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.4.0...v0.4.1)
70
+
71
+ **Merged pull requests:**
72
+
73
+ - Updates for ruby 2.7 [\#25](https://github.com/DannyBen/snapcrawl/pull/25) ([DannyBen](https://github.com/DannyBen))
74
+ - Test with ruby 2.7 [\#23](https://github.com/DannyBen/snapcrawl/pull/23) ([DannyBen](https://github.com/DannyBen))
75
+ - Improve error handling [\#20](https://github.com/DannyBen/snapcrawl/pull/20) ([DannyBen](https://github.com/DannyBen))
76
+
77
+ ## [v0.4.0](https://github.com/DannyBen/snapcrawl/tree/v0.4.0) (2020-01-01)
78
+
79
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.3.1...v0.4.0)
80
+
81
+ **Merged pull requests:**
82
+
83
+ - Remove go subcommand [\#22](https://github.com/DannyBen/snapcrawl/pull/22) ([DannyBen](https://github.com/DannyBen))
84
+ - Make CI more consistent [\#21](https://github.com/DannyBen/snapcrawl/pull/21) ([DannyBen](https://github.com/DannyBen))
85
+
86
+ ## [v0.3.1](https://github.com/DannyBen/snapcrawl/tree/v0.3.1) (2019-09-11)
87
+
88
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.3.0...v0.3.1)
89
+
90
+ **Fixed bugs:**
91
+
92
+ - Try catch error instead of stopping script [\#19](https://github.com/DannyBen/snapcrawl/issues/19)
93
+ - error : Cliver::Dependency::VersionMismatch [\#18](https://github.com/DannyBen/snapcrawl/issues/18)
94
+ - RuntimeError redirection forbidden [\#16](https://github.com/DannyBen/snapcrawl/issues/16)
95
+
96
+ ## [v0.3.0](https://github.com/DannyBen/snapcrawl/tree/v0.3.0) (2019-09-10)
97
+
98
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.8...v0.3.0)
99
+
100
+ **Merged pull requests:**
101
+
102
+ - Fixes round [\#17](https://github.com/DannyBen/snapcrawl/pull/17) ([DannyBen](https://github.com/DannyBen))
103
+
104
+ ## [v0.2.8](https://github.com/DannyBen/snapcrawl/tree/v0.2.8) (2019-06-14)
105
+
106
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.7...v0.2.8)
107
+
108
+ **Closed issues:**
109
+
110
+ - Improve tests and run tests on Travis [\#13](https://github.com/DannyBen/snapcrawl/issues/13)
111
+ - Save all versions of snapshot? [\#11](https://github.com/DannyBen/snapcrawl/issues/11)
112
+
113
+ **Merged pull requests:**
114
+
115
+ - Add Travis CI [\#15](https://github.com/DannyBen/snapcrawl/pull/15) ([DannyBen](https://github.com/DannyBen))
116
+ - Add ability to set filename template [\#14](https://github.com/DannyBen/snapcrawl/pull/14) ([DannyBen](https://github.com/DannyBen))
117
+
118
+ ## [v0.2.7](https://github.com/DannyBen/snapcrawl/tree/v0.2.7) (2019-06-13)
119
+
120
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.6...v0.2.7)
121
+
122
+ **Closed issues:**
123
+
124
+ - Using snapcrawl via proxy? [\#10](https://github.com/DannyBen/snapcrawl/issues/10)
125
+
126
+ **Merged pull requests:**
127
+
128
+ - Fix ignored --folder parameter [\#12](https://github.com/DannyBen/snapcrawl/pull/12) ([DannyBen](https://github.com/DannyBen))
129
+
130
+ ## [v0.2.6](https://github.com/DannyBen/snapcrawl/tree/v0.2.6) (2019-04-18)
131
+
132
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.5...v0.2.6)
133
+
134
+ **Closed issues:**
135
+
136
+ - Screenshots not saving to default snaps folder on Windows machine [\#6](https://github.com/DannyBen/snapcrawl/issues/6)
137
+ - Add the ability to pass headers into the application [\#3](https://github.com/DannyBen/snapcrawl/issues/3)
138
+
139
+ **Merged pull requests:**
140
+
141
+ - Upgrade colsole to fix windows command\_exist [\#9](https://github.com/DannyBen/snapcrawl/pull/9) ([DannyBen](https://github.com/DannyBen))
142
+
143
+ ## [v0.2.5](https://github.com/DannyBen/snapcrawl/tree/v0.2.5) (2019-03-14)
144
+
145
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.4...v0.2.5)
146
+
147
+ **Fixed bugs:**
148
+
149
+ - Screenshots not saving to default or specified folder locations [\#4](https://github.com/DannyBen/snapcrawl/issues/4)
150
+
151
+ **Merged pull requests:**
152
+
153
+ - Alert when imagemagick is not installed [\#7](https://github.com/DannyBen/snapcrawl/pull/7) ([DannyBen](https://github.com/DannyBen))
154
+
155
+ ## [v0.2.4](https://github.com/DannyBen/snapcrawl/tree/v0.2.4) (2018-10-18)
156
+
157
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.3...v0.2.4)
158
+
159
+ **Merged pull requests:**
160
+
161
+ - Switch from screencap to webshot [\#5](https://github.com/DannyBen/snapcrawl/pull/5) ([DannyBen](https://github.com/DannyBen))
162
+ - Switch from minitest to rspec [\#2](https://github.com/DannyBen/snapcrawl/pull/2) ([DannyBen](https://github.com/DannyBen))
163
+
164
+ ## [v0.2.3](https://github.com/DannyBen/snapcrawl/tree/v0.2.3) (2017-03-15)
165
+
166
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.2...v0.2.3)
167
+
168
+ **Merged pull requests:**
169
+
170
+ - Fixes [\#1](https://github.com/DannyBen/snapcrawl/pull/1) ([DannyBen](https://github.com/DannyBen))
171
+
172
+ ## [v0.2.2](https://github.com/DannyBen/snapcrawl/tree/v0.2.2) (2015-12-05)
173
+
174
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.1...v0.2.2)
175
+
176
+ ## [v0.2.1](https://github.com/DannyBen/snapcrawl/tree/v0.2.1) (2015-12-05)
177
+
178
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/v0.2.0...v0.2.1)
179
+
180
+ ## [v0.2.0](https://github.com/DannyBen/snapcrawl/tree/v0.2.0) (2015-12-05)
181
+
182
+ [Full Changelog](https://github.com/DannyBen/snapcrawl/compare/0710e5f8d5e45b5341ae4a9fa2212d5c76c72de4...v0.2.0)
@@ -0,0 +1,15 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ group :development, :test do
6
+ gem 'byebug'
7
+ gem 'lp'
8
+ gem 'puma'
9
+ gem 'rspec'
10
+ gem 'rspec_approvals'
11
+ gem 'runfile'
12
+ gem 'runfile-tasks'
13
+ gem 'simplecov'
14
+ gem 'sinatra'
15
+ end
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Danny Ben Shitrit
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,135 @@
1
+ # Snapcrawl - crawl a website and take screenshots
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/snapcrawl.svg)](http://badge.fury.io/rb/snapcrawl)
4
+ [![Build Status](https://github.com/DannyBen/snapcrawl/workflows/Test/badge.svg)](https://github.com/DannyBen/snapcrawl/actions?query=workflow%3ATest)
5
+ [![Code Climate](https://codeclimate.com/github/DannyBen/snapcrawl/badges/gpa.svg)](https://codeclimate.com/github/DannyBen/snapcrawl)
6
+
7
+ ---
8
+
9
+ Snapcrawl is a command line utility for crawling a website and saving
10
+ screenshots.
11
+
12
+
13
+ ## Features
14
+
15
+ - Crawls a website to any given depth and saves screenshots
16
+ - Can capture the full length of the page
17
+ - Can use a specific resolution for screenshots
18
+ - Skips capturing if the screenshot was already saved recently
19
+ - Uses local caching to avoid expensive crawl operations if not needed
20
+ - Reports broken links
21
+
22
+ ## Install
23
+
24
+ **Using Docker**
25
+
26
+ You can run Snapcrawl by using this docker image (which contains all the
27
+ necessary prerequisites):
28
+
29
+ ```shell
30
+ $ alias snapcrawl='docker run --rm -it --network host --volume "$PWD:/app" dannyben/snapcrawl'
31
+ ```
32
+
33
+ For more information on the Docker image, refer to the [docker-snapcrawl][3] repository.
34
+
35
+ **Using Ruby**
36
+
37
+ ```shell
38
+ $ gem install snapcrawl
39
+ ```
40
+
41
+ Note that Snapcrawl requires [PhantomJS][1] and [ImageMagick][2].
42
+
43
+ ## Usage
44
+
45
+ Snapcrawl can be configured either through a configuration file (YAML), or by specifying options in the command line.
46
+
47
+ ```shell
48
+ $ snapcrawl
49
+ Usage:
50
+ snapcrawl URL [--config FILE] [SETTINGS...]
51
+ snapcrawl -h | --help
52
+ snapcrawl -v | --version
53
+ ```
54
+
55
+ The default configuration filename is `snapcrawl.yml`.
56
+
57
+ Using the `--config` flag will create a template configuration file if it is not present:
58
+
59
+ ```shell
60
+ $ snapcrawl example.com --config snapcrawl
61
+ ```
62
+
63
+ ### Specifying options in the command line
64
+
65
+ All configuration options can be specified in the command line as `key=value` pairs:
66
+
67
+ ```shell
68
+ $ snapcrawl example.com log_level=0 depth=2 width=1024
69
+ ```
70
+
71
+ ### Sample configuration file
72
+
73
+ ```yaml
74
+ # All values below are the default values
75
+
76
+ # log level (0-4) 0=DEBUG 1=INFO 2=WARN 3=ERROR 4=FATAL
77
+ log_level: 1
78
+
79
+ # log_color (yes, no, auto)
80
+ # yes = always show log color
81
+ # no = never use colors
82
+ # auto = only use colors when running in an interactive terminal
83
+ log_color: auto
84
+
85
+ # number of levels to crawl, 0 means capture only the root URL
86
+ depth: 1
87
+
88
+ # screenshot width in pixels
89
+ width: 1280
90
+
91
+ # screenshot height in pixels, 0 means the entire height
92
+ height: 0
93
+
94
+ # number of seconds to consider the page cache and its screenshot fresh
95
+ cache_life: 86400
96
+
97
+ # where to store the HTML page cache
98
+ cache_dir: cache
99
+
100
+ # where to store screenshots
101
+ snaps_dir: snaps
102
+
103
+ # screenshot filename template, where '%{url}' will be replaced with a
104
+ # slug version of the URL (no need to include the .png extension)
105
+ name_template: '%{url}'
106
+
107
+ # urls not matching this regular expression will be ignored
108
+ url_whitelist:
109
+
110
+ # urls matching this regular expression will be ignored
111
+ url_blacklist:
112
+
113
+ # take a screenshot of this CSS selector only
114
+ css_selector:
115
+
116
+ # when true, ignore SSL related errors
117
+ skip_ssl_verification: false
118
+
119
+ # set to any number of seconds to wait for the page to load before taking
120
+ # a screenshot, leave empty to not wait at all (only needed for pages with
121
+ # animations or other post-load events).
122
+ screenshot_delay:
123
+ ```
124
+
125
+ ## Contributing / Support
126
+ If you experience any issue, have a question or a suggestion, or if you wish
127
+ to contribute, feel free to [open an issue][issues].
128
+
129
+ ---
130
+
131
+ [1]: http://phantomjs.org/download.html
132
+ [2]: https://imagemagick.org/script/download.php
133
+ [3]: https://github.com/DannyBen/docker-snapcrawl
134
+ [issues]: https://github.com/DannyBen/snapcrawl/issues
135
+
@@ -0,0 +1,35 @@
1
+ require 'snapcrawl/version'
2
+
3
+ title 'Snapcrawl Runfile'
4
+ summary 'Runfile tasks for building the Snapcrawl gem'
5
+ version Snapcrawl::VERSION
6
+
7
+ import_gem 'runfile-tasks/gem'
8
+ import 'debug'
9
+
10
+ help "Regenerate the command line output in the README file"
11
+ action :patchme do
12
+ readme = File.read 'README.md'
13
+ usage = `bundle exec snapcrawl -h`
14
+ usage.gsub!(/^/, " ")
15
+ readme.gsub!(/(\$ snapcrawl --help)(.*)(---\s*)/m) { "#{$1}\n\n#{usage}\n#{$3}" }
16
+ File.write "README.md", readme
17
+ end
18
+
19
+ help "Generate changelog and append old changelog"
20
+ action :changelog do
21
+ system "git changelog --save"
22
+ # append older changelog (prior to switching to git-changelog)
23
+ system "cat .changelog.old.md >> CHANGELOG.md"
24
+ end
25
+
26
+ usage "mockserver"
27
+ help "Start the mock server"
28
+ action :mockserver do
29
+ Dir.chdir 'spec/server' do
30
+ system 'rackup -p 3000 -o 0.0.0.0'
31
+ end
32
+ rescue Interrupt
33
+ abort "\rBye"
34
+ end
35
+
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'snapcrawl'
4
+ require 'colsole'
5
+
6
+ trap(:INT) { abort "\r\nGoodbye" }
7
+
8
+ include Snapcrawl
9
+ include Colsole
10
+
11
+ begin
12
+ CLI.new.call ARGV
13
+ rescue MissingPhantomJS => e
14
+ message = 'Cannot find phantomjs executable in the path, please install it first.'
15
+ say! "\n\nru`#{e.class}`\n#{message}"
16
+ exit 2
17
+ rescue MissingImageMagick => e
18
+ message = 'Cannot find convert (ImageMagick) executable in the path, please install it first.'
19
+ say! "\n\nru`#{e.class}`\n#{message}"
20
+ exit 3
21
+ rescue => e
22
+ puts e.backtrace.reverse if ENV['DEBUG']
23
+ say! "\nru`#{e.class}`\n#{e.message}"
24
+ exit 1
25
+ end