wayfarer 0.0.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (339) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yaml +32 -0
  3. data/.gitignore +3 -4
  4. data/.rubocop.yml +25 -9
  5. data/.ruby-version +1 -1
  6. data/Dockerfile +5 -0
  7. data/Gemfile +1 -7
  8. data/Gemfile.lock +212 -0
  9. data/RELEASING.md +17 -0
  10. data/Rakefile +38 -90
  11. data/bin/wayfarer +1 -111
  12. data/docker-compose.yml +32 -0
  13. data/docs/cookbook/querying_html.md +42 -0
  14. data/docs/cookbook/screenshots.md +27 -0
  15. data/docs/cookbook/user_agent.md +7 -0
  16. data/docs/guides/browser_automation/capybara.md +3 -0
  17. data/docs/guides/browser_automation/ferrum.md +37 -0
  18. data/docs/guides/browser_automation/selenium.md +59 -0
  19. data/docs/guides/callbacks.md +27 -34
  20. data/docs/guides/configuration.md +20 -171
  21. data/docs/guides/error_handling.md +18 -33
  22. data/docs/guides/jobs.md +75 -0
  23. data/docs/guides/networking.md +20 -0
  24. data/docs/guides/pages.md +52 -0
  25. data/docs/guides/performance.md +22 -0
  26. data/docs/guides/tasks.md +38 -0
  27. data/docs/index.md +34 -68
  28. data/docs/reference/api/base.md +162 -0
  29. data/docs/reference/api/route.md +182 -0
  30. data/docs/reference/cli.md +139 -0
  31. data/docs/reference/environment_variables.md +85 -0
  32. data/lib/wayfarer/base.rb +60 -0
  33. data/lib/wayfarer/cli/base.rb +23 -0
  34. data/lib/wayfarer/cli/generate.rb +17 -0
  35. data/lib/wayfarer/cli/job.rb +58 -0
  36. data/lib/wayfarer/cli/route.rb +27 -0
  37. data/lib/wayfarer/cli/route_printer.rb +116 -0
  38. data/lib/wayfarer/cli/runner.rb +34 -0
  39. data/lib/wayfarer/cli/templates/Gemfile.tt +5 -0
  40. data/lib/wayfarer/cli/templates/job.rb.tt +8 -0
  41. data/lib/wayfarer/config.rb +67 -0
  42. data/lib/wayfarer/gc.rb +19 -0
  43. data/lib/wayfarer/middleware/chain.rb +19 -0
  44. data/lib/wayfarer/middleware/dedup.rb +25 -0
  45. data/lib/wayfarer/middleware/fetch.rb +32 -0
  46. data/lib/wayfarer/middleware/normalize.rb +25 -0
  47. data/lib/wayfarer/middleware/router.rb +21 -0
  48. data/lib/wayfarer/middleware/stage.rb +23 -0
  49. data/lib/wayfarer/middleware/worker.rb +47 -0
  50. data/lib/wayfarer/networking/ferrum.rb +70 -0
  51. data/lib/wayfarer/networking/healer.rb +21 -0
  52. data/lib/wayfarer/networking/net_http.rb +52 -0
  53. data/lib/wayfarer/networking/pool.rb +34 -0
  54. data/lib/wayfarer/networking/result.rb +18 -0
  55. data/lib/wayfarer/networking/selenium.rb +70 -0
  56. data/lib/wayfarer/page.rb +15 -71
  57. data/lib/wayfarer/parsing/json.rb +17 -0
  58. data/lib/wayfarer/parsing/xml.rb +17 -0
  59. data/lib/wayfarer/redis/barrier.rb +36 -0
  60. data/lib/wayfarer/redis/connection.rb +13 -0
  61. data/lib/wayfarer/redis/counter.rb +29 -0
  62. data/lib/wayfarer/redis/pool.rb +18 -0
  63. data/lib/wayfarer/redis/version.rb +19 -0
  64. data/lib/wayfarer/routing/custom_matcher.rb +21 -0
  65. data/lib/wayfarer/routing/dsl.rb +57 -0
  66. data/lib/wayfarer/routing/host_matcher.rb +23 -0
  67. data/lib/wayfarer/routing/path_finder.rb +46 -0
  68. data/lib/wayfarer/routing/path_matcher.rb +46 -0
  69. data/lib/wayfarer/routing/{query_rule.rb → query_matcher.rb} +24 -16
  70. data/lib/wayfarer/routing/result.rb +15 -0
  71. data/lib/wayfarer/routing/root_route.rb +7 -0
  72. data/lib/wayfarer/routing/route.rb +41 -0
  73. data/lib/wayfarer/routing/scheme_matcher.rb +21 -0
  74. data/lib/wayfarer/routing/suffix_matcher.rb +21 -0
  75. data/lib/wayfarer/routing/target_route.rb +7 -0
  76. data/lib/wayfarer/routing/url_matcher.rb +21 -0
  77. data/lib/wayfarer/serializer.rb +17 -0
  78. data/lib/wayfarer/stringify.rb +41 -0
  79. data/lib/wayfarer/task.rb +34 -0
  80. data/lib/wayfarer.rb +47 -58
  81. data/mkdocs.yml +47 -0
  82. data/requirements.txt +1 -0
  83. data/spec/base_spec.rb +219 -0
  84. data/spec/cli/generate_spec.rb +39 -0
  85. data/spec/cli/job_spec.rb +74 -0
  86. data/spec/cli/version_spec.rb +13 -0
  87. data/spec/config_spec.rb +144 -0
  88. data/spec/factories/queue/chain.rb +11 -0
  89. data/spec/factories/queue/middleware.rb +15 -0
  90. data/spec/factories/queue/page.rb +78 -0
  91. data/spec/factories/queue/task.rb +12 -0
  92. data/spec/fixtures/dummy_job.rb +7 -0
  93. data/spec/gc_spec.rb +61 -0
  94. data/spec/middleware/chain_spec.rb +96 -0
  95. data/spec/middleware/dedup_spec.rb +76 -0
  96. data/spec/middleware/fetch_spec.rb +72 -0
  97. data/spec/middleware/normalize_spec.rb +28 -0
  98. data/spec/middleware/router_spec.rb +46 -0
  99. data/spec/middleware/stage_spec.rb +39 -0
  100. data/spec/middleware/worker_spec.rb +90 -0
  101. data/spec/networking/adapter.rb +135 -0
  102. data/spec/networking/ferrum_spec.rb +28 -0
  103. data/spec/networking/healer_spec.rb +46 -0
  104. data/spec/networking/net_http_spec.rb +37 -0
  105. data/spec/networking/pool_spec.rb +42 -0
  106. data/spec/networking/selenium_spec.rb +28 -0
  107. data/spec/page_spec.rb +21 -12
  108. data/spec/{parsers/json_parser_spec.rb → parsing/json_spec.rb} +5 -4
  109. data/spec/{parsers/xml_parser_spec.rb → parsing/xml_spec.rb} +3 -2
  110. data/spec/redis/barrier_spec.rb +78 -0
  111. data/spec/redis/counter_spec.rb +32 -0
  112. data/spec/redis/pool_spec.rb +18 -0
  113. data/spec/redis/version_spec.rb +13 -0
  114. data/spec/routing/custom_matcher_spec.rb +31 -0
  115. data/spec/routing/dsl_spec.rb +98 -0
  116. data/spec/routing/host_matcher_spec.rb +49 -0
  117. data/spec/routing/integration_spec.rb +110 -0
  118. data/spec/routing/path_finder_spec.rb +33 -0
  119. data/spec/routing/path_matcher_spec.rb +43 -0
  120. data/spec/routing/{query_rule_spec.rb → query_matcher_spec.rb} +39 -26
  121. data/spec/routing/root_route_spec.rb +29 -0
  122. data/spec/routing/route_spec.rb +43 -0
  123. data/spec/routing/scheme_matcher_spec.rb +25 -0
  124. data/spec/routing/{filetypes_rule_spec.rb → suffix_matcher_spec.rb} +14 -13
  125. data/spec/routing/uri_matcher_spec.rb +27 -0
  126. data/spec/spec_helpers.rb +65 -38
  127. data/spec/stringify_spec.rb +23 -0
  128. data/{support → spec/support}/static/finders.html +0 -0
  129. data/{support → spec/support}/static/graph/details/a.html +0 -0
  130. data/{support → spec/support}/static/graph/details/b.html +0 -0
  131. data/{support → spec/support}/static/graph/index.html +0 -0
  132. data/{support → spec/support}/static/json/dummy.json +0 -0
  133. data/{support → spec/support}/static/links/links.html +0 -0
  134. data/{support → spec/support}/static/xml/dummy.xml +0 -0
  135. data/{support → spec/support}/test_app.rb +9 -2
  136. data/spec/task_spec.rb +27 -0
  137. data/spec/wayfarer_spec.rb +2 -13
  138. data/wayfarer.gemspec +39 -42
  139. metadata +191 -368
  140. data/.travis.yml +0 -5
  141. data/Changelog.md +0 -10
  142. data/README.md +0 -21
  143. data/benchmark/frontiers.rb +0 -143
  144. data/docs/.gitignore +0 -2
  145. data/docs/_config.yml +0 -15
  146. data/docs/_includes/base.html +0 -7
  147. data/docs/_includes/head.html +0 -10
  148. data/docs/_includes/navigation.html +0 -187
  149. data/docs/_layouts/default.html +0 -42
  150. data/docs/_sass/base.scss +0 -439
  151. data/docs/_sass/variables.scss +0 -24
  152. data/docs/_sass/vendor/bourbon/_bourbon-deprecate.scss +0 -19
  153. data/docs/_sass/vendor/bourbon/_bourbon-deprecated-upcoming.scss +0 -425
  154. data/docs/_sass/vendor/bourbon/_bourbon.scss +0 -90
  155. data/docs/_sass/vendor/bourbon/addons/_border-color.scss +0 -29
  156. data/docs/_sass/vendor/bourbon/addons/_border-radius.scss +0 -48
  157. data/docs/_sass/vendor/bourbon/addons/_border-style.scss +0 -28
  158. data/docs/_sass/vendor/bourbon/addons/_border-width.scss +0 -28
  159. data/docs/_sass/vendor/bourbon/addons/_buttons.scss +0 -69
  160. data/docs/_sass/vendor/bourbon/addons/_clearfix.scss +0 -25
  161. data/docs/_sass/vendor/bourbon/addons/_ellipsis.scss +0 -30
  162. data/docs/_sass/vendor/bourbon/addons/_font-stacks.scss +0 -31
  163. data/docs/_sass/vendor/bourbon/addons/_hide-text.scss +0 -27
  164. data/docs/_sass/vendor/bourbon/addons/_margin.scss +0 -29
  165. data/docs/_sass/vendor/bourbon/addons/_padding.scss +0 -29
  166. data/docs/_sass/vendor/bourbon/addons/_position.scss +0 -51
  167. data/docs/_sass/vendor/bourbon/addons/_prefixer.scss +0 -66
  168. data/docs/_sass/vendor/bourbon/addons/_retina-image.scss +0 -27
  169. data/docs/_sass/vendor/bourbon/addons/_size.scss +0 -56
  170. data/docs/_sass/vendor/bourbon/addons/_text-inputs.scss +0 -118
  171. data/docs/_sass/vendor/bourbon/addons/_timing-functions.scss +0 -34
  172. data/docs/_sass/vendor/bourbon/addons/_triangle.scss +0 -63
  173. data/docs/_sass/vendor/bourbon/addons/_word-wrap.scss +0 -29
  174. data/docs/_sass/vendor/bourbon/css3/_animation.scss +0 -61
  175. data/docs/_sass/vendor/bourbon/css3/_appearance.scss +0 -5
  176. data/docs/_sass/vendor/bourbon/css3/_backface-visibility.scss +0 -5
  177. data/docs/_sass/vendor/bourbon/css3/_background-image.scss +0 -44
  178. data/docs/_sass/vendor/bourbon/css3/_background.scss +0 -57
  179. data/docs/_sass/vendor/bourbon/css3/_border-image.scss +0 -61
  180. data/docs/_sass/vendor/bourbon/css3/_calc.scss +0 -6
  181. data/docs/_sass/vendor/bourbon/css3/_columns.scss +0 -67
  182. data/docs/_sass/vendor/bourbon/css3/_filter.scss +0 -6
  183. data/docs/_sass/vendor/bourbon/css3/_flex-box.scss +0 -327
  184. data/docs/_sass/vendor/bourbon/css3/_font-face.scss +0 -29
  185. data/docs/_sass/vendor/bourbon/css3/_font-feature-settings.scss +0 -6
  186. data/docs/_sass/vendor/bourbon/css3/_hidpi-media-query.scss +0 -12
  187. data/docs/_sass/vendor/bourbon/css3/_hyphens.scss +0 -6
  188. data/docs/_sass/vendor/bourbon/css3/_image-rendering.scss +0 -15
  189. data/docs/_sass/vendor/bourbon/css3/_keyframes.scss +0 -38
  190. data/docs/_sass/vendor/bourbon/css3/_linear-gradient.scss +0 -40
  191. data/docs/_sass/vendor/bourbon/css3/_perspective.scss +0 -12
  192. data/docs/_sass/vendor/bourbon/css3/_placeholder.scss +0 -10
  193. data/docs/_sass/vendor/bourbon/css3/_radial-gradient.scss +0 -40
  194. data/docs/_sass/vendor/bourbon/css3/_selection.scss +0 -44
  195. data/docs/_sass/vendor/bourbon/css3/_text-decoration.scss +0 -27
  196. data/docs/_sass/vendor/bourbon/css3/_transform.scss +0 -21
  197. data/docs/_sass/vendor/bourbon/css3/_transition.scss +0 -81
  198. data/docs/_sass/vendor/bourbon/css3/_user-select.scss +0 -5
  199. data/docs/_sass/vendor/bourbon/functions/_assign-inputs.scss +0 -16
  200. data/docs/_sass/vendor/bourbon/functions/_contains-falsy.scss +0 -25
  201. data/docs/_sass/vendor/bourbon/functions/_contains.scss +0 -31
  202. data/docs/_sass/vendor/bourbon/functions/_is-length.scss +0 -16
  203. data/docs/_sass/vendor/bourbon/functions/_is-light.scss +0 -26
  204. data/docs/_sass/vendor/bourbon/functions/_is-number.scss +0 -16
  205. data/docs/_sass/vendor/bourbon/functions/_is-size.scss +0 -23
  206. data/docs/_sass/vendor/bourbon/functions/_modular-scale.scss +0 -74
  207. data/docs/_sass/vendor/bourbon/functions/_px-to-em.scss +0 -24
  208. data/docs/_sass/vendor/bourbon/functions/_px-to-rem.scss +0 -26
  209. data/docs/_sass/vendor/bourbon/functions/_shade.scss +0 -24
  210. data/docs/_sass/vendor/bourbon/functions/_strip-units.scss +0 -22
  211. data/docs/_sass/vendor/bourbon/functions/_tint.scss +0 -24
  212. data/docs/_sass/vendor/bourbon/functions/_transition-property-name.scss +0 -37
  213. data/docs/_sass/vendor/bourbon/functions/_unpack.scss +0 -32
  214. data/docs/_sass/vendor/bourbon/helpers/_convert-units.scss +0 -26
  215. data/docs/_sass/vendor/bourbon/helpers/_directional-values.scss +0 -108
  216. data/docs/_sass/vendor/bourbon/helpers/_font-source-declaration.scss +0 -53
  217. data/docs/_sass/vendor/bourbon/helpers/_gradient-positions-parser.scss +0 -24
  218. data/docs/_sass/vendor/bourbon/helpers/_linear-angle-parser.scss +0 -35
  219. data/docs/_sass/vendor/bourbon/helpers/_linear-gradient-parser.scss +0 -51
  220. data/docs/_sass/vendor/bourbon/helpers/_linear-positions-parser.scss +0 -77
  221. data/docs/_sass/vendor/bourbon/helpers/_linear-side-corner-parser.scss +0 -41
  222. data/docs/_sass/vendor/bourbon/helpers/_radial-arg-parser.scss +0 -74
  223. data/docs/_sass/vendor/bourbon/helpers/_radial-gradient-parser.scss +0 -55
  224. data/docs/_sass/vendor/bourbon/helpers/_radial-positions-parser.scss +0 -28
  225. data/docs/_sass/vendor/bourbon/helpers/_render-gradients.scss +0 -31
  226. data/docs/_sass/vendor/bourbon/helpers/_shape-size-stripper.scss +0 -15
  227. data/docs/_sass/vendor/bourbon/helpers/_str-to-num.scss +0 -55
  228. data/docs/_sass/vendor/bourbon/settings/_asset-pipeline.scss +0 -7
  229. data/docs/_sass/vendor/bourbon/settings/_deprecation-warnings.scss +0 -8
  230. data/docs/_sass/vendor/bourbon/settings/_prefixer.scss +0 -9
  231. data/docs/_sass/vendor/bourbon/settings/_px-to-em.scss +0 -1
  232. data/docs/_sass/vendor/neat/_neat-helpers.scss +0 -11
  233. data/docs/_sass/vendor/neat/_neat.scss +0 -23
  234. data/docs/_sass/vendor/neat/functions/_new-breakpoint.scss +0 -49
  235. data/docs/_sass/vendor/neat/functions/_private.scss +0 -114
  236. data/docs/_sass/vendor/neat/grid/_box-sizing.scss +0 -15
  237. data/docs/_sass/vendor/neat/grid/_direction-context.scss +0 -33
  238. data/docs/_sass/vendor/neat/grid/_display-context.scss +0 -28
  239. data/docs/_sass/vendor/neat/grid/_fill-parent.scss +0 -22
  240. data/docs/_sass/vendor/neat/grid/_media.scss +0 -92
  241. data/docs/_sass/vendor/neat/grid/_omega.scss +0 -87
  242. data/docs/_sass/vendor/neat/grid/_outer-container.scss +0 -34
  243. data/docs/_sass/vendor/neat/grid/_pad.scss +0 -25
  244. data/docs/_sass/vendor/neat/grid/_private.scss +0 -35
  245. data/docs/_sass/vendor/neat/grid/_row.scss +0 -52
  246. data/docs/_sass/vendor/neat/grid/_shift.scss +0 -50
  247. data/docs/_sass/vendor/neat/grid/_span-columns.scss +0 -94
  248. data/docs/_sass/vendor/neat/grid/_to-deprecate.scss +0 -97
  249. data/docs/_sass/vendor/neat/grid/_visual-grid.scss +0 -42
  250. data/docs/_sass/vendor/neat/mixins/_clearfix.scss +0 -25
  251. data/docs/_sass/vendor/neat/settings/_disable-warnings.scss +0 -13
  252. data/docs/_sass/vendor/neat/settings/_grid.scss +0 -51
  253. data/docs/_sass/vendor/neat/settings/_visual-grid.scss +0 -27
  254. data/docs/_sass/vendor/normalize-3.0.2.scss +0 -427
  255. data/docs/_sass/vendor/pygments.scss +0 -356
  256. data/docs/automating_browsers/capybara.md +0 -70
  257. data/docs/css/screen.scss +0 -7
  258. data/docs/guides/cli.md +0 -52
  259. data/docs/guides/frontiers.md +0 -93
  260. data/docs/guides/halting.md +0 -23
  261. data/docs/guides/job_queues.md +0 -26
  262. data/docs/guides/locals.md +0 -36
  263. data/docs/guides/logging.md +0 -22
  264. data/docs/guides/page_objects.md +0 -67
  265. data/docs/guides/peeking.md +0 -46
  266. data/docs/guides/selenium_capybara.md +0 -100
  267. data/docs/guides/tutorial.md +0 -452
  268. data/docs/js/navigation.js +0 -11
  269. data/docs/misc/contributing.md +0 -20
  270. data/docs/misc/testing.md +0 -11
  271. data/docs/recipes/authentication.md +0 -23
  272. data/docs/recipes/csv.md +0 -29
  273. data/docs/recipes/javascript.md +0 -20
  274. data/docs/recipes/multiple_uris.md +0 -18
  275. data/docs/recipes/screenshots.md +0 -20
  276. data/docs/routing/custom_rules.md +0 -16
  277. data/docs/routing/filetypes_rules.md +0 -21
  278. data/docs/routing/host_rules.md +0 -24
  279. data/docs/routing/path_rules.md +0 -33
  280. data/docs/routing/protocol_rules.md +0 -17
  281. data/docs/routing/query_rules.md +0 -69
  282. data/docs/routing/routes.md +0 -96
  283. data/docs/routing/uri_rules.md +0 -18
  284. data/examples/collect_github_issues.rb +0 -65
  285. data/examples/find_foobar_on_wikipedia.rb +0 -23
  286. data/lib/wayfarer/configuration.rb +0 -86
  287. data/lib/wayfarer/crawl.rb +0 -79
  288. data/lib/wayfarer/crawl_observer.rb +0 -103
  289. data/lib/wayfarer/dispatcher.rb +0 -104
  290. data/lib/wayfarer/finders.rb +0 -61
  291. data/lib/wayfarer/frontiers/frontier.rb +0 -79
  292. data/lib/wayfarer/frontiers/memory_bloomfilter.rb +0 -32
  293. data/lib/wayfarer/frontiers/memory_frontier.rb +0 -76
  294. data/lib/wayfarer/frontiers/memory_trie_frontier.rb +0 -39
  295. data/lib/wayfarer/frontiers/normalize_uris.rb +0 -48
  296. data/lib/wayfarer/frontiers/redis_bloomfilter.rb +0 -34
  297. data/lib/wayfarer/frontiers/redis_frontier.rb +0 -83
  298. data/lib/wayfarer/http_adapters/adapter_pool.rb +0 -62
  299. data/lib/wayfarer/http_adapters/net_http_adapter.rb +0 -77
  300. data/lib/wayfarer/http_adapters/selenium_adapter.rb +0 -80
  301. data/lib/wayfarer/job.rb +0 -211
  302. data/lib/wayfarer/locals.rb +0 -40
  303. data/lib/wayfarer/parsers/json_parser.rb +0 -20
  304. data/lib/wayfarer/parsers/xml_parser.rb +0 -27
  305. data/lib/wayfarer/processor.rb +0 -103
  306. data/lib/wayfarer/routing/custom_rule.rb +0 -21
  307. data/lib/wayfarer/routing/filetypes_rule.rb +0 -20
  308. data/lib/wayfarer/routing/host_rule.rb +0 -19
  309. data/lib/wayfarer/routing/path_rule.rb +0 -54
  310. data/lib/wayfarer/routing/protocol_rule.rb +0 -21
  311. data/lib/wayfarer/routing/router.rb +0 -71
  312. data/lib/wayfarer/routing/rule.rb +0 -114
  313. data/lib/wayfarer/routing/uri_rule.rb +0 -21
  314. data/spec/configuration_spec.rb +0 -26
  315. data/spec/crawl_spec.rb +0 -48
  316. data/spec/finders_spec.rb +0 -49
  317. data/spec/frontiers/memory_bloomfilter_spec.rb +0 -6
  318. data/spec/frontiers/memory_frontier_spec.rb +0 -6
  319. data/spec/frontiers/memory_trie_frontier_spec.rb +0 -6
  320. data/spec/frontiers/normalize_uris_spec.rb +0 -59
  321. data/spec/frontiers/redis_bloomfilter_spec.rb +0 -6
  322. data/spec/frontiers/redis_frontier_spec.rb +0 -6
  323. data/spec/http_adapters/adapter_pool_spec.rb +0 -33
  324. data/spec/http_adapters/net_http_adapter_spec.rb +0 -83
  325. data/spec/http_adapters/selenium_adapter_spec.rb +0 -53
  326. data/spec/integration/callbacks_spec.rb +0 -42
  327. data/spec/integration/locals_spec.rb +0 -106
  328. data/spec/integration/peeking_spec.rb +0 -61
  329. data/spec/job_spec.rb +0 -122
  330. data/spec/processor_spec.rb +0 -31
  331. data/spec/routing/custom_rule_spec.rb +0 -26
  332. data/spec/routing/host_rule_spec.rb +0 -48
  333. data/spec/routing/path_rule_spec.rb +0 -66
  334. data/spec/routing/protocol_rule_spec.rb +0 -26
  335. data/spec/routing/router_spec.rb +0 -67
  336. data/spec/routing/rule_spec.rb +0 -251
  337. data/spec/routing/uri_rule_spec.rb +0 -24
  338. data/spec/shared/frontier.rb +0 -96
  339. data/wayfarer-jruby.gemspec +0 -49
data/.travis.yml DELETED
@@ -1,5 +0,0 @@
1
- language: ruby
2
- services: redis-server
3
- rvm:
4
- - 2.3.1
5
- - jruby-9.1.6.0
data/Changelog.md DELETED
@@ -1,10 +0,0 @@
1
- ## 0.0.2
2
-
3
- Features:
4
-
5
- * `Job#stage` now expands relative paths/URIs
6
- * Added `ProtocolRule` and `CustomRule`
7
-
8
- Bugfixes:
9
-
10
- * Fixed the CLI which relied on a removed method alias
data/README.md DELETED
@@ -1,21 +0,0 @@
1
- # Wayfarer
2
- [![Build Status](https://travis-ci.org/bauerd/wayfarer.svg)](https://travis-ci.org/bauerd/wayfarer)
3
- [![Code Climate](https://codeclimate.com/github/bauerd/wayfarer/badges/gpa.svg)](https://codeclimate.com/github/bauerd/wayfarer)
4
-
5
- Versatile web crawling with (J)Ruby
6
-
7
- * [__Usage and more__ on the website](https://bauerd.github.io/wayfarer/)
8
- * [__API documentation__ on Ruby-Doc.org](http://www.rubydoc.info/github/bauerd/wayfarer) (`master` branch)
9
- * __Releases__ on RubyGems.org:
10
- * [wayfarer](https://rubygems.org/gems/wayfarer)
11
- * [wayfarer-jruby](https://rubygems.org/gems/wayfarer-jruby)
12
-
13
- MRI:
14
- ```
15
- % [sudo] gem install wayfarer
16
- ```
17
-
18
- JRuby:
19
- ```
20
- % [sudo] gem install wayfarer-jruby
21
- ```
@@ -1,143 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "../lib/wayfarer"
4
-
5
- require "benchmark"
6
- require "parallel" unless RUBY_PLATFORM == "java"
7
- require "faker"
8
-
9
- include Wayfarer::Frontiers
10
-
11
- def print_separator
12
- puts "-" * `tput cols`.to_i
13
- end
14
-
15
- # URI count is kept low on purpose because this file is run on Travis CI.
16
- # Bump up the numbers when running locally.
17
- URI_COUNT = 10_000
18
- SAMPLE_COUNT = URI_COUNT * 0.01
19
- CYCLE_COUNT = 20
20
-
21
- FRONTIERS = RUBY_PLATFORM == "java" ? {
22
- MemoryFrontier => "memory",
23
- RedisFrontier => "redis"
24
- } : {
25
- MemoryBloomfilter => "memory_bloom",
26
- MemoryFrontier => "memory",
27
- MemoryTrieFrontier => "memory_trie",
28
- RedisBloomfilter => "redis_bloom",
29
- RedisFrontier => "redis"
30
- }
31
-
32
- label_width = FRONTIERS.reduce(0) { |acc, (_, label)|
33
- acc > (l = label.length) ? acc : l
34
- }
35
-
36
- puts "Generating #{URI_COUNT} URIs..."
37
-
38
- uris = if RUBY_PLATFORM == "java"
39
- Array.new(URI_COUNT) { Faker::Internet.url }
40
- else
41
- Parallel.map(URI_COUNT.times) { Faker::Internet.url }
42
- end
43
- samples = uris.sample(SAMPLE_COUNT)
44
-
45
- puts "Done generating URIs"
46
-
47
- print_separator
48
- puts "Cache insertion"
49
-
50
- Benchmark.bm do |bm|
51
- FRONTIERS.each do |(klass, label)|
52
- frontier = klass.new(Wayfarer::Configuration.new)
53
- frontier.cache(*uris)
54
-
55
- bm.report(label.ljust(label_width)) do
56
- frontier.cache(*samples)
57
- end
58
-
59
- frontier.free
60
- end
61
- end
62
-
63
- print_separator
64
- puts "Cache detection"
65
-
66
- Benchmark.bm do |bm|
67
- FRONTIERS.each do |(klass, label)|
68
- frontier = klass.new(Wayfarer::Configuration.new)
69
- frontier.cache(*uris)
70
-
71
- bm.report(label.ljust(label_width)) do
72
- samples.each { |sample| frontier.cached?(sample) }
73
- end
74
-
75
- frontier.free
76
- end
77
- end
78
-
79
- print_separator
80
- puts "Cycling"
81
-
82
- slices = samples.each_slice(CYCLE_COUNT)
83
-
84
- Benchmark.bm do |bm|
85
- FRONTIERS.each do |(klass, label)|
86
- frontier = klass.new(Wayfarer::Configuration.new)
87
-
88
- bm.report(label.ljust(label_width)) do
89
- slices.each do |samples|
90
- frontier.stage(*samples)
91
- frontier.cycle
92
- end
93
- end
94
-
95
- frontier.free
96
- end
97
- end
98
-
99
- print_separator
100
- puts "Cache detection false positives/negatives"
101
-
102
- trials = samples.count / 2
103
-
104
- positives = samples.sample(trials)
105
- negatives = if RUBY_PLATFORM == "java"
106
- Array.new(trials) { Faker::Internet.url }
107
- else
108
- Parallel.map(trials.times) { Faker::Internet.url }
109
- end
110
-
111
- FRONTIERS.each do |(klass, label)|
112
- frontier = klass.new(Wayfarer::Configuration.new)
113
-
114
- false_positives = false_negatives = 0
115
-
116
- positives.each do |positive| frontier.cache(positive) end
117
-
118
- positives.each do |positive|
119
- false_positives += 1 unless frontier.cached?(positive)
120
- end
121
-
122
- negatives.each do |negative|
123
- false_positives += 1 if frontier.cached?(negative)
124
- end
125
-
126
- fp_percentage = if false_positives.zero?
127
- 0
128
- else
129
- (trials.to_f / false_positives).round(3)
130
- end
131
-
132
- fn_percentage = if false_negatives.zero?
133
- 0
134
- else
135
- (trials.to_f / false_negatives).round(3)
136
- end
137
-
138
- puts "* #{label.ljust(label_width)}"
139
- puts " * False positives: #{false_positives} (#{fp_percentage}%)"
140
- puts " * False negatives: #{false_negatives} (#{fn_percentage}%)"
141
-
142
- frontier.free
143
- end
data/docs/.gitignore DELETED
@@ -1,2 +0,0 @@
1
- _site
2
- .sass-cache
data/docs/_config.yml DELETED
@@ -1,15 +0,0 @@
1
- gems:
2
- - bourbon
3
- - neat
4
-
5
- markdown: kramdown
6
- sass:
7
- style: :compressed
8
-
9
- kramdown:
10
- input: GFM
11
- syntax_highlighter: rouge
12
-
13
- highlighter: rouge
14
-
15
- title: Wayfarer
@@ -1,7 +0,0 @@
1
- <!-- See: https://ricostacruz.com/til/relative-paths-in-jekyll -->
2
- {% assign base = '' %}
3
- {% assign depth = page.url | split: '/' | size | minus: 1 %}
4
- {% if depth <= 1 %}{% assign base = '.' %}
5
- {% elsif depth == 2 %}{% assign base = '..' %}
6
- {% elsif depth == 3 %}{% assign base = '../..' %}
7
- {% elsif depth == 4 %}{% assign base = '../../..' %}{% endif %}
@@ -1,10 +0,0 @@
1
- <head>
2
- <title>{{page.title}} | Wayfarer</title>
3
- <meta charset="utf-8">
4
- <meta name="viewport" content="width=device-width">
5
- <link rel="stylesheet" href="{{base}}/css/screen.css">
6
- <link
7
- rel="stylesheet"
8
- href="//brick.a.ssl.fastly.net/Titillium:400,600/Roboto+Mono:400,700">
9
- <script src="{{base}}/js/navigation.js"></script>
10
- </head>
@@ -1,187 +0,0 @@
1
- <section class="navigation">
2
- <input id="navigation__toggle-checkbox" class="navigation__toggle-checkbox" type="checkbox">
3
- <label for="navigation__toggle-checkbox" class="navigation__toggle-label">
4
- <span class="show">Show navigation</span>
5
- <span class="hide">Hide navigation</span>
6
- </label>
7
- <nav class="navigation__main">
8
- <ul class="navigation__list">
9
- <li class="navigation__category">
10
- <h1 class="navigation__category__title">Guides</h1>
11
- <ul class="navigation__list">
12
- <li class="navigation__page">
13
- <a class="navigation__link" href="{{base}}/guides/tutorial.html">
14
- Tutorial
15
- </a>
16
- </li>
17
- <li class="navigation__page">
18
- <a class="navigation__link" href="{{base}}/guides/configuration.html">
19
- Configuration
20
- </a>
21
- </li>
22
- <li class="navigation__page">
23
- <a class="navigation__link" href="{{base}}/guides/halting.html">
24
- Halting
25
- </a>
26
- </li>
27
- <li class="navigation__page">
28
- <a class="navigation__link" href="{{base}}/guides/locals.html">
29
- Locals
30
- </a>
31
- </li>
32
- <li class="navigation__page">
33
- <a class="navigation__link" href="{{base}}/guides/page_objects.html">
34
- <code>Page</code> objects
35
- </a>
36
- </li>
37
- <li class="navigation__page">
38
- <a class="navigation__link" href="{{base}}/guides/error_handling.html">
39
- Error handling
40
- </a>
41
- </li>
42
- <li class="navigation__page">
43
- <a class="navigation__link" href="{{base}}/guides/frontiers.html">
44
- (Redis) Frontiers
45
- </a>
46
- </li>
47
- <li class="navigation__page">
48
- <a class="navigation__link" href="{{base}}/guides/peeking.html">
49
- Peeking
50
- </a>
51
- </li>
52
- <li class="navigation__page">
53
- <a class="navigation__link" href="{{base}}/guides/selenium_capybara.html">
54
- Selenium &amp; Capybara
55
- </a>
56
- </li>
57
- <li class="navigation__page">
58
- <a class="navigation__link" href="{{base}}/guides/callbacks.html">
59
- Callbacks
60
- </a>
61
- </li>
62
- <li class="navigation__page">
63
- <a class="navigation__link" href="{{base}}/guides/cli.html">
64
- CLI
65
- </a>
66
- </li>
67
- <li class="navigation__page">
68
- <a class="navigation__link" href="{{base}}/guides/job_queues.html">
69
- Job queues
70
- </a>
71
- </li>
72
- <li class="navigation__page">
73
- <a class="navigation__link" href="{{base}}/guides/logging.html">
74
- Logging
75
- </a>
76
- </li>
77
- </ul>
78
- </li>
79
-
80
- <li class="navigation__category">
81
- <h1 class="navigation__category__title">Routing</h1>
82
- <ul class="navigation__list">
83
- <li class="navigation__page">
84
- <a class="navigation__link" href="{{base}}/routing/routes.html">
85
- Routes
86
- </a>
87
- </li>
88
- <li class="navigation__page">
89
- <a class="navigation__link" href="{{base}}/routing/uri_rules.html">
90
- URI rules
91
- </a>
92
- </li>
93
- <li class="navigation__page">
94
- <a class="navigation__link" href="{{base}}/routing/host_rules.html">
95
- Host rules
96
- </a>
97
- </li>
98
- <li class="navigation__page">
99
- <a class="navigation__link" href="{{base}}/routing/path_rules.html">
100
- Path rules
101
- </a>
102
- </li>
103
- <li class="navigation__page">
104
- <a class="navigation__link" href="{{base}}/routing/query_rules.html">
105
- Query rules
106
- </a>
107
- </li>
108
- <li class="navigation__page">
109
- <a class="navigation__link" href="{{base}}/routing/protocol_rules.html">
110
- Protocol rules
111
- </a>
112
- </li>
113
- <li class="navigation__page">
114
- <a class="navigation__link" href="{{base}}/routing/filetypes_rules.html">
115
- Filetypes rules
116
- </a>
117
- </li>
118
- <li class="navigation__page">
119
- <a class="navigation__link" href="{{base}}/routing/custom_rules.html">
120
- Custom rules
121
- </a>
122
- </li>
123
- </ul>
124
- </li>
125
-
126
- <li class="navigation__category">
127
- <h1 class="navigation__category__title">Recipes</h1>
128
- <ul class="navigation__list">
129
- <li class="navigation__page">
130
- <a class="navigation__link" href="{{base}}/recipes/multiple_uris.html">
131
- Starting from multiple URIs
132
- </a>
133
- </li>
134
- <li class="navigation__page">
135
- <a class="navigation__link" href="{{base}}/recipes/authentication.html">
136
- Authentication
137
- </a>
138
- </li>
139
- <li class="navigation__page">
140
- <a class="navigation__link" href="{{base}}/recipes/screenshots.html">
141
- Taking screenshots
142
- </a>
143
- </li>
144
- <li class="navigation__page">
145
- <a class="navigation__link" href="{{base}}/recipes/javascript.html">
146
- Executing JavaScript
147
- </a>
148
- </li>
149
- <li class="navigation__page">
150
- <a class="navigation__link" href="{{base}}/recipes/csv.html">
151
- CSV output
152
- </a>
153
- </li>
154
- </ul>
155
- </li>
156
-
157
- <li class="navigation__category">
158
- <h1 class="navigation__category__title">Miscellaneous</h1>
159
- <ul class="navigation__list">
160
- <li class="navigation__page">
161
- <a class="navigation__link" href="//github.com/bauerd/wayfarer">
162
- Code on GitHub
163
- </a>
164
- </li>
165
- <li class="navigation__page">
166
- <a class="navigation__link" href="http://www.rubydoc.info/github/bauerd/wayfarer">
167
- API documentation
168
- </a>
169
- </li>
170
- <li class="navigation__page">
171
- <a class="navigation__link" href="{{base}}/misc/testing.html">
172
- Testing
173
- </a>
174
- </li>
175
- <li class="navigation__page">
176
- <a class="navigation__link" href="{{base}}/misc/contributing.html">
177
- Contributing
178
- </a>
179
- </li>
180
- </ul>
181
- </li>
182
-
183
- <label for="navigation__toggle-checkbox" class="navigation__close-bottom">
184
- Hide navigation
185
- </label>
186
- </nav>
187
- </section>
@@ -1,42 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- {% include base.html %}
4
- {% include head.html %}
5
-
6
- <body>
7
- <div class="wrapper">
8
- <aside class="sidebar">
9
- <header class="site-header">
10
- <h1 class="site-header__title">
11
- <a href="{{base}}" class="site-header__link">{{ site.title }}</a>
12
- </h1>
13
- <div class="site-header__version">
14
- 0.0.3
15
- </div>
16
- </header>
17
-
18
- <section class="badges">
19
- <iframe class="star-button" src="https://ghbtns.com/github-btn.html?user=bauerd&repo=wayfarer&type=star" frameborder="0" scrolling="0" width="50px" height="20px"></iframe>
20
- <img class="build-status" src="https://travis-ci.org/bauerd/wayfarer.svg?branch=master">
21
- </section>
22
-
23
- {% include navigation.html %}
24
- </aside>
25
-
26
- <main class="page-content">
27
- {{content}}
28
-
29
- <aside class="page-meta">
30
- Instructions unclear or not working? Please
31
- <a href="{{site.github.repository_url}}/edit/master/docs/{{page.path}}">
32
- edit this page
33
- </a>
34
- or
35
- <a href="{{site.github.repository_url}}/issues/new">
36
- file a bug
37
- </a>
38
- </aside>
39
- </main>
40
- </div>
41
- </body>
42
- </html>