wayfarer 0.0.3 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (369) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/ci.yaml +32 -0
  3. data/.gitignore +3 -4
  4. data/.rubocop.yml +25 -9
  5. data/.ruby-version +1 -1
  6. data/Dockerfile +5 -0
  7. data/Gemfile +1 -7
  8. data/Gemfile.lock +221 -0
  9. data/RELEASING.md +17 -0
  10. data/Rakefile +38 -90
  11. data/bin/wayfarer +1 -111
  12. data/docker-compose.yml +32 -0
  13. data/docs/cookbook/batch_routing.md +22 -0
  14. data/docs/cookbook/consent_screen.md +36 -0
  15. data/docs/cookbook/executing_javascript.md +41 -0
  16. data/docs/cookbook/querying_html.md +42 -0
  17. data/docs/cookbook/screenshots.md +27 -0
  18. data/docs/cookbook/user_agent.md +7 -0
  19. data/docs/guides/browser_automation/capybara.md +69 -0
  20. data/docs/guides/browser_automation/custom_adapters.md +100 -0
  21. data/docs/guides/browser_automation/ferrum.md +39 -0
  22. data/docs/guides/browser_automation/selenium.md +63 -0
  23. data/docs/guides/callbacks.md +131 -31
  24. data/docs/guides/configuration.md +24 -169
  25. data/docs/guides/debugging.md +17 -0
  26. data/docs/guides/error_handling.md +30 -45
  27. data/docs/guides/jobs.md +101 -0
  28. data/docs/guides/navigation.md +73 -0
  29. data/docs/guides/networking.md +94 -0
  30. data/docs/guides/pages.md +52 -0
  31. data/docs/guides/performance.md +130 -0
  32. data/docs/guides/reliability.md +41 -0
  33. data/docs/guides/routing/steering.md +30 -0
  34. data/docs/guides/tasks.md +14 -0
  35. data/docs/index.md +40 -66
  36. data/docs/reference/api/base.md +48 -0
  37. data/docs/reference/api/route.md +182 -0
  38. data/docs/reference/cli.md +61 -0
  39. data/docs/reference/configuration_keys.md +42 -0
  40. data/docs/reference/environment_variables.md +83 -0
  41. data/lib/wayfarer/base.rb +50 -0
  42. data/lib/wayfarer/callbacks.rb +71 -0
  43. data/lib/wayfarer/cli/base.rb +27 -0
  44. data/lib/wayfarer/cli/generate.rb +17 -0
  45. data/lib/wayfarer/cli/job.rb +60 -0
  46. data/lib/wayfarer/cli/route.rb +29 -0
  47. data/lib/wayfarer/cli/route_printer.rb +116 -0
  48. data/lib/wayfarer/cli/runner.rb +34 -0
  49. data/lib/wayfarer/cli/templates/Gemfile.tt +5 -0
  50. data/lib/wayfarer/cli/templates/job.rb.tt +10 -0
  51. data/lib/wayfarer/config/capybara.rb +10 -0
  52. data/lib/wayfarer/config/ferrum.rb +11 -0
  53. data/lib/wayfarer/config/networking.rb +26 -0
  54. data/lib/wayfarer/config/redis.rb +14 -0
  55. data/lib/wayfarer/config/root.rb +11 -0
  56. data/lib/wayfarer/config/selenium.rb +21 -0
  57. data/lib/wayfarer/config/strconv.rb +45 -0
  58. data/lib/wayfarer/config/struct.rb +72 -0
  59. data/lib/wayfarer/gc.rb +15 -0
  60. data/lib/wayfarer/middleware/chain.rb +19 -0
  61. data/lib/wayfarer/middleware/dedup.rb +25 -0
  62. data/lib/wayfarer/middleware/fetch.rb +47 -0
  63. data/lib/wayfarer/middleware/normalize.rb +25 -0
  64. data/lib/wayfarer/middleware/router.rb +53 -0
  65. data/lib/wayfarer/middleware/stage.rb +23 -0
  66. data/lib/wayfarer/middleware/worker.rb +30 -0
  67. data/lib/wayfarer/networking/capybara.rb +28 -0
  68. data/lib/wayfarer/networking/context.rb +36 -0
  69. data/lib/wayfarer/networking/ferrum.rb +35 -0
  70. data/lib/wayfarer/networking/http.rb +34 -0
  71. data/lib/wayfarer/networking/pool.rb +40 -0
  72. data/lib/wayfarer/networking/result.rb +18 -0
  73. data/lib/wayfarer/networking/selenium.rb +43 -0
  74. data/lib/wayfarer/networking/strategy.rb +38 -0
  75. data/lib/wayfarer/page.rb +17 -74
  76. data/lib/wayfarer/parsing/json.rb +17 -0
  77. data/lib/wayfarer/parsing/xml.rb +17 -0
  78. data/lib/wayfarer/redis/.#barrier.rb +1 -0
  79. data/lib/wayfarer/redis/barrier.rb +36 -0
  80. data/lib/wayfarer/redis/connection.rb +13 -0
  81. data/lib/wayfarer/redis/counter.rb +29 -0
  82. data/lib/wayfarer/redis/pool.rb +20 -0
  83. data/lib/wayfarer/redis/version.rb +19 -0
  84. data/lib/wayfarer/routing/dsl.rb +57 -0
  85. data/lib/wayfarer/routing/matchers/custom.rb +25 -0
  86. data/lib/wayfarer/routing/matchers/host.rb +19 -0
  87. data/lib/wayfarer/routing/matchers/path.rb +49 -0
  88. data/lib/wayfarer/routing/matchers/query.rb +63 -0
  89. data/lib/wayfarer/routing/matchers/scheme.rb +17 -0
  90. data/lib/wayfarer/routing/matchers/suffix.rb +17 -0
  91. data/lib/wayfarer/routing/matchers/url.rb +17 -0
  92. data/lib/wayfarer/routing/path_finder.rb +46 -0
  93. data/lib/wayfarer/routing/result.rb +15 -0
  94. data/lib/wayfarer/routing/root_route.rb +7 -0
  95. data/lib/wayfarer/routing/route.rb +47 -0
  96. data/lib/wayfarer/routing/router.rb +10 -54
  97. data/lib/wayfarer/routing/target_route.rb +7 -0
  98. data/lib/wayfarer/serializer.rb +17 -0
  99. data/lib/wayfarer/stringify.rb +47 -0
  100. data/lib/wayfarer/task.rb +34 -0
  101. data/lib/wayfarer.rb +48 -57
  102. data/mkdocs.yml +47 -0
  103. data/requirements.txt +1 -0
  104. data/spec/base_spec.rb +233 -0
  105. data/spec/callbacks_spec.rb +102 -0
  106. data/spec/cli/generate_spec.rb +39 -0
  107. data/spec/cli/job_spec.rb +74 -0
  108. data/spec/cli/version_spec.rb +13 -0
  109. data/spec/config/capybara_spec.rb +18 -0
  110. data/spec/config/ferrum_spec.rb +24 -0
  111. data/spec/config/networking_spec.rb +73 -0
  112. data/spec/config/redis_spec.rb +32 -0
  113. data/spec/config/root_spec.rb +31 -0
  114. data/spec/config/selenium_spec.rb +56 -0
  115. data/spec/config/strconv_spec.rb +58 -0
  116. data/spec/config/struct_spec.rb +66 -0
  117. data/spec/factories/middleware.rb +15 -0
  118. data/spec/factories/page.rb +78 -0
  119. data/spec/factories/task.rb +12 -0
  120. data/spec/fixtures/dummy_job.rb +7 -0
  121. data/spec/gc_spec.rb +63 -0
  122. data/spec/middleware/chain_spec.rb +96 -0
  123. data/spec/middleware/dedup_spec.rb +76 -0
  124. data/spec/middleware/fetch_spec.rb +100 -0
  125. data/spec/middleware/normalize_spec.rb +28 -0
  126. data/spec/middleware/router_spec.rb +80 -0
  127. data/spec/middleware/stage_spec.rb +39 -0
  128. data/spec/middleware/worker_spec.rb +117 -0
  129. data/spec/networking/capybara_spec.rb +12 -0
  130. data/spec/networking/context_spec.rb +127 -0
  131. data/spec/networking/ferrum_spec.rb +12 -0
  132. data/spec/networking/http_spec.rb +12 -0
  133. data/spec/networking/pool_spec.rb +67 -0
  134. data/spec/networking/selenium_spec.rb +12 -0
  135. data/spec/networking/strategy.rb +170 -0
  136. data/spec/page_spec.rb +21 -12
  137. data/spec/{parsers/json_parser_spec.rb → parsing/json_spec.rb} +5 -4
  138. data/spec/{parsers/xml_parser_spec.rb → parsing/xml_spec.rb} +3 -2
  139. data/spec/redis/barrier_spec.rb +78 -0
  140. data/spec/redis/counter_spec.rb +32 -0
  141. data/spec/redis/pool_spec.rb +18 -0
  142. data/spec/redis/version_spec.rb +13 -0
  143. data/spec/routing/dsl_spec.rb +98 -0
  144. data/spec/routing/integration_spec.rb +110 -0
  145. data/spec/routing/matchers/custom_spec.rb +31 -0
  146. data/spec/routing/matchers/host_spec.rb +49 -0
  147. data/spec/routing/matchers/path_spec.rb +43 -0
  148. data/spec/routing/matchers/query_spec.rb +137 -0
  149. data/spec/routing/matchers/scheme_spec.rb +25 -0
  150. data/spec/routing/{filetypes_rule_spec.rb → matchers/suffix_spec.rb} +14 -13
  151. data/spec/routing/matchers/uri_spec.rb +27 -0
  152. data/spec/routing/path_finder_spec.rb +33 -0
  153. data/spec/routing/root_route_spec.rb +29 -0
  154. data/spec/routing/route_spec.rb +43 -0
  155. data/spec/routing/router_spec.rb +13 -56
  156. data/spec/spec_helpers.rb +73 -38
  157. data/spec/stringify_spec.rb +23 -0
  158. data/{support → spec/support}/static/finders.html +0 -0
  159. data/{support → spec/support}/static/graph/details/a.html +0 -0
  160. data/{support → spec/support}/static/graph/details/b.html +0 -0
  161. data/{support → spec/support}/static/graph/index.html +0 -0
  162. data/{support → spec/support}/static/json/dummy.json +0 -0
  163. data/{support → spec/support}/static/links/links.html +0 -0
  164. data/{support → spec/support}/static/xml/dummy.xml +0 -0
  165. data/{support → spec/support}/test_app.rb +9 -2
  166. data/spec/task_spec.rb +27 -0
  167. data/spec/wayfarer_spec.rb +2 -13
  168. data/wayfarer.gemspec +40 -42
  169. metadata +234 -361
  170. data/.travis.yml +0 -5
  171. data/Changelog.md +0 -10
  172. data/README.md +0 -21
  173. data/benchmark/frontiers.rb +0 -143
  174. data/docs/.gitignore +0 -2
  175. data/docs/_config.yml +0 -15
  176. data/docs/_includes/base.html +0 -7
  177. data/docs/_includes/head.html +0 -10
  178. data/docs/_includes/navigation.html +0 -187
  179. data/docs/_layouts/default.html +0 -42
  180. data/docs/_sass/base.scss +0 -439
  181. data/docs/_sass/variables.scss +0 -24
  182. data/docs/_sass/vendor/bourbon/_bourbon-deprecate.scss +0 -19
  183. data/docs/_sass/vendor/bourbon/_bourbon-deprecated-upcoming.scss +0 -425
  184. data/docs/_sass/vendor/bourbon/_bourbon.scss +0 -90
  185. data/docs/_sass/vendor/bourbon/addons/_border-color.scss +0 -29
  186. data/docs/_sass/vendor/bourbon/addons/_border-radius.scss +0 -48
  187. data/docs/_sass/vendor/bourbon/addons/_border-style.scss +0 -28
  188. data/docs/_sass/vendor/bourbon/addons/_border-width.scss +0 -28
  189. data/docs/_sass/vendor/bourbon/addons/_buttons.scss +0 -69
  190. data/docs/_sass/vendor/bourbon/addons/_clearfix.scss +0 -25
  191. data/docs/_sass/vendor/bourbon/addons/_ellipsis.scss +0 -30
  192. data/docs/_sass/vendor/bourbon/addons/_font-stacks.scss +0 -31
  193. data/docs/_sass/vendor/bourbon/addons/_hide-text.scss +0 -27
  194. data/docs/_sass/vendor/bourbon/addons/_margin.scss +0 -29
  195. data/docs/_sass/vendor/bourbon/addons/_padding.scss +0 -29
  196. data/docs/_sass/vendor/bourbon/addons/_position.scss +0 -51
  197. data/docs/_sass/vendor/bourbon/addons/_prefixer.scss +0 -66
  198. data/docs/_sass/vendor/bourbon/addons/_retina-image.scss +0 -27
  199. data/docs/_sass/vendor/bourbon/addons/_size.scss +0 -56
  200. data/docs/_sass/vendor/bourbon/addons/_text-inputs.scss +0 -118
  201. data/docs/_sass/vendor/bourbon/addons/_timing-functions.scss +0 -34
  202. data/docs/_sass/vendor/bourbon/addons/_triangle.scss +0 -63
  203. data/docs/_sass/vendor/bourbon/addons/_word-wrap.scss +0 -29
  204. data/docs/_sass/vendor/bourbon/css3/_animation.scss +0 -61
  205. data/docs/_sass/vendor/bourbon/css3/_appearance.scss +0 -5
  206. data/docs/_sass/vendor/bourbon/css3/_backface-visibility.scss +0 -5
  207. data/docs/_sass/vendor/bourbon/css3/_background-image.scss +0 -44
  208. data/docs/_sass/vendor/bourbon/css3/_background.scss +0 -57
  209. data/docs/_sass/vendor/bourbon/css3/_border-image.scss +0 -61
  210. data/docs/_sass/vendor/bourbon/css3/_calc.scss +0 -6
  211. data/docs/_sass/vendor/bourbon/css3/_columns.scss +0 -67
  212. data/docs/_sass/vendor/bourbon/css3/_filter.scss +0 -6
  213. data/docs/_sass/vendor/bourbon/css3/_flex-box.scss +0 -327
  214. data/docs/_sass/vendor/bourbon/css3/_font-face.scss +0 -29
  215. data/docs/_sass/vendor/bourbon/css3/_font-feature-settings.scss +0 -6
  216. data/docs/_sass/vendor/bourbon/css3/_hidpi-media-query.scss +0 -12
  217. data/docs/_sass/vendor/bourbon/css3/_hyphens.scss +0 -6
  218. data/docs/_sass/vendor/bourbon/css3/_image-rendering.scss +0 -15
  219. data/docs/_sass/vendor/bourbon/css3/_keyframes.scss +0 -38
  220. data/docs/_sass/vendor/bourbon/css3/_linear-gradient.scss +0 -40
  221. data/docs/_sass/vendor/bourbon/css3/_perspective.scss +0 -12
  222. data/docs/_sass/vendor/bourbon/css3/_placeholder.scss +0 -10
  223. data/docs/_sass/vendor/bourbon/css3/_radial-gradient.scss +0 -40
  224. data/docs/_sass/vendor/bourbon/css3/_selection.scss +0 -44
  225. data/docs/_sass/vendor/bourbon/css3/_text-decoration.scss +0 -27
  226. data/docs/_sass/vendor/bourbon/css3/_transform.scss +0 -21
  227. data/docs/_sass/vendor/bourbon/css3/_transition.scss +0 -81
  228. data/docs/_sass/vendor/bourbon/css3/_user-select.scss +0 -5
  229. data/docs/_sass/vendor/bourbon/functions/_assign-inputs.scss +0 -16
  230. data/docs/_sass/vendor/bourbon/functions/_contains-falsy.scss +0 -25
  231. data/docs/_sass/vendor/bourbon/functions/_contains.scss +0 -31
  232. data/docs/_sass/vendor/bourbon/functions/_is-length.scss +0 -16
  233. data/docs/_sass/vendor/bourbon/functions/_is-light.scss +0 -26
  234. data/docs/_sass/vendor/bourbon/functions/_is-number.scss +0 -16
  235. data/docs/_sass/vendor/bourbon/functions/_is-size.scss +0 -23
  236. data/docs/_sass/vendor/bourbon/functions/_modular-scale.scss +0 -74
  237. data/docs/_sass/vendor/bourbon/functions/_px-to-em.scss +0 -24
  238. data/docs/_sass/vendor/bourbon/functions/_px-to-rem.scss +0 -26
  239. data/docs/_sass/vendor/bourbon/functions/_shade.scss +0 -24
  240. data/docs/_sass/vendor/bourbon/functions/_strip-units.scss +0 -22
  241. data/docs/_sass/vendor/bourbon/functions/_tint.scss +0 -24
  242. data/docs/_sass/vendor/bourbon/functions/_transition-property-name.scss +0 -37
  243. data/docs/_sass/vendor/bourbon/functions/_unpack.scss +0 -32
  244. data/docs/_sass/vendor/bourbon/helpers/_convert-units.scss +0 -26
  245. data/docs/_sass/vendor/bourbon/helpers/_directional-values.scss +0 -108
  246. data/docs/_sass/vendor/bourbon/helpers/_font-source-declaration.scss +0 -53
  247. data/docs/_sass/vendor/bourbon/helpers/_gradient-positions-parser.scss +0 -24
  248. data/docs/_sass/vendor/bourbon/helpers/_linear-angle-parser.scss +0 -35
  249. data/docs/_sass/vendor/bourbon/helpers/_linear-gradient-parser.scss +0 -51
  250. data/docs/_sass/vendor/bourbon/helpers/_linear-positions-parser.scss +0 -77
  251. data/docs/_sass/vendor/bourbon/helpers/_linear-side-corner-parser.scss +0 -41
  252. data/docs/_sass/vendor/bourbon/helpers/_radial-arg-parser.scss +0 -74
  253. data/docs/_sass/vendor/bourbon/helpers/_radial-gradient-parser.scss +0 -55
  254. data/docs/_sass/vendor/bourbon/helpers/_radial-positions-parser.scss +0 -28
  255. data/docs/_sass/vendor/bourbon/helpers/_render-gradients.scss +0 -31
  256. data/docs/_sass/vendor/bourbon/helpers/_shape-size-stripper.scss +0 -15
  257. data/docs/_sass/vendor/bourbon/helpers/_str-to-num.scss +0 -55
  258. data/docs/_sass/vendor/bourbon/settings/_asset-pipeline.scss +0 -7
  259. data/docs/_sass/vendor/bourbon/settings/_deprecation-warnings.scss +0 -8
  260. data/docs/_sass/vendor/bourbon/settings/_prefixer.scss +0 -9
  261. data/docs/_sass/vendor/bourbon/settings/_px-to-em.scss +0 -1
  262. data/docs/_sass/vendor/neat/_neat-helpers.scss +0 -11
  263. data/docs/_sass/vendor/neat/_neat.scss +0 -23
  264. data/docs/_sass/vendor/neat/functions/_new-breakpoint.scss +0 -49
  265. data/docs/_sass/vendor/neat/functions/_private.scss +0 -114
  266. data/docs/_sass/vendor/neat/grid/_box-sizing.scss +0 -15
  267. data/docs/_sass/vendor/neat/grid/_direction-context.scss +0 -33
  268. data/docs/_sass/vendor/neat/grid/_display-context.scss +0 -28
  269. data/docs/_sass/vendor/neat/grid/_fill-parent.scss +0 -22
  270. data/docs/_sass/vendor/neat/grid/_media.scss +0 -92
  271. data/docs/_sass/vendor/neat/grid/_omega.scss +0 -87
  272. data/docs/_sass/vendor/neat/grid/_outer-container.scss +0 -34
  273. data/docs/_sass/vendor/neat/grid/_pad.scss +0 -25
  274. data/docs/_sass/vendor/neat/grid/_private.scss +0 -35
  275. data/docs/_sass/vendor/neat/grid/_row.scss +0 -52
  276. data/docs/_sass/vendor/neat/grid/_shift.scss +0 -50
  277. data/docs/_sass/vendor/neat/grid/_span-columns.scss +0 -94
  278. data/docs/_sass/vendor/neat/grid/_to-deprecate.scss +0 -97
  279. data/docs/_sass/vendor/neat/grid/_visual-grid.scss +0 -42
  280. data/docs/_sass/vendor/neat/mixins/_clearfix.scss +0 -25
  281. data/docs/_sass/vendor/neat/settings/_disable-warnings.scss +0 -13
  282. data/docs/_sass/vendor/neat/settings/_grid.scss +0 -51
  283. data/docs/_sass/vendor/neat/settings/_visual-grid.scss +0 -27
  284. data/docs/_sass/vendor/normalize-3.0.2.scss +0 -427
  285. data/docs/_sass/vendor/pygments.scss +0 -356
  286. data/docs/automating_browsers/capybara.md +0 -70
  287. data/docs/css/screen.scss +0 -7
  288. data/docs/guides/cli.md +0 -52
  289. data/docs/guides/frontiers.md +0 -93
  290. data/docs/guides/halting.md +0 -23
  291. data/docs/guides/job_queues.md +0 -26
  292. data/docs/guides/locals.md +0 -36
  293. data/docs/guides/logging.md +0 -22
  294. data/docs/guides/page_objects.md +0 -67
  295. data/docs/guides/peeking.md +0 -46
  296. data/docs/guides/selenium_capybara.md +0 -100
  297. data/docs/guides/tutorial.md +0 -452
  298. data/docs/js/navigation.js +0 -11
  299. data/docs/misc/contributing.md +0 -20
  300. data/docs/misc/testing.md +0 -11
  301. data/docs/recipes/authentication.md +0 -23
  302. data/docs/recipes/csv.md +0 -29
  303. data/docs/recipes/javascript.md +0 -20
  304. data/docs/recipes/multiple_uris.md +0 -18
  305. data/docs/recipes/screenshots.md +0 -20
  306. data/docs/routing/custom_rules.md +0 -16
  307. data/docs/routing/filetypes_rules.md +0 -21
  308. data/docs/routing/host_rules.md +0 -24
  309. data/docs/routing/path_rules.md +0 -33
  310. data/docs/routing/protocol_rules.md +0 -17
  311. data/docs/routing/query_rules.md +0 -69
  312. data/docs/routing/routes.md +0 -96
  313. data/docs/routing/uri_rules.md +0 -18
  314. data/examples/collect_github_issues.rb +0 -65
  315. data/examples/find_foobar_on_wikipedia.rb +0 -23
  316. data/lib/wayfarer/configuration.rb +0 -86
  317. data/lib/wayfarer/crawl.rb +0 -79
  318. data/lib/wayfarer/crawl_observer.rb +0 -103
  319. data/lib/wayfarer/dispatcher.rb +0 -104
  320. data/lib/wayfarer/finders.rb +0 -61
  321. data/lib/wayfarer/frontiers/frontier.rb +0 -79
  322. data/lib/wayfarer/frontiers/memory_bloomfilter.rb +0 -32
  323. data/lib/wayfarer/frontiers/memory_frontier.rb +0 -76
  324. data/lib/wayfarer/frontiers/memory_trie_frontier.rb +0 -39
  325. data/lib/wayfarer/frontiers/normalize_uris.rb +0 -48
  326. data/lib/wayfarer/frontiers/redis_bloomfilter.rb +0 -34
  327. data/lib/wayfarer/frontiers/redis_frontier.rb +0 -83
  328. data/lib/wayfarer/http_adapters/adapter_pool.rb +0 -62
  329. data/lib/wayfarer/http_adapters/net_http_adapter.rb +0 -77
  330. data/lib/wayfarer/http_adapters/selenium_adapter.rb +0 -80
  331. data/lib/wayfarer/job.rb +0 -211
  332. data/lib/wayfarer/locals.rb +0 -40
  333. data/lib/wayfarer/parsers/json_parser.rb +0 -20
  334. data/lib/wayfarer/parsers/xml_parser.rb +0 -27
  335. data/lib/wayfarer/processor.rb +0 -103
  336. data/lib/wayfarer/routing/custom_rule.rb +0 -21
  337. data/lib/wayfarer/routing/filetypes_rule.rb +0 -20
  338. data/lib/wayfarer/routing/host_rule.rb +0 -19
  339. data/lib/wayfarer/routing/path_rule.rb +0 -54
  340. data/lib/wayfarer/routing/protocol_rule.rb +0 -21
  341. data/lib/wayfarer/routing/query_rule.rb +0 -59
  342. data/lib/wayfarer/routing/rule.rb +0 -114
  343. data/lib/wayfarer/routing/uri_rule.rb +0 -21
  344. data/spec/configuration_spec.rb +0 -26
  345. data/spec/crawl_spec.rb +0 -48
  346. data/spec/finders_spec.rb +0 -49
  347. data/spec/frontiers/memory_bloomfilter_spec.rb +0 -6
  348. data/spec/frontiers/memory_frontier_spec.rb +0 -6
  349. data/spec/frontiers/memory_trie_frontier_spec.rb +0 -6
  350. data/spec/frontiers/normalize_uris_spec.rb +0 -59
  351. data/spec/frontiers/redis_bloomfilter_spec.rb +0 -6
  352. data/spec/frontiers/redis_frontier_spec.rb +0 -6
  353. data/spec/http_adapters/adapter_pool_spec.rb +0 -33
  354. data/spec/http_adapters/net_http_adapter_spec.rb +0 -83
  355. data/spec/http_adapters/selenium_adapter_spec.rb +0 -53
  356. data/spec/integration/callbacks_spec.rb +0 -42
  357. data/spec/integration/locals_spec.rb +0 -106
  358. data/spec/integration/peeking_spec.rb +0 -61
  359. data/spec/job_spec.rb +0 -122
  360. data/spec/processor_spec.rb +0 -31
  361. data/spec/routing/custom_rule_spec.rb +0 -26
  362. data/spec/routing/host_rule_spec.rb +0 -48
  363. data/spec/routing/path_rule_spec.rb +0 -66
  364. data/spec/routing/protocol_rule_spec.rb +0 -26
  365. data/spec/routing/query_rule_spec.rb +0 -124
  366. data/spec/routing/rule_spec.rb +0 -251
  367. data/spec/routing/uri_rule_spec.rb +0 -24
  368. data/spec/shared/frontier.rb +0 -96
  369. data/wayfarer-jruby.gemspec +0 -49
@@ -1,356 +0,0 @@
1
- /* Generated by Pygments CSS Theme Builder - https://jwarby.github.io/jekyll-pygments-themes/builder.html */
2
- /* Base Style */
3
- .highlight pre {
4
- color: #333333;
5
- background-color: transparent;
6
- }
7
- /* Punctuation */
8
- .highlight .p {
9
- color: #333333;
10
- background-color: transparent;
11
- }
12
- /* Error */
13
- .highlight .err {
14
- color: #333333;
15
- background-color: transparent;
16
- }
17
- /* Base Style */
18
- .highlight .n {
19
- color: #333333;
20
- background-color: transparent;
21
- }
22
- /* Name Attribute */
23
- .highlight .na {
24
- color: #333333;
25
- background-color: transparent;
26
- }
27
- /* Name Builtin */
28
- .highlight .nb {
29
- color: #333333;
30
- background-color: transparent;
31
- }
32
- /* Name Class */
33
- .highlight .nc {
34
- color: #333333;
35
- background-color: transparent;
36
- }
37
- /* Name Constant */
38
- .highlight .no {
39
- color: #333333;
40
- background-color: transparent;
41
- }
42
- /* Name Decorator */
43
- .highlight .nd {
44
- color: #333333;
45
- background-color: transparent;
46
- }
47
- /* Name Entity */
48
- .highlight .ni {
49
- color: #a20e30;
50
- background-color: transparent;
51
- }
52
- /* Name Exception */
53
- .highlight .ne {
54
- color: #333333;
55
- background-color: transparent;
56
- }
57
- /* Name Function */
58
- .highlight .nf {
59
- color: #333333;
60
- background-color: transparent;
61
- }
62
- /* Name Label */
63
- .highlight .nl {
64
- color: #333333;
65
- background-color: transparent;
66
- }
67
- /* Name Namespace */
68
- .highlight .nn {
69
- color: #333333;
70
- background-color: transparent;
71
- }
72
- /* Name Other */
73
- .highlight .nx {
74
- color: #333333;
75
- background-color: transparent;
76
- }
77
- /* Name Property */
78
- .highlight .py {
79
- color: #333333;
80
- background-color: transparent;
81
- }
82
- /* Name Tag */
83
- .highlight .nt {
84
- color: #333333;
85
- background-color: transparent;
86
- }
87
- /* Name Variable */
88
- .highlight .nv {
89
- color: #333333;
90
- background-color: transparent;
91
- }
92
- /* Name Variable Class */
93
- .highlight .vc {
94
- color: #333333;
95
- background-color: transparent;
96
- }
97
- /* Name Variable Global */
98
- .highlight .vg {
99
- color: #333333;
100
- background-color: transparent;
101
- }
102
- /* Name Variable Instance */
103
- .highlight .vi {
104
- color: #333333;
105
- background-color: transparent;
106
- }
107
- /* Name Builtin Pseudo */
108
- .highlight .bp {
109
- color: #333333;
110
- background-color: transparent;
111
- }
112
- /* Base Style */
113
- .highlight .g {
114
- color: #333333;
115
- background-color: transparent;
116
- }
117
- /* */
118
- .highlight .gd {
119
- color: #333333;
120
- background-color: transparent;
121
- }
122
- /* Base Style */
123
- .highlight .o {
124
- color: #333333;
125
- background-color: transparent;
126
- }
127
- /* Operator Word */
128
- .highlight .ow {
129
- color: #333333;
130
- background-color: transparent;
131
- }
132
- /* Base Style */
133
- .highlight .c {
134
- color: #727273;
135
- background-color: transparent;
136
- }
137
- /* Comment Multiline */
138
- .highlight .cm {
139
- color: #727273;
140
- background-color: transparent;
141
- }
142
- /* Comment Preproc */
143
- .highlight .cp {
144
- color: #727273;
145
- background-color: transparent;
146
- }
147
- /* Comment Single */
148
- .highlight .c1 {
149
- color: #727273;
150
- background-color: transparent;
151
- }
152
- /* Comment Special */
153
- .highlight .cs {
154
- color: #727273;
155
- background-color: transparent;
156
- }
157
- /* Base Style */
158
- .highlight .k {
159
- color: #333333;
160
- background-color: transparent;
161
- }
162
- /* Keyword Constant */
163
- .highlight .kc {
164
- color: #333333;
165
- background-color: transparent;
166
- }
167
- /* Keyword Declaration */
168
- .highlight .kd {
169
- color: #2f3661;
170
- background-color: transparent;
171
- }
172
- /* Keyword Namespace */
173
- .highlight .kn {
174
- color: #333333;
175
- background-color: transparent;
176
- }
177
- /* Keyword Pseudo */
178
- .highlight .kp {
179
- color: #333333;
180
- background-color: transparent;
181
- }
182
- /* Keyword Reserved */
183
- .highlight .kr {
184
- color: #333333;
185
- background-color: transparent;
186
- }
187
- /* Keyword Type */
188
- .highlight .kt {
189
- color: #333333;
190
- background-color: transparent;
191
- }
192
- /* Base Style */
193
- .highlight .l {
194
- color: #a20e30;
195
- background-color: transparent;
196
- }
197
- /* Literal Date */
198
- .highlight .ld {
199
- color: #a20e30;
200
- background-color: transparent;
201
- }
202
- /* Literal Number */
203
- .highlight .m {
204
- color: #a20e30;
205
- background-color: transparent;
206
- }
207
- /* Literal Number Float */
208
- .highlight .mf {
209
- color: #a20e30;
210
- background-color: transparent;
211
- }
212
- /* Literal Number Hex */
213
- .highlight .mh {
214
- color: #333333;
215
- background-color: transparent;
216
- }
217
- /* Literal Number Integer */
218
- .highlight .mi {
219
- color: #a20e30;
220
- background-color: transparent;
221
- }
222
- /* Literal Number Oct */
223
- .highlight .mo {
224
- color: #a20e30;
225
- background-color: transparent;
226
- }
227
- /* Literal Number Integer Long */
228
- .highlight .il {
229
- color: #a20e30;
230
- background-color: transparent;
231
- }
232
- /* Literal String */
233
- .highlight .s {
234
- color: #a20e30;
235
- background-color: transparent;
236
- }
237
- /* Literal String Backtick */
238
- .highlight .sb {
239
- color: #a20e30;
240
- background-color: transparent;
241
- }
242
- /* Literal String Char */
243
- .highlight .sc {
244
- color: #a20e30;
245
- background-color: transparent;
246
- }
247
- /* Literal String Doc */
248
- .highlight .sd {
249
- color: #a20e30;
250
- background-color: transparent;
251
- }
252
- /* Literal String Double */
253
- .highlight .s2 {
254
- color: #a20e30;
255
- background-color: transparent;
256
- }
257
- /* Literal String Escape */
258
- .highlight .se {
259
- color: #a20e30;
260
- background-color: transparent;
261
- }
262
- /* Literal String Heredoc */
263
- .highlight .sh {
264
- color: #a20e30;
265
- background-color: transparent;
266
- }
267
- /* Literal String Interpol */
268
- .highlight .si {
269
- color: #a20e30;
270
- background-color: transparent;
271
- }
272
- /* Literal String Other */
273
- .highlight .sx {
274
- color: #a20e30;
275
- background-color: transparent;
276
- }
277
- /* Literal String Regex */
278
- .highlight .sr {
279
- color: #a20e30;
280
- background-color: transparent;
281
- }
282
- /* Literal String Single */
283
- .highlight .s1 {
284
- color: #a20e30;
285
- background-color: transparent;
286
- }
287
- /* Literal String Symbol */
288
- .highlight .ss {
289
- color: #a20e30;
290
- background-color: transparent;
291
- }
292
- /* Base Style */
293
- .highlight .g {
294
- color: #333333;
295
- background-color: transparent;
296
- }
297
- /* Generic Deleted */
298
- .highlight .gd {
299
- color: #333333;
300
- background-color: transparent;
301
- }
302
- /* Generic Emph */
303
- .highlight .ge {
304
- color: #333333;
305
- background-color: transparent;
306
- }
307
- /* Generic Error */
308
- .highlight .gr {
309
- color: #333333;
310
- background-color: transparent;
311
- }
312
- /* Generic Heading */
313
- .highlight .gh {
314
- color: #333333;
315
- background-color: transparent;
316
- }
317
- /* Generic Inserted */
318
- .highlight .gi {
319
- color: #333333;
320
- background-color: transparent;
321
- }
322
- /* Generic Output */
323
- .highlight .go {
324
- color: #333333;
325
- background-color: transparent;
326
- }
327
- /* Generic Prompt */
328
- .highlight .gp {
329
- color: #333333;
330
- background-color: transparent;
331
- }
332
- /* Generic Strong */
333
- .highlight .gs {
334
- color: #333333;
335
- background-color: transparent;
336
- }
337
- /* Generic Subheading */
338
- .highlight .gu {
339
- color: #333333;
340
- background-color: transparent;
341
- }
342
- /* Generic Traceback */
343
- .highlight .gt {
344
- color: #333333;
345
- background-color: transparent;
346
- }
347
- /* Other */
348
- .highlight .x {
349
- color: #333333;
350
- background-color: transparent;
351
- }
352
- /* Text Whitespace */
353
- .highlight .w {
354
- color: #333333;
355
- background-color: transparent;
356
- }
@@ -1,70 +0,0 @@
1
- ---
2
- layout: default
3
- title: Using Capybara
4
- ---
5
-
6
- # Using Capybara
7
- When using Selenium, Wayfarer supports Selenium drivers. You can execute JavaScript, take screenshots, interact with the page, and so on. For an exhaustive list, see [the official API documentation](http://www.rubydoc.info/gems/selenium-webdriver/0.0.28/Selenium/WebDriver/Driver).
8
-
9
- See [examples/selenium.rb](../examples/selenium.rb).
10
-
11
- ## Setup
12
- Inside your instance methods, you have access to `#driver`, which returns a Selenium driver:
13
-
14
- {% highlight ruby %}
15
- class DummyJob < Wayfarer::Job
16
- config do |c|
17
- c.http_adapter = :selenium
18
- c.selenium_argv = [:firefox]
19
- c.connection_count = 4 # Number of instantiated WebDrivers
20
- end
21
-
22
- draw uri: "https://example.com"
23
- def foo
24
- driver # => #<Selenium::WebDriver::Driver:...>
25
- end
26
- end
27
- {% endhighlight %}
28
-
29
- ### Selenium Grid
30
- {% highlight ruby %}
31
- class DummyJob < Wayfarer::Job
32
- config do |c|
33
- c.http_adapter = :selenium
34
- c.selenium_argv = [
35
- :remote, url: "http://localhost:4444/wd/hub", desired_capabilities: :firefox
36
- ]
37
- end
38
- end
39
- {% endhighlight %}
40
-
41
- ## Executing JavaScript
42
- ```ruby
43
- class DummyJob < Wayfarer::Job
44
- config do |c|
45
- c.http_adapter = :selenium
46
- c.selenium_argv = [:firefox]
47
- end
48
-
49
- draw uri: "https://example.com"
50
- def example
51
- driver.execute_script("console.log('Hello from wayfarer!')")
52
- end
53
- end
54
- ```
55
-
56
- ## Taking screenshots
57
- {% highlight ruby %}
58
- class DummyJob < Wayfarer::Job
59
- config do |c|
60
- c.http_adapter = :selenium
61
- c.selenium_argv = [:firefox]
62
- c.window_size: [1024, 768]
63
- end
64
-
65
- draw uri: "https://example.com"
66
- def example
67
- driver.save_screenshot("/tmp/screenshot.png")
68
- end
69
- end
70
- {% endhighlight %}
data/docs/css/screen.scss DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- ---
3
-
4
- @charset "utf-8";
5
-
6
- @import "variables";
7
- @import "base";
data/docs/guides/cli.md DELETED
@@ -1,52 +0,0 @@
1
- ---
2
- layout: default
3
- title: CLI
4
- ---
5
-
6
- # Command-line interface
7
- Wayfarer ships with a small executable, `wayfarer`.
8
-
9
- Job classes are loaded by naming convention, e.g. if you pass `./directory/foo_bar.rb` as the `FILE` parameter, that file is expected to define the class `FooBar`. You can leave off the `.rb` extension.
10
-
11
- ## `% wayfarer route FILE URI`
12
- Loads the job defined in `FILE`, and prints the first matching route for `URI`.
13
-
14
- ## `% wayfarer enqueue FILE URI`
15
- Loads and enqueues the job in `FILE`, starting from `URI`.
16
-
17
- * `--log_level LEVEL`
18
- Option. Which log messages to print.
19
-
20
- * Default: `info`
21
- * Recognized values: `unknown`, `debug`, `error`, `fatal`, `info`, `warn`
22
-
23
- * `--queue_adapter ADAPTER`
24
- Option. Which ActiveJob queue adapter to use (e.g. `sidekiq`, `resque`).
25
- * Recognized values: strings, see [documentation](http://api.rubyonrails.org/)
26
-
27
- * `--wait VALUE`
28
- Option. Point of time when the enqueued job should be run.
29
-
30
- 1. If the value can be converted to an integer, it represents the seconds from now.
31
- 2. If the value can be parsed by `Time::parse`, the job gets scheduled at that point in time.
32
- 3. If the value is a human-readable time string that [Chronic](https://github.com/mojombo/chronic) can make sense of, the job is scheduled at that point in time.
33
-
34
- __Examples:__
35
-
36
- 60 seconds from now:
37
-
38
- ```
39
- % wayfarer enqueue ./foo_bar http://google.com --wait 60
40
- ```
41
-
42
- 6pm, today:
43
-
44
- ```
45
- % wayfarer enqueue ./foo_bar http://google.com --wait 18:00
46
- ```
47
-
48
- Tomorrow:
49
-
50
- ```
51
- % wayfarer enqueue ./foo_bar http://google.com --wait tomorrow
52
- ```
@@ -1,93 +0,0 @@
1
- ---
2
- layout: default
3
- title: Frontiers
4
- ---
5
-
6
- # Frontiers
7
-
8
- Frontiers keep track of three sets of URIs:
9
-
10
- * Current URIs that are being processed
11
- * Staged URIs that might be processed in the next cycle
12
- * Cached URIs that have been processed
13
-
14
- All frontiers expose the same behaviour.
15
-
16
- <pre class="illustration">
17
- ┌──────────────────────────────────────────────────────────┐
18
- │ STAGED │
19
- │ {https://alpha.com, https://beta.com} │
20
- └──────────────────────────────────────────────────────────┘
21
- ┌──────────────────────────────────────────────────────────┐
22
- │ CURRENT │
23
- │ {https://gamma.com} │
24
- └──────────────────────────────────────────────────────────┘
25
- ┌──────────────────────────────────────────────────────────┐
26
- │ CACHED │
27
- │ {https://beta.com} │
28
- └──────────────────────────────────────────────────────────┘
29
-
30
- Cycle
31
-
32
-
33
- ┌──────────────────────────────────────────────────────────┐
34
- │ STAGED' │
35
- │ {...} │
36
- └──────────────────────────────────────────────────────────┘
37
- ┌──────────────────────────────────────────────────────────┐
38
- │ CURRENT' = STAGED \ CACHED │
39
- │ {https://alpha.com} │
40
- └──────────────────────────────────────────────────────────┘
41
- ┌──────────────────────────────────────────────────────────┐
42
- │ CACHED' = CACHED ∪ CURRENT │
43
- │ {https://beta.com, https://gamma.com} │
44
- └──────────────────────────────────────────────────────────┘
45
- </pre>
46
-
47
- ## Available frontiers
48
- Currently, there are 5 frontiers available:
49
-
50
- 2. `:memory` (default): Uses sets from the standard lib.
51
- 4. `:redis`: Uses Redis sets.
52
- 3. `:memory_bloom`: Uses a [Bloom filter](https://github.com/igrigorik/bloomfilter-rb).
53
- 5. `:redis_bloom`: Uses a Redis-backed Bloom filter.
54
- 1. `:memory_trie`: Uses a [trie](https://github.com/tyler/trie) and sets.
55
-
56
- | Frontier | MRI support | JRuby support |
57
- | --- | --- |
58
- | `:memory` | Yes | Yes
59
- | `:redis` | Yes | Yes
60
- | `:memory_bloom` | Yes | No
61
- | `:redis_bloom` | Yes | No
62
- | `:memory_trie` | Yes | No
63
-
64
- ## Setting the frontier
65
-
66
- Set the `:frontier` configuration key:
67
-
68
- {% highlight ruby %}
69
- class DummyJob < Wayfarer::Job
70
- config.frontier = :foobar
71
- end
72
- {% endhighlight %}
73
-
74
- ### Using a Redis frontier
75
-
76
- Set the `:redis_opts` and `:frontier` configuration keys:
77
-
78
- {% highlight ruby %}
79
- class DummyJob < Wayfarer::Job
80
- config.redis_opts = { port: 4242 }
81
- config.frontier = :redis
82
- end
83
- {% endhighlight %}
84
-
85
- ### Setting bloomfilter parameters
86
-
87
- Set the `:bloomfilter_opts` configuration key:
88
-
89
- {% highlight ruby %}
90
- class DummyJob < Wayfarer::Job
91
- config.bloomfilter_opts = { ... }
92
- end
93
- {% endhighlight %}
@@ -1,23 +0,0 @@
1
- ---
2
- layout: default
3
- title: Halting
4
- ---
5
-
6
- # Halting
7
- Processing can be stopped by calling `#halt` within actions.
8
-
9
- `#halt` does not return immediately. Instead, it sets a halting flag internally, and once the action returns, all threads will stop instead of processing further URIs.
10
-
11
- Job instances run in separate threads. When a job signals that it wants to halt, all other threads will finish their current work, but will not process any further URIs. All instances have the chance to get their current work done.
12
-
13
- {% highlight ruby %}
14
- class DummyJob < Wayfarer::Job
15
- def example
16
- halt
17
- puts "This will be printed!"
18
-
19
- return halt
20
- puts "This will not be printed!"
21
- end
22
- end
23
- {% endhighlight %}
@@ -1,26 +0,0 @@
1
- ---
2
- layout: default
3
- title: Locals
4
- ---
5
-
6
- # Job queues
7
-
8
- Thanks to [ActiveJob](http://edgeguides.rubyonrails.org/active_job_basics.html), jobs can be enqueued with various backends, e.g. Sidekiq or Resque:
9
-
10
- {% highlight ruby %}
11
- class DummyJob < Wayfarer::Job
12
- # Overrides ActiveJob's global setting
13
- self.queue_adapter = :resque
14
-
15
- # Identifier for enqueued jobs
16
- queue_as :dummy_job
17
-
18
- # Alternatively, pass a block
19
- queue_as do
20
- [:first, :second].sample
21
- end
22
- end
23
-
24
- # Alternatively, set the queue explicitly on call:
25
- DummyJob.set(queue: :something_else).perform_later(*uris)
26
- {% endhighlight %}
@@ -1,36 +0,0 @@
1
- ---
2
- layout: default
3
- title: Locals
4
- ---
5
-
6
- # Locals
7
-
8
- Locals are Wayfarer's replacement for job instance variables. Both `let` and `let!` declare variables that are accessible within [callbacks]({{base}}/callbacks.html) and actions.
9
-
10
- Even though you might recognise them from RSpec, they have differing semantics: Values in `let` blocks will be replaced with thread-safe counterparts once the job is run. `let!` skips this. Both evaluate their block immediately.
11
-
12
- | Standard lib | Counterpart |
13
- | --- | --- |
14
- | Booleans | [`Concurrent::AtomicBoolean`](http://ruby-concurrency.github.io/concurrent-ruby/Concurrent/AtomicBoolean.html) |
15
- | `Fixnum` | [`Concurrent::AtomicFixnum`](http://ruby-concurrency.github.io/concurrent-ruby/Concurrent/AtomicFixnum.html) |
16
- | `Hash` | [`Concurrent::Hash`](http://ruby-concurrency.github.io/concurrent-ruby/Concurrent/Hash.html) |
17
- | `Array` | [`Concurrent::Array`](http://ruby-concurrency.github.io/concurrent-ruby/Concurrent/Array.html) |
18
- | Everything else | Untouched |
19
-
20
- {% highlight ruby %}
21
- class DummyJob < Wayfarer::Job
22
- let(:values) { [1, 2, 3] }
23
-
24
- before_crawl do
25
- values.reverse!
26
- end
27
-
28
- after_crawl do
29
- values # => [3, 2, 1, 0]
30
- end
31
-
32
- def some_action
33
- values << 0
34
- end
35
- end
36
- {% endhighlight %}
@@ -1,22 +0,0 @@
1
- ---
2
- layout: default
3
- title: Logging
4
- ---
5
-
6
- # Logging
7
-
8
- {% highlight ruby %}
9
- # Global configuration serves as the template
10
- Wayfarer.logger.level = :fatal
11
-
12
- class DummyJob < Wayfarer::Job
13
- # Jobs can tweak their logger
14
- config.logger.level = :warn
15
- config.logger.progname = "dummy-job"
16
-
17
- def example
18
- logger.info "No"
19
- logger.warn "Yes"
20
- end
21
- end
22
- {% endhighlight %}