tanakai 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/.travis.yml +5 -0
  4. data/CHANGELOG.md +118 -0
  5. data/Gemfile +6 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +2038 -0
  8. data/Rakefile +10 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/exe/tanakai +6 -0
  12. data/lib/tanakai/automation/deploy.yml +54 -0
  13. data/lib/tanakai/automation/setup/chromium_chromedriver.yml +26 -0
  14. data/lib/tanakai/automation/setup/firefox_geckodriver.yml +20 -0
  15. data/lib/tanakai/automation/setup/phantomjs.yml +33 -0
  16. data/lib/tanakai/automation/setup/ruby_environment.yml +124 -0
  17. data/lib/tanakai/automation/setup.yml +45 -0
  18. data/lib/tanakai/base/saver.rb +106 -0
  19. data/lib/tanakai/base/storage.rb +54 -0
  20. data/lib/tanakai/base.rb +326 -0
  21. data/lib/tanakai/base_helper.rb +22 -0
  22. data/lib/tanakai/browser_builder/apparition_builder.rb +58 -0
  23. data/lib/tanakai/browser_builder/cuprite_builder.rb +54 -0
  24. data/lib/tanakai/browser_builder/mechanize_builder.rb +154 -0
  25. data/lib/tanakai/browser_builder/poltergeist_phantomjs_builder.rb +175 -0
  26. data/lib/tanakai/browser_builder/selenium_chrome_builder.rb +199 -0
  27. data/lib/tanakai/browser_builder/selenium_firefox_builder.rb +204 -0
  28. data/lib/tanakai/browser_builder.rb +20 -0
  29. data/lib/tanakai/capybara_configuration.rb +10 -0
  30. data/lib/tanakai/capybara_ext/apparition/driver.rb +13 -0
  31. data/lib/tanakai/capybara_ext/cuprite/driver.rb +13 -0
  32. data/lib/tanakai/capybara_ext/driver/base.rb +62 -0
  33. data/lib/tanakai/capybara_ext/mechanize/driver.rb +71 -0
  34. data/lib/tanakai/capybara_ext/poltergeist/driver.rb +13 -0
  35. data/lib/tanakai/capybara_ext/selenium/driver.rb +34 -0
  36. data/lib/tanakai/capybara_ext/session/config.rb +22 -0
  37. data/lib/tanakai/capybara_ext/session.rb +249 -0
  38. data/lib/tanakai/cli/ansible_command_builder.rb +71 -0
  39. data/lib/tanakai/cli/generator.rb +57 -0
  40. data/lib/tanakai/cli.rb +183 -0
  41. data/lib/tanakai/core_ext/array.rb +14 -0
  42. data/lib/tanakai/core_ext/hash.rb +5 -0
  43. data/lib/tanakai/core_ext/numeric.rb +19 -0
  44. data/lib/tanakai/core_ext/string.rb +7 -0
  45. data/lib/tanakai/pipeline.rb +33 -0
  46. data/lib/tanakai/runner.rb +60 -0
  47. data/lib/tanakai/template/.gitignore +18 -0
  48. data/lib/tanakai/template/Gemfile +28 -0
  49. data/lib/tanakai/template/README.md +3 -0
  50. data/lib/tanakai/template/config/application.rb +37 -0
  51. data/lib/tanakai/template/config/automation.yml +13 -0
  52. data/lib/tanakai/template/config/boot.rb +22 -0
  53. data/lib/tanakai/template/config/initializers/.keep +0 -0
  54. data/lib/tanakai/template/config/schedule.rb +57 -0
  55. data/lib/tanakai/template/db/.keep +0 -0
  56. data/lib/tanakai/template/helpers/application_helper.rb +3 -0
  57. data/lib/tanakai/template/lib/.keep +0 -0
  58. data/lib/tanakai/template/log/.keep +0 -0
  59. data/lib/tanakai/template/pipelines/saver.rb +11 -0
  60. data/lib/tanakai/template/pipelines/validator.rb +24 -0
  61. data/lib/tanakai/template/spiders/application_spider.rb +143 -0
  62. data/lib/tanakai/template/tmp/.keep +0 -0
  63. data/lib/tanakai/version.rb +3 -0
  64. data/lib/tanakai.rb +54 -0
  65. data/tanakai.gemspec +50 -0
  66. metadata +382 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: '0363335680ba18ca855d2413e4efdce1957decab5f31c954e2b04f4f91660ac6'
4
+ data.tar.gz: 3fee8a56e284ef3bae724d1ffe3cc7f1614ad374efd59d60a002a7f71353cf06
5
+ SHA512:
6
+ metadata.gz: fabeeb2270349d0961294de34abe055906c38477cd4f744da9e033c626939e2672b86b30053a3d8c89bea1889f6392370e1b58296a0327f1a891d4915132478c
7
+ data.tar.gz: 8e34927825ef45893de6e00c676621823b4d3ca7c28210c73720409000c04bb228b1ea0dd75293144afd1dbff6f4f370ca0312847d781895434896374bb77f7b
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ Gemfile.lock
10
+
11
+ *.retry
12
+ .tags*
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.5.1
5
+ before_install: gem install bundler -v 1.16.2
data/CHANGELOG.md ADDED
@@ -0,0 +1,118 @@
1
+ # CHANGELOG
2
+
3
+ ## 1.5.0
4
+ ### New
5
+ * First release as Tanakai
6
+ * Add support to [Apparition](https://github.com/twalpole/apparition)
7
+ * Add support to [Cuprite](https://github.com/rubycdp/cuprite)
8
+
9
+ ## 1.4.0
10
+ ### New
11
+ * Add `encoding` config option (see [All available config options](https://github.com/vifreefly/kimuraframework#all-available-config-options))
12
+ * Validate url before processing a request (Base#request_to)
13
+
14
+ ### Fixes
15
+ * Fix console command bug (see [issue 21](https://github.com/vifreefly/kimuraframework/issues/21))
16
+
17
+ ## 1.3.2
18
+ ### Fixes
19
+ * In the project template, set Ruby version as >= 2.5 (before was hard-coded to 2.5.1)
20
+ * Remove .ruby-version file (was hard-coded to 2.5.1) from the project template
21
+
22
+ ## 1.3.1
23
+ ### Fixes
24
+ * Fixed bug in Base#save_to
25
+
26
+ ## 1.3.0
27
+ ### Breaking changes 1.3.0
28
+ * Remove persistence database feature (because it's slow and makes things complicated)
29
+
30
+ ### New
31
+ * Add `--include` and `--exclude` options to CLI#runner
32
+ * Add Base `#create_browser` method to easily create additional browser instances
33
+ * Add Capybara::Session `#scroll_to_bottom`
34
+ * Add skip_on_failure feature to `retry_request_errors` config option
35
+ * Add info about `add_event` method to the README
36
+
37
+ ### Fixes and improvements
38
+ * Improve Runner
39
+ * Fix time helper in schedule.rb
40
+ * Add proxy validation to browser builders
41
+ * Allow to pass different arguments to the `Base.parse` method
42
+
43
+ ## 1.2.0
44
+ ### New
45
+ * Add possibility to add array of values to the storage (`Base::Storage#add`)
46
+ * Add `exception_on_fail` option to `Base.crawl!`
47
+ * Add possibility to pass request hash to the `start_urls` (You can use array of hashes as well, like: `@start_urls = [{ url: "https://example.com/cat?id=1", data: { category: "First Category" } }]`)
48
+ * Implement `skip_request_errors` config feature. Added [Handle request errors](https://github.com/vifreefly/kimuraframework#handle-request-errors) chapter to the README.
49
+ * Add option to choose response type for `Session#current_response` (`:html` default, or `:json`)
50
+ * Add option to provide custom chrome and chromedriver paths
51
+
52
+ ### Improvements
53
+ * Refactor `Runner`
54
+
55
+ ### Fixes
56
+ * Fix `Base#Saver` (automatically create file if it doesn't exists in case of persistence database)
57
+ * Do not deep merge config's `headers:` option
58
+
59
+ ## 1.1.0
60
+ ### Breaking changes 1.1.0
61
+ `browser` config option depricated. Now all sub-options inside `browser` should be placed right into `@config` hash, without `browser` parent key. Example:
62
+
63
+ ```ruby
64
+ # Was:
65
+ @config = {
66
+ browser: {
67
+ retry_request_errors: [Net::ReadTimeout],
68
+ restart_if: {
69
+ memory_limit: 350_000,
70
+ requests_limit: 100
71
+ },
72
+ before_request: {
73
+ change_proxy: true,
74
+ change_user_agent: true,
75
+ clear_cookies: true,
76
+ clear_and_set_cookies: true,
77
+ delay: 1..3
78
+ }
79
+ }
80
+ }
81
+
82
+ # Now:
83
+ @config = {
84
+ retry_request_errors: [Net::ReadTimeout],
85
+ restart_if: {
86
+ memory_limit: 350_000,
87
+ requests_limit: 100
88
+ },
89
+ before_request: {
90
+ change_proxy: true,
91
+ change_user_agent: true,
92
+ clear_cookies: true,
93
+ clear_and_set_cookies: true,
94
+ delay: 1..3
95
+ }
96
+ }
97
+ ```
98
+
99
+ ### New
100
+ * Add `storage` object with additional methods and persistence database feature
101
+ * Add events feature to `run_info`
102
+ * Add `skip_duplicate_requests` config option to automatically skip already visited urls when using requrst_to
103
+ * Add `extensions` config option to allow inject JS code into browser (supported only by poltergeist_phantomjs engine)
104
+ * Add Capybara::Session#within_new_window_by method
105
+
106
+ ### Improvements
107
+ * Add the last backtrace line to pipeline output when item was dropped
108
+ * Do not destroy driver if it's not exists (for Base.parse! method)
109
+ * Handle possible Net::ReadTimeout error while trying to #quit driver
110
+
111
+ ### Fixes
112
+ * Fix Mechanize::Driver#proxy (there was a bug while using proxy for mechanize engine without authorization)
113
+ * Fix requests retries logic
114
+
115
+
116
+ ## 1.0.1
117
+ * Add missing `logger` method to pipeline
118
+ * Fix `set_proxy` in Mechanize and Poltergeist builders
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in tanakai.gemspec
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Victor Afanasev
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.