tanakai 1.6.0 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.gitignore +1 -0
 - data/CHANGELOG.md +19 -6
 - data/README.md +6 -0
 - data/lib/tanakai/base/saver.rb +4 -6
 - data/lib/tanakai/base.rb +5 -5
 - data/lib/tanakai/cli/ansible_command_builder.rb +1 -1
 - data/lib/tanakai/cli/generator.rb +1 -1
 - data/lib/tanakai/cli.rb +1 -1
 - data/lib/tanakai/version.rb +1 -1
 - metadata +2 -2
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: f639e8c843d1effdd2fa5268fe01ee0fda5adcb741ecda49b5e8f2c8a51f55a3
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: d03287426dc9e1e802ef149ad9edf5f83813ea067d22f9f6fb8fa7ec33bb3c5a
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 12b9a122343c1599c87caf97cd527bf98c83db50e4f5fab40b4657932c41b41c5f3437a26297141a2e19f064f5083ba9b12c099d4bc20f537b0cf440aa92d9e2
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 756e98178ef2c1fe80d9dfca936eab248e46e1c199665c23973fa9b96ad513a9bad2e4d0498e6ea3d3442a3aa76bf744109d91161e39c89aa0b29cdf83385ed0
         
     | 
    
        data/.gitignore
    CHANGED
    
    
    
        data/CHANGELOG.md
    CHANGED
    
    | 
         @@ -1,18 +1,32 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # CHANGELOG
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
      
 3 
     | 
    
         
            +
            ## Next
         
     | 
| 
      
 4 
     | 
    
         
            +
            * Your contribution here
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            ## 1.7.1
         
     | 
| 
      
 7 
     | 
    
         
            +
            ### Fixes
         
     | 
| 
      
 8 
     | 
    
         
            +
            * [#5](https://github.com/glaucocustodio/tanakai/pull/5): Replace `File.exists`/`Dir.exists` that have been removed on ruby 3.2 - [MrChriss](https://github.com/MrChriss)
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            ## 1.7.0
         
     | 
| 
      
 11 
     | 
    
         
            +
            ### New
         
     | 
| 
      
 12 
     | 
    
         
            +
            * Allow passing `data:` to `crawl!` - [glaucocustodio](https://github.com/glaucocustodio)
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            ### Fixes
         
     | 
| 
      
 15 
     | 
    
         
            +
            * [#4](https://github.com/glaucocustodio/tanakai/pull/4): Fix keyword args on `crawl!` - [milk1000cc](https://github.com/milk1000cc)
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
       3 
17 
     | 
    
         
             
            ## 1.6.0
         
     | 
| 
       4 
18 
     | 
    
         
             
            ### New
         
     | 
| 
       5 
     | 
    
         
            -
            * Add support to Ruby 3
         
     | 
| 
      
 19 
     | 
    
         
            +
            * Add support to Ruby 3 - [glaucocustodio](https://github.com/glaucocustodio)
         
     | 
| 
       6 
20 
     | 
    
         | 
| 
       7 
21 
     | 
    
         
             
            ## 1.5.1
         
     | 
| 
       8 
22 
     | 
    
         
             
            ### New
         
     | 
| 
       9 
     | 
    
         
            -
            * Add `response_type` to `in_parallel`
         
     | 
| 
      
 23 
     | 
    
         
            +
            * Add `response_type` to `in_parallel` - [glaucocustodio](https://github.com/glaucocustodio)
         
     | 
| 
       10 
24 
     | 
    
         | 
| 
       11 
25 
     | 
    
         
             
            ## 1.5.0
         
     | 
| 
       12 
26 
     | 
    
         
             
            ### New
         
     | 
| 
       13 
     | 
    
         
            -
            * First release as Tanakai
         
     | 
| 
       14 
     | 
    
         
            -
            * Add support to [Apparition](https://github.com/twalpole/apparition)
         
     | 
| 
       15 
     | 
    
         
            -
            * Add support to [Cuprite](https://github.com/rubycdp/cuprite)
         
     | 
| 
      
 27 
     | 
    
         
            +
            * First release as Tanakai - [glaucocustodio](https://github.com/glaucocustodio)
         
     | 
| 
      
 28 
     | 
    
         
            +
            * Add support to [Apparition](https://github.com/twalpole/apparition) - [glaucocustodio](https://github.com/glaucocustodio)
         
     | 
| 
      
 29 
     | 
    
         
            +
            * Add support to [Cuprite](https://github.com/rubycdp/cuprite) - [glaucocustodio](https://github.com/glaucocustodio)
         
     | 
| 
       16 
30 
     | 
    
         | 
| 
       17 
31 
     | 
    
         
             
            ## 1.4.0
         
     | 
| 
       18 
32 
     | 
    
         
             
            ### New
         
     | 
| 
         @@ -120,7 +134,6 @@ 
     | 
|
| 
       120 
134 
     | 
    
         
             
            * Fix Mechanize::Driver#proxy (there was a bug while using proxy for mechanize engine without authorization)
         
     | 
| 
       121 
135 
     | 
    
         
             
            * Fix requests retries logic
         
     | 
| 
       122 
136 
     | 
    
         | 
| 
       123 
     | 
    
         
            -
             
     | 
| 
       124 
137 
     | 
    
         
             
            ## 1.0.1
         
     | 
| 
       125 
138 
     | 
    
         
             
            * Add missing `logger` method to pipeline
         
     | 
| 
       126 
139 
     | 
    
         
             
            * Fix `set_proxy` in Mechanize and Poltergeist builders
         
     | 
    
        data/README.md
    CHANGED
    
    | 
         @@ -1355,6 +1355,12 @@ end # => 
     | 
|
| 
       1355 
1355 
     | 
    
         
             
            # {:spider_name=>"example_spider", :status=>:completed, :environment=>"development", :start_time=>2018-08-22 18:49:22 +0400, :stop_time=>2018-08-22 18:49:23 +0400, :running_time=>0.801, :visits=>{:requests=>1, :responses=>1}, :items=>{:sent=>0, :processed=>0}, :error=>nil}
         
     | 
| 
       1356 
1356 
     | 
    
         
             
            ```
         
     | 
| 
       1357 
1357 
     | 
    
         | 
| 
      
 1358 
     | 
    
         
            +
            You can also pass `data` to `crawl!`:
         
     | 
| 
      
 1359 
     | 
    
         
            +
             
     | 
| 
      
 1360 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 1361 
     | 
    
         
            +
            ExampleSpider.crawl!(data: { foo: "bar" })
         
     | 
| 
      
 1362 
     | 
    
         
            +
            ```
         
     | 
| 
      
 1363 
     | 
    
         
            +
             
     | 
| 
       1358 
1364 
     | 
    
         
             
            So what if you're don't care about stats and just want to process request to a particular spider method and get the returning value from this method? Use `.parse!` instead:
         
     | 
| 
       1359 
1365 
     | 
    
         | 
| 
       1360 
1366 
     | 
    
         
             
            #### `.parse!(:method_name, url:)` method
         
     | 
    
        data/lib/tanakai/base/saver.rb
    CHANGED
    
    | 
         @@ -42,7 +42,7 @@ module Tanakai 
     | 
|
| 
       42 
42 
     | 
    
         
             
                  def save_to_json(item)
         
     | 
| 
       43 
43 
     | 
    
         
             
                    data = JSON.generate([item])
         
     | 
| 
       44 
44 
     | 
    
         | 
| 
       45 
     | 
    
         
            -
                    if @index > 1 || append && File. 
     | 
| 
      
 45 
     | 
    
         
            +
                    if @index > 1 || append && File.exist?(path)
         
     | 
| 
       46 
46 
     | 
    
         
             
                      file_content = File.read(path).sub(/\}\]\Z/, "\}\,")
         
     | 
| 
       47 
47 
     | 
    
         
             
                      File.open(path, "w") do |f|
         
     | 
| 
       48 
48 
     | 
    
         
             
                        f.write(file_content + data.sub(/\A\[/, ""))
         
     | 
| 
         @@ -55,7 +55,7 @@ module Tanakai 
     | 
|
| 
       55 
55 
     | 
    
         
             
                  def save_to_pretty_json(item)
         
     | 
| 
       56 
56 
     | 
    
         
             
                    data = JSON.pretty_generate([item])
         
     | 
| 
       57 
57 
     | 
    
         | 
| 
       58 
     | 
    
         
            -
                    if @index > 1 || append && File. 
     | 
| 
      
 58 
     | 
    
         
            +
                    if @index > 1 || append && File.exist?(path)
         
     | 
| 
       59 
59 
     | 
    
         
             
                      file_content = File.read(path).sub(/\}\n\]\Z/, "\}\,\n")
         
     | 
| 
       60 
60 
     | 
    
         
             
                      File.open(path, "w") do |f|
         
     | 
| 
       61 
61 
     | 
    
         
             
                        f.write(file_content + data.sub(/\A\[\n/, ""))
         
     | 
| 
         @@ -68,7 +68,7 @@ module Tanakai 
     | 
|
| 
       68 
68 
     | 
    
         
             
                  def save_to_jsonlines(item)
         
     | 
| 
       69 
69 
     | 
    
         
             
                    data = JSON.generate(item)
         
     | 
| 
       70 
70 
     | 
    
         | 
| 
       71 
     | 
    
         
            -
                    if @index > 1 || append && File. 
     | 
| 
      
 71 
     | 
    
         
            +
                    if @index > 1 || append && File.exist?(path)
         
     | 
| 
       72 
72 
     | 
    
         
             
                      File.open(path, "a") { |file| file.write("\n" + data) }
         
     | 
| 
       73 
73 
     | 
    
         
             
                    else
         
     | 
| 
       74 
74 
     | 
    
         
             
                      File.open(path, "w") { |file| file.write(data) }
         
     | 
| 
         @@ -78,7 +78,7 @@ module Tanakai 
     | 
|
| 
       78 
78 
     | 
    
         
             
                  def save_to_csv(item)
         
     | 
| 
       79 
79 
     | 
    
         
             
                    data = flatten_hash(item)
         
     | 
| 
       80 
80 
     | 
    
         | 
| 
       81 
     | 
    
         
            -
                    if @index > 1 || append && File. 
     | 
| 
      
 81 
     | 
    
         
            +
                    if @index > 1 || append && File.exist?(path)
         
     | 
| 
       82 
82 
     | 
    
         
             
                      CSV.open(path, "a+", force_quotes: true) do |csv|
         
     | 
| 
       83 
83 
     | 
    
         
             
                        csv << data.values
         
     | 
| 
       84 
84 
     | 
    
         
             
                      end
         
     | 
| 
         @@ -102,5 +102,3 @@ module Tanakai 
     | 
|
| 
       102 
102 
     | 
    
         
             
                end
         
     | 
| 
       103 
103 
     | 
    
         
             
              end
         
     | 
| 
       104 
104 
     | 
    
         
             
            end
         
     | 
| 
       105 
     | 
    
         
            -
             
     | 
| 
       106 
     | 
    
         
            -
             
     | 
    
        data/lib/tanakai/base.rb
    CHANGED
    
    | 
         @@ -100,7 +100,7 @@ module Tanakai 
     | 
|
| 
       100 
100 
     | 
    
         
             
                  end
         
     | 
| 
       101 
101 
     | 
    
         
             
                end
         
     | 
| 
       102 
102 
     | 
    
         | 
| 
       103 
     | 
    
         
            -
                def self.crawl!(exception_on_fail: true)
         
     | 
| 
      
 103 
     | 
    
         
            +
                def self.crawl!(exception_on_fail: true, data: {})
         
     | 
| 
       104 
104 
     | 
    
         
             
                  logger.error "Spider: already running: #{name}" and return false if running?
         
     | 
| 
       105 
105 
     | 
    
         | 
| 
       106 
106 
     | 
    
         
             
                  @storage = Storage.new
         
     | 
| 
         @@ -124,13 +124,13 @@ module Tanakai 
     | 
|
| 
       124 
124 
     | 
    
         
             
                  if start_urls
         
     | 
| 
       125 
125 
     | 
    
         
             
                    start_urls.each do |start_url|
         
     | 
| 
       126 
126 
     | 
    
         
             
                      if start_url.class == Hash
         
     | 
| 
       127 
     | 
    
         
            -
                        spider.request_to(:parse, start_url)
         
     | 
| 
      
 127 
     | 
    
         
            +
                        spider.request_to(:parse, url: start_url, data: data)
         
     | 
| 
       128 
128 
     | 
    
         
             
                      else
         
     | 
| 
       129 
     | 
    
         
            -
                        spider.request_to(:parse, url: start_url)
         
     | 
| 
      
 129 
     | 
    
         
            +
                        spider.request_to(:parse, url: start_url, data: data)
         
     | 
| 
       130 
130 
     | 
    
         
             
                      end
         
     | 
| 
       131 
131 
     | 
    
         
             
                    end
         
     | 
| 
       132 
132 
     | 
    
         
             
                  else
         
     | 
| 
       133 
     | 
    
         
            -
                    spider.parse
         
     | 
| 
      
 133 
     | 
    
         
            +
                    spider.parse(data: data)
         
     | 
| 
       134 
134 
     | 
    
         
             
                  end
         
     | 
| 
       135 
135 
     | 
    
         
             
                rescue StandardError, SignalException, SystemExit => e
         
     | 
| 
       136 
136 
     | 
    
         
             
                  @run_info.merge!(status: :failed, error: e.inspect)
         
     | 
| 
         @@ -160,7 +160,7 @@ module Tanakai 
     | 
|
| 
       160 
160 
     | 
    
         
             
                  if args.present?
         
     | 
| 
       161 
161 
     | 
    
         
             
                    spider.public_send(handler, *args)
         
     | 
| 
       162 
162 
     | 
    
         
             
                  elsif request.present?
         
     | 
| 
       163 
     | 
    
         
            -
                    spider.request_to(handler, request)
         
     | 
| 
      
 163 
     | 
    
         
            +
                    spider.request_to(handler, **request)
         
     | 
| 
       164 
164 
     | 
    
         
             
                  else
         
     | 
| 
       165 
165 
     | 
    
         
             
                    spider.public_send(handler)
         
     | 
| 
       166 
166 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -31,7 +31,7 @@ module Tanakai 
     | 
|
| 
       31 
31 
     | 
    
         
             
                      "--extra-vars", "ansible_python_interpreter=/usr/bin/python3"
         
     | 
| 
       32 
32 
     | 
    
         
             
                    ]
         
     | 
| 
       33 
33 
     | 
    
         | 
| 
       34 
     | 
    
         
            -
                    if File. 
     | 
| 
      
 34 
     | 
    
         
            +
                    if File.exist? "config/automation.yml"
         
     | 
| 
       35 
35 
     | 
    
         
             
                      require 'yaml'
         
     | 
| 
       36 
36 
     | 
    
         
             
                      if config = YAML.load_file("config/automation.yml").dig(@playbook)
         
     | 
| 
       37 
37 
     | 
    
         
             
                        config.each { |key, value| @vars[key] = value unless @vars[key] }
         
     | 
| 
         @@ -17,7 +17,7 @@ module Tanakai 
     | 
|
| 
       17 
17 
     | 
    
         | 
| 
       18 
18 
     | 
    
         
             
                  def generate_spider(spider_name, in_project:)
         
     | 
| 
       19 
19 
     | 
    
         
             
                    spider_path = in_project ? "spiders/#{spider_name}.rb" : "./#{spider_name}.rb"
         
     | 
| 
       20 
     | 
    
         
            -
                    raise "Spider #{spider_path} already  
     | 
| 
      
 20 
     | 
    
         
            +
                    raise "Spider #{spider_path} already exist?" if File.exist? spider_path
         
     | 
| 
       21 
21 
     | 
    
         | 
| 
       22 
22 
     | 
    
         
             
                    spider_class = to_spider_class(spider_name)
         
     | 
| 
       23 
23 
     | 
    
         
             
                    create_file spider_path do
         
     | 
    
        data/lib/tanakai/cli.rb
    CHANGED
    
    
    
        data/lib/tanakai/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: tanakai
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 1. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 1.7.1
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Victor Afanasev
         
     | 
| 
         @@ -9,7 +9,7 @@ authors: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            autorequire:
         
     | 
| 
       10 
10 
     | 
    
         
             
            bindir: exe
         
     | 
| 
       11 
11 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       12 
     | 
    
         
            -
            date: 2023-02 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2023-11-02 00:00:00.000000000 Z
         
     | 
| 
       13 
13 
     | 
    
         
             
            dependencies:
         
     | 
| 
       14 
14 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       15 
15 
     | 
    
         
             
              name: thor
         
     |