scrapbot 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/scrapbot +3 -4
 - data/lib/scrapbot.rb +2 -1
 - data/lib/scrapbot/downloader.rb +1 -39
 - data/lib/scrapbot/storage.rb +10 -4
 - data/lib/scrapbot/version.rb +1 -1
 - metadata +2 -2
 
    
        data/bin/scrapbot
    CHANGED
    
    | 
         @@ -1,12 +1,11 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            #!/usr/bin/env ruby
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            require File.expand_path('../../lib/scrapbot', __FILE__)
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
       4 
5 
     | 
    
         
             
            require 'thor'
         
     | 
| 
       5 
     | 
    
         
            -
            require 'settingslogic'
         
     | 
| 
       6 
6 
     | 
    
         
             
            require 'configliere'
         
     | 
| 
       7 
7 
     | 
    
         | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
9 
     | 
    
         
             
            module Scrapbot
         
     | 
| 
       11 
10 
     | 
    
         
             
              Settings.use :commandline, :config_file
         
     | 
| 
       12 
11 
     | 
    
         | 
| 
         @@ -17,8 +16,9 @@ module Scrapbot 
     | 
|
| 
       17 
16 
     | 
    
         | 
| 
       18 
17 
     | 
    
         
             
                desc "server", "start watching urls"
         
     | 
| 
       19 
18 
     | 
    
         | 
| 
       20 
     | 
    
         
            -
                # 
     | 
| 
      
 19 
     | 
    
         
            +
                # File with url list
         
     | 
| 
       21 
20 
     | 
    
         
             
                method_options :url_file => :string, :required => true
         
     | 
| 
      
 21 
     | 
    
         
            +
                # Yaml configuration file
         
     | 
| 
       22 
22 
     | 
    
         
             
                method_options :conf_file => :string, :required => true
         
     | 
| 
       23 
23 
     | 
    
         | 
| 
       24 
24 
     | 
    
         
             
                def watch
         
     | 
| 
         @@ -49,4 +49,3 @@ end 
     | 
|
| 
       49 
49 
     | 
    
         
             
            Scrapbot::Init.start
         
     | 
| 
       50 
50 
     | 
    
         | 
| 
       51 
51 
     | 
    
         
             
            # scrapbot watch --url_file="www.wp.pl"
         
     | 
| 
       52 
     | 
    
         
            -
             
     | 
    
        data/lib/scrapbot.rb
    CHANGED
    
    
    
        data/lib/scrapbot/downloader.rb
    CHANGED
    
    | 
         @@ -1,5 +1,4 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
     | 
    
         
            -
            require 'settingslogic'
         
     | 
| 
      
 1 
     | 
    
         
            +
             
     | 
| 
       3 
2 
     | 
    
         | 
| 
       4 
3 
     | 
    
         
             
            module Scrapbot
         
     | 
| 
       5 
4 
     | 
    
         
             
              #  This class is responsible for downloading page
         
     | 
| 
         @@ -28,41 +27,4 @@ module Scrapbot 
     | 
|
| 
       28 
27 
     | 
    
         
             
                  end
         
     | 
| 
       29 
28 
     | 
    
         
             
                end
         
     | 
| 
       30 
29 
     | 
    
         
             
              end
         
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
              #  module Downloader
         
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
       37 
     | 
    
         
            -
              #    def archive(urls = [],options = {})
         
     | 
| 
       38 
     | 
    
         
            -
              #      #      This method download given urls
         
     | 
| 
       39 
     | 
    
         
            -
              #      urls.each do |url|
         
     | 
| 
       40 
     | 
    
         
            -
              #        response = Typhoeus::Request.get(url)
         
     | 
| 
       41 
     | 
    
         
            -
              #        Storage.new(response).save
         
     | 
| 
       42 
     | 
    
         
            -
              #      end
         
     | 
| 
       43 
     | 
    
         
            -
              #    end
         
     | 
| 
       44 
     | 
    
         
            -
              #
         
     | 
| 
       45 
     | 
    
         
            -
              #
         
     | 
| 
       46 
     | 
    
         
            -
              #    # Create or switch to branch named response.effective_url
         
     | 
| 
       47 
     | 
    
         
            -
              #    # Grit::Repo(path_to_repo)
         
     | 
| 
       48 
     | 
    
         
            -
              #
         
     | 
| 
       49 
     | 
    
         
            -
              #
         
     | 
| 
       50 
     | 
    
         
            -
              #    class Storage
         
     | 
| 
       51 
     | 
    
         
            -
              #      attr_accessor :response
         
     | 
| 
       52 
     | 
    
         
            -
              #
         
     | 
| 
       53 
     | 
    
         
            -
              #      def initialize(typh_url)
         
     | 
| 
       54 
     | 
    
         
            -
              #        self.response = typh_url
         
     | 
| 
       55 
     | 
    
         
            -
              #      end
         
     | 
| 
       56 
     | 
    
         
            -
              #
         
     | 
| 
       57 
     | 
    
         
            -
              #      def save
         
     | 
| 
       58 
     | 
    
         
            -
              #
         
     | 
| 
       59 
     | 
    
         
            -
              #      end
         
     | 
| 
       60 
     | 
    
         
            -
              #
         
     | 
| 
       61 
     | 
    
         
            -
              #      private
         
     | 
| 
       62 
     | 
    
         
            -
              #
         
     | 
| 
       63 
     | 
    
         
            -
              #      def find_or_create_branch
         
     | 
| 
       64 
     | 
    
         
            -
              #
         
     | 
| 
       65 
     | 
    
         
            -
              #      end
         
     | 
| 
       66 
     | 
    
         
            -
              #    end
         
     | 
| 
       67 
     | 
    
         
            -
              #  end
         
     | 
| 
       68 
30 
     | 
    
         
             
            end
         
     | 
    
        data/lib/scrapbot/storage.rb
    CHANGED
    
    | 
         @@ -1,4 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require 'grit'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'uri'
         
     | 
| 
       2 
3 
     | 
    
         | 
| 
       3 
4 
     | 
    
         
             
            module Scrapbot
         
     | 
| 
       4 
5 
     | 
    
         
             
              class Storage
         
     | 
| 
         @@ -12,7 +13,7 @@ module Scrapbot 
     | 
|
| 
       12 
13 
     | 
    
         | 
| 
       13 
14 
     | 
    
         
             
                def save_all
         
     | 
| 
       14 
15 
     | 
    
         
             
                  self.magazine.each do |response|
         
     | 
| 
       15 
     | 
    
         
            -
                    File.open(save_path,'wb') do |f|
         
     | 
| 
      
 16 
     | 
    
         
            +
                    File.open(save_path(response),'wb') do |f|
         
     | 
| 
       16 
17 
     | 
    
         
             
                      f.write(response.body)
         
     | 
| 
       17 
18 
     | 
    
         
             
                    end
         
     | 
| 
       18 
19 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -20,9 +21,14 @@ module Scrapbot 
     | 
|
| 
       20 
21 
     | 
    
         | 
| 
       21 
22 
     | 
    
         
             
                private
         
     | 
| 
       22 
23 
     | 
    
         | 
| 
       23 
     | 
    
         
            -
                def save_path
         
     | 
| 
       24 
     | 
    
         
            -
                   
     | 
| 
      
 24 
     | 
    
         
            +
                def save_path(response)
         
     | 
| 
      
 25 
     | 
    
         
            +
                  filename = URI.parse(response.effective_url).host
         
     | 
| 
      
 26 
     | 
    
         
            +
                  File.join(Settings[:path_to_git],"#{filename}.html")
         
     | 
| 
       25 
27 
     | 
    
         
             
                end
         
     | 
| 
       26 
28 
     | 
    
         | 
| 
       27 
29 
     | 
    
         
             
              end
         
     | 
| 
       28 
     | 
    
         
            -
            end
         
     | 
| 
      
 30 
     | 
    
         
            +
            end
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            #require 'grit'
         
     | 
| 
      
 33 
     | 
    
         
            +
            #include Grit
         
     | 
| 
      
 34 
     | 
    
         
            +
            #r = Repo.new("/home/lewy/programowanie/projekty/downloads")
         
     | 
    
        data/lib/scrapbot/version.rb
    CHANGED