rubyretriever 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/retriever/fetch.rb +2 -3
 - data/lib/retriever/version.rb +1 -1
 - data/readme.md +1 -1
 - metadata +1 -1
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: f27793b1294bd489c3338aa5bc739dc6058d479a
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: e48491765a741087ba7bf708d62a8bbf8aa3cf80
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 60c017cf5dda8c659b12c3146f565df743dc4f6cf0e8436889bebb86cdccd5b128cca233d76b1413f526c21d1d99478c8149e6c43772e5dd0db1067f8dea5263
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: ec8142f3a8cbd75861c74322dd0358da51ee45388376ca0118d0998b81cf302b182eef93c39d88ef17dd35ef80a6253bf7a058e2e2052ea60ea3266d401892ab
         
     | 
    
        data/lib/retriever/fetch.rb
    CHANGED
    
    | 
         @@ -155,7 +155,7 @@ module Retriever 
     | 
|
| 
       155 
155 
     | 
    
         
             
            				@linkStack.concat(new_links_arr)
         
     | 
| 
       156 
156 
     | 
    
         
             
            				@sitemap.concat(new_links_arr) if @s
         
     | 
| 
       157 
157 
     | 
    
         
             
            			end
         
     | 
| 
       158 
     | 
    
         
            -
            			@progressbar.finish
         
     | 
| 
      
 158 
     | 
    
         
            +
            			@progressbar.finish if @prgrss
         
     | 
| 
       159 
159 
     | 
    
         
             
            		end
         
     | 
| 
       160 
160 
     | 
    
         
             
            		def asyncGetWave() #send a new wave of GET requests, using current @linkStack
         
     | 
| 
       161 
161 
     | 
    
         
             
            			new_stuff = []
         
     | 
| 
         @@ -167,11 +167,10 @@ module Retriever 
     | 
|
| 
       167 
167 
     | 
    
         
             
            			    	if @already_crawled.include?(url)
         
     | 
| 
       168 
168 
     | 
    
         
             
            			    		@linkStack.delete(url)
         
     | 
| 
       169 
169 
     | 
    
         
             
            			    		next
         
     | 
| 
       170 
     | 
    
         
            -
            			    	else
         
     | 
| 
       171 
     | 
    
         
            -
            			    		@already_crawled.insert(url)
         
     | 
| 
       172 
170 
     | 
    
         
             
            			    	end
         
     | 
| 
       173 
171 
     | 
    
         
             
            			    	resp = EventMachine::HttpRequest.new(url).get
         
     | 
| 
       174 
172 
     | 
    
         
             
            					lg("URL Crawled: #{url}")
         
     | 
| 
      
 173 
     | 
    
         
            +
            			    	@already_crawled.insert(url)
         
     | 
| 
       175 
174 
     | 
    
         
             
            					if @prgrss
         
     | 
| 
       176 
175 
     | 
    
         
             
            						@progressbar.increment if @already_crawled.size < @maxPages
         
     | 
| 
       177 
176 
     | 
    
         
             
            					end
         
     | 
    
        data/lib/retriever/version.rb
    CHANGED
    
    
    
        data/readme.md
    CHANGED