scrapbot 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/scrapbot CHANGED
@@ -1,12 +1,11 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require File.expand_path('../../lib/scrapbot', __FILE__)
4
+
4
5
  require 'thor'
5
- require 'settingslogic'
6
6
  require 'configliere'
7
7
 
8
8
 
9
-
10
9
  module Scrapbot
11
10
  Settings.use :commandline, :config_file
12
11
 
@@ -17,8 +16,9 @@ module Scrapbot
17
16
 
18
17
  desc "server", "start watching urls"
19
18
 
20
- # Url file
19
+ # File with url list
21
20
  method_options :url_file => :string, :required => true
21
+ # Yaml configuration file
22
22
  method_options :conf_file => :string, :required => true
23
23
 
24
24
  def watch
@@ -49,4 +49,3 @@ end
49
49
  Scrapbot::Init.start
50
50
 
51
51
  # scrapbot watch --url_file="www.wp.pl"
52
-
data/lib/scrapbot.rb CHANGED
@@ -1,7 +1,8 @@
1
1
  require 'scrapbot/downloader'
2
+ require 'scrapbot/storage'
2
3
  require 'typhoeus'
3
4
 
4
5
 
5
6
  module Scrapbot
6
- PATH = '/home/lewy/programowanie/projekty/downloads'
7
+
7
8
  end
@@ -1,5 +1,4 @@
1
- require_relative 'storage'
2
- require 'settingslogic'
1
+
3
2
 
4
3
  module Scrapbot
5
4
  # This class is responsible for downloading page
@@ -28,41 +27,4 @@ module Scrapbot
28
27
  end
29
28
  end
30
29
  end
31
-
32
-
33
-
34
-
35
- # module Downloader
36
-
37
- # def archive(urls = [],options = {})
38
- # # This method download given urls
39
- # urls.each do |url|
40
- # response = Typhoeus::Request.get(url)
41
- # Storage.new(response).save
42
- # end
43
- # end
44
- #
45
- #
46
- # # Create or switch to branch named response.effective_url
47
- # # Grit::Repo(path_to_repo)
48
- #
49
- #
50
- # class Storage
51
- # attr_accessor :response
52
- #
53
- # def initialize(typh_url)
54
- # self.response = typh_url
55
- # end
56
- #
57
- # def save
58
- #
59
- # end
60
- #
61
- # private
62
- #
63
- # def find_or_create_branch
64
- #
65
- # end
66
- # end
67
- # end
68
30
  end
@@ -1,4 +1,5 @@
1
1
  require 'grit'
2
+ require 'uri'
2
3
 
3
4
  module Scrapbot
4
5
  class Storage
@@ -12,7 +13,7 @@ module Scrapbot
12
13
 
13
14
  def save_all
14
15
  self.magazine.each do |response|
15
- File.open(save_path,'wb') do |f|
16
+ File.open(save_path(response),'wb') do |f|
16
17
  f.write(response.body)
17
18
  end
18
19
  end
@@ -20,9 +21,14 @@ module Scrapbot
20
21
 
21
22
  private
22
23
 
23
- def save_path
24
- File.join(Settings[:path_to_git],"index.html")
24
+ def save_path(response)
25
+ filename = URI.parse(response.effective_url).host
26
+ File.join(Settings[:path_to_git],"#{filename}.html")
25
27
  end
26
28
 
27
29
  end
28
- end
30
+ end
31
+
32
+ #require 'grit'
33
+ #include Grit
34
+ #r = Repo.new("/home/lewy/programowanie/projekty/downloads")
@@ -1,3 +1,3 @@
1
1
  module Scrapbot
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 1
9
- version: 0.0.1
8
+ - 2
9
+ version: 0.0.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - Pawel Lewinski