scrapbot 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/scrapbot +3 -4
- data/lib/scrapbot.rb +2 -1
- data/lib/scrapbot/downloader.rb +1 -39
- data/lib/scrapbot/storage.rb +10 -4
- data/lib/scrapbot/version.rb +1 -1
- metadata +2 -2
data/bin/scrapbot
CHANGED
@@ -1,12 +1,11 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require File.expand_path('../../lib/scrapbot', __FILE__)
|
4
|
+
|
4
5
|
require 'thor'
|
5
|
-
require 'settingslogic'
|
6
6
|
require 'configliere'
|
7
7
|
|
8
8
|
|
9
|
-
|
10
9
|
module Scrapbot
|
11
10
|
Settings.use :commandline, :config_file
|
12
11
|
|
@@ -17,8 +16,9 @@ module Scrapbot
|
|
17
16
|
|
18
17
|
desc "server", "start watching urls"
|
19
18
|
|
20
|
-
#
|
19
|
+
# File with url list
|
21
20
|
method_options :url_file => :string, :required => true
|
21
|
+
# Yaml configuration file
|
22
22
|
method_options :conf_file => :string, :required => true
|
23
23
|
|
24
24
|
def watch
|
@@ -49,4 +49,3 @@ end
|
|
49
49
|
Scrapbot::Init.start
|
50
50
|
|
51
51
|
# scrapbot watch --url_file="www.wp.pl"
|
52
|
-
|
data/lib/scrapbot.rb
CHANGED
data/lib/scrapbot/downloader.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
|
2
|
-
require 'settingslogic'
|
1
|
+
|
3
2
|
|
4
3
|
module Scrapbot
|
5
4
|
# This class is responsible for downloading page
|
@@ -28,41 +27,4 @@ module Scrapbot
|
|
28
27
|
end
|
29
28
|
end
|
30
29
|
end
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
# module Downloader
|
36
|
-
|
37
|
-
# def archive(urls = [],options = {})
|
38
|
-
# # This method download given urls
|
39
|
-
# urls.each do |url|
|
40
|
-
# response = Typhoeus::Request.get(url)
|
41
|
-
# Storage.new(response).save
|
42
|
-
# end
|
43
|
-
# end
|
44
|
-
#
|
45
|
-
#
|
46
|
-
# # Create or switch to branch named response.effective_url
|
47
|
-
# # Grit::Repo(path_to_repo)
|
48
|
-
#
|
49
|
-
#
|
50
|
-
# class Storage
|
51
|
-
# attr_accessor :response
|
52
|
-
#
|
53
|
-
# def initialize(typh_url)
|
54
|
-
# self.response = typh_url
|
55
|
-
# end
|
56
|
-
#
|
57
|
-
# def save
|
58
|
-
#
|
59
|
-
# end
|
60
|
-
#
|
61
|
-
# private
|
62
|
-
#
|
63
|
-
# def find_or_create_branch
|
64
|
-
#
|
65
|
-
# end
|
66
|
-
# end
|
67
|
-
# end
|
68
30
|
end
|
data/lib/scrapbot/storage.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'grit'
|
2
|
+
require 'uri'
|
2
3
|
|
3
4
|
module Scrapbot
|
4
5
|
class Storage
|
@@ -12,7 +13,7 @@ module Scrapbot
|
|
12
13
|
|
13
14
|
def save_all
|
14
15
|
self.magazine.each do |response|
|
15
|
-
File.open(save_path,'wb') do |f|
|
16
|
+
File.open(save_path(response),'wb') do |f|
|
16
17
|
f.write(response.body)
|
17
18
|
end
|
18
19
|
end
|
@@ -20,9 +21,14 @@ module Scrapbot
|
|
20
21
|
|
21
22
|
private
|
22
23
|
|
23
|
-
def save_path
|
24
|
-
|
24
|
+
def save_path(response)
|
25
|
+
filename = URI.parse(response.effective_url).host
|
26
|
+
File.join(Settings[:path_to_git],"#{filename}.html")
|
25
27
|
end
|
26
28
|
|
27
29
|
end
|
28
|
-
end
|
30
|
+
end
|
31
|
+
|
32
|
+
#require 'grit'
|
33
|
+
#include Grit
|
34
|
+
#r = Repo.new("/home/lewy/programowanie/projekty/downloads")
|
data/lib/scrapbot/version.rb
CHANGED