scrapbot 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ nbproject/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in scrapbot.gemspec
4
+ gemspec
@@ -0,0 +1,15 @@
1
+ = Scrapbot
2
+
3
+ == Introduction
4
+
5
+ Bot which can watch give urls and copy them on your computer if detect any changes
6
+
7
+ == Instalation
8
+
9
+ gem install scrapbot
10
+
11
+ == Usage
12
+
13
+ I put example configuration file in example folder
14
+
15
+ scrapbot watch --url_file=urls.txt --conf_file=conf.yml
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path('../../lib/scrapbot', __FILE__)
4
+ require 'thor'
5
+ require 'settingslogic'
6
+ require 'configliere'
7
+
8
+
9
+
10
+ module Scrapbot
11
+ Settings.use :commandline, :config_file
12
+
13
+
14
+ class Init < Thor
15
+
16
+ attr_accessor :urls
17
+
18
+ desc "server", "start watching urls"
19
+
20
+ # Url file
21
+ method_options :url_file => :string, :required => true
22
+ method_options :conf_file => :string, :required => true
23
+
24
+ def watch
25
+
26
+ load_settings(File.join(Dir.getwd,"/#{options[:conf_file]}"))
27
+ load_urls(File.join(Dir.getwd,"/#{options[:url_file]}"))
28
+
29
+ s = Scrapbot::Downloader.new(self.urls)
30
+ s.start
31
+ end
32
+
33
+ private
34
+
35
+ def load_settings(path)
36
+ Settings.read path
37
+ Settings.resolve!
38
+ end
39
+
40
+ def load_urls(path)
41
+ #read and parse by line txt file
42
+ self.urls = File.open(path).readlines.map(&:strip).reject(&:empty?)
43
+ end
44
+
45
+ end
46
+
47
+ end
48
+
49
+ Scrapbot::Init.start
50
+
51
+ # scrapbot watch --url_file="www.wp.pl"
52
+
@@ -0,0 +1 @@
1
+ :path_to_git : /home/lewy/programowanie/projekty/downloads/
@@ -0,0 +1,2 @@
1
+ www.google.com
2
+ www.msn.com
@@ -0,0 +1,7 @@
1
+ require 'scrapbot/downloader'
2
+ require 'typhoeus'
3
+
4
+
5
+ module Scrapbot
6
+ PATH = '/home/lewy/programowanie/projekty/downloads'
7
+ end
@@ -0,0 +1,68 @@
1
+ require_relative 'storage'
2
+ require 'settingslogic'
3
+
4
+ module Scrapbot
5
+ # This class is responsible for downloading page
6
+ class Downloader
7
+ attr_accessor :urls
8
+
9
+ def initialize(urls)
10
+ self.urls = urls
11
+ end
12
+
13
+ def start
14
+ s = Storage.new
15
+
16
+ urls_list do |response|
17
+ s.magazine << response
18
+ end
19
+
20
+ s.save_all
21
+ end
22
+
23
+ private
24
+
25
+ def urls_list(&block)
26
+ self.urls.each do |url|
27
+ yield Typhoeus::Request.get(url)
28
+ end
29
+ end
30
+ end
31
+
32
+
33
+
34
+
35
+ # module Downloader
36
+
37
+ # def archive(urls = [],options = {})
38
+ # # This method download given urls
39
+ # urls.each do |url|
40
+ # response = Typhoeus::Request.get(url)
41
+ # Storage.new(response).save
42
+ # end
43
+ # end
44
+ #
45
+ #
46
+ # # Create or switch to branch named response.effective_url
47
+ # # Grit::Repo(path_to_repo)
48
+ #
49
+ #
50
+ # class Storage
51
+ # attr_accessor :response
52
+ #
53
+ # def initialize(typh_url)
54
+ # self.response = typh_url
55
+ # end
56
+ #
57
+ # def save
58
+ #
59
+ # end
60
+ #
61
+ # private
62
+ #
63
+ # def find_or_create_branch
64
+ #
65
+ # end
66
+ # end
67
+ # end
68
+ end
@@ -0,0 +1,28 @@
1
+ require 'grit'
2
+
3
+ module Scrapbot
4
+ class Storage
5
+
6
+ attr_accessor :magazine
7
+
8
+ def initialize
9
+ self.magazine = []
10
+
11
+ end
12
+
13
+ def save_all
14
+ self.magazine.each do |response|
15
+ File.open(save_path,'wb') do |f|
16
+ f.write(response.body)
17
+ end
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ def save_path
24
+ File.join(Settings[:path_to_git],"index.html")
25
+ end
26
+
27
+ end
28
+ end
@@ -0,0 +1,3 @@
1
+ module Scrapbot
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "scrapbot/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "scrapbot"
7
+ s.version = Scrapbot::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Pawel Lewinski"]
10
+ s.email = ["lewy313@gmail.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{Scrapping Bot}
13
+ s.description = %q{Bot for archive pages}
14
+
15
+ s.add_development_dependency "grit"
16
+ s.add_development_dependency "typhoeus", ">=0.2"
17
+ s.add_development_dependency "configliere", ">=0.3"
18
+
19
+ s.executables = ["scrapbot"]
20
+
21
+ s.rubyforge_project = "scrapbot"
22
+
23
+ s.files = `git ls-files`.split("\n")
24
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
+ s.require_paths = ["lib"]
27
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scrapbot
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Pawel Lewinski
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-03-17 00:00:00 +01:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: grit
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ type: :development
32
+ version_requirements: *id001
33
+ - !ruby/object:Gem::Dependency
34
+ name: typhoeus
35
+ prerelease: false
36
+ requirement: &id002 !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 0
43
+ - 2
44
+ version: "0.2"
45
+ type: :development
46
+ version_requirements: *id002
47
+ - !ruby/object:Gem::Dependency
48
+ name: configliere
49
+ prerelease: false
50
+ requirement: &id003 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 0
57
+ - 3
58
+ version: "0.3"
59
+ type: :development
60
+ version_requirements: *id003
61
+ description: Bot for archive pages
62
+ email:
63
+ - lewy313@gmail.com
64
+ executables:
65
+ - scrapbot
66
+ extensions: []
67
+
68
+ extra_rdoc_files: []
69
+
70
+ files:
71
+ - .gitignore
72
+ - Gemfile
73
+ - README.rdoc
74
+ - Rakefile
75
+ - bin/scrapbot
76
+ - example/conf.yml
77
+ - example/urls.txt
78
+ - lib/scrapbot.rb
79
+ - lib/scrapbot/downloader.rb
80
+ - lib/scrapbot/storage.rb
81
+ - lib/scrapbot/version.rb
82
+ - scrapbot.gemspec
83
+ has_rdoc: true
84
+ homepage: ""
85
+ licenses: []
86
+
87
+ post_install_message:
88
+ rdoc_options: []
89
+
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ segments:
98
+ - 0
99
+ version: "0"
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ segments:
106
+ - 0
107
+ version: "0"
108
+ requirements: []
109
+
110
+ rubyforge_project: scrapbot
111
+ rubygems_version: 1.3.7
112
+ signing_key:
113
+ specification_version: 3
114
+ summary: Scrapping Bot
115
+ test_files: []
116
+