scrapbot 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ nbproject/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in scrapbot.gemspec
4
+ gemspec
@@ -0,0 +1,15 @@
1
+ = Scrapbot
2
+
3
+ == Introduction
4
+
5
+ Bot which can watch give urls and copy them on your computer if detect any changes
6
+
7
+ == Instalation
8
+
9
+ gem install scrapbot
10
+
11
+ == Usage
12
+
13
+ I put example configuration file in example folder
14
+
15
+ scrapbot watch --url_file=urls.txt --conf_file=conf.yml
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path('../../lib/scrapbot', __FILE__)
4
+ require 'thor'
5
+ require 'settingslogic'
6
+ require 'configliere'
7
+
8
+
9
+
10
+ module Scrapbot
11
+ Settings.use :commandline, :config_file
12
+
13
+
14
+ class Init < Thor
15
+
16
+ attr_accessor :urls
17
+
18
+ desc "server", "start watching urls"
19
+
20
+ # Url file
21
+ method_options :url_file => :string, :required => true
22
+ method_options :conf_file => :string, :required => true
23
+
24
+ def watch
25
+
26
+ load_settings(File.join(Dir.getwd,"/#{options[:conf_file]}"))
27
+ load_urls(File.join(Dir.getwd,"/#{options[:url_file]}"))
28
+
29
+ s = Scrapbot::Downloader.new(self.urls)
30
+ s.start
31
+ end
32
+
33
+ private
34
+
35
+ def load_settings(path)
36
+ Settings.read path
37
+ Settings.resolve!
38
+ end
39
+
40
+ def load_urls(path)
41
+ #read and parse by line txt file
42
+ self.urls = File.open(path).readlines.map(&:strip).reject(&:empty?)
43
+ end
44
+
45
+ end
46
+
47
+ end
48
+
49
+ Scrapbot::Init.start
50
+
51
+ # scrapbot watch --url_file="www.wp.pl"
52
+
@@ -0,0 +1 @@
1
+ :path_to_git : /home/lewy/programowanie/projekty/downloads/
@@ -0,0 +1,2 @@
1
+ www.google.com
2
+ www.msn.com
@@ -0,0 +1,7 @@
1
+ require 'scrapbot/downloader'
2
+ require 'typhoeus'
3
+
4
+
5
+ module Scrapbot
6
+ PATH = '/home/lewy/programowanie/projekty/downloads'
7
+ end
@@ -0,0 +1,68 @@
1
+ require_relative 'storage'
2
+ require 'settingslogic'
3
+
4
+ module Scrapbot
5
+ # This class is responsible for downloading page
6
+ class Downloader
7
+ attr_accessor :urls
8
+
9
+ def initialize(urls)
10
+ self.urls = urls
11
+ end
12
+
13
+ def start
14
+ s = Storage.new
15
+
16
+ urls_list do |response|
17
+ s.magazine << response
18
+ end
19
+
20
+ s.save_all
21
+ end
22
+
23
+ private
24
+
25
+ def urls_list(&block)
26
+ self.urls.each do |url|
27
+ yield Typhoeus::Request.get(url)
28
+ end
29
+ end
30
+ end
31
+
32
+
33
+
34
+
35
+ # module Downloader
36
+
37
+ # def archive(urls = [],options = {})
38
+ # # This method download given urls
39
+ # urls.each do |url|
40
+ # response = Typhoeus::Request.get(url)
41
+ # Storage.new(response).save
42
+ # end
43
+ # end
44
+ #
45
+ #
46
+ # # Create or switch to branch named response.effective_url
47
+ # # Grit::Repo(path_to_repo)
48
+ #
49
+ #
50
+ # class Storage
51
+ # attr_accessor :response
52
+ #
53
+ # def initialize(typh_url)
54
+ # self.response = typh_url
55
+ # end
56
+ #
57
+ # def save
58
+ #
59
+ # end
60
+ #
61
+ # private
62
+ #
63
+ # def find_or_create_branch
64
+ #
65
+ # end
66
+ # end
67
+ # end
68
+ end
@@ -0,0 +1,28 @@
1
+ require 'grit'
2
+
3
+ module Scrapbot
4
+ class Storage
5
+
6
+ attr_accessor :magazine
7
+
8
+ def initialize
9
+ self.magazine = []
10
+
11
+ end
12
+
13
+ def save_all
14
+ self.magazine.each do |response|
15
+ File.open(save_path,'wb') do |f|
16
+ f.write(response.body)
17
+ end
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ def save_path
24
+ File.join(Settings[:path_to_git],"index.html")
25
+ end
26
+
27
+ end
28
+ end
@@ -0,0 +1,3 @@
1
+ module Scrapbot
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "scrapbot/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "scrapbot"
7
+ s.version = Scrapbot::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Pawel Lewinski"]
10
+ s.email = ["lewy313@gmail.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{Scrapping Bot}
13
+ s.description = %q{Bot for archive pages}
14
+
15
+ s.add_development_dependency "grit"
16
+ s.add_development_dependency "typhoeus", ">=0.2"
17
+ s.add_development_dependency "configliere", ">=0.3"
18
+
19
+ s.executables = ["scrapbot"]
20
+
21
+ s.rubyforge_project = "scrapbot"
22
+
23
+ s.files = `git ls-files`.split("\n")
24
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
+ s.require_paths = ["lib"]
27
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scrapbot
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Pawel Lewinski
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-03-17 00:00:00 +01:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: grit
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ type: :development
32
+ version_requirements: *id001
33
+ - !ruby/object:Gem::Dependency
34
+ name: typhoeus
35
+ prerelease: false
36
+ requirement: &id002 !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 0
43
+ - 2
44
+ version: "0.2"
45
+ type: :development
46
+ version_requirements: *id002
47
+ - !ruby/object:Gem::Dependency
48
+ name: configliere
49
+ prerelease: false
50
+ requirement: &id003 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 0
57
+ - 3
58
+ version: "0.3"
59
+ type: :development
60
+ version_requirements: *id003
61
+ description: Bot for archive pages
62
+ email:
63
+ - lewy313@gmail.com
64
+ executables:
65
+ - scrapbot
66
+ extensions: []
67
+
68
+ extra_rdoc_files: []
69
+
70
+ files:
71
+ - .gitignore
72
+ - Gemfile
73
+ - README.rdoc
74
+ - Rakefile
75
+ - bin/scrapbot
76
+ - example/conf.yml
77
+ - example/urls.txt
78
+ - lib/scrapbot.rb
79
+ - lib/scrapbot/downloader.rb
80
+ - lib/scrapbot/storage.rb
81
+ - lib/scrapbot/version.rb
82
+ - scrapbot.gemspec
83
+ has_rdoc: true
84
+ homepage: ""
85
+ licenses: []
86
+
87
+ post_install_message:
88
+ rdoc_options: []
89
+
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ segments:
98
+ - 0
99
+ version: "0"
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ segments:
106
+ - 0
107
+ version: "0"
108
+ requirements: []
109
+
110
+ rubyforge_project: scrapbot
111
+ rubygems_version: 1.3.7
112
+ signing_key:
113
+ specification_version: 3
114
+ summary: Scrapping Bot
115
+ test_files: []
116
+