scrapbot 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/Gemfile +4 -0
- data/README.rdoc +15 -0
- data/Rakefile +2 -0
- data/bin/scrapbot +52 -0
- data/example/conf.yml +1 -0
- data/example/urls.txt +2 -0
- data/lib/scrapbot.rb +7 -0
- data/lib/scrapbot/downloader.rb +68 -0
- data/lib/scrapbot/storage.rb +28 -0
- data/lib/scrapbot/version.rb +3 -0
- data/scrapbot.gemspec +27 -0
- metadata +116 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
= Scrapbot
|
2
|
+
|
3
|
+
== Introduction
|
4
|
+
|
5
|
+
Bot which can watch give urls and copy them on your computer if detect any changes
|
6
|
+
|
7
|
+
== Instalation
|
8
|
+
|
9
|
+
gem install scrapbot
|
10
|
+
|
11
|
+
== Usage
|
12
|
+
|
13
|
+
I put example configuration file in example folder
|
14
|
+
|
15
|
+
scrapbot watch --url_file=urls.txt --conf_file=conf.yml
|
data/Rakefile
ADDED
data/bin/scrapbot
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require File.expand_path('../../lib/scrapbot', __FILE__)
|
4
|
+
require 'thor'
|
5
|
+
require 'settingslogic'
|
6
|
+
require 'configliere'
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
module Scrapbot
|
11
|
+
Settings.use :commandline, :config_file
|
12
|
+
|
13
|
+
|
14
|
+
class Init < Thor
|
15
|
+
|
16
|
+
attr_accessor :urls
|
17
|
+
|
18
|
+
desc "server", "start watching urls"
|
19
|
+
|
20
|
+
# Url file
|
21
|
+
method_options :url_file => :string, :required => true
|
22
|
+
method_options :conf_file => :string, :required => true
|
23
|
+
|
24
|
+
def watch
|
25
|
+
|
26
|
+
load_settings(File.join(Dir.getwd,"/#{options[:conf_file]}"))
|
27
|
+
load_urls(File.join(Dir.getwd,"/#{options[:url_file]}"))
|
28
|
+
|
29
|
+
s = Scrapbot::Downloader.new(self.urls)
|
30
|
+
s.start
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def load_settings(path)
|
36
|
+
Settings.read path
|
37
|
+
Settings.resolve!
|
38
|
+
end
|
39
|
+
|
40
|
+
def load_urls(path)
|
41
|
+
#read and parse by line txt file
|
42
|
+
self.urls = File.open(path).readlines.map(&:strip).reject(&:empty?)
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
Scrapbot::Init.start
|
50
|
+
|
51
|
+
# scrapbot watch --url_file="www.wp.pl"
|
52
|
+
|
data/example/conf.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
:path_to_git : /home/lewy/programowanie/projekty/downloads/
|
data/example/urls.txt
ADDED
data/lib/scrapbot.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
require_relative 'storage'
|
2
|
+
require 'settingslogic'
|
3
|
+
|
4
|
+
module Scrapbot
|
5
|
+
# This class is responsible for downloading page
|
6
|
+
class Downloader
|
7
|
+
attr_accessor :urls
|
8
|
+
|
9
|
+
def initialize(urls)
|
10
|
+
self.urls = urls
|
11
|
+
end
|
12
|
+
|
13
|
+
def start
|
14
|
+
s = Storage.new
|
15
|
+
|
16
|
+
urls_list do |response|
|
17
|
+
s.magazine << response
|
18
|
+
end
|
19
|
+
|
20
|
+
s.save_all
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def urls_list(&block)
|
26
|
+
self.urls.each do |url|
|
27
|
+
yield Typhoeus::Request.get(url)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
# module Downloader
|
36
|
+
|
37
|
+
# def archive(urls = [],options = {})
|
38
|
+
# # This method download given urls
|
39
|
+
# urls.each do |url|
|
40
|
+
# response = Typhoeus::Request.get(url)
|
41
|
+
# Storage.new(response).save
|
42
|
+
# end
|
43
|
+
# end
|
44
|
+
#
|
45
|
+
#
|
46
|
+
# # Create or switch to branch named response.effective_url
|
47
|
+
# # Grit::Repo(path_to_repo)
|
48
|
+
#
|
49
|
+
#
|
50
|
+
# class Storage
|
51
|
+
# attr_accessor :response
|
52
|
+
#
|
53
|
+
# def initialize(typh_url)
|
54
|
+
# self.response = typh_url
|
55
|
+
# end
|
56
|
+
#
|
57
|
+
# def save
|
58
|
+
#
|
59
|
+
# end
|
60
|
+
#
|
61
|
+
# private
|
62
|
+
#
|
63
|
+
# def find_or_create_branch
|
64
|
+
#
|
65
|
+
# end
|
66
|
+
# end
|
67
|
+
# end
|
68
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'grit'
|
2
|
+
|
3
|
+
module Scrapbot
|
4
|
+
class Storage
|
5
|
+
|
6
|
+
attr_accessor :magazine
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
self.magazine = []
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
def save_all
|
14
|
+
self.magazine.each do |response|
|
15
|
+
File.open(save_path,'wb') do |f|
|
16
|
+
f.write(response.body)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def save_path
|
24
|
+
File.join(Settings[:path_to_git],"index.html")
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
data/scrapbot.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "scrapbot/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "scrapbot"
|
7
|
+
s.version = Scrapbot::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Pawel Lewinski"]
|
10
|
+
s.email = ["lewy313@gmail.com"]
|
11
|
+
s.homepage = ""
|
12
|
+
s.summary = %q{Scrapping Bot}
|
13
|
+
s.description = %q{Bot for archive pages}
|
14
|
+
|
15
|
+
s.add_development_dependency "grit"
|
16
|
+
s.add_development_dependency "typhoeus", ">=0.2"
|
17
|
+
s.add_development_dependency "configliere", ">=0.3"
|
18
|
+
|
19
|
+
s.executables = ["scrapbot"]
|
20
|
+
|
21
|
+
s.rubyforge_project = "scrapbot"
|
22
|
+
|
23
|
+
s.files = `git ls-files`.split("\n")
|
24
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
25
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
26
|
+
s.require_paths = ["lib"]
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scrapbot
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Pawel Lewinski
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-03-17 00:00:00 +01:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: grit
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
type: :development
|
32
|
+
version_requirements: *id001
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: typhoeus
|
35
|
+
prerelease: false
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
43
|
+
- 2
|
44
|
+
version: "0.2"
|
45
|
+
type: :development
|
46
|
+
version_requirements: *id002
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: configliere
|
49
|
+
prerelease: false
|
50
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
segments:
|
56
|
+
- 0
|
57
|
+
- 3
|
58
|
+
version: "0.3"
|
59
|
+
type: :development
|
60
|
+
version_requirements: *id003
|
61
|
+
description: Bot for archive pages
|
62
|
+
email:
|
63
|
+
- lewy313@gmail.com
|
64
|
+
executables:
|
65
|
+
- scrapbot
|
66
|
+
extensions: []
|
67
|
+
|
68
|
+
extra_rdoc_files: []
|
69
|
+
|
70
|
+
files:
|
71
|
+
- .gitignore
|
72
|
+
- Gemfile
|
73
|
+
- README.rdoc
|
74
|
+
- Rakefile
|
75
|
+
- bin/scrapbot
|
76
|
+
- example/conf.yml
|
77
|
+
- example/urls.txt
|
78
|
+
- lib/scrapbot.rb
|
79
|
+
- lib/scrapbot/downloader.rb
|
80
|
+
- lib/scrapbot/storage.rb
|
81
|
+
- lib/scrapbot/version.rb
|
82
|
+
- scrapbot.gemspec
|
83
|
+
has_rdoc: true
|
84
|
+
homepage: ""
|
85
|
+
licenses: []
|
86
|
+
|
87
|
+
post_install_message:
|
88
|
+
rdoc_options: []
|
89
|
+
|
90
|
+
require_paths:
|
91
|
+
- lib
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
93
|
+
none: false
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
segments:
|
98
|
+
- 0
|
99
|
+
version: "0"
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
+
none: false
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
segments:
|
106
|
+
- 0
|
107
|
+
version: "0"
|
108
|
+
requirements: []
|
109
|
+
|
110
|
+
rubyforge_project: scrapbot
|
111
|
+
rubygems_version: 1.3.7
|
112
|
+
signing_key:
|
113
|
+
specification_version: 3
|
114
|
+
summary: Scrapping Bot
|
115
|
+
test_files: []
|
116
|
+
|