scrapey 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +81 -15
- data/examples/imdb.rb +14 -0
- data/examples/multi.rb +3 -5
- data/examples/multi2.rb +25 -0
- data/examples/redis.rb +20 -0
- data/examples/status_check.rb +10 -0
- data/lib/scrapey.rb +2 -7
- data/lib/scrapey/cache.rb +9 -17
- data/lib/scrapey/cache/disk.rb +21 -0
- data/lib/scrapey/cache/redis.rb +20 -0
- data/lib/scrapey/constants.rb +3 -1
- data/lib/scrapey/database.rb +16 -1
- data/lib/scrapey/multi.rb +16 -4
- data/lib/scrapey/scrapey.rb +5 -1
- data/lib/scrapey/template.rb +5 -0
- data/scrapey.gemspec +2 -2
- data/template/Gemfile +2 -0
- data/template/Rakefile +22 -0
- data/template/icon.ico +0 -0
- data/template/src/schema.rb +16 -0
- data/template/src/template.rb +3 -1
- data/template/template.iss +12 -0
- metadata +18 -8
- data/lib/scrapey/version.rb +0 -3
data/README.md
CHANGED
@@ -1,37 +1,103 @@
|
|
1
1
|
# Scrapey
|
2
2
|
|
3
|
-
|
3
|
+
A simple framework for solving common scraping problems
|
4
4
|
|
5
|
-
##
|
5
|
+
## Install latest version
|
6
|
+
### Add to Gemfile
|
6
7
|
|
7
|
-
|
8
|
+
gem "scrapey", :git => 'https://github.com/monkeysuffrage/scrapey.git'
|
8
9
|
|
9
|
-
|
10
|
+
### Then run:
|
11
|
+
$ bundle install
|
10
12
|
|
11
|
-
|
13
|
+
## Create a new scrapey project
|
12
14
|
|
13
|
-
$
|
15
|
+
$ scrapey my_scraper
|
14
16
|
|
15
|
-
|
17
|
+
## Examples
|
16
18
|
|
17
|
-
|
19
|
+
### CSV
|
18
20
|
|
19
|
-
|
21
|
+
```ruby
|
22
|
+
require 'scrapey'
|
23
|
+
# By default scrapey will save as 'output.csv'
|
24
|
+
# You can change this with:
|
25
|
+
# @output = 'mycsv.csv'
|
20
26
|
|
21
|
-
|
27
|
+
page = get 'http://www.alexa.com/topsites'
|
28
|
+
page.search('li.site-listing').each do |li|
|
29
|
+
save [li.at('a').text, li.at('.description').text, li.at('.stars')[:title]]
|
30
|
+
end
|
31
|
+
```
|
32
|
+
|
33
|
+
### Database
|
34
|
+
```ruby
|
35
|
+
require 'scrapey'
|
36
|
+
# if you created a scrapey project you can fill out the database connection
|
37
|
+
# information in config/config.yml
|
38
|
+
|
39
|
+
tables 'Movie', 'Actor' # create ActiveRecord models
|
40
|
+
|
41
|
+
page = get 'http://www.imdb.com/movies-in-theaters/'
|
42
|
+
|
43
|
+
page.search('div.list_item').each do |div|
|
44
|
+
movie = Movie.find_or_create_by_title div.at('h4 a').text
|
45
|
+
div.search('span[@itemprop="actors"] a').each do |a|
|
46
|
+
actor = Actor.find_or_create_by_name a.text
|
47
|
+
end
|
48
|
+
end
|
49
|
+
```
|
50
|
+
|
51
|
+
### Caching
|
52
|
+
Scrapey can cache responses so that next time they don't hit the network
|
53
|
+
```ruby
|
54
|
+
use_cache
|
55
|
+
```
|
22
56
|
|
57
|
+
You can use redis for caching if you have lots of memory
|
58
|
+
```ruby
|
59
|
+
require 'redis'
|
60
|
+
use_cache :redis => Redis.new
|
61
|
+
```
|
62
|
+
|
63
|
+
### Retries
|
64
|
+
Retry downloads on error a max of 3 times and sleep 30 seconds between retries.
|
65
|
+
```ruby
|
66
|
+
get 'some_url', :retries => 3, :sleep => 30
|
67
|
+
```
|
68
|
+
Or just handle errors in an on_error method (Scrapey will call it automatically if it's defined)
|
69
|
+
```ruby
|
70
|
+
def on_error e, method, url, options, *args
|
71
|
+
puts "retrying #{url} again in 30 seconds..."
|
72
|
+
sleep 30
|
73
|
+
send method, url, options, *args
|
74
|
+
end
|
75
|
+
```
|
76
|
+
|
77
|
+
### Proxy switching
|
78
|
+
|
79
|
+
```ruby
|
80
|
+
def on_error e, method, url, options, *args
|
81
|
+
host, port = @config['proxies'].sample.split(':')
|
82
|
+
set_proxy host, port.to_i
|
83
|
+
send method, url, options, *args
|
84
|
+
end
|
85
|
+
|
86
|
+
get 'some_throttled_website_url'
|
87
|
+
```
|
88
|
+
|
89
|
+
### Concurrent downloads
|
90
|
+
Scrapey will ensure that the callbacks are threadsafe
|
23
91
|
```ruby
|
24
92
|
require 'scrapey'
|
25
93
|
require 'scrapey/multi'
|
26
94
|
|
27
95
|
fields 'url', 'title'
|
28
96
|
|
29
|
-
def scrape url, response
|
97
|
+
def scrape url, response, header
|
30
98
|
doc = Nokogiri::HTML response
|
31
|
-
|
99
|
+
save({'url' => url, 'title' => doc.at('title').text})
|
32
100
|
end
|
33
101
|
|
34
|
-
|
35
|
-
multi_get ['http://www.yahoo.com/', 'http://www.google.com.', 'http://www.bing.com/'], 3, :scrape
|
36
|
-
@items.each{|item| save item}
|
102
|
+
multi_get ['http://www.yahoo.com/', 'http://www.google.com.', 'http://www.bing.com/'], :threads => 3, :callback => :scrape
|
37
103
|
```
|
data/examples/imdb.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'scrapey'
|
2
|
+
# if you created a scrapey project you can fill out the database connection
|
3
|
+
# information in config/config.yml
|
4
|
+
|
5
|
+
tables 'Movie', 'Actor' # create ActiveRecord models
|
6
|
+
|
7
|
+
page = get 'http://www.imdb.com/movies-in-theaters/'
|
8
|
+
|
9
|
+
page.search('div.list_item').each do |div|
|
10
|
+
movie = Movie.find_or_create_by_title div.at('h4 a').text
|
11
|
+
div.search('span[@itemprop="actors"] a').each do |a|
|
12
|
+
actor = Actor.find_or_create_by_name a.text
|
13
|
+
end
|
14
|
+
end
|
data/examples/multi.rb
CHANGED
@@ -3,11 +3,9 @@ require 'scrapey/multi'
|
|
3
3
|
|
4
4
|
fields 'url', 'title'
|
5
5
|
|
6
|
-
def scrape url, response
|
6
|
+
def scrape url, response, header
|
7
7
|
doc = Nokogiri::HTML response
|
8
|
-
|
8
|
+
save({'url' => url, 'title' => doc.at('title').text})
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
multi_get ['http://www.yahoo.com/', 'http://www.google.com.', 'http://www.bing.com/'], 3, :scrape
|
13
|
-
@items.each{|item| save item}
|
11
|
+
multi_get ['http://www.yahoo.com/', 'http://www.google.com.', 'http://www.bing.com/'], :threads => 3, :callback => :scrape
|
data/examples/multi2.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'scrapey'
|
2
|
+
require 'scrapey/multi'
|
3
|
+
|
4
|
+
fields 'url', 'title'
|
5
|
+
|
6
|
+
def scrape url, response, header
|
7
|
+
doc = Nokogiri::HTML response
|
8
|
+
save({'url' => url, 'title' => doc.at('title').text})
|
9
|
+
puts "scraped #{url}."
|
10
|
+
end
|
11
|
+
|
12
|
+
options = {
|
13
|
+
:threads => 3,
|
14
|
+
:callback => :scrape,
|
15
|
+
:proxy => {:host => 'localhost', :port => 8888},
|
16
|
+
:head => {
|
17
|
+
"Accept" => "*/*",
|
18
|
+
#"User-Agent" => "Scrapey #{Scrapey::VERSION}",
|
19
|
+
"Keep-alive" => "true"
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
multi_get ['http://www.yahoo.com/', 'http://www.google.com/', 'http://www.bing.com/'], options
|
24
|
+
|
25
|
+
puts "this happens after all callbacks."
|
data/examples/redis.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'scrapey'
|
2
|
+
require 'redis'
|
3
|
+
require 'pry'
|
4
|
+
|
5
|
+
@debug = true
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
use_cache :redis => Redis.new
|
14
|
+
|
15
|
+
url = 'http://www.yahoo.com/'
|
16
|
+
google = get url
|
17
|
+
puts google.at('title').text, (x = google.encoding rescue 'foo'), (y = google.body.encoding rescue 'foo'), '--'
|
18
|
+
|
19
|
+
google = get url
|
20
|
+
puts google.at('title').text, (x = google.encoding rescue 'foo'), (y = google.body.encoding rescue 'foo'), '--'
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'scrapey'
|
2
|
+
require 'scrapey/multi'
|
3
|
+
|
4
|
+
fields 'url', 'status'
|
5
|
+
|
6
|
+
def scrape url, response, header
|
7
|
+
save({'url' => url, 'status' => header.status})
|
8
|
+
end
|
9
|
+
|
10
|
+
multi_head ['http://www.yahoo.com/', 'http://www.google.com.', 'http://www.bing.com/', 'http://www.bing.com/404.html'], :threads => 4, :callback => :scrape
|
data/lib/scrapey.rb
CHANGED
@@ -4,7 +4,6 @@ require 'json'
|
|
4
4
|
require 'yaml'
|
5
5
|
|
6
6
|
require "scrapey/scrapey"
|
7
|
-
require "scrapey/version"
|
8
7
|
require "scrapey/constants"
|
9
8
|
require "scrapey/cache"
|
10
9
|
require "scrapey/database"
|
@@ -13,7 +12,7 @@ include Scrapey
|
|
13
12
|
|
14
13
|
# some defaults that I like
|
15
14
|
@agent ||= Mechanize.new{|a| a.history.max_size = 10}
|
16
|
-
@agent.user_agent =
|
15
|
+
@agent.user_agent = "Scrapey v#{Scrapey::VERSION} - #{Scrapey::URL}"
|
17
16
|
|
18
17
|
# default output file
|
19
18
|
@output = 'output.csv'
|
@@ -22,8 +21,4 @@ include Scrapey
|
|
22
21
|
config_file = "#{BASEDIR}/config/config.yml"
|
23
22
|
@config = File.exists?(config_file) ? YAML::load(File.open(config_file)) : {}
|
24
23
|
|
25
|
-
if @config['database']
|
26
|
-
['active_record', @config['database']['adapter'], 'tzinfo', 'active_support/all'].each{|lib| require lib}
|
27
|
-
ActiveRecord::Base.establish_connection(@config['database'])
|
28
|
-
end
|
29
|
-
|
24
|
+
init_db if @config['database']
|
data/lib/scrapey/cache.rb
CHANGED
@@ -1,22 +1,14 @@
|
|
1
1
|
module Scrapey
|
2
|
-
def use_cache
|
3
|
-
@use_cache = true
|
4
|
-
@config['cache_dir'] ||= "#{BASEDIR}/cache"
|
5
|
-
FileUtils.mkdir_p @config['cache_dir']
|
6
|
-
end
|
7
|
-
|
8
|
-
def cache_filename url
|
9
|
-
@config['cache_dir'] + "/" + Digest::MD5.hexdigest(url) + ".cache"
|
10
|
-
end
|
11
2
|
|
12
|
-
def
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
3
|
+
def use_cache options = {}
|
4
|
+
@use_cache = true
|
5
|
+
if @redis = options.delete(:redis)
|
6
|
+
require 'scrapey/cache/redis'
|
7
|
+
else
|
8
|
+
require 'scrapey/cache/disk'
|
9
|
+
@config['cache_dir'] ||= "#{BASEDIR}/cache"
|
10
|
+
FileUtils.mkdir_p @config['cache_dir']
|
11
|
+
end
|
17
12
|
end
|
18
13
|
|
19
|
-
def save_cache url,doc
|
20
|
-
File.open(cache_filename(url), 'wb') {|f| f.write(doc) }
|
21
|
-
end
|
22
14
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Scrapey
|
2
|
+
|
3
|
+
def cache_filename url
|
4
|
+
@config['cache_dir'] + "/" + Digest::MD5.hexdigest(url) + ".cache"
|
5
|
+
end
|
6
|
+
|
7
|
+
def is_cached? url
|
8
|
+
File.exists? cache_filename(url)
|
9
|
+
end
|
10
|
+
|
11
|
+
def load_cache url
|
12
|
+
filename = cache_filename url
|
13
|
+
return nil unless File::exists?(filename)
|
14
|
+
debug "Loading #{filename} from cache"
|
15
|
+
Nokogiri::HTML Marshal.load(File.read(filename))
|
16
|
+
end
|
17
|
+
|
18
|
+
def save_cache url, doc, options = {}
|
19
|
+
File.open(cache_filename(url), "w") {|f| f << Marshal.dump(doc) }
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'redis'
|
2
|
+
|
3
|
+
module Scrapey
|
4
|
+
|
5
|
+
def is_cached? url
|
6
|
+
!!@redis.get(url)
|
7
|
+
end
|
8
|
+
|
9
|
+
def load_cache url
|
10
|
+
debug "Loading #{url} from cache"
|
11
|
+
return nil unless str = @redis.get(url)
|
12
|
+
debug "found it"
|
13
|
+
#binding.pry
|
14
|
+
Nokogiri::HTML Marshal.load(str)
|
15
|
+
end
|
16
|
+
|
17
|
+
def save_cache url, body, options = {}
|
18
|
+
@redis.set url, Marshal.dump(body)
|
19
|
+
end
|
20
|
+
end
|
data/lib/scrapey/constants.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
module Scrapey
|
2
|
+
VERSION = "0.0.4"
|
2
3
|
BASEDIR = File.expand_path(File.dirname($0)).gsub(/\/src$/,'')
|
3
|
-
|
4
|
+
URL = "https://github.com/monkeysuffrage/scrapey"
|
5
|
+
#ENV['SSL_FILE'] = "#{Gem.dir}/gems/scrapey-#{Scrapey::VERSION}/ssl/cacert.pem"
|
4
6
|
end
|
data/lib/scrapey/database.rb
CHANGED
@@ -1,13 +1,28 @@
|
|
1
1
|
module Scrapey
|
2
|
+
def check_db_config
|
3
|
+
raise 'No database configured' unless @config['database']
|
4
|
+
end
|
5
|
+
|
2
6
|
def tables *args
|
7
|
+
check_db_config
|
8
|
+
missing_tables = false
|
3
9
|
args.each do |arg|
|
4
|
-
Object.const_set(arg, Class.new(ActiveRecord::Base) {})
|
10
|
+
model = Object.const_set(arg, Class.new(ActiveRecord::Base) {})
|
11
|
+
missing_tables = true unless model.table_exists?
|
5
12
|
end
|
13
|
+
schema = "#{BASEDIR}/src/schema.rb"
|
14
|
+
require schema if missing_tables && File.exists?(schema)
|
6
15
|
end
|
7
16
|
|
8
17
|
def truncate *args
|
18
|
+
check_db_config
|
9
19
|
args.each do |arg|
|
10
20
|
ActiveRecord::Base.connection.execute("TRUNCATE TABLE #{Object.const_get(arg).table_name}")
|
11
21
|
end
|
12
22
|
end
|
23
|
+
|
24
|
+
def init_db
|
25
|
+
['active_record', @config['database']['adapter'], 'tzinfo', 'active_support/all', 'active_support/multibyte/chars'].each{|lib| require lib}
|
26
|
+
ActiveRecord::Base.establish_connection(@config['database'])
|
27
|
+
end
|
13
28
|
end
|
data/lib/scrapey/multi.rb
CHANGED
@@ -1,18 +1,25 @@
|
|
1
1
|
require 'em-http-request'
|
2
2
|
|
3
3
|
module Scrapey
|
4
|
-
def
|
5
|
-
|
4
|
+
def multi_get_or_post method, all_urls, options = {}
|
5
|
+
request_options = {:redirects => 10, :head => {"User-Agent" => "Scrapey v#{Scrapey::VERSION} - #{Scrapey::URL}"}.merge(options.delete(:head))}
|
6
|
+
threads = options[:threads] || 20
|
7
|
+
callback = options[:callback] || :save_cache
|
8
|
+
all_urls.reject!{|url| is_cached? url} if @use_cache
|
9
|
+
@lock = Mutex.new
|
10
|
+
all_urls.each_slice(threads) do |urls|
|
6
11
|
next unless urls.size > 0
|
7
12
|
EventMachine.run do
|
8
13
|
multi = EventMachine::MultiRequest.new
|
9
14
|
urls.each_with_index do |url, i|
|
10
|
-
multi.add i, EventMachine::HttpRequest.new(url).
|
15
|
+
multi.add i, EventMachine::HttpRequest.new(url, options).send(method, request_options)
|
11
16
|
end
|
12
17
|
multi.callback do
|
13
18
|
(0...multi.requests.length).each do |i|
|
14
19
|
if multi.responses[:callback][i]
|
15
|
-
|
20
|
+
@lock.synchronize do
|
21
|
+
send callback, urls[i], multi.responses[:callback][i].response, multi.responses[:callback][i].response_header
|
22
|
+
end
|
16
23
|
else
|
17
24
|
puts "problem downloading #{urls[i]}!"
|
18
25
|
end
|
@@ -22,4 +29,9 @@ module Scrapey
|
|
22
29
|
end
|
23
30
|
end
|
24
31
|
end
|
32
|
+
|
33
|
+
def multi_get *args; multi_get_or_post 'get', *args; end
|
34
|
+
def multi_post *args; multi_get_or_post 'post', *args; end
|
35
|
+
def multi_head *args; multi_get_or_post 'head', *args; end
|
36
|
+
|
25
37
|
end
|
data/lib/scrapey/scrapey.rb
CHANGED
@@ -15,7 +15,7 @@ module Scrapey
|
|
15
15
|
return doc if doc
|
16
16
|
|
17
17
|
page = agent.send *new_args
|
18
|
-
save_cache(url, page.
|
18
|
+
save_cache(url, page.root.to_s) if @use_cache
|
19
19
|
|
20
20
|
#exit if Object.const_defined? :Ocra
|
21
21
|
page
|
@@ -64,6 +64,10 @@ module Scrapey
|
|
64
64
|
false
|
65
65
|
end
|
66
66
|
|
67
|
+
def debug msg
|
68
|
+
puts msg if @debug
|
69
|
+
end
|
70
|
+
|
67
71
|
def ts
|
68
72
|
Time.now.to_i.to_s
|
69
73
|
end
|
data/lib/scrapey/template.rb
CHANGED
@@ -7,9 +7,14 @@ module Scrapey
|
|
7
7
|
template = File.expand_path('../../../template', __FILE__)
|
8
8
|
FileUtils.cp_r template, name
|
9
9
|
Dir.chdir name
|
10
|
+
|
10
11
|
Dir.glob(['*/*.*', '*.*']).grep(/template/).each do |fn|
|
11
12
|
FileUtils.mv fn, fn.gsub('template', name)
|
12
13
|
end
|
14
|
+
buf = File.read "#{name}.iss"
|
15
|
+
buf.gsub! /Template/, "rightmove_rentals".tr('_', ' ').gsub(/\w+/){|x| x.capitalize}
|
16
|
+
buf.gsub! /template/, name
|
17
|
+
File.open("#{name}.iss", 'w'){|f| f << buf}
|
13
18
|
|
14
19
|
end
|
15
20
|
end
|
data/scrapey.gemspec
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
require File.expand_path('../lib/scrapey/
|
2
|
+
require File.expand_path('../lib/scrapey/constants', __FILE__)
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.authors = ["P Guardiario"]
|
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.name = "scrapey"
|
16
16
|
gem.require_paths = ["lib"]
|
17
17
|
gem.version = Scrapey::VERSION
|
18
|
-
gem.add_dependency(%q<mechanize
|
18
|
+
gem.add_dependency(%q<mechanize>)
|
19
19
|
gem.add_dependency(%q<json>, ["~> 1.7.0"])
|
20
20
|
end
|
21
21
|
|
data/template/Gemfile
ADDED
data/template/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
#Rake.application.options.trace = true
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
def name
|
6
|
+
@name ||= Dir.pwd[/[^\/]+$/]
|
7
|
+
end
|
8
|
+
|
9
|
+
desc "Build project with ocra"
|
10
|
+
task 'build' do
|
11
|
+
system "ocra --icon icon.ico src/#{name}.rb --no-lzma --chdir-first --no-autoload --innosetup #{name}.iss"
|
12
|
+
end
|
13
|
+
|
14
|
+
desc "Copy installer to dropbox folder"
|
15
|
+
task 'dropbox' do
|
16
|
+
raise 'no dropbox folder!' unless ENV['DROPBOX']
|
17
|
+
folder = [ENV['DROPBOX'], name].join('/').squeeze('/')
|
18
|
+
FileUtils.mkdir(folder) unless File.exists?(folder)
|
19
|
+
FileUtils.cp "Output/setup.exe", folder
|
20
|
+
url = [ENV['DROPBOX_public_url'], name, 'setup.exe'].join('/').squeeze('/')
|
21
|
+
puts "uploaded to #{url}"
|
22
|
+
end
|
data/template/icon.ico
ADDED
Binary file
|
@@ -0,0 +1,16 @@
|
|
1
|
+
=begin
|
2
|
+
# put table schemas here. this will be included if the table is not found.
|
3
|
+
ActiveRecord::Schema.define do
|
4
|
+
create_table "items" do |t|
|
5
|
+
t.string "string_field"
|
6
|
+
t.text "text_field"
|
7
|
+
t.integer "number_field"
|
8
|
+
t.boolean "boolean_field"
|
9
|
+
t.float "float_field"
|
10
|
+
t.date "created_at"
|
11
|
+
t.datetime "created_on"
|
12
|
+
end
|
13
|
+
|
14
|
+
add_index "items", ["number_field"], :name => "number_field_idx", :unique => true
|
15
|
+
end
|
16
|
+
=end
|
data/template/src/template.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require 'scrapey'
|
2
|
+
# require 'scrapey/multi' #=> requires em-http-request
|
2
3
|
|
3
|
-
# customizations...
|
4
|
+
# sample customizations...
|
5
|
+
# @agent.user_agent = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.56 Safari/536.5'
|
4
6
|
# @output = Time.now.strftime("#{BASEDIR}/Output/output_%Y_%m_%d_%H_%M_%S.csv")
|
@@ -0,0 +1,12 @@
|
|
1
|
+
[Setup]
|
2
|
+
AppName=Template Scraper
|
3
|
+
AppVersion=1.0
|
4
|
+
DefaultDirName={localappdata}\Template Scraper
|
5
|
+
DefaultGroupName=Template Scraper
|
6
|
+
|
7
|
+
[Files]
|
8
|
+
Source: "config\*"; DestDir: "{app}\config";
|
9
|
+
Source: "src\*"; DestDir: "{app}\src";
|
10
|
+
|
11
|
+
[Icons]
|
12
|
+
Name: "{group}\Template Scraper"; Filename: "{app}\template.exe"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapey
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,24 +9,24 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-08-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
|
-
- -
|
19
|
+
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
21
|
+
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
25
|
none: false
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version:
|
29
|
+
version: '0'
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: json
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -56,18 +56,28 @@ files:
|
|
56
56
|
- README.md
|
57
57
|
- Rakefile
|
58
58
|
- bin/scrapey
|
59
|
+
- examples/imdb.rb
|
59
60
|
- examples/multi.rb
|
61
|
+
- examples/multi2.rb
|
62
|
+
- examples/redis.rb
|
63
|
+
- examples/status_check.rb
|
64
|
+
- lib/scrapey/cache/disk.rb
|
65
|
+
- lib/scrapey/cache/redis.rb
|
60
66
|
- lib/scrapey/cache.rb
|
61
67
|
- lib/scrapey/constants.rb
|
62
68
|
- lib/scrapey/database.rb
|
63
69
|
- lib/scrapey/multi.rb
|
64
70
|
- lib/scrapey/scrapey.rb
|
65
71
|
- lib/scrapey/template.rb
|
66
|
-
- lib/scrapey/version.rb
|
67
72
|
- lib/scrapey.rb
|
68
73
|
- scrapey.gemspec
|
69
74
|
- template/config/config.yml
|
75
|
+
- template/Gemfile
|
76
|
+
- template/icon.ico
|
77
|
+
- template/Rakefile
|
78
|
+
- template/src/schema.rb
|
70
79
|
- template/src/template.rb
|
80
|
+
- template/template.iss
|
71
81
|
homepage: ''
|
72
82
|
licenses: []
|
73
83
|
post_install_message:
|
@@ -88,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
88
98
|
version: '0'
|
89
99
|
requirements: []
|
90
100
|
rubyforge_project:
|
91
|
-
rubygems_version: 1.8.
|
101
|
+
rubygems_version: 1.8.24
|
92
102
|
signing_key:
|
93
103
|
specification_version: 3
|
94
104
|
summary: A simple scraping framework
|
data/lib/scrapey/version.rb
DELETED