scrapey 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +81 -15
- data/examples/imdb.rb +14 -0
- data/examples/multi.rb +3 -5
- data/examples/multi2.rb +25 -0
- data/examples/redis.rb +20 -0
- data/examples/status_check.rb +10 -0
- data/lib/scrapey.rb +2 -7
- data/lib/scrapey/cache.rb +9 -17
- data/lib/scrapey/cache/disk.rb +21 -0
- data/lib/scrapey/cache/redis.rb +20 -0
- data/lib/scrapey/constants.rb +3 -1
- data/lib/scrapey/database.rb +16 -1
- data/lib/scrapey/multi.rb +16 -4
- data/lib/scrapey/scrapey.rb +5 -1
- data/lib/scrapey/template.rb +5 -0
- data/scrapey.gemspec +2 -2
- data/template/Gemfile +2 -0
- data/template/Rakefile +22 -0
- data/template/icon.ico +0 -0
- data/template/src/schema.rb +16 -0
- data/template/src/template.rb +3 -1
- data/template/template.iss +12 -0
- metadata +18 -8
- data/lib/scrapey/version.rb +0 -3
data/README.md
CHANGED
@@ -1,37 +1,103 @@
|
|
1
1
|
# Scrapey
|
2
2
|
|
3
|
-
|
3
|
+
A simple framework for solving common scraping problems
|
4
4
|
|
5
|
-
##
|
5
|
+
## Install latest version
|
6
|
+
### Add to Gemfile
|
6
7
|
|
7
|
-
|
8
|
+
gem "scrapey", :git => 'https://github.com/monkeysuffrage/scrapey.git'
|
8
9
|
|
9
|
-
|
10
|
+
### Then run:
|
11
|
+
$ bundle install
|
10
12
|
|
11
|
-
|
13
|
+
## Create a new scrapey project
|
12
14
|
|
13
|
-
$
|
15
|
+
$ scrapey my_scraper
|
14
16
|
|
15
|
-
|
17
|
+
## Examples
|
16
18
|
|
17
|
-
|
19
|
+
### CSV
|
18
20
|
|
19
|
-
|
21
|
+
```ruby
|
22
|
+
require 'scrapey'
|
23
|
+
# By default scrapey will save as 'output.csv'
|
24
|
+
# You can change this with:
|
25
|
+
# @output = 'mycsv.csv'
|
20
26
|
|
21
|
-
|
27
|
+
page = get 'http://www.alexa.com/topsites'
|
28
|
+
page.search('li.site-listing').each do |li|
|
29
|
+
save [li.at('a').text, li.at('.description').text, li.at('.stars')[:title]]
|
30
|
+
end
|
31
|
+
```
|
32
|
+
|
33
|
+
### Database
|
34
|
+
```ruby
|
35
|
+
require 'scrapey'
|
36
|
+
# if you created a scrapey project you can fill out the database connection
|
37
|
+
# information in config/config.yml
|
38
|
+
|
39
|
+
tables 'Movie', 'Actor' # create ActiveRecord models
|
40
|
+
|
41
|
+
page = get 'http://www.imdb.com/movies-in-theaters/'
|
42
|
+
|
43
|
+
page.search('div.list_item').each do |div|
|
44
|
+
movie = Movie.find_or_create_by_title div.at('h4 a').text
|
45
|
+
div.search('span[@itemprop="actors"] a').each do |a|
|
46
|
+
actor = Actor.find_or_create_by_name a.text
|
47
|
+
end
|
48
|
+
end
|
49
|
+
```
|
50
|
+
|
51
|
+
### Caching
|
52
|
+
Scrapey can cache responses so that next time they don't hit the network
|
53
|
+
```ruby
|
54
|
+
use_cache
|
55
|
+
```
|
22
56
|
|
57
|
+
You can use redis for caching if you have lots of memory
|
58
|
+
```ruby
|
59
|
+
require 'redis'
|
60
|
+
use_cache :redis => Redis.new
|
61
|
+
```
|
62
|
+
|
63
|
+
### Retries
|
64
|
+
Retry downloads on error a max of 3 times and sleep 30 seconds between retries.
|
65
|
+
```ruby
|
66
|
+
get 'some_url', :retries => 3, :sleep => 30
|
67
|
+
```
|
68
|
+
Or just handle errors in an on_error method (Scrapey will call it automatically if it's defined)
|
69
|
+
```ruby
|
70
|
+
def on_error e, method, url, options, *args
|
71
|
+
puts "retrying #{url} again in 30 seconds..."
|
72
|
+
sleep 30
|
73
|
+
send method, url, options, *args
|
74
|
+
end
|
75
|
+
```
|
76
|
+
|
77
|
+
### Proxy switching
|
78
|
+
|
79
|
+
```ruby
|
80
|
+
def on_error e, method, url, options, *args
|
81
|
+
host, port = @config['proxies'].sample.split(':')
|
82
|
+
set_proxy host, port.to_i
|
83
|
+
send method, url, options, *args
|
84
|
+
end
|
85
|
+
|
86
|
+
get 'some_throttled_website_url'
|
87
|
+
```
|
88
|
+
|
89
|
+
### Concurrent downloads
|
90
|
+
Scrapey will ensure that the callbacks are threadsafe
|
23
91
|
```ruby
|
24
92
|
require 'scrapey'
|
25
93
|
require 'scrapey/multi'
|
26
94
|
|
27
95
|
fields 'url', 'title'
|
28
96
|
|
29
|
-
def scrape url, response
|
97
|
+
def scrape url, response, header
|
30
98
|
doc = Nokogiri::HTML response
|
31
|
-
|
99
|
+
save({'url' => url, 'title' => doc.at('title').text})
|
32
100
|
end
|
33
101
|
|
34
|
-
|
35
|
-
multi_get ['http://www.yahoo.com/', 'http://www.google.com.', 'http://www.bing.com/'], 3, :scrape
|
36
|
-
@items.each{|item| save item}
|
102
|
+
multi_get ['http://www.yahoo.com/', 'http://www.google.com.', 'http://www.bing.com/'], :threads => 3, :callback => :scrape
|
37
103
|
```
|
data/examples/imdb.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'scrapey'
|
2
|
+
# if you created a scrapey project you can fill out the database connection
|
3
|
+
# information in config/config.yml
|
4
|
+
|
5
|
+
tables 'Movie', 'Actor' # create ActiveRecord models
|
6
|
+
|
7
|
+
page = get 'http://www.imdb.com/movies-in-theaters/'
|
8
|
+
|
9
|
+
page.search('div.list_item').each do |div|
|
10
|
+
movie = Movie.find_or_create_by_title div.at('h4 a').text
|
11
|
+
div.search('span[@itemprop="actors"] a').each do |a|
|
12
|
+
actor = Actor.find_or_create_by_name a.text
|
13
|
+
end
|
14
|
+
end
|
data/examples/multi.rb
CHANGED
@@ -3,11 +3,9 @@ require 'scrapey/multi'
|
|
3
3
|
|
4
4
|
fields 'url', 'title'
|
5
5
|
|
6
|
-
def scrape url, response
|
6
|
+
def scrape url, response, header
|
7
7
|
doc = Nokogiri::HTML response
|
8
|
-
|
8
|
+
save({'url' => url, 'title' => doc.at('title').text})
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
multi_get ['http://www.yahoo.com/', 'http://www.google.com.', 'http://www.bing.com/'], 3, :scrape
|
13
|
-
@items.each{|item| save item}
|
11
|
+
multi_get ['http://www.yahoo.com/', 'http://www.google.com.', 'http://www.bing.com/'], :threads => 3, :callback => :scrape
|
data/examples/multi2.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'scrapey'
|
2
|
+
require 'scrapey/multi'
|
3
|
+
|
4
|
+
fields 'url', 'title'
|
5
|
+
|
6
|
+
def scrape url, response, header
|
7
|
+
doc = Nokogiri::HTML response
|
8
|
+
save({'url' => url, 'title' => doc.at('title').text})
|
9
|
+
puts "scraped #{url}."
|
10
|
+
end
|
11
|
+
|
12
|
+
options = {
|
13
|
+
:threads => 3,
|
14
|
+
:callback => :scrape,
|
15
|
+
:proxy => {:host => 'localhost', :port => 8888},
|
16
|
+
:head => {
|
17
|
+
"Accept" => "*/*",
|
18
|
+
#"User-Agent" => "Scrapey #{Scrapey::VERSION}",
|
19
|
+
"Keep-alive" => "true"
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
multi_get ['http://www.yahoo.com/', 'http://www.google.com/', 'http://www.bing.com/'], options
|
24
|
+
|
25
|
+
puts "this happens after all callbacks."
|
data/examples/redis.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'scrapey'
|
2
|
+
require 'redis'
|
3
|
+
require 'pry'
|
4
|
+
|
5
|
+
@debug = true
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
use_cache :redis => Redis.new
|
14
|
+
|
15
|
+
url = 'http://www.yahoo.com/'
|
16
|
+
google = get url
|
17
|
+
puts google.at('title').text, (x = google.encoding rescue 'foo'), (y = google.body.encoding rescue 'foo'), '--'
|
18
|
+
|
19
|
+
google = get url
|
20
|
+
puts google.at('title').text, (x = google.encoding rescue 'foo'), (y = google.body.encoding rescue 'foo'), '--'
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'scrapey'
|
2
|
+
require 'scrapey/multi'
|
3
|
+
|
4
|
+
fields 'url', 'status'
|
5
|
+
|
6
|
+
def scrape url, response, header
|
7
|
+
save({'url' => url, 'status' => header.status})
|
8
|
+
end
|
9
|
+
|
10
|
+
multi_head ['http://www.yahoo.com/', 'http://www.google.com.', 'http://www.bing.com/', 'http://www.bing.com/404.html'], :threads => 4, :callback => :scrape
|
data/lib/scrapey.rb
CHANGED
@@ -4,7 +4,6 @@ require 'json'
|
|
4
4
|
require 'yaml'
|
5
5
|
|
6
6
|
require "scrapey/scrapey"
|
7
|
-
require "scrapey/version"
|
8
7
|
require "scrapey/constants"
|
9
8
|
require "scrapey/cache"
|
10
9
|
require "scrapey/database"
|
@@ -13,7 +12,7 @@ include Scrapey
|
|
13
12
|
|
14
13
|
# some defaults that I like
|
15
14
|
@agent ||= Mechanize.new{|a| a.history.max_size = 10}
|
16
|
-
@agent.user_agent =
|
15
|
+
@agent.user_agent = "Scrapey v#{Scrapey::VERSION} - #{Scrapey::URL}"
|
17
16
|
|
18
17
|
# default output file
|
19
18
|
@output = 'output.csv'
|
@@ -22,8 +21,4 @@ include Scrapey
|
|
22
21
|
config_file = "#{BASEDIR}/config/config.yml"
|
23
22
|
@config = File.exists?(config_file) ? YAML::load(File.open(config_file)) : {}
|
24
23
|
|
25
|
-
if @config['database']
|
26
|
-
['active_record', @config['database']['adapter'], 'tzinfo', 'active_support/all'].each{|lib| require lib}
|
27
|
-
ActiveRecord::Base.establish_connection(@config['database'])
|
28
|
-
end
|
29
|
-
|
24
|
+
init_db if @config['database']
|
data/lib/scrapey/cache.rb
CHANGED
@@ -1,22 +1,14 @@
|
|
1
1
|
module Scrapey
|
2
|
-
def use_cache
|
3
|
-
@use_cache = true
|
4
|
-
@config['cache_dir'] ||= "#{BASEDIR}/cache"
|
5
|
-
FileUtils.mkdir_p @config['cache_dir']
|
6
|
-
end
|
7
|
-
|
8
|
-
def cache_filename url
|
9
|
-
@config['cache_dir'] + "/" + Digest::MD5.hexdigest(url) + ".cache"
|
10
|
-
end
|
11
2
|
|
12
|
-
def
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
3
|
+
def use_cache options = {}
|
4
|
+
@use_cache = true
|
5
|
+
if @redis = options.delete(:redis)
|
6
|
+
require 'scrapey/cache/redis'
|
7
|
+
else
|
8
|
+
require 'scrapey/cache/disk'
|
9
|
+
@config['cache_dir'] ||= "#{BASEDIR}/cache"
|
10
|
+
FileUtils.mkdir_p @config['cache_dir']
|
11
|
+
end
|
17
12
|
end
|
18
13
|
|
19
|
-
def save_cache url,doc
|
20
|
-
File.open(cache_filename(url), 'wb') {|f| f.write(doc) }
|
21
|
-
end
|
22
14
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Scrapey
|
2
|
+
|
3
|
+
def cache_filename url
|
4
|
+
@config['cache_dir'] + "/" + Digest::MD5.hexdigest(url) + ".cache"
|
5
|
+
end
|
6
|
+
|
7
|
+
def is_cached? url
|
8
|
+
File.exists? cache_filename(url)
|
9
|
+
end
|
10
|
+
|
11
|
+
def load_cache url
|
12
|
+
filename = cache_filename url
|
13
|
+
return nil unless File::exists?(filename)
|
14
|
+
debug "Loading #{filename} from cache"
|
15
|
+
Nokogiri::HTML Marshal.load(File.read(filename))
|
16
|
+
end
|
17
|
+
|
18
|
+
def save_cache url, doc, options = {}
|
19
|
+
File.open(cache_filename(url), "w") {|f| f << Marshal.dump(doc) }
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'redis'
|
2
|
+
|
3
|
+
module Scrapey
|
4
|
+
|
5
|
+
def is_cached? url
|
6
|
+
!!@redis.get(url)
|
7
|
+
end
|
8
|
+
|
9
|
+
def load_cache url
|
10
|
+
debug "Loading #{url} from cache"
|
11
|
+
return nil unless str = @redis.get(url)
|
12
|
+
debug "found it"
|
13
|
+
#binding.pry
|
14
|
+
Nokogiri::HTML Marshal.load(str)
|
15
|
+
end
|
16
|
+
|
17
|
+
def save_cache url, body, options = {}
|
18
|
+
@redis.set url, Marshal.dump(body)
|
19
|
+
end
|
20
|
+
end
|
data/lib/scrapey/constants.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
module Scrapey
|
2
|
+
VERSION = "0.0.4"
|
2
3
|
BASEDIR = File.expand_path(File.dirname($0)).gsub(/\/src$/,'')
|
3
|
-
|
4
|
+
URL = "https://github.com/monkeysuffrage/scrapey"
|
5
|
+
#ENV['SSL_FILE'] = "#{Gem.dir}/gems/scrapey-#{Scrapey::VERSION}/ssl/cacert.pem"
|
4
6
|
end
|
data/lib/scrapey/database.rb
CHANGED
@@ -1,13 +1,28 @@
|
|
1
1
|
module Scrapey
|
2
|
+
def check_db_config
|
3
|
+
raise 'No database configured' unless @config['database']
|
4
|
+
end
|
5
|
+
|
2
6
|
def tables *args
|
7
|
+
check_db_config
|
8
|
+
missing_tables = false
|
3
9
|
args.each do |arg|
|
4
|
-
Object.const_set(arg, Class.new(ActiveRecord::Base) {})
|
10
|
+
model = Object.const_set(arg, Class.new(ActiveRecord::Base) {})
|
11
|
+
missing_tables = true unless model.table_exists?
|
5
12
|
end
|
13
|
+
schema = "#{BASEDIR}/src/schema.rb"
|
14
|
+
require schema if missing_tables && File.exists?(schema)
|
6
15
|
end
|
7
16
|
|
8
17
|
def truncate *args
|
18
|
+
check_db_config
|
9
19
|
args.each do |arg|
|
10
20
|
ActiveRecord::Base.connection.execute("TRUNCATE TABLE #{Object.const_get(arg).table_name}")
|
11
21
|
end
|
12
22
|
end
|
23
|
+
|
24
|
+
def init_db
|
25
|
+
['active_record', @config['database']['adapter'], 'tzinfo', 'active_support/all', 'active_support/multibyte/chars'].each{|lib| require lib}
|
26
|
+
ActiveRecord::Base.establish_connection(@config['database'])
|
27
|
+
end
|
13
28
|
end
|
data/lib/scrapey/multi.rb
CHANGED
@@ -1,18 +1,25 @@
|
|
1
1
|
require 'em-http-request'
|
2
2
|
|
3
3
|
module Scrapey
|
4
|
-
def
|
5
|
-
|
4
|
+
def multi_get_or_post method, all_urls, options = {}
|
5
|
+
request_options = {:redirects => 10, :head => {"User-Agent" => "Scrapey v#{Scrapey::VERSION} - #{Scrapey::URL}"}.merge(options.delete(:head))}
|
6
|
+
threads = options[:threads] || 20
|
7
|
+
callback = options[:callback] || :save_cache
|
8
|
+
all_urls.reject!{|url| is_cached? url} if @use_cache
|
9
|
+
@lock = Mutex.new
|
10
|
+
all_urls.each_slice(threads) do |urls|
|
6
11
|
next unless urls.size > 0
|
7
12
|
EventMachine.run do
|
8
13
|
multi = EventMachine::MultiRequest.new
|
9
14
|
urls.each_with_index do |url, i|
|
10
|
-
multi.add i, EventMachine::HttpRequest.new(url).
|
15
|
+
multi.add i, EventMachine::HttpRequest.new(url, options).send(method, request_options)
|
11
16
|
end
|
12
17
|
multi.callback do
|
13
18
|
(0...multi.requests.length).each do |i|
|
14
19
|
if multi.responses[:callback][i]
|
15
|
-
|
20
|
+
@lock.synchronize do
|
21
|
+
send callback, urls[i], multi.responses[:callback][i].response, multi.responses[:callback][i].response_header
|
22
|
+
end
|
16
23
|
else
|
17
24
|
puts "problem downloading #{urls[i]}!"
|
18
25
|
end
|
@@ -22,4 +29,9 @@ module Scrapey
|
|
22
29
|
end
|
23
30
|
end
|
24
31
|
end
|
32
|
+
|
33
|
+
def multi_get *args; multi_get_or_post 'get', *args; end
|
34
|
+
def multi_post *args; multi_get_or_post 'post', *args; end
|
35
|
+
def multi_head *args; multi_get_or_post 'head', *args; end
|
36
|
+
|
25
37
|
end
|
data/lib/scrapey/scrapey.rb
CHANGED
@@ -15,7 +15,7 @@ module Scrapey
|
|
15
15
|
return doc if doc
|
16
16
|
|
17
17
|
page = agent.send *new_args
|
18
|
-
save_cache(url, page.
|
18
|
+
save_cache(url, page.root.to_s) if @use_cache
|
19
19
|
|
20
20
|
#exit if Object.const_defined? :Ocra
|
21
21
|
page
|
@@ -64,6 +64,10 @@ module Scrapey
|
|
64
64
|
false
|
65
65
|
end
|
66
66
|
|
67
|
+
def debug msg
|
68
|
+
puts msg if @debug
|
69
|
+
end
|
70
|
+
|
67
71
|
def ts
|
68
72
|
Time.now.to_i.to_s
|
69
73
|
end
|
data/lib/scrapey/template.rb
CHANGED
@@ -7,9 +7,14 @@ module Scrapey
|
|
7
7
|
template = File.expand_path('../../../template', __FILE__)
|
8
8
|
FileUtils.cp_r template, name
|
9
9
|
Dir.chdir name
|
10
|
+
|
10
11
|
Dir.glob(['*/*.*', '*.*']).grep(/template/).each do |fn|
|
11
12
|
FileUtils.mv fn, fn.gsub('template', name)
|
12
13
|
end
|
14
|
+
buf = File.read "#{name}.iss"
|
15
|
+
buf.gsub! /Template/, "rightmove_rentals".tr('_', ' ').gsub(/\w+/){|x| x.capitalize}
|
16
|
+
buf.gsub! /template/, name
|
17
|
+
File.open("#{name}.iss", 'w'){|f| f << buf}
|
13
18
|
|
14
19
|
end
|
15
20
|
end
|
data/scrapey.gemspec
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
require File.expand_path('../lib/scrapey/
|
2
|
+
require File.expand_path('../lib/scrapey/constants', __FILE__)
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.authors = ["P Guardiario"]
|
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.name = "scrapey"
|
16
16
|
gem.require_paths = ["lib"]
|
17
17
|
gem.version = Scrapey::VERSION
|
18
|
-
gem.add_dependency(%q<mechanize
|
18
|
+
gem.add_dependency(%q<mechanize>)
|
19
19
|
gem.add_dependency(%q<json>, ["~> 1.7.0"])
|
20
20
|
end
|
21
21
|
|
data/template/Gemfile
ADDED
data/template/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
#Rake.application.options.trace = true
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
def name
|
6
|
+
@name ||= Dir.pwd[/[^\/]+$/]
|
7
|
+
end
|
8
|
+
|
9
|
+
desc "Build project with ocra"
|
10
|
+
task 'build' do
|
11
|
+
system "ocra --icon icon.ico src/#{name}.rb --no-lzma --chdir-first --no-autoload --innosetup #{name}.iss"
|
12
|
+
end
|
13
|
+
|
14
|
+
desc "Copy installer to dropbox folder"
|
15
|
+
task 'dropbox' do
|
16
|
+
raise 'no dropbox folder!' unless ENV['DROPBOX']
|
17
|
+
folder = [ENV['DROPBOX'], name].join('/').squeeze('/')
|
18
|
+
FileUtils.mkdir(folder) unless File.exists?(folder)
|
19
|
+
FileUtils.cp "Output/setup.exe", folder
|
20
|
+
url = [ENV['DROPBOX_public_url'], name, 'setup.exe'].join('/').squeeze('/')
|
21
|
+
puts "uploaded to #{url}"
|
22
|
+
end
|
data/template/icon.ico
ADDED
Binary file
|
@@ -0,0 +1,16 @@
|
|
1
|
+
=begin
|
2
|
+
# put table schemas here. this will be included if the table is not found.
|
3
|
+
ActiveRecord::Schema.define do
|
4
|
+
create_table "items" do |t|
|
5
|
+
t.string "string_field"
|
6
|
+
t.text "text_field"
|
7
|
+
t.integer "number_field"
|
8
|
+
t.boolean "boolean_field"
|
9
|
+
t.float "float_field"
|
10
|
+
t.date "created_at"
|
11
|
+
t.datetime "created_on"
|
12
|
+
end
|
13
|
+
|
14
|
+
add_index "items", ["number_field"], :name => "number_field_idx", :unique => true
|
15
|
+
end
|
16
|
+
=end
|
data/template/src/template.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require 'scrapey'
|
2
|
+
# require 'scrapey/multi' #=> requires em-http-request
|
2
3
|
|
3
|
-
# customizations...
|
4
|
+
# sample customizations...
|
5
|
+
# @agent.user_agent = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.56 Safari/536.5'
|
4
6
|
# @output = Time.now.strftime("#{BASEDIR}/Output/output_%Y_%m_%d_%H_%M_%S.csv")
|
@@ -0,0 +1,12 @@
|
|
1
|
+
[Setup]
|
2
|
+
AppName=Template Scraper
|
3
|
+
AppVersion=1.0
|
4
|
+
DefaultDirName={localappdata}\Template Scraper
|
5
|
+
DefaultGroupName=Template Scraper
|
6
|
+
|
7
|
+
[Files]
|
8
|
+
Source: "config\*"; DestDir: "{app}\config";
|
9
|
+
Source: "src\*"; DestDir: "{app}\src";
|
10
|
+
|
11
|
+
[Icons]
|
12
|
+
Name: "{group}\Template Scraper"; Filename: "{app}\template.exe"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapey
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,24 +9,24 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-08-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
|
-
- -
|
19
|
+
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
21
|
+
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
25
|
none: false
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version:
|
29
|
+
version: '0'
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: json
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -56,18 +56,28 @@ files:
|
|
56
56
|
- README.md
|
57
57
|
- Rakefile
|
58
58
|
- bin/scrapey
|
59
|
+
- examples/imdb.rb
|
59
60
|
- examples/multi.rb
|
61
|
+
- examples/multi2.rb
|
62
|
+
- examples/redis.rb
|
63
|
+
- examples/status_check.rb
|
64
|
+
- lib/scrapey/cache/disk.rb
|
65
|
+
- lib/scrapey/cache/redis.rb
|
60
66
|
- lib/scrapey/cache.rb
|
61
67
|
- lib/scrapey/constants.rb
|
62
68
|
- lib/scrapey/database.rb
|
63
69
|
- lib/scrapey/multi.rb
|
64
70
|
- lib/scrapey/scrapey.rb
|
65
71
|
- lib/scrapey/template.rb
|
66
|
-
- lib/scrapey/version.rb
|
67
72
|
- lib/scrapey.rb
|
68
73
|
- scrapey.gemspec
|
69
74
|
- template/config/config.yml
|
75
|
+
- template/Gemfile
|
76
|
+
- template/icon.ico
|
77
|
+
- template/Rakefile
|
78
|
+
- template/src/schema.rb
|
70
79
|
- template/src/template.rb
|
80
|
+
- template/template.iss
|
71
81
|
homepage: ''
|
72
82
|
licenses: []
|
73
83
|
post_install_message:
|
@@ -88,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
88
98
|
version: '0'
|
89
99
|
requirements: []
|
90
100
|
rubyforge_project:
|
91
|
-
rubygems_version: 1.8.
|
101
|
+
rubygems_version: 1.8.24
|
92
102
|
signing_key:
|
93
103
|
specification_version: 3
|
94
104
|
summary: A simple scraping framework
|
data/lib/scrapey/version.rb
DELETED