spidr 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,12 @@
1
+ ### 0.3.2 / 2011-06-20
2
+
3
+ * Added separate intitialize methods for {Spidr::Actions}, {Spidr::Events},
4
+ {Spidr::Filters} and {Spidr::Sanitizers}.
5
+ * Aliased {Spidr::Events#urls_like} to {Spidr::Events#every_url_like}.
6
+ * Reduce usage of `self.included` and `module_eval`.
7
+ * Reduce usage of nested-blocks.
8
+ * Reduce usage of `return`.
9
+
1
10
  ### 0.3.1 / 2011-04-22
2
11
 
3
12
  * Require `set` in `spidr/headers.rb`.
data/Gemfile CHANGED
@@ -3,7 +3,7 @@ source 'https://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  group :development do
6
- gem 'rake', '~> 0.8.7'
6
+ gem 'rake', '~> 0.8'
7
7
 
8
8
  gem 'ore-tasks', '~> 0.4'
9
9
  gem 'rspec', '~> 2.4'
@@ -8,12 +8,6 @@ module Spidr
8
8
  # spidering of links.
9
9
  #
10
10
  module Actions
11
- def initialize(options={})
12
- @paused = false
13
-
14
- super(options)
15
- end
16
-
17
11
  #
18
12
  # Continue spidering.
19
13
  #
@@ -79,5 +73,11 @@ module Spidr
79
73
  def skip_page!
80
74
  raise(SkipPage)
81
75
  end
76
+
77
+ protected
78
+
79
+ def initialize_actions(options={})
80
+ @paused = false
81
+ end
82
82
  end
83
83
  end
@@ -115,15 +115,15 @@ module Spidr
115
115
  @host_headers.merge!(options[:host_headers])
116
116
  end
117
117
 
118
- @user_agent = (options[:user_agent] || Spidr.user_agent)
118
+ @user_agent = options.fetch(:user_agent,Spidr.user_agent)
119
119
  @referer = options[:referer]
120
120
 
121
- @sessions = SessionCache.new(options[:proxy] || Spidr.proxy)
121
+ @sessions = SessionCache.new(options.fetch(:proxy,Spidr.proxy))
122
122
  @cookies = CookieJar.new
123
123
  @authorized = AuthStore.new
124
124
 
125
125
  @running = false
126
- @delay = (options[:delay] || 0)
126
+ @delay = options.fetch(:delay,0)
127
127
  @history = Set[]
128
128
  @failures = Set[]
129
129
  @queue = []
@@ -131,7 +131,10 @@ module Spidr
131
131
  @levels = Hash.new(0)
132
132
  @max_depth = options[:max_depth]
133
133
 
134
- super(options)
134
+ initialize_sanitizers(options)
135
+ initialize_filters(options)
136
+ initialize_actions(options)
137
+ initialize_events(options)
135
138
 
136
139
  yield self if block_given?
137
140
  end
@@ -152,19 +155,16 @@ module Spidr
152
155
  # @yieldparam [Agent] agent
153
156
  # The newly created agent.
154
157
  #
155
- def self.start_at(url,options={})
156
- self.new(options) do |spider|
157
- yield spider if block_given?
158
-
159
- spider.start_at(url)
160
- end
158
+ def self.start_at(url,options={},&block)
159
+ agent = new(options,&block)
160
+ agent.start_at(url)
161
161
  end
162
162
 
163
163
  #
164
- # Creates a new agent and spiders the given host.
164
+ # Creates a new agent and spiders the web-site located at the given URL.
165
165
  #
166
- # @param [String]
167
- # The host-name to spider.
166
+ # @param [URI::HTTP, String] url
167
+ # The web-site to spider.
168
168
  #
169
169
  # @param [Hash] options
170
170
  # Additional options. See {Agent#initialize}.
@@ -176,19 +176,18 @@ module Spidr
176
176
  # @yieldparam [Agent] agent
177
177
  # The newly created agent.
178
178
  #
179
- def self.host(name,options={})
180
- self.new(options.merge(:host => name)) do |spider|
181
- yield spider if block_given?
179
+ def self.site(url,options={},&block)
180
+ url = URI(url.to_s) unless url.kind_of?(URI)
182
181
 
183
- spider.start_at("http://#{name}/")
184
- end
182
+ agent = new(options.merge(:host => url.host),&block)
183
+ agent.start_at(url)
185
184
  end
186
185
 
187
186
  #
188
- # Creates a new agent and spiders the web-site located at the given URL.
187
+ # Creates a new agent and spiders the given host.
189
188
  #
190
- # @param [URI::HTTP, String] url
191
- # The web-site to spider.
189
+ # @param [String]
190
+ # The host-name to spider.
192
191
  #
193
192
  # @param [Hash] options
194
193
  # Additional options. See {Agent#initialize}.
@@ -200,14 +199,8 @@ module Spidr
200
199
  # @yieldparam [Agent] agent
201
200
  # The newly created agent.
202
201
  #
203
- def self.site(url,options={})
204
- url = URI(url.to_s)
205
-
206
- return self.new(options.merge(:host => url.host)) do |spider|
207
- yield spider if block_given?
208
-
209
- spider.start_at(url)
210
- end
202
+ def self.host(name,options={},&block)
203
+ site(URI::HTTP.build(:host => name, :path => '/'),options,&block)
211
204
  end
212
205
 
213
206
  #
@@ -234,7 +227,6 @@ module Spidr
234
227
  #
235
228
  def start_at(url,&block)
236
229
  enqueue(url)
237
-
238
230
  return run(&block)
239
231
  end
240
232
 
@@ -261,7 +253,6 @@ module Spidr
261
253
  end
262
254
 
263
255
  @running = false
264
-
265
256
  @sessions.clear
266
257
  return self
267
258
  end
@@ -387,10 +378,10 @@ module Spidr
387
378
 
388
379
  new_failures.each do |url|
389
380
  @failures << unless url.kind_of?(URI)
390
- URI(url.to_s)
391
- else
392
- url
393
- end
381
+ URI(url.to_s)
382
+ else
383
+ url
384
+ end
394
385
  end
395
386
 
396
387
  return @failures
@@ -471,7 +462,7 @@ module Spidr
471
462
  begin
472
463
  @every_url_blocks.each { |url_block| url_block.call(url) }
473
464
 
474
- @urls_like_blocks.each do |pattern,url_blocks|
465
+ @every_url_like_blocks.each do |pattern,url_blocks|
475
466
  match = case pattern
476
467
  when Regexp
477
468
  link =~ pattern
@@ -653,12 +644,11 @@ module Spidr
653
644
  def prepare_request(url,&block)
654
645
  host = url.host
655
646
  port = url.port
656
-
657
- unless url.path.empty?
658
- path = url.path
659
- else
660
- path = '/'
661
- end
647
+ path = unless url.path.empty?
648
+ url.path
649
+ else
650
+ '/'
651
+ end
662
652
 
663
653
  # append the URL query to the path
664
654
  path += "?#{url.query}" if url.query
@@ -724,7 +714,7 @@ module Spidr
724
714
  # Specifies whether the given URL should be visited.
725
715
  #
726
716
  def visit?(url)
727
- !(visited?(url)) &&
717
+ !visited?(url) &&
728
718
  visit_scheme?(url.scheme) &&
729
719
  visit_host?(url.host) &&
730
720
  visit_port?(url.port) &&
@@ -24,16 +24,15 @@ module Spidr
24
24
  # @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/HTML/Document.html
25
25
  #
26
26
  def doc
27
- return nil if body.empty?
28
-
29
- begin
30
- if html?
31
- return @doc ||= Nokogiri::HTML(body)
32
- elsif (xml? || xsl? || rss? || atom?)
33
- return @doc ||= Nokogiri::XML(body)
27
+ unless body.empty?
28
+ begin
29
+ if html?
30
+ @doc ||= Nokogiri::HTML(body)
31
+ elsif (rss? || atom? || xml? || xsl?)
32
+ @doc ||= Nokogiri::XML(body)
33
+ end
34
+ rescue
34
35
  end
35
- rescue
36
- return nil
37
36
  end
38
37
  end
39
38
 
@@ -5,17 +5,6 @@ module Spidr
5
5
  # they are visited.
6
6
  #
7
7
  module Events
8
- def initialize(options={})
9
- super(options)
10
-
11
- @every_url_blocks = []
12
- @every_failed_url_blocks = []
13
- @urls_like_blocks = Hash.new { |hash,key| hash[key] = [] }
14
-
15
- @every_page_blocks = []
16
- @every_link_blocks = []
17
- end
18
-
19
8
  #
20
9
  # Pass each URL from each page visited to the given block.
21
10
  #
@@ -57,11 +46,20 @@ module Spidr
57
46
  # @yieldparam [URI::HTTP] url
58
47
  # A matching URL.
59
48
  #
60
- def urls_like(pattern,&block)
61
- @urls_like_blocks[pattern] << block
49
+ # @since 0.3.2
50
+ #
51
+ def every_url_like(pattern,&block)
52
+ @every_url_like_blocks[pattern] << block
62
53
  return self
63
54
  end
64
55
 
56
+ #
57
+ # @see #every_url_like
58
+ #
59
+ def urls_like(pattern,&block)
60
+ every_url_like(pattern,&block)
61
+ end
62
+
65
63
  #
66
64
  # Pass the headers from every response the agent receives to a given
67
65
  # block.
@@ -524,5 +522,16 @@ module Spidr
524
522
  @every_link_blocks << block
525
523
  return self
526
524
  end
525
+
526
+ protected
527
+
528
+ def initialize_events(options={})
529
+ @every_url_blocks = []
530
+ @every_failed_url_blocks = []
531
+ @every_url_like_blocks = Hash.new { |hash,key| hash[key] = [] }
532
+
533
+ @every_page_blocks = []
534
+ @every_link_blocks = []
535
+ end
527
536
  end
528
537
  end
@@ -6,110 +6,8 @@ module Spidr
6
6
  # URLs the agent will visit.
7
7
  #
8
8
  module Filters
9
- def self.included(base)
10
- base.module_eval do
11
- # List of acceptable URL schemes to follow
12
- attr_reader :schemes
13
- end
14
- end
15
-
16
- #
17
- # Initializes filtering rules.
18
- #
19
- # @param [Hash] options
20
- # Additional options.
21
- #
22
- # @option options [Array] :schemes (['http', 'https'])
23
- # The list of acceptable URI schemes to visit.
24
- # The `https` scheme will be ignored if `net/https` cannot be loaded.
25
- #
26
- # @option options [String] :host
27
- # The host-name to visit.
28
- #
29
- # @option options [Array<String, Regexp, Proc>] :hosts
30
- # The patterns which match the host-names to visit.
31
- #
32
- # @option options [Array<String, Regexp, Proc>] :ignore_hosts
33
- # The patterns which match the host-names to not visit.
34
- #
35
- # @option options [Array<Integer, Regexp, Proc>] :ports
36
- # The patterns which match the ports to visit.
37
- #
38
- # @option options [Array<Integer, Regexp, Proc>] :ignore_ports
39
- # The patterns which match the ports to not visit.
40
- #
41
- # @option options [Array<String, Regexp, Proc>] :links
42
- # The patterns which match the links to visit.
43
- #
44
- # @option options [Array<String, Regexp, Proc>] :ignore_links
45
- # The patterns which match the links to not visit.
46
- #
47
- # @option options [Array<String, Regexp, Proc>] :urls
48
- # The patterns which match the URLs to visit.
49
- #
50
- # @option options [Array<String, Regexp, Proc>] :ignore_urls
51
- # The patterns which match the URLs to not visit.
52
- #
53
- # @option options [Array<String, Regexp, Proc>] :exts
54
- # The patterns which match the URI path extensions to visit.
55
- #
56
- # @option options [Array<String, Regexp, Proc>] :ignore_exts
57
- # The patterns which match the URI path extensions to not visit.
58
- #
59
- def initialize(options={})
60
- super(options)
61
-
62
- @schemes = []
63
-
64
- if options[:schemes]
65
- @schemes += options[:schemes]
66
- else
67
- @schemes << 'http'
68
-
69
- begin
70
- require 'net/https'
71
-
72
- @schemes << 'https'
73
- rescue Gem::LoadError => e
74
- raise(e)
75
- rescue ::LoadError
76
- STDERR.puts "Warning: cannot load 'net/https', https support disabled"
77
- end
78
- end
79
-
80
- @host_rules = Rules.new(
81
- :accept => options[:hosts],
82
- :reject => options[:ignore_hosts]
83
- )
84
- @port_rules = Rules.new(
85
- :accept => options[:ports],
86
- :reject => options[:ignore_ports]
87
- )
88
- @link_rules = Rules.new(
89
- :accept => options[:links],
90
- :reject => options[:ignore_links]
91
- )
92
- @url_rules = Rules.new(
93
- :accept => options[:urls],
94
- :reject => options[:ignore_urls]
95
- )
96
- @ext_rules = Rules.new(
97
- :accept => options[:exts],
98
- :reject => options[:ignore_exts]
99
- )
100
-
101
- if options[:host]
102
- visit_hosts_like(options[:host])
103
- end
104
-
105
- if options[:queue]
106
- self.queue = options[:queue]
107
- end
108
-
109
- if options[:history]
110
- self.history = options[:history]
111
- end
112
- end
9
+ # List of acceptable URL schemes to follow
10
+ attr_reader :schemes
113
11
 
114
12
  #
115
13
  # Sets the list of acceptable URL schemes to visit.
@@ -458,6 +356,102 @@ module Spidr
458
356
 
459
357
  protected
460
358
 
359
+ #
360
+ # Initializes filtering rules.
361
+ #
362
+ # @param [Hash] options
363
+ # Additional options.
364
+ #
365
+ # @option options [Array] :schemes (['http', 'https'])
366
+ # The list of acceptable URI schemes to visit.
367
+ # The `https` scheme will be ignored if `net/https` cannot be loaded.
368
+ #
369
+ # @option options [String] :host
370
+ # The host-name to visit.
371
+ #
372
+ # @option options [Array<String, Regexp, Proc>] :hosts
373
+ # The patterns which match the host-names to visit.
374
+ #
375
+ # @option options [Array<String, Regexp, Proc>] :ignore_hosts
376
+ # The patterns which match the host-names to not visit.
377
+ #
378
+ # @option options [Array<Integer, Regexp, Proc>] :ports
379
+ # The patterns which match the ports to visit.
380
+ #
381
+ # @option options [Array<Integer, Regexp, Proc>] :ignore_ports
382
+ # The patterns which match the ports to not visit.
383
+ #
384
+ # @option options [Array<String, Regexp, Proc>] :links
385
+ # The patterns which match the links to visit.
386
+ #
387
+ # @option options [Array<String, Regexp, Proc>] :ignore_links
388
+ # The patterns which match the links to not visit.
389
+ #
390
+ # @option options [Array<String, Regexp, Proc>] :urls
391
+ # The patterns which match the URLs to visit.
392
+ #
393
+ # @option options [Array<String, Regexp, Proc>] :ignore_urls
394
+ # The patterns which match the URLs to not visit.
395
+ #
396
+ # @option options [Array<String, Regexp, Proc>] :exts
397
+ # The patterns which match the URI path extensions to visit.
398
+ #
399
+ # @option options [Array<String, Regexp, Proc>] :ignore_exts
400
+ # The patterns which match the URI path extensions to not visit.
401
+ #
402
+ def initialize_filters(options={})
403
+ @schemes = []
404
+
405
+ if options[:schemes]
406
+ @schemes += options[:schemes]
407
+ else
408
+ @schemes << 'http'
409
+
410
+ begin
411
+ require 'net/https'
412
+
413
+ @schemes << 'https'
414
+ rescue Gem::LoadError => e
415
+ raise(e)
416
+ rescue ::LoadError
417
+ STDERR.puts "Warning: cannot load 'net/https', https support disabled"
418
+ end
419
+ end
420
+
421
+ @host_rules = Rules.new(
422
+ :accept => options[:hosts],
423
+ :reject => options[:ignore_hosts]
424
+ )
425
+ @port_rules = Rules.new(
426
+ :accept => options[:ports],
427
+ :reject => options[:ignore_ports]
428
+ )
429
+ @link_rules = Rules.new(
430
+ :accept => options[:links],
431
+ :reject => options[:ignore_links]
432
+ )
433
+ @url_rules = Rules.new(
434
+ :accept => options[:urls],
435
+ :reject => options[:ignore_urls]
436
+ )
437
+ @ext_rules = Rules.new(
438
+ :accept => options[:exts],
439
+ :reject => options[:ignore_exts]
440
+ )
441
+
442
+ if options[:host]
443
+ visit_hosts_like(options[:host])
444
+ end
445
+
446
+ if options[:queue]
447
+ self.queue = options[:queue]
448
+ end
449
+
450
+ if options[:history]
451
+ self.history = options[:history]
452
+ end
453
+ end
454
+
461
455
  #
462
456
  # Determines if a given URI scheme should be visited.
463
457
  #
@@ -295,9 +295,9 @@ module Spidr
295
295
  cookie.split('; ').each do |key_value|
296
296
  key, value = key_value.split('=',2)
297
297
 
298
- next if RESERVED_COOKIE_NAMES.include?(key)
299
-
300
- params[key] = (value || '')
298
+ unless RESERVED_COOKIE_NAMES.include?(key)
299
+ params[key] = (value || '')
300
+ end
301
301
  end
302
302
  end
303
303
 
@@ -85,7 +85,7 @@ module Spidr
85
85
  location.each(&block)
86
86
  else
87
87
  # usually the location header contains a single String
88
- block.call(location)
88
+ yield location
89
89
  end
90
90
  end
91
91
 
@@ -40,17 +40,9 @@ module Spidr
40
40
  #
41
41
  def accept?(data)
42
42
  unless @accept.empty?
43
- @accept.each do |rule|
44
- return true if test_data(data,rule)
45
- end
46
-
47
- return false
43
+ @accept.any? { |rule| test_data(data,rule) }
48
44
  else
49
- @reject.each do |rule|
50
- return false if test_data(data,rule)
51
- end
52
-
53
- return true
45
+ !@reject.any? { |rule| test_data(data,rule) }
54
46
  end
55
47
  end
56
48
 
@@ -62,7 +54,7 @@ module Spidr
62
54
  # rejection patterns.
63
55
  #
64
56
  def reject?(data)
65
- !(accept?(data))
57
+ !accept?(data)
66
58
  end
67
59
 
68
60
  protected
@@ -75,11 +67,11 @@ module Spidr
75
67
  #
76
68
  def test_data(data,rule)
77
69
  if rule.kind_of?(Proc)
78
- return (rule.call(data) == true)
70
+ rule.call(data) == true
79
71
  elsif rule.kind_of?(Regexp)
80
- return !((data.to_s =~ rule).nil?)
72
+ !((data.to_s =~ rule).nil?)
81
73
  else
82
- return data == rule
74
+ data == rule
83
75
  end
84
76
  end
85
77
 
@@ -6,39 +6,11 @@ module Spidr
6
6
  # sanitation of incoming links.
7
7
  #
8
8
  module Sanitizers
9
- def self.included(base)
10
- base.module_eval do
11
- # Specifies whether the Agent will strip URI fragments
12
- attr_accessor :strip_fragments
9
+ # Specifies whether the Agent will strip URI fragments
10
+ attr_accessor :strip_fragments
13
11
 
14
- # Specifies whether the Agent will strip URI queries
15
- attr_accessor :strip_query
16
- end
17
- end
18
-
19
- #
20
- # Initializes the Sanitizer rules.
21
- #
22
- # @param [Hash] options
23
- # Additional options.
24
- #
25
- # @option options [Boolean] :strip_fragments (true)
26
- # Specifies whether or not to strip the fragment component from URLs.
27
- #
28
- # @option options [Boolean] :strip_query (false)
29
- # Specifies whether or not to strip the query component from URLs.
30
- #
31
- # @since 0.2.2
32
- #
33
- def initialize(options={})
34
- @strip_fragments = true
35
-
36
- if options.has_key?(:strip_fragments)
37
- @strip_fragments = options[:strip_fragments]
38
- end
39
-
40
- @strip_query = (options[:strip_query] || false)
41
- end
12
+ # Specifies whether the Agent will strip URI queries
13
+ attr_accessor :strip_query
42
14
 
43
15
  #
44
16
  # Sanitizes a URL based on filtering options.
@@ -59,5 +31,26 @@ module Spidr
59
31
 
60
32
  return url
61
33
  end
34
+
35
+ protected
36
+
37
+ #
38
+ # Initializes the Sanitizer rules.
39
+ #
40
+ # @param [Hash] options
41
+ # Additional options.
42
+ #
43
+ # @option options [Boolean] :strip_fragments (true)
44
+ # Specifies whether or not to strip the fragment component from URLs.
45
+ #
46
+ # @option options [Boolean] :strip_query (false)
47
+ # Specifies whether or not to strip the query component from URLs.
48
+ #
49
+ # @since 0.2.2
50
+ #
51
+ def initialize_sanitizers(options={})
52
+ @strip_fragments = options.fetch(:strip_fragments,true)
53
+ @strip_query = options.fetch(:strip_query,false)
54
+ end
62
55
  end
63
56
  end
@@ -1,4 +1,4 @@
1
1
  module Spidr
2
2
  # Spidr version
3
- VERSION = '0.3.1'
3
+ VERSION = '0.3.2'
4
4
  end
@@ -1,15 +1,127 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- begin
4
- Ore::Specification.new do |gemspec|
5
- # custom logic here
6
- end
7
- rescue NameError
8
- begin
9
- require 'ore/specification'
10
- retry
11
- rescue LoadError
12
- STDERR.puts "The '#{__FILE__}' file requires Ore."
13
- STDERR.puts "Run `gem install ore-core` to install Ore."
1
+ # encoding: utf-8
2
+
3
+ require 'yaml'
4
+
5
+ Gem::Specification.new do |gemspec|
6
+ files = if File.directory?('.git')
7
+ `git ls-files`.split($/)
8
+ elsif File.directory?('.hg')
9
+ `hg manifest`.split($/)
10
+ elsif File.directory?('.svn')
11
+ `svn ls -R`.split($/).select { |path| File.file?(path) }
12
+ else
13
+ Dir['{**/}{.*,*}'].select { |path| File.file?(path) }
14
+ end
15
+
16
+ filter_files = lambda { |paths|
17
+ case paths
18
+ when Array
19
+ (files & paths)
20
+ when String
21
+ (files & Dir[paths])
22
+ end
23
+ }
24
+
25
+ version = {
26
+ :file => 'lib/spidr/version.rb',
27
+ :constant => 'Spidr::VERSION'
28
+ }
29
+
30
+ defaults = {
31
+ 'name' => File.basename(File.dirname(__FILE__)),
32
+ 'files' => files,
33
+ 'executables' => filter_files['bin/*'].map { |path| File.basename(path) },
34
+ 'test_files' => filter_files['{test/{**/}*_test.rb,spec/{**/}*_spec.rb}'],
35
+ 'extra_doc_files' => filter_files['*.{txt,rdoc,md,markdown,tt,textile}'],
36
+ }
37
+
38
+ metadata = defaults.merge(YAML.load_file('gemspec.yml'))
39
+
40
+ gemspec.name = metadata.fetch('name',defaults[:name])
41
+ gemspec.version = if metadata['version']
42
+ metadata['version']
43
+ elsif File.file?(version[:file])
44
+ require File.join('.',version[:file])
45
+ eval(version[:constant])
46
+ end
47
+
48
+ gemspec.summary = metadata.fetch('summary',metadata['description'])
49
+ gemspec.description = metadata.fetch('description',metadata['summary'])
50
+
51
+ case metadata['license']
52
+ when Array
53
+ gemspec.licenses = metadata['license']
54
+ when String
55
+ gemspec.license = metadata['license']
56
+ end
57
+
58
+ case metadata['authors']
59
+ when Array
60
+ gemspec.authors = metadata['authors']
61
+ when String
62
+ gemspec.author = metadata['authors']
63
+ end
64
+
65
+ gemspec.email = metadata['email']
66
+ gemspec.homepage = metadata['homepage']
67
+
68
+ case metadata['require_paths']
69
+ when Array
70
+ gemspec.require_paths = metadata['require_paths']
71
+ when String
72
+ gemspec.require_path = metadata['require_paths']
73
+ end
74
+
75
+ gemspec.files = filter_files[metadata['files']]
76
+
77
+ gemspec.executables = metadata['executables']
78
+ gemspec.extensions = metadata['extensions']
79
+
80
+ if Gem::VERSION < '1.7.'
81
+ gemspec.default_executable = gemspec.executables.first
82
+ end
83
+
84
+ gemspec.test_files = filter_files[metadata['test_files']]
85
+
86
+ unless gemspec.files.include?('.document')
87
+ gemspec.extra_rdoc_files = metadata['extra_doc_files']
88
+ end
89
+
90
+ gemspec.post_install_message = metadata['post_install_message']
91
+ gemspec.requirements = metadata['requirements']
92
+
93
+ if gemspec.respond_to?(:required_ruby_version=)
94
+ gemspec.required_ruby_version = metadata['required_ruby_version']
95
+ end
96
+
97
+ if gemspec.respond_to?(:required_rubygems_version=)
98
+ gemspec.required_rubygems_version = metadata['required_ruby_version']
99
+ end
100
+
101
+ parse_versions = lambda { |versions|
102
+ case versions
103
+ when Array
104
+ versions.map { |v| v.to_s }
105
+ when String
106
+ versions.split(/,\s*/)
107
+ end
108
+ }
109
+
110
+ if metadata['dependencies']
111
+ metadata['dependencies'].each do |name,versions|
112
+ gemspec.add_dependency(name,parse_versions[versions])
113
+ end
114
+ end
115
+
116
+ if metadata['runtime_dependencies']
117
+ metadata['runtime_dependencies'].each do |name,versions|
118
+ gemspec.add_runtime_dependency(name,parse_versions[versions])
119
+ end
120
+ end
121
+
122
+ if metadata['development_dependencies']
123
+ metadata['development_dependencies'].each do |name,versions|
124
+ gemspec.add_development_dependency(name,parse_versions[versions])
125
+ end
14
126
  end
15
127
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: spidr
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.3.1
5
+ version: 0.3.2
6
6
  platform: ruby
7
7
  authors:
8
8
  - Postmodern
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-04-22 00:00:00 Z
13
+ date: 2011-06-20 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: nokogiri
@@ -128,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
128
  requirements: []
129
129
 
130
130
  rubyforge_project: spidr
131
- rubygems_version: 1.7.2
131
+ rubygems_version: 1.8.5
132
132
  signing_key:
133
133
  specification_version: 3
134
134
  summary: A versatile Ruby web spidering library