fizx-rwget 0.5.1 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -10,7 +10,7 @@ begin
10
10
  gem.homepage = "http://github.com/fizx/rwget"
11
11
  gem.authors = ["Kyle Maxwell"]
12
12
  gem.add_dependency("curb", ["> 0.0.0"])
13
- gem.add_dependency("hpricot", ["> 0.0.0", "< 0.7"])
13
+ gem.add_dependency("hpricot", ["> 0.0.0"])
14
14
  gem.add_dependency("fizx-robots", [">= 0.3.1"])
15
15
  gem.add_dependency("bloomfilter", ["> 0.0.0"])
16
16
  gem.add_dependency("libxml-ruby", ["> 0.9"])
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.1
1
+ 0.5.2
@@ -78,6 +78,7 @@ class RWGet::Controller
78
78
  puts "storing at #{key}"
79
79
  @store.put(key, tmpfile)
80
80
  sleep options[:wait]
81
+ tmpfile.close rescue nil
81
82
  else
82
83
  puts "unable to download"
83
84
  end
@@ -102,12 +103,13 @@ class RWGet::Controller
102
103
 
103
104
  def key_for(uri)
104
105
  arr = []
105
- arr << options[:prefix] if options[:prefix]
106
- arr << @start_time if options[:timestampize]
107
- arr << uri.scheme if options[:protocol_directories]
108
- arr << uri.host unless options[:no_host_directories]
106
+ arr << options[:prefix] if options[:prefix]
107
+ arr << @start_time if options[:timestampize]
108
+ arr << uri.scheme if options[:protocol_directories]
109
+ arr << uri.host unless options[:no_host_directories]
109
110
  paths = uri.path.split("/")
110
- paths.shift if paths.first.to_s.empty?
111
+ paths << paths.pop + "?" + uri.query if uri.query
112
+ paths.shift if paths.first.to_s.empty?
111
113
  File.join(arr + paths)
112
114
  end
113
115
 
@@ -9,7 +9,8 @@ class RWGetOptionParser < OptionParser
9
9
 
10
10
  def parse!
11
11
  super
12
- options[:seeds] = ARGV
12
+ options[:seeds] ||= []
13
+ options[:seeds] += ARGV
13
14
  end
14
15
 
15
16
  def initialize
@@ -49,10 +50,6 @@ class RWGetOptionParser < OptionParser
49
50
  options[:reject_patterns] ||= []
50
51
  options[:reject_patterns] << Regexp.new(r)
51
52
  end
52
-
53
- opts.on("--require=RUBY_SCRIPT", "Will execute 'require RUBY_SCRIPT'") do |s|
54
- require s
55
- end
56
53
 
57
54
  opts.on("--limit-rate=RATE", "limit download rate to RATE.") do |r|
58
55
  rate = r.to_i
@@ -74,23 +71,27 @@ class RWGetOptionParser < OptionParser
74
71
  options[:proxy_password] = p
75
72
  end
76
73
 
77
- opts.on("--fetch-class=RUBY_CLASS", "Must implement fetch(uri, user_agent_string) #=> [final_redirected_url, file_object]") do |c|
74
+ opts.on("--require=RUBY_SCRIPT", "Will execute 'require RUBY_SCRIPT'") do |s|
75
+ require s
76
+ end
77
+
78
+ opts.on("--fetch-class=RUBY_CLASS", "Must implement fetch(uri, user_agent_string) #=> [final_redirected_url, file_object] (Load the class with --require)") do |c|
78
79
  options[:fetch_class] = c
79
80
  end
80
81
 
81
- opts.on("--store-class=RUBY_CLASS", "Must implement put(key_string, temp_file)") do |c|
82
+ opts.on("--store-class=RUBY_CLASS", "Must implement put(key_string, temp_file) (Load the class with --require)") do |c|
82
83
  options[:store_class] = c
83
84
  end
84
85
 
85
- opts.on("--dupes-class=RUBY_CLASS", "Must implement dupe?(uri)") do |c|
86
+ opts.on("--dupes-class=RUBY_CLASS", "Must implement dupe?(uri) (Load the class with --require)") do |c|
86
87
  options[:dupes_class] = c
87
88
  end
88
89
 
89
- opts.on("--queue-class=RUBY_CLASS", "Must implement put(key_string, depth_int) and get() #=> [key_string, depth_int]") do |c|
90
+ opts.on("--queue-class=RUBY_CLASS", "Must implement put(key_string, depth_int) and get() #=> [key_string, depth_int] (Load the class with --require)") do |c|
90
91
  options[:queue_class] = c
91
92
  end
92
93
 
93
- opts.on("--queue-class=RUBY_CLASS", "Must implement put(key_string, depth_int) and get() #=> [key_string, depth_int]") do |c|
94
+ opts.on("--queue-class=RUBY_CLASS", "Must implement put(key_string, depth_int) and get() #=> [key_string, depth_int] (Load the class with --require)") do |c|
94
95
  options[:queue_class] = c
95
96
  end
96
97
 
@@ -99,6 +100,7 @@ class RWGetOptionParser < OptionParser
99
100
  end
100
101
 
101
102
  opts.on("-S", "--sitemap=URL", "URL of a sitemap to crawl (will ignore inter-page links)") do |url|
103
+ options[:seeds] ||= []
102
104
  options[:seeds] << url
103
105
  options[:links_class] = "RWGet::SitemapLinks"
104
106
  end
@@ -11,7 +11,15 @@ class RWGet::Store
11
11
  def put(key, tmpfile)
12
12
  path = File.join(@root, key)
13
13
  path = File.join(path, "index.html") unless path.split("/").last =~ /\.|\?/
14
- mkdir_p(File.dirname(path))
14
+ dir = File.dirname(path)
15
+ if(File.file?(dir))
16
+ tmp = "#{dir}.index.html.#{Time.now.to_f}"
17
+ mv dir, tmp
18
+ mkdir_p(dir)
19
+ mv tmp, File.join(dir, "index.html")
20
+ else
21
+ mkdir_p(dir)
22
+ end
15
23
  mv tmpfile.path, path
16
24
  end
17
25
  end
@@ -1,12 +1,15 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
1
4
  # -*- encoding: utf-8 -*-
2
5
 
3
6
  Gem::Specification.new do |s|
4
7
  s.name = %q{rwget}
5
- s.version = "0.5.1"
8
+ s.version = "0.5.2"
6
9
 
7
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
11
  s.authors = ["Kyle Maxwell"]
9
- s.date = %q{2009-06-19}
12
+ s.date = %q{2009-09-10}
10
13
  s.default_executable = %q{rwget}
11
14
  s.email = %q{kyle@kylemaxwell.com}
12
15
  s.executables = ["rwget"]
@@ -42,11 +45,10 @@ Gem::Specification.new do |s|
42
45
  "test/sitemap_links_test.rb",
43
46
  "test/store_test.rb"
44
47
  ]
45
- s.has_rdoc = true
46
48
  s.homepage = %q{http://github.com/fizx/rwget}
47
49
  s.rdoc_options = ["--charset=UTF-8"]
48
50
  s.require_paths = ["lib"]
49
- s.rubygems_version = %q{1.3.2}
51
+ s.rubygems_version = %q{1.3.5}
50
52
  s.summary = %q{Ruby port of wget, emphasis on recursive/crawler}
51
53
  s.test_files = [
52
54
  "test/controller_test.rb",
@@ -65,20 +67,20 @@ Gem::Specification.new do |s|
65
67
 
66
68
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
67
69
  s.add_runtime_dependency(%q<curb>, ["> 0.0.0"])
68
- s.add_runtime_dependency(%q<hpricot>, ["> 0.0.0", "< 0.7"])
70
+ s.add_runtime_dependency(%q<hpricot>, ["> 0.0.0"])
69
71
  s.add_runtime_dependency(%q<fizx-robots>, [">= 0.3.1"])
70
72
  s.add_runtime_dependency(%q<bloomfilter>, ["> 0.0.0"])
71
73
  s.add_runtime_dependency(%q<libxml-ruby>, ["> 0.9"])
72
74
  else
73
75
  s.add_dependency(%q<curb>, ["> 0.0.0"])
74
- s.add_dependency(%q<hpricot>, ["> 0.0.0", "< 0.7"])
76
+ s.add_dependency(%q<hpricot>, ["> 0.0.0"])
75
77
  s.add_dependency(%q<fizx-robots>, [">= 0.3.1"])
76
78
  s.add_dependency(%q<bloomfilter>, ["> 0.0.0"])
77
79
  s.add_dependency(%q<libxml-ruby>, ["> 0.9"])
78
80
  end
79
81
  else
80
82
  s.add_dependency(%q<curb>, ["> 0.0.0"])
81
- s.add_dependency(%q<hpricot>, ["> 0.0.0", "< 0.7"])
83
+ s.add_dependency(%q<hpricot>, ["> 0.0.0"])
82
84
  s.add_dependency(%q<fizx-robots>, [">= 0.3.1"])
83
85
  s.add_dependency(%q<bloomfilter>, ["> 0.0.0"])
84
86
  s.add_dependency(%q<libxml-ruby>, ["> 0.9"])
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fizx-rwget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kyle Maxwell
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-19 00:00:00 -07:00
12
+ date: 2009-09-10 00:00:00 -07:00
13
13
  default_executable: rwget
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -31,9 +31,6 @@ dependencies:
31
31
  - - ">"
32
32
  - !ruby/object:Gem::Version
33
33
  version: 0.0.0
34
- - - <
35
- - !ruby/object:Gem::Version
36
- version: "0.7"
37
34
  version:
38
35
  - !ruby/object:Gem::Dependency
39
36
  name: fizx-robots
@@ -101,8 +98,9 @@ files:
101
98
  - test/server.rb
102
99
  - test/sitemap_links_test.rb
103
100
  - test/store_test.rb
104
- has_rdoc: true
101
+ has_rdoc: false
105
102
  homepage: http://github.com/fizx/rwget
103
+ licenses:
106
104
  post_install_message:
107
105
  rdoc_options:
108
106
  - --charset=UTF-8
@@ -123,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
123
121
  requirements: []
124
122
 
125
123
  rubyforge_project:
126
- rubygems_version: 1.2.0
124
+ rubygems_version: 1.3.5
127
125
  signing_key:
128
126
  specification_version: 3
129
127
  summary: Ruby port of wget, emphasis on recursive/crawler