fizx-rwget 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -10,7 +10,7 @@ begin
10
10
  gem.homepage = "http://github.com/fizx/rwget"
11
11
  gem.authors = ["Kyle Maxwell"]
12
12
  gem.add_dependency("curb", ["> 0.0.0"])
13
- gem.add_dependency("hpricot", ["> 0.0.0", "< 0.7"])
13
+ gem.add_dependency("hpricot", ["> 0.0.0"])
14
14
  gem.add_dependency("fizx-robots", [">= 0.3.1"])
15
15
  gem.add_dependency("bloomfilter", ["> 0.0.0"])
16
16
  gem.add_dependency("libxml-ruby", ["> 0.9"])
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.1
1
+ 0.5.2
@@ -78,6 +78,7 @@ class RWGet::Controller
78
78
  puts "storing at #{key}"
79
79
  @store.put(key, tmpfile)
80
80
  sleep options[:wait]
81
+ tmpfile.close rescue nil
81
82
  else
82
83
  puts "unable to download"
83
84
  end
@@ -102,12 +103,13 @@ class RWGet::Controller
102
103
 
103
104
  def key_for(uri)
104
105
  arr = []
105
- arr << options[:prefix] if options[:prefix]
106
- arr << @start_time if options[:timestampize]
107
- arr << uri.scheme if options[:protocol_directories]
108
- arr << uri.host unless options[:no_host_directories]
106
+ arr << options[:prefix] if options[:prefix]
107
+ arr << @start_time if options[:timestampize]
108
+ arr << uri.scheme if options[:protocol_directories]
109
+ arr << uri.host unless options[:no_host_directories]
109
110
  paths = uri.path.split("/")
110
- paths.shift if paths.first.to_s.empty?
111
+ paths << paths.pop + "?" + uri.query if uri.query
112
+ paths.shift if paths.first.to_s.empty?
111
113
  File.join(arr + paths)
112
114
  end
113
115
 
@@ -9,7 +9,8 @@ class RWGetOptionParser < OptionParser
9
9
 
10
10
  def parse!
11
11
  super
12
- options[:seeds] = ARGV
12
+ options[:seeds] ||= []
13
+ options[:seeds] += ARGV
13
14
  end
14
15
 
15
16
  def initialize
@@ -49,10 +50,6 @@ class RWGetOptionParser < OptionParser
49
50
  options[:reject_patterns] ||= []
50
51
  options[:reject_patterns] << Regexp.new(r)
51
52
  end
52
-
53
- opts.on("--require=RUBY_SCRIPT", "Will execute 'require RUBY_SCRIPT'") do |s|
54
- require s
55
- end
56
53
 
57
54
  opts.on("--limit-rate=RATE", "limit download rate to RATE.") do |r|
58
55
  rate = r.to_i
@@ -74,23 +71,27 @@ class RWGetOptionParser < OptionParser
74
71
  options[:proxy_password] = p
75
72
  end
76
73
 
77
- opts.on("--fetch-class=RUBY_CLASS", "Must implement fetch(uri, user_agent_string) #=> [final_redirected_url, file_object]") do |c|
74
+ opts.on("--require=RUBY_SCRIPT", "Will execute 'require RUBY_SCRIPT'") do |s|
75
+ require s
76
+ end
77
+
78
+ opts.on("--fetch-class=RUBY_CLASS", "Must implement fetch(uri, user_agent_string) #=> [final_redirected_url, file_object] (Load the class with --require)") do |c|
78
79
  options[:fetch_class] = c
79
80
  end
80
81
 
81
- opts.on("--store-class=RUBY_CLASS", "Must implement put(key_string, temp_file)") do |c|
82
+ opts.on("--store-class=RUBY_CLASS", "Must implement put(key_string, temp_file) (Load the class with --require)") do |c|
82
83
  options[:store_class] = c
83
84
  end
84
85
 
85
- opts.on("--dupes-class=RUBY_CLASS", "Must implement dupe?(uri)") do |c|
86
+ opts.on("--dupes-class=RUBY_CLASS", "Must implement dupe?(uri) (Load the class with --require)") do |c|
86
87
  options[:dupes_class] = c
87
88
  end
88
89
 
89
- opts.on("--queue-class=RUBY_CLASS", "Must implement put(key_string, depth_int) and get() #=> [key_string, depth_int]") do |c|
90
+ opts.on("--queue-class=RUBY_CLASS", "Must implement put(key_string, depth_int) and get() #=> [key_string, depth_int] (Load the class with --require)") do |c|
90
91
  options[:queue_class] = c
91
92
  end
92
93
 
93
- opts.on("--queue-class=RUBY_CLASS", "Must implement put(key_string, depth_int) and get() #=> [key_string, depth_int]") do |c|
94
+ opts.on("--queue-class=RUBY_CLASS", "Must implement put(key_string, depth_int) and get() #=> [key_string, depth_int] (Load the class with --require)") do |c|
94
95
  options[:queue_class] = c
95
96
  end
96
97
 
@@ -99,6 +100,7 @@ class RWGetOptionParser < OptionParser
99
100
  end
100
101
 
101
102
  opts.on("-S", "--sitemap=URL", "URL of a sitemap to crawl (will ignore inter-page links)") do |url|
103
+ options[:seeds] ||= []
102
104
  options[:seeds] << url
103
105
  options[:links_class] = "RWGet::SitemapLinks"
104
106
  end
@@ -11,7 +11,15 @@ class RWGet::Store
11
11
  def put(key, tmpfile)
12
12
  path = File.join(@root, key)
13
13
  path = File.join(path, "index.html") unless path.split("/").last =~ /\.|\?/
14
- mkdir_p(File.dirname(path))
14
+ dir = File.dirname(path)
15
+ if(File.file?(dir))
16
+ tmp = "#{dir}.index.html.#{Time.now.to_f}"
17
+ mv dir, tmp
18
+ mkdir_p(dir)
19
+ mv tmp, File.join(dir, "index.html")
20
+ else
21
+ mkdir_p(dir)
22
+ end
15
23
  mv tmpfile.path, path
16
24
  end
17
25
  end
@@ -1,12 +1,15 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
1
4
  # -*- encoding: utf-8 -*-
2
5
 
3
6
  Gem::Specification.new do |s|
4
7
  s.name = %q{rwget}
5
- s.version = "0.5.1"
8
+ s.version = "0.5.2"
6
9
 
7
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
11
  s.authors = ["Kyle Maxwell"]
9
- s.date = %q{2009-06-19}
12
+ s.date = %q{2009-09-10}
10
13
  s.default_executable = %q{rwget}
11
14
  s.email = %q{kyle@kylemaxwell.com}
12
15
  s.executables = ["rwget"]
@@ -42,11 +45,10 @@ Gem::Specification.new do |s|
42
45
  "test/sitemap_links_test.rb",
43
46
  "test/store_test.rb"
44
47
  ]
45
- s.has_rdoc = true
46
48
  s.homepage = %q{http://github.com/fizx/rwget}
47
49
  s.rdoc_options = ["--charset=UTF-8"]
48
50
  s.require_paths = ["lib"]
49
- s.rubygems_version = %q{1.3.2}
51
+ s.rubygems_version = %q{1.3.5}
50
52
  s.summary = %q{Ruby port of wget, emphasis on recursive/crawler}
51
53
  s.test_files = [
52
54
  "test/controller_test.rb",
@@ -65,20 +67,20 @@ Gem::Specification.new do |s|
65
67
 
66
68
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
67
69
  s.add_runtime_dependency(%q<curb>, ["> 0.0.0"])
68
- s.add_runtime_dependency(%q<hpricot>, ["> 0.0.0", "< 0.7"])
70
+ s.add_runtime_dependency(%q<hpricot>, ["> 0.0.0"])
69
71
  s.add_runtime_dependency(%q<fizx-robots>, [">= 0.3.1"])
70
72
  s.add_runtime_dependency(%q<bloomfilter>, ["> 0.0.0"])
71
73
  s.add_runtime_dependency(%q<libxml-ruby>, ["> 0.9"])
72
74
  else
73
75
  s.add_dependency(%q<curb>, ["> 0.0.0"])
74
- s.add_dependency(%q<hpricot>, ["> 0.0.0", "< 0.7"])
76
+ s.add_dependency(%q<hpricot>, ["> 0.0.0"])
75
77
  s.add_dependency(%q<fizx-robots>, [">= 0.3.1"])
76
78
  s.add_dependency(%q<bloomfilter>, ["> 0.0.0"])
77
79
  s.add_dependency(%q<libxml-ruby>, ["> 0.9"])
78
80
  end
79
81
  else
80
82
  s.add_dependency(%q<curb>, ["> 0.0.0"])
81
- s.add_dependency(%q<hpricot>, ["> 0.0.0", "< 0.7"])
83
+ s.add_dependency(%q<hpricot>, ["> 0.0.0"])
82
84
  s.add_dependency(%q<fizx-robots>, [">= 0.3.1"])
83
85
  s.add_dependency(%q<bloomfilter>, ["> 0.0.0"])
84
86
  s.add_dependency(%q<libxml-ruby>, ["> 0.9"])
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fizx-rwget
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kyle Maxwell
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-19 00:00:00 -07:00
12
+ date: 2009-09-10 00:00:00 -07:00
13
13
  default_executable: rwget
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -31,9 +31,6 @@ dependencies:
31
31
  - - ">"
32
32
  - !ruby/object:Gem::Version
33
33
  version: 0.0.0
34
- - - <
35
- - !ruby/object:Gem::Version
36
- version: "0.7"
37
34
  version:
38
35
  - !ruby/object:Gem::Dependency
39
36
  name: fizx-robots
@@ -101,8 +98,9 @@ files:
101
98
  - test/server.rb
102
99
  - test/sitemap_links_test.rb
103
100
  - test/store_test.rb
104
- has_rdoc: true
101
+ has_rdoc: false
105
102
  homepage: http://github.com/fizx/rwget
103
+ licenses:
106
104
  post_install_message:
107
105
  rdoc_options:
108
106
  - --charset=UTF-8
@@ -123,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
123
121
  requirements: []
124
122
 
125
123
  rubyforge_project:
126
- rubygems_version: 1.2.0
124
+ rubygems_version: 1.3.5
127
125
  signing_key:
128
126
  specification_version: 3
129
127
  summary: Ruby port of wget, emphasis on recursive/crawler