web_crawler 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -4,11 +4,15 @@ gem 'thor', '>=0.14.6'
4
4
  gem 'mime-types', '>=1.16'
5
5
  gem 'parallel', '>=0.5.5'
6
6
  gem 'activesupport'
7
+ gem "rake", '0.8.7'
8
+ gem 'i18n'
9
+ gem 'hpricot'
7
10
 
8
11
  # Specify your gem's dependencies in web_crawler.gemspec
9
12
  gemspec
10
13
 
11
14
  group :development, :test do
15
+ gem "rake", '0.8.7'
12
16
  gem "rspec", ">=2.6"
13
17
  gem "autotest"
14
18
  gem "autotest-growl"
data/README CHANGED
@@ -1,22 +1,32 @@
1
1
  Web crawler help you with parse and collect data from the web
2
2
 
3
- #TODO
3
+ ==How it works.
4
4
 
5
- Base web crawler class for API present
6
- Its showld work like this:
5
+ class StackoverflowCrawler < WebCrawler::Base
7
6
 
8
- class MyCrawler < WebCrawler::Base
7
+ target "http://stackoverflow.com/questions/tagged/:tag", :tag=> %w{ruby ruby-on-rails ruby-on-rails-3}
8
+ logger "path/to/log/file" # or Logger.new(...)
9
9
 
10
- target "www.example.com"
11
- target "www.example.com/page2"
12
- target %[www.example.com/contacts www.example.com/about]
13
- target "www.example.com/category_:category/page:page/", :categories => [1,2,3,4], :page => 1..100
10
+ cache_to '/tmp/cache/stackoverflow'
14
11
 
15
- target { call_advanced_logic_for_url_generating }
12
+ context "#questions .question-summary", :jobs do
16
13
 
17
- logger "path/to/log/file" # or Logger.new(...)
14
+ #TODO: defaults :format => lambda{ |v| v.to_i }
18
15
 
16
+ map '.vote-count-post strong', :to => :vote_count, :format => lambda{ |v| v.to_i }
17
+ map '.views', :to => :view_count, :format => lambda{ |v| v.match(/\d+/)[0].to_i }
18
+ map '.status strong', :to => :answer_count, :format => lambda{ |v| v.to_i }
19
+ map '.summary h3 a', :to => :title, :format => lambda{ |v| v.to_i }
20
+ map '.summary .excerpt', :to => :excerpt, :format => lambda{ |v| v.to_i }
21
+ map '.user-action-time .relativetime', :to => :posted_at, :on => [:attr, :title]
22
+ map '.tags .post-tag', :to => :tags, :format => lambda{ |v| v.to_i }
19
23
 
20
-
24
+ end
25
+ end
21
26
 
22
- end
27
+
28
+ #TODO
29
+ 1. Add documentation
30
+ 2. ...
31
+ 3. PROFIT!!!1
32
+ (:
data/bin/wcrawler CHANGED
@@ -1,5 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
-
3
2
  $:.unshift File.expand_path("../../lib", __FILE__)
4
3
 
5
4
  # Check if an older version of bundler is installed
data/gem_graph.png ADDED
Binary file
@@ -1,6 +1,9 @@
1
1
  module WebCrawler
2
2
  class Application < CLI
3
3
 
4
+ map '-V' => 'version'
5
+ map '-v' => 'version'
6
+
4
7
  desc "test", "Test task"
5
8
 
6
9
  def test
@@ -72,6 +75,11 @@ module WebCrawler
72
75
  end
73
76
  end
74
77
 
78
+ desc '-v or -V or version', 'Show gem version'
79
+ def version
80
+ WebCrawler::VERSION::STRING
81
+ end
82
+
75
83
  protected
76
84
  def allow_format(*allow)
77
85
  allow.flatten.select { |f| f == @options[:format] }.first
@@ -2,7 +2,7 @@ module WebCrawler
2
2
  module VERSION
3
3
  MAJOR = 0
4
4
  MINOR = 5
5
- TINY = 0
5
+ TINY = 2
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -17,6 +17,8 @@ module WebCrawler::View
17
17
  class Base
18
18
  attr_reader :input
19
19
 
20
+ delegate :logger, :to => WebCrawler.logger
21
+
20
22
  class << self
21
23
  attr_accessor :default_options
22
24
 
@@ -52,11 +54,22 @@ module WebCrawler::View
52
54
  @present_output = if override && override.respond_to?(:puts)
53
55
  override
54
56
  elsif @options['output'].is_a?(String)
55
- File.open(@options['output'], 'w+')
57
+ output_to_file(@options['output'])
56
58
  elsif @options['output'].respond_to? :puts
57
59
  @options['output']
58
60
  end
59
61
  end
62
+
63
+ def output_to_file(filename)
64
+ path = Pathname.new(filename)
65
+
66
+ unless path.dirname.exist?
67
+ info("#{path.dirname} not exist, try to create...")
68
+ path.dirname.mkpath
69
+ end
70
+
71
+ path.open('w+')
72
+ end
60
73
  end
61
74
 
62
75
  end
data/lib/web_crawler.rb CHANGED
@@ -7,6 +7,7 @@ require "ext/hash"
7
7
  require "ext/array"
8
8
  require "ext/http_response"
9
9
  require "active_support/core_ext"
10
+ require "web_crawler/version"
10
11
 
11
12
  module WebCrawler
12
13
  autoload :Request, 'web_crawler/request'
data/web_crawler.gemspec CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
8
8
  s.platform = Gem::Platform::RUBY
9
9
  s.authors = ["Anton Sozontov"]
10
10
  s.email = ["a.sozontov@gmail.com"]
11
- s.homepage = ""
11
+ s.homepage = "https://github.com/webgago/web_crawler"
12
12
  s.summary = %q{Web crawler help you with parse and collect data from the web}
13
13
  s.description = %q{Web crawler help you with parse and collect data from the web}
14
14
 
@@ -26,7 +26,7 @@ Gem::Specification.new do |s|
26
26
  s.add_dependency 'thor', '>=0.14.6'
27
27
  s.add_dependency 'mime-types', '>=1.16'
28
28
  s.add_dependency 'parallel', '>=0.5.5'
29
- s.add_dependency 'activesupport'
29
+ s.add_dependency 'activesupport', '>=3.0'
30
30
 
31
31
  s.add_development_dependency(%q<rspec>, [">=2.6"])
32
32
  s.add_development_dependency(%q<fakeweb>)
metadata CHANGED
@@ -1,12 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web_crawler
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 5
8
- - 0
9
- version: 0.5.0
4
+ prerelease:
5
+ version: 0.5.2
10
6
  platform: ruby
11
7
  authors:
12
8
  - Anton Sozontov
@@ -14,8 +10,7 @@ autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
12
 
17
- date: 2011-06-14 00:00:00 +04:00
18
- default_executable:
13
+ date: 2011-06-24 00:00:00 Z
19
14
  dependencies:
20
15
  - !ruby/object:Gem::Dependency
21
16
  name: thor
@@ -25,10 +20,6 @@ dependencies:
25
20
  requirements:
26
21
  - - ">="
27
22
  - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
- - 14
31
- - 6
32
23
  version: 0.14.6
33
24
  type: :runtime
34
25
  version_requirements: *id001
@@ -40,9 +31,6 @@ dependencies:
40
31
  requirements:
41
32
  - - ">="
42
33
  - !ruby/object:Gem::Version
43
- segments:
44
- - 1
45
- - 16
46
34
  version: "1.16"
47
35
  type: :runtime
48
36
  version_requirements: *id002
@@ -54,10 +42,6 @@ dependencies:
54
42
  requirements:
55
43
  - - ">="
56
44
  - !ruby/object:Gem::Version
57
- segments:
58
- - 0
59
- - 5
60
- - 5
61
45
  version: 0.5.5
62
46
  type: :runtime
63
47
  version_requirements: *id003
@@ -69,9 +53,7 @@ dependencies:
69
53
  requirements:
70
54
  - - ">="
71
55
  - !ruby/object:Gem::Version
72
- segments:
73
- - 0
74
- version: "0"
56
+ version: "3.0"
75
57
  type: :runtime
76
58
  version_requirements: *id004
77
59
  - !ruby/object:Gem::Dependency
@@ -82,9 +64,6 @@ dependencies:
82
64
  requirements:
83
65
  - - ">="
84
66
  - !ruby/object:Gem::Version
85
- segments:
86
- - 2
87
- - 6
88
67
  version: "2.6"
89
68
  type: :development
90
69
  version_requirements: *id005
@@ -96,8 +75,6 @@ dependencies:
96
75
  requirements:
97
76
  - - ">="
98
77
  - !ruby/object:Gem::Version
99
- segments:
100
- - 0
101
78
  version: "0"
102
79
  type: :development
103
80
  version_requirements: *id006
@@ -117,6 +94,7 @@ files:
117
94
  - README
118
95
  - Rakefile
119
96
  - bin/wcrawler
97
+ - gem_graph.png
120
98
  - lib/ext/array.rb
121
99
  - lib/ext/hash.rb
122
100
  - lib/ext/http_response.rb
@@ -167,8 +145,7 @@ files:
167
145
  - spec/web_crawler/view_spec.rb
168
146
  - spec/web_crawler/web_crawler_api_base_class_spec.rb
169
147
  - web_crawler.gemspec
170
- has_rdoc: false
171
- homepage: ""
148
+ homepage: https://github.com/webgago/web_crawler
172
149
  licenses: []
173
150
 
174
151
  post_install_message:
@@ -181,21 +158,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
181
158
  requirements:
182
159
  - - ">="
183
160
  - !ruby/object:Gem::Version
184
- segments:
185
- - 0
186
161
  version: "0"
187
162
  required_rubygems_version: !ruby/object:Gem::Requirement
188
163
  none: false
189
164
  requirements:
190
165
  - - ">="
191
166
  - !ruby/object:Gem::Version
192
- segments:
193
- - 0
194
167
  version: "0"
195
168
  requirements: []
196
169
 
197
170
  rubyforge_project: web_crawler
198
- rubygems_version: 1.3.7
171
+ rubygems_version: 1.8.5
199
172
  signing_key:
200
173
  specification_version: 3
201
174
  summary: Web crawler help you with parse and collect data from the web