ro_crawler 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ro_crawler.rb +1 -3
- data/lib/ro_crawler/base.rb +14 -10
- data/lib/ro_crawler/version.rb +1 -1
- data/spec/ro_crawler/base_spec.rb +17 -3
- metadata +31 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2bc31485c675436fdef8b64dfc058c8109509de6
|
4
|
+
data.tar.gz: 3090f40f7e0ce91a531b4b29c26fdda660e64988
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f8c5d1fa858d404688779e1b58618259736e27bc024b6a935c3fba53d32b9d70c720b875a524efe9b41a8da6bc0013ab13efa9624fb380adc84610bbd923f327
|
7
|
+
data.tar.gz: 2b71ebef44fec7ce565550c3322d9400087f0ed95733720153655a6931c553421d2429d4bf80fc67c8581a05c72bdf520ffed224bc4a7f76c79658bec12b40e0
|
data/lib/ro_crawler.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# require all files in dir name is same with __FILE_-
|
2
|
-
|
3
|
-
autoload :Array, 'ro_support/array'
|
4
|
-
autoload :Log, 'ro_support/log'
|
2
|
+
require 'ro_support/array'
|
5
3
|
|
6
4
|
include RoSupport::FileActions
|
7
5
|
include RoSupport::Array
|
data/lib/ro_crawler/base.rb
CHANGED
@@ -18,8 +18,8 @@ module RoCrawler
|
|
18
18
|
|
19
19
|
def get_contents
|
20
20
|
ro_raise(err "@link_titles is nil", output: ['@url']) if @link_titles.nil?
|
21
|
-
@
|
22
|
-
@
|
21
|
+
@offers = @link_titles.dup
|
22
|
+
@offers.each do |link_content|
|
23
23
|
if link_content[0][/http/]
|
24
24
|
link = link_content[0]
|
25
25
|
else
|
@@ -28,14 +28,23 @@ module RoCrawler
|
|
28
28
|
|
29
29
|
link_content << intr = get_tags_attrs_from(link, @intr_selector, 'text')
|
30
30
|
unless intr.is_a? String
|
31
|
-
|
31
|
+
puts_log 'intr must be a string', 'ro_crawler_base.log'
|
32
32
|
end
|
33
33
|
link_content
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
def handler
|
38
|
-
|
37
|
+
def handler
|
38
|
+
ro_raise(err '@offers is empty') if @offers.empty?
|
39
|
+
if block_given?
|
40
|
+
if @offers.is_a?(Array)
|
41
|
+
@offers.each do |offer|
|
42
|
+
yield offer
|
43
|
+
end
|
44
|
+
else
|
45
|
+
raise "@offers is not a Array, @offers is #{@offers.class}"
|
46
|
+
end
|
47
|
+
end
|
39
48
|
end
|
40
49
|
|
41
50
|
def open_browser(driver)
|
@@ -126,11 +135,6 @@ module RoCrawler
|
|
126
135
|
end
|
127
136
|
|
128
137
|
def handle(results)
|
129
|
-
if results.is_a?(Array)
|
130
|
-
results.each do |result|
|
131
|
-
yield result
|
132
|
-
end
|
133
|
-
end
|
134
138
|
end
|
135
139
|
|
136
140
|
def get_home_url(url)
|
data/lib/ro_crawler/version.rb
CHANGED
@@ -90,13 +90,13 @@ describe RoCrawler::Base do
|
|
90
90
|
end
|
91
91
|
|
92
92
|
|
93
|
-
it '
|
93
|
+
it 'get_tags_from_url' do
|
94
94
|
expect(
|
95
95
|
@b.get_tags_from("http://baidu.com", 'body').inner_html
|
96
96
|
).not_to be_nil
|
97
97
|
end
|
98
98
|
|
99
|
-
describe '
|
99
|
+
describe 'get_attrs_in_tags' do
|
100
100
|
it 'when get two attribute' do
|
101
101
|
tags = @b.get_tags_from("http://baidu.com", '#m p#nv a')
|
102
102
|
@b.get_attrs_in(tags, 'href', 'text').each do |attrs|
|
@@ -121,9 +121,23 @@ describe RoCrawler::Base do
|
|
121
121
|
end
|
122
122
|
end
|
123
123
|
|
124
|
-
it '
|
124
|
+
it 'v2ex' do
|
125
125
|
@b.get_tags_attrs_from("http://www.v2ex.com/t/80954#reply6", '.topic_content', 'text')
|
126
126
|
end
|
127
|
+
|
128
|
+
it 'ruby_china' do
|
129
|
+
url = "http://ruby-china.org/topics/node25"
|
130
|
+
anchor_selector = ".title>a"
|
131
|
+
results = @b.get_tags_attrs_from(url, anchor_selector, 'text')
|
132
|
+
expect(results.count).to be == 15
|
133
|
+
end
|
134
|
+
|
135
|
+
it 'yingjiesheng' do
|
136
|
+
url = "http://s.yingjiesheng.com/result.jsp?keyword=%E5%89%8D%E7%AB%AF&city=0&jobtype=0&do=1&stype=0"
|
137
|
+
anchor_selector = "h3.title>a"
|
138
|
+
results = @b.get_tags_attrs_from(url, anchor_selector, 'text')
|
139
|
+
expect(results.count).to be == 10
|
140
|
+
end
|
127
141
|
end
|
128
142
|
|
129
143
|
it 'handle result' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ro_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ro
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-10-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -24,6 +24,34 @@ dependencies:
|
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 4.0.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: ro_support
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: term-ansicolor
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
27
55
|
- !ruby/object:Gem::Dependency
|
28
56
|
name: watir-rails
|
29
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -125,7 +153,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
125
153
|
version: '0'
|
126
154
|
requirements: []
|
127
155
|
rubyforge_project:
|
128
|
-
rubygems_version: 2.
|
156
|
+
rubygems_version: 2.1.5
|
129
157
|
signing_key:
|
130
158
|
specification_version: 4
|
131
159
|
summary: ''
|