ro_crawler 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 138a31aa60488f939724b71f2d74b2abcfa07e16
4
- data.tar.gz: eb14725c6bce8e6150eaf11cfe66d13a120c934c
3
+ metadata.gz: 2bc31485c675436fdef8b64dfc058c8109509de6
4
+ data.tar.gz: 3090f40f7e0ce91a531b4b29c26fdda660e64988
5
5
  SHA512:
6
- metadata.gz: 0b38276470529131ac0fd360f31a2cf232fc8d026d51a27abd815af4f203cef29733226b6e9d4c1e974e82611bea4869a63ce01d4b954f6cf57477818c0cbda4
7
- data.tar.gz: 9fcdee015c33106ac2f82831136e6dddb314516143caab342a3e43bd460c33b95561cb00e9b2fbcca0987e6ed6fcf36a5dd74bc09cf6119eb188f1f90a0e9f68
6
+ metadata.gz: f8c5d1fa858d404688779e1b58618259736e27bc024b6a935c3fba53d32b9d70c720b875a524efe9b41a8da6bc0013ab13efa9624fb380adc84610bbd923f327
7
+ data.tar.gz: 2b71ebef44fec7ce565550c3322d9400087f0ed95733720153655a6931c553421d2429d4bf80fc67c8581a05c72bdf520ffed224bc4a7f76c79658bec12b40e0
@@ -1,7 +1,5 @@
1
1
  # require all files in dir name is same with __FILE_-
2
- autoload :FileActions, 'ro_support/file_actions'
3
- autoload :Array, 'ro_support/array'
4
- autoload :Log, 'ro_support/log'
2
+ require 'ro_support/array'
5
3
 
6
4
  include RoSupport::FileActions
7
5
  include RoSupport::Array
@@ -18,8 +18,8 @@ module RoCrawler
18
18
 
19
19
  def get_contents
20
20
  ro_raise(err "@link_titles is nil", output: ['@url']) if @link_titles.nil?
21
- @link_title_contents = @link_titles.dup
22
- @link_title_contents.each do |link_content|
21
+ @offers = @link_titles.dup
22
+ @offers.each do |link_content|
23
23
  if link_content[0][/http/]
24
24
  link = link_content[0]
25
25
  else
@@ -28,14 +28,23 @@ module RoCrawler
28
28
 
29
29
  link_content << intr = get_tags_attrs_from(link, @intr_selector, 'text')
30
30
  unless intr.is_a? String
31
- raise_log 'intr must be a string', 'ro_crawler_base.log'
31
+ puts_log 'intr must be a string', 'ro_crawler_base.log'
32
32
  end
33
33
  link_content
34
34
  end
35
35
  end
36
36
 
37
- def handler(&blk)
38
- handle @link_title_contents, &blk
37
+ def handler
38
+ ro_raise(err '@offers is empty') if @offers.empty?
39
+ if block_given?
40
+ if @offers.is_a?(Array)
41
+ @offers.each do |offer|
42
+ yield offer
43
+ end
44
+ else
45
+ raise "@offers is not a Array, @offers is #{@offers.class}"
46
+ end
47
+ end
39
48
  end
40
49
 
41
50
  def open_browser(driver)
@@ -126,11 +135,6 @@ module RoCrawler
126
135
  end
127
136
 
128
137
  def handle(results)
129
- if results.is_a?(Array)
130
- results.each do |result|
131
- yield result
132
- end
133
- end
134
138
  end
135
139
 
136
140
  def get_home_url(url)
@@ -1,3 +1,3 @@
1
1
  module RoCrawler
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -90,13 +90,13 @@ describe RoCrawler::Base do
90
90
  end
91
91
 
92
92
 
93
- it 'get tags from url' do
93
+ it 'get_tags_from_url' do
94
94
  expect(
95
95
  @b.get_tags_from("http://baidu.com", 'body').inner_html
96
96
  ).not_to be_nil
97
97
  end
98
98
 
99
- describe 'get attrs in tags' do
99
+ describe 'get_attrs_in_tags' do
100
100
  it 'when get two attribute' do
101
101
  tags = @b.get_tags_from("http://baidu.com", '#m p#nv a')
102
102
  @b.get_attrs_in(tags, 'href', 'text').each do |attrs|
@@ -121,9 +121,23 @@ describe RoCrawler::Base do
121
121
  end
122
122
  end
123
123
 
124
- it 'case2' do
124
+ it 'v2ex' do
125
125
  @b.get_tags_attrs_from("http://www.v2ex.com/t/80954#reply6", '.topic_content', 'text')
126
126
  end
127
+
128
+ it 'ruby_china' do
129
+ url = "http://ruby-china.org/topics/node25"
130
+ anchor_selector = ".title>a"
131
+ results = @b.get_tags_attrs_from(url, anchor_selector, 'text')
132
+ expect(results.count).to be == 15
133
+ end
134
+
135
+ it 'yingjiesheng' do
136
+ url = "http://s.yingjiesheng.com/result.jsp?keyword=%E5%89%8D%E7%AB%AF&city=0&jobtype=0&do=1&stype=0"
137
+ anchor_selector = "h3.title>a"
138
+ results = @b.get_tags_attrs_from(url, anchor_selector, 'text')
139
+ expect(results.count).to be == 10
140
+ end
127
141
  end
128
142
 
129
143
  it 'handle result' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ro_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - ro
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-26 00:00:00.000000000 Z
11
+ date: 2013-10-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -24,6 +24,34 @@ dependencies:
24
24
  - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: 4.0.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: ro_support
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: term-ansicolor
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
27
55
  - !ruby/object:Gem::Dependency
28
56
  name: watir-rails
29
57
  requirement: !ruby/object:Gem::Requirement
@@ -125,7 +153,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
125
153
  version: '0'
126
154
  requirements: []
127
155
  rubyforge_project:
128
- rubygems_version: 2.0.7
156
+ rubygems_version: 2.1.5
129
157
  signing_key:
130
158
  specification_version: 4
131
159
  summary: ''