ro_crawler 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 138a31aa60488f939724b71f2d74b2abcfa07e16
4
- data.tar.gz: eb14725c6bce8e6150eaf11cfe66d13a120c934c
3
+ metadata.gz: 2bc31485c675436fdef8b64dfc058c8109509de6
4
+ data.tar.gz: 3090f40f7e0ce91a531b4b29c26fdda660e64988
5
5
  SHA512:
6
- metadata.gz: 0b38276470529131ac0fd360f31a2cf232fc8d026d51a27abd815af4f203cef29733226b6e9d4c1e974e82611bea4869a63ce01d4b954f6cf57477818c0cbda4
7
- data.tar.gz: 9fcdee015c33106ac2f82831136e6dddb314516143caab342a3e43bd460c33b95561cb00e9b2fbcca0987e6ed6fcf36a5dd74bc09cf6119eb188f1f90a0e9f68
6
+ metadata.gz: f8c5d1fa858d404688779e1b58618259736e27bc024b6a935c3fba53d32b9d70c720b875a524efe9b41a8da6bc0013ab13efa9624fb380adc84610bbd923f327
7
+ data.tar.gz: 2b71ebef44fec7ce565550c3322d9400087f0ed95733720153655a6931c553421d2429d4bf80fc67c8581a05c72bdf520ffed224bc4a7f76c79658bec12b40e0
@@ -1,7 +1,5 @@
1
1
  # require all files in dir name is same with __FILE_-
2
- autoload :FileActions, 'ro_support/file_actions'
3
- autoload :Array, 'ro_support/array'
4
- autoload :Log, 'ro_support/log'
2
+ require 'ro_support/array'
5
3
 
6
4
  include RoSupport::FileActions
7
5
  include RoSupport::Array
@@ -18,8 +18,8 @@ module RoCrawler
18
18
 
19
19
  def get_contents
20
20
  ro_raise(err "@link_titles is nil", output: ['@url']) if @link_titles.nil?
21
- @link_title_contents = @link_titles.dup
22
- @link_title_contents.each do |link_content|
21
+ @offers = @link_titles.dup
22
+ @offers.each do |link_content|
23
23
  if link_content[0][/http/]
24
24
  link = link_content[0]
25
25
  else
@@ -28,14 +28,23 @@ module RoCrawler
28
28
 
29
29
  link_content << intr = get_tags_attrs_from(link, @intr_selector, 'text')
30
30
  unless intr.is_a? String
31
- raise_log 'intr must be a string', 'ro_crawler_base.log'
31
+ puts_log 'intr must be a string', 'ro_crawler_base.log'
32
32
  end
33
33
  link_content
34
34
  end
35
35
  end
36
36
 
37
- def handler(&blk)
38
- handle @link_title_contents, &blk
37
+ def handler
38
+ ro_raise(err '@offers is empty') if @offers.empty?
39
+ if block_given?
40
+ if @offers.is_a?(Array)
41
+ @offers.each do |offer|
42
+ yield offer
43
+ end
44
+ else
45
+ raise "@offers is not a Array, @offers is #{@offers.class}"
46
+ end
47
+ end
39
48
  end
40
49
 
41
50
  def open_browser(driver)
@@ -126,11 +135,6 @@ module RoCrawler
126
135
  end
127
136
 
128
137
  def handle(results)
129
- if results.is_a?(Array)
130
- results.each do |result|
131
- yield result
132
- end
133
- end
134
138
  end
135
139
 
136
140
  def get_home_url(url)
@@ -1,3 +1,3 @@
1
1
  module RoCrawler
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -90,13 +90,13 @@ describe RoCrawler::Base do
90
90
  end
91
91
 
92
92
 
93
- it 'get tags from url' do
93
+ it 'get_tags_from_url' do
94
94
  expect(
95
95
  @b.get_tags_from("http://baidu.com", 'body').inner_html
96
96
  ).not_to be_nil
97
97
  end
98
98
 
99
- describe 'get attrs in tags' do
99
+ describe 'get_attrs_in_tags' do
100
100
  it 'when get two attribute' do
101
101
  tags = @b.get_tags_from("http://baidu.com", '#m p#nv a')
102
102
  @b.get_attrs_in(tags, 'href', 'text').each do |attrs|
@@ -121,9 +121,23 @@ describe RoCrawler::Base do
121
121
  end
122
122
  end
123
123
 
124
- it 'case2' do
124
+ it 'v2ex' do
125
125
  @b.get_tags_attrs_from("http://www.v2ex.com/t/80954#reply6", '.topic_content', 'text')
126
126
  end
127
+
128
+ it 'ruby_china' do
129
+ url = "http://ruby-china.org/topics/node25"
130
+ anchor_selector = ".title>a"
131
+ results = @b.get_tags_attrs_from(url, anchor_selector, 'text')
132
+ expect(results.count).to be == 15
133
+ end
134
+
135
+ it 'yingjiesheng' do
136
+ url = "http://s.yingjiesheng.com/result.jsp?keyword=%E5%89%8D%E7%AB%AF&city=0&jobtype=0&do=1&stype=0"
137
+ anchor_selector = "h3.title>a"
138
+ results = @b.get_tags_attrs_from(url, anchor_selector, 'text')
139
+ expect(results.count).to be == 10
140
+ end
127
141
  end
128
142
 
129
143
  it 'handle result' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ro_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - ro
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-26 00:00:00.000000000 Z
11
+ date: 2013-10-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -24,6 +24,34 @@ dependencies:
24
24
  - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: 4.0.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: ro_support
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: term-ansicolor
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
27
55
  - !ruby/object:Gem::Dependency
28
56
  name: watir-rails
29
57
  requirement: !ruby/object:Gem::Requirement
@@ -125,7 +153,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
125
153
  version: '0'
126
154
  requirements: []
127
155
  rubyforge_project:
128
- rubygems_version: 2.0.7
156
+ rubygems_version: 2.1.5
129
157
  signing_key:
130
158
  specification_version: 4
131
159
  summary: ''