baiduserp 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +5 -5
- data/lib/baiduserp/parser.rb +3 -5
- data/lib/baiduserp/version.rb +1 -1
- metadata +3 -4
data/README.md
CHANGED
@@ -6,17 +6,17 @@
|
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
9
|
-
1
|
9
|
+
1 系统要求
|
10
10
|
|
11
11
|
Linux或Mac. Linux最好使用新版本的Ubuntu或Fedora系列.
|
12
12
|
|
13
|
-
2
|
13
|
+
2 安装ruby环境
|
14
14
|
|
15
15
|
只支持ruby1.9及以上. 最好的安装ruby的方法是通过[RVM](https://rvm.io/),RVM的使用方法可以搜索一下,有很多教程.
|
16
16
|
|
17
17
|
在最新的Ubuntu或Fedora系列的Linux中,也可以通过apt-get或yum安装ruby1.9.
|
18
18
|
|
19
|
-
3
|
19
|
+
3 安装gem依赖
|
20
20
|
|
21
21
|
需要依赖nokogiri这个gem.而这个gem需要系统中的两个库.
|
22
22
|
所以在ubuntu或者fedora下需要
|
@@ -30,7 +30,7 @@ $ sudo yum install libxml2-devel libxml2 libxslt libxslt-devel # fedora
|
|
30
30
|
|
31
31
|
`$ gem install nokogiri`
|
32
32
|
|
33
|
-
4
|
33
|
+
4 最后我们安装 baiduserp gem
|
34
34
|
|
35
35
|
`$ gem install baiduserp`
|
36
36
|
|
@@ -190,4 +190,4 @@ $ baiduserp -s 香港
|
|
190
190
|
4. Push to the branch (`git push origin my-new-feature`)
|
191
191
|
5. Create new Pull Request
|
192
192
|
|
193
|
-
或者可以到Issue页面提交问题,可以提BUG,新的需求,各种建议,等等.
|
193
|
+
或者可以到Issue页面提交问题,可以提BUG,新的需求,各种建议,等等.
|
data/lib/baiduserp/parser.rb
CHANGED
@@ -5,8 +5,6 @@ require 'open-uri'
|
|
5
5
|
|
6
6
|
module Baiduserp
|
7
7
|
class Parser
|
8
|
-
BAIDU_RESULT = /找到相关结果(.*)个/
|
9
|
-
|
10
8
|
def parse(html)
|
11
9
|
@html = html
|
12
10
|
@doc = Nokogiri::HTML(@html)
|
@@ -27,7 +25,7 @@ module Baiduserp
|
|
27
25
|
else
|
28
26
|
html = open(URI.escape(file_path))
|
29
27
|
end
|
30
|
-
html = html.read.encode('UTF-8')
|
28
|
+
html = html.read.encode!('UTF-8','UTF-8',:invalid => :replace)
|
31
29
|
parse html
|
32
30
|
end
|
33
31
|
|
@@ -106,7 +104,7 @@ module Baiduserp
|
|
106
104
|
result[:title] = get_content_safe(table.css('h3'))
|
107
105
|
result[:content] = parse_serp_content(id)
|
108
106
|
|
109
|
-
if id
|
107
|
+
if id >= 3000 # sem ads
|
110
108
|
result[:paid] = 1
|
111
109
|
else # organic results
|
112
110
|
result[:paid] = 0
|
@@ -151,7 +149,7 @@ module Baiduserp
|
|
151
149
|
end
|
152
150
|
|
153
151
|
def parse_serp_result_num
|
154
|
-
str = @html.scan(
|
152
|
+
str = @html.scan(/找到相关结果(.*)个/).join
|
155
153
|
str = str.gsub('约','')
|
156
154
|
if str.include?('万')
|
157
155
|
parts = str.split('万')
|
data/lib/baiduserp/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: baiduserp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-04-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -60,9 +60,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
60
60
|
version: '0'
|
61
61
|
requirements: []
|
62
62
|
rubyforge_project:
|
63
|
-
rubygems_version: 1.8.
|
63
|
+
rubygems_version: 1.8.25
|
64
64
|
signing_key:
|
65
65
|
specification_version: 3
|
66
66
|
summary: Baidu SERP
|
67
67
|
test_files: []
|
68
|
-
has_rdoc:
|