query 0.1.22 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -5
- data/Gemfile +0 -2
- data/README.md +1 -0
- data/lib/query/engine/baidu.rb +67 -64
- data/lib/query/version.rb +1 -1
- data/query.gemspec +1 -0
- data/spec/spec_helper.rb +0 -1
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e0dd534e82e96f66579c0d5b9533be3d3f63983
|
4
|
+
data.tar.gz: ccc1e5baee2d1fe441b98bc68fed3be70d603142
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 26b548b92c64dd7431b459344c591634a3d24ebe79f6ce2c9632503f4f801ef8f8a502cfbaf3d897190cc954dcc90ce01e4fbf1477d68d4507da0c1a7778be75
|
7
|
+
data.tar.gz: 055fac0339f710c6f931778f1e7072f127684173a16d32bedbc1aedfa05422d7bb4c581a6c5be9994dc6dbf9510966508eb6587f0bdb88e3872c14ebfd0275f2
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
data/lib/query/engine/baidu.rb
CHANGED
@@ -1,24 +1,25 @@
|
|
1
1
|
module Query
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
2
|
+
module Engine
|
3
|
+
class Baidu
|
4
|
+
include Query::Engine
|
5
|
+
Host = 'www.baidu.com'
|
6
|
+
BaseUri = 'http://www.baidu.com/s?'
|
7
|
+
Options = {
|
8
|
+
:headers => {"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'}
|
9
|
+
}
|
10
|
+
|
11
|
+
def self.suggestions(query)
|
12
|
+
require 'json'
|
13
|
+
query = URI.encode(query)
|
14
|
+
suggestions = HTTParty.get("https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=#{query}&json=1&p=3&sid=&req=2&csor=0&cb=jQuery1102036467162938788533_1437556180622&_=#{(Time.now.to_f*1000).to_i}")
|
15
|
+
suggestions.force_encoding('GB18030').encode('UTF-8').delete('jQuery1102036467162938788533_1437556180622(').delete(')').scan(/"q": "([^"]+)"/).flatten.uniq
|
16
|
+
end
|
17
|
+
#to find out the real url for something lik 'www.baidu.com/link?url=7yoYGJqjJ4zBBpC8yDF8xDhctimd_UkfF8AVaJRPKduy2ypxVG18aRB5L6D558y3MjT_Ko0nqFgkMoS'
|
18
|
+
# def url(id)
|
19
|
+
# a = Mechanize.new
|
20
|
+
# a.redirect_ok=false
|
21
|
+
# return a.head("http://www.baidu.com/link?url=#{id}").header['location']
|
22
|
+
# end
|
22
23
|
|
23
24
|
=begin
|
24
25
|
def extend(words,level=3,sleeptime=1)
|
@@ -38,33 +39,35 @@ module Query
|
|
38
39
|
end
|
39
40
|
=end
|
40
41
|
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
def self.popular?(wd)
|
43
|
+
return HTTParty.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").include?"boxFlash"
|
44
|
+
end
|
44
45
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
46
|
+
def self.query(wd,params={})
|
47
|
+
q = Array.new
|
48
|
+
q << "wd=#{URI.encode(wd)}"
|
49
|
+
q << "rn=#{@perpage.to_i}" if @perpage
|
50
|
+
params.each do |k,v|
|
51
|
+
q << "#{k.to_s}=#{v.to_s}"
|
52
|
+
end
|
53
|
+
queryStr = q.join("&")
|
54
|
+
#uri = URI.encode((BaseUri + queryStr).encode('GBK'))
|
55
|
+
# uri = URI.encode((BaseUri + queryStr))
|
56
|
+
uri = URI::HTTP.build(:host=>Host,:path=>'/s',:query=>q.join('&'))
|
57
|
+
# begin
|
58
|
+
# @page = @a.get uri
|
59
|
+
p uri
|
60
|
+
p Options
|
61
|
+
@page = HTTParty.get(uri,Options)
|
62
|
+
r = Query::Result::Baidu.new(@page)
|
63
|
+
r.baseuri = uri
|
64
|
+
r.pagenumber = 1
|
65
|
+
r.perpage = @perpage
|
66
|
+
r
|
67
|
+
# rescue Exception => e
|
68
|
+
# warn e.to_s
|
69
|
+
# return false
|
70
|
+
# end
|
68
71
|
=begin
|
69
72
|
query = "#{query}"
|
70
73
|
@uri = BaseUri+URI.encode(query.encode('GBK'))
|
@@ -75,26 +78,26 @@ module Query
|
|
75
78
|
@maxpage =10 if @maxpage>10
|
76
79
|
@currpage =0
|
77
80
|
=end
|
78
|
-
|
81
|
+
end
|
79
82
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
83
|
+
#site:xxx.yyy.com
|
84
|
+
def self.query_within(host,query)
|
85
|
+
self.query("#{query} site:#{host}")
|
86
|
+
end
|
84
87
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
+
def self.pages(host)
|
89
|
+
self.query("site:#{host}")
|
90
|
+
end
|
88
91
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
92
|
+
#domain:xxx.yyy.com/path/file.html
|
93
|
+
def self.links(uri)
|
94
|
+
self.query("domain:\"#{uri}\"")
|
95
|
+
end
|
93
96
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
end
|
97
|
+
#site:xxx.yyy.com inurl:zzz
|
98
|
+
def self.pages_with(host,string)
|
99
|
+
self.query("site:#{host} inurl:#{string}")
|
100
|
+
end
|
99
101
|
end
|
100
|
-
end
|
102
|
+
end
|
103
|
+
end
|
data/lib/query/version.rb
CHANGED
data/query.gemspec
CHANGED
@@ -19,6 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rspec"
|
22
23
|
spec.add_development_dependency "rake"
|
23
24
|
spec.add_dependency "nokogiri"
|
24
25
|
spec.add_dependency "addressable"
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: query
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.23
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- seoaqua
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-09-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: rake
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -154,7 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
154
168
|
version: '0'
|
155
169
|
requirements: []
|
156
170
|
rubyforge_project:
|
157
|
-
rubygems_version: 2.
|
171
|
+
rubygems_version: 2.4.8
|
158
172
|
signing_key:
|
159
173
|
specification_version: 4
|
160
174
|
summary: I dont have time to write the document yet. Usage is almost within rspec
|