newrank 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/assets/newrank_md5.js +4 -4
- data/lib/newrank.rb +20 -8
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 60f464166521ea508b11fd9bc823aa8b7c734009
|
4
|
+
data.tar.gz: 85f6bd6eba6af40f71c611f0e721d08f79b0685d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c474215faa650391ab334620d62559f470f5627c93f7b49d84390302a46685ead07c69fa41396b8862a275561323a7df8a0a8da5ab2f18f70d8dc83d4b8cd53e
|
7
|
+
data.tar.gz: 9b73e56483539d5c5c9a3e5e1ab70e6cf35250c2bef41da4f7fe75b16cd99c4222bc92752f654b28c900a2969ecc55d62555c73165e81ee4f494e0623b39b95b
|
data/lib/assets/newrank_md5.js
CHANGED
@@ -40,9 +40,9 @@ function newrank_md5(a) {
|
|
40
40
|
a[(b + 64 >>> 9 << 4) + 14] = b;
|
41
41
|
for (var c = 1732584193, d = -271733879, e = -1732584194, f = 271733878, g = 0; g < a.length; g += 16) {
|
42
42
|
var h = c
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
, i = d
|
44
|
+
, o = e
|
45
|
+
, p = f;
|
46
46
|
c = j(c, d, e, f, a[g + 0], 7, -680876936),
|
47
47
|
f = j(f, c, d, e, a[g + 1], 12, -389564586),
|
48
48
|
e = j(e, f, c, d, a[g + 2], 17, 606105819),
|
@@ -131,7 +131,7 @@ function newrank_md5(a) {
|
|
131
131
|
}
|
132
132
|
function n(a, b) {
|
133
133
|
var c = (65535 & a) + (65535 & b)
|
134
|
-
|
134
|
+
, d = (a >> 16) + (b >> 16) + (c >> 16);
|
135
135
|
return d << 16 | 65535 & c
|
136
136
|
}
|
137
137
|
function o(a, b) {
|
data/lib/newrank.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
# coding: utf-8
|
2
1
|
require 'open-uri'
|
3
2
|
require 'rkelly'
|
4
3
|
require 'nokogiri'
|
@@ -46,7 +45,9 @@ class Newrank
|
|
46
45
|
nonce = gen_nonce
|
47
46
|
xyz = gen_xyz(nonce, uuid)
|
48
47
|
|
49
|
-
|
48
|
+
wait_for_seconds
|
49
|
+
|
50
|
+
posts = JSON.parse(RestClient.post("http://www.newrank.cn/xdnphb/detail/getAccountArticle", {uuid: uuid, nonce: nonce, xyz: xyz, flag: true}, {"User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36"}))
|
50
51
|
end
|
51
52
|
|
52
53
|
# crawl week data
|
@@ -67,6 +68,7 @@ class Newrank
|
|
67
68
|
|
68
69
|
# get Nogogiri Document
|
69
70
|
def document(newrank_account)
|
71
|
+
wait_for_seconds
|
70
72
|
url = 'http://www.newrank.cn/public/info/detail.html?account=' + newrank_account
|
71
73
|
Nokogiri::HTML(open(url, "User-Agent" => "Mozilla/5.0 (Windows NT 6.2; rv:10.0.1) Gecko/20100101 Firefox/10.0.1", :read_timeout => 10), nil, 'utf-8')
|
72
74
|
end
|
@@ -75,11 +77,15 @@ class Newrank
|
|
75
77
|
def score_and_uuid(doc)
|
76
78
|
score, uuid = nil
|
77
79
|
|
78
|
-
script = doc.css("script")[0]
|
80
|
+
script = doc.css("script[type='text/javascript']")[0]
|
79
81
|
if !script.nil?
|
80
82
|
parser = RKelly::Parser.new
|
81
83
|
ast = parser.parse(script.text.strip)
|
84
|
+
|
85
|
+
# 找到第一个数组节点
|
82
86
|
array_node = ast.pointcut(RKelly::Nodes::ArrayNode).matches.first
|
87
|
+
|
88
|
+
# 找到数组节点内地第一个Element Node并寻找Score
|
83
89
|
element_node = array_node.pointcut(RKelly::Nodes::ElementNode).matches.first
|
84
90
|
json_data = element_node.nil? ? {} : JSON.parse(element_node.to_ecma)
|
85
91
|
if json_data["new_rank_index_mark"]
|
@@ -87,9 +93,15 @@ class Newrank
|
|
87
93
|
else
|
88
94
|
score = 0.0
|
89
95
|
end
|
90
|
-
|
91
|
-
|
92
|
-
|
96
|
+
|
97
|
+
# 找到有UUID的Node
|
98
|
+
object_node = ast.pointcut(RKelly::Nodes::VarDeclNode).matches.select{|node| node.name == "fgkcdg"}.first
|
99
|
+
unless object_node.nil?
|
100
|
+
node = object_node.pointcut(RKelly::Nodes::PropertyNode).matches.select{|n| n.name == '"uuid"'}.first.value
|
101
|
+
uuid = node.value[1..-2]
|
102
|
+
else
|
103
|
+
uuid = "uuid nil"
|
104
|
+
end
|
93
105
|
end
|
94
106
|
|
95
107
|
return score, uuid
|
@@ -98,7 +110,7 @@ class Newrank
|
|
98
110
|
# wait for seconds
|
99
111
|
# instead of request too much
|
100
112
|
def wait_for_seconds
|
101
|
-
sleep(1 * rand)
|
113
|
+
sleep(1 * rand + 1)
|
102
114
|
end
|
103
115
|
|
104
116
|
# generate parameter nonce
|
@@ -120,7 +132,7 @@ class Newrank
|
|
120
132
|
|
121
133
|
# generate parameter xyz
|
122
134
|
def gen_xyz(nonce, uuid)
|
123
|
-
h = "/xdnphb/detail/getAccountArticle?AppKey=joker&uuid=#{uuid}&nonce=#{nonce}"
|
135
|
+
h = "/xdnphb/detail/getAccountArticle?AppKey=joker&flag=true&uuid=#{uuid}&nonce=#{nonce}"
|
124
136
|
_md5(h)
|
125
137
|
end
|
126
138
|
|