trackablaze 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/trackers/pagerank.rb +144 -0
- data/trackers/pagerank.yml +12 -0
- data/version.rb +1 -1
- metadata +3 -1
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
require 'socket'
|
|
2
|
+
|
|
3
|
+
module Trackablaze
|
|
4
|
+
class Pagerank < Tracker
|
|
5
|
+
def get_metrics(tracker_items)
|
|
6
|
+
tracker_items.collect {|tracker_item| get_metrics_single(tracker_item)}
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def get_metrics_single(tracker_item)
|
|
10
|
+
metrics = {}
|
|
11
|
+
|
|
12
|
+
if (tracker_item.params["domain"].nil? || tracker_item.params["domain"].empty?)
|
|
13
|
+
add_error(metrics, "No domain supplied", "domain")
|
|
14
|
+
return metrics
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
pagerank = nil
|
|
18
|
+
begin
|
|
19
|
+
pagerank = getpr(tracker_item.params["domain"])
|
|
20
|
+
rescue
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
if (pagerank.nil?)
|
|
24
|
+
add_error(metrics, "Could not find pagerank. Is domain specified properly (e.g. google.com)?", "domain")
|
|
25
|
+
return metrics
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
metrics['pagerank'] = pagerank
|
|
29
|
+
|
|
30
|
+
metrics
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
#------------------------------
|
|
34
|
+
# Ruby conversion by Amol Kelkar of
|
|
35
|
+
# PageRank Lookup v1.1 by HM2K at http://www.hm2k.com/projects/pagerank
|
|
36
|
+
# based on an alogoritham found here: http://pagerank.gamesaga.net/
|
|
37
|
+
#------------------------------
|
|
38
|
+
|
|
39
|
+
#convert a string to a 32-bit integer
|
|
40
|
+
def str_to_num(str, check, magic)
|
|
41
|
+
int32Unit = 4294967296; # 2^32
|
|
42
|
+
|
|
43
|
+
length = str.length;
|
|
44
|
+
(0..length-1).each do |i|
|
|
45
|
+
check *= magic;
|
|
46
|
+
|
|
47
|
+
#If the float is beyond the boundaries of integer (usually +/- 2.15e+9 = 2^31),
|
|
48
|
+
# the result of converting to integer is undefined
|
|
49
|
+
# refer to http://www.php.net/manual/en/language.types.integer.php
|
|
50
|
+
if (check >= int32Unit)
|
|
51
|
+
check = (check - int32Unit * (check / int32Unit).to_i);
|
|
52
|
+
#if the check less than -2^31
|
|
53
|
+
check = (check < -2147483648) ? (check + int32Unit) : check;
|
|
54
|
+
end
|
|
55
|
+
check += str[i].ord
|
|
56
|
+
end
|
|
57
|
+
return check
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# genearate a hash for a url
|
|
61
|
+
def hash_url(str)
|
|
62
|
+
check1 = str_to_num(str, 0x1505, 0x21);
|
|
63
|
+
check2 = str_to_num(str, 0, 0x1003F);
|
|
64
|
+
|
|
65
|
+
check1 >>= 2;
|
|
66
|
+
check1 = ((check1 >> 4) & 0x3FFFFC0 ) | (check1 & 0x3F);
|
|
67
|
+
check1 = ((check1 >> 4) & 0x3FFC00 ) | (check1 & 0x3FF);
|
|
68
|
+
check1 = ((check1 >> 4) & 0x3C000 ) | (check1 & 0x3FFF);
|
|
69
|
+
|
|
70
|
+
t1 = ((((check1 & 0x3C0) << 4) | (check1 & 0x3C)) <<2 ) | (check2 & 0xF0F );
|
|
71
|
+
t2 = ((((check1 & 0xFFFFC000) << 4) | (check1 & 0x3C00)) << 0xA) | (check2 & 0xF0F0000 );
|
|
72
|
+
|
|
73
|
+
return t1 | t2;
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# genearate a checksum for the hash string
|
|
77
|
+
def check_hash(hashnum)
|
|
78
|
+
checkByte = 0
|
|
79
|
+
flag = 0
|
|
80
|
+
|
|
81
|
+
hashStr = sprintf('%u', hashnum)
|
|
82
|
+
length = hashStr.length;
|
|
83
|
+
|
|
84
|
+
(length - 1).downto(0) do |i|
|
|
85
|
+
re = hashStr[i].to_i;
|
|
86
|
+
if (1 === (flag % 2))
|
|
87
|
+
re += re
|
|
88
|
+
re = (re / 10).to_i + (re % 10).to_i
|
|
89
|
+
end
|
|
90
|
+
checkByte += re;
|
|
91
|
+
flag += 1
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
checkByte %= 10;
|
|
95
|
+
if (0 != checkByte)
|
|
96
|
+
checkByte = 10 - checkByte;
|
|
97
|
+
if (1 === (flag % 2) )
|
|
98
|
+
if (1 === (checkByte % 2))
|
|
99
|
+
checkByte += 9
|
|
100
|
+
end
|
|
101
|
+
checkByte >>= 1
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
return "7#{checkByte}#{hashStr}"
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# return the pagerank checksum hash
|
|
109
|
+
def getch(url)
|
|
110
|
+
return check_hash(hash_url(url));
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# return the pagerank figure
|
|
114
|
+
def getpr(url)
|
|
115
|
+
googlehost='toolbarqueries.google.com';
|
|
116
|
+
googleua='Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5';
|
|
117
|
+
|
|
118
|
+
ch = getch(url)
|
|
119
|
+
fp = TCPSocket.open(googlehost, 80)
|
|
120
|
+
if (fp)
|
|
121
|
+
out = "GET /search?client=navclient-auto&ch=#{ch}&features=Rank&q=info:#{url} HTTP/1.1\r\n"
|
|
122
|
+
|
|
123
|
+
out = out + "User-Agent: #{googleua}\r\n"
|
|
124
|
+
out = out + "Host: #{googlehost}\r\n"
|
|
125
|
+
out = out + "Connection: Close\r\n\r\n"
|
|
126
|
+
|
|
127
|
+
fp.puts out
|
|
128
|
+
|
|
129
|
+
while line = fp.gets do
|
|
130
|
+
pos = line.index("Rank_");
|
|
131
|
+
unless(pos.nil?)
|
|
132
|
+
pr = line.slice(pos + 9, 2);
|
|
133
|
+
return pr.to_i;
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
fp.close
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
|
data/version.rb
CHANGED
metadata
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
name: trackablaze
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease:
|
|
5
|
-
version: 0.1.
|
|
5
|
+
version: 0.1.3
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
8
8
|
- Amol Kelkar
|
|
@@ -118,6 +118,8 @@ files:
|
|
|
118
118
|
- lib/trackablaze.rb
|
|
119
119
|
- trackers/facebook_page.rb
|
|
120
120
|
- trackers/facebook_page.yml
|
|
121
|
+
- trackers/pagerank.rb
|
|
122
|
+
- trackers/pagerank.yml
|
|
121
123
|
- trackers/twitter.rb
|
|
122
124
|
- trackers/twitter.yml
|
|
123
125
|
- README.markdown
|