linkscape 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,100 @@
1
+ module Linkscape
2
+ module Constants
3
+ module LinkMetrics
4
+
5
+ RequestBits = {
6
+ :flags => {
7
+ :name => 'Flags',
8
+ :flag => 2,
9
+ :desc => %Q[A bit field indicating a variety of attributes which apply to this link.]
10
+ },
11
+ :text => {
12
+ :name => 'Anchor Text',
13
+ :flag => 4,
14
+ :desc => %Q[The anchor text of the link, including any markup (e.g. image tags with alt text).]
15
+ },
16
+ :mozrank => {
17
+ :name => 'mozRank Passed',
18
+ :flag => 16,
19
+ :desc => %Q[The amount of mozRank passed by the link. Requesting this metric will provide both the pretty 10-point score and the raw score.]
20
+ }
21
+ }
22
+ RequestBits[:all] = {
23
+ :name => 'All columnts',
24
+ :flag => RequestBits.keys.inject(0) {|sum,k| sum + RequestBits[k][:flag]},
25
+ :desc => %Q[Requests all known columns from the API]
26
+ }
27
+
28
+ ResponseFlags = {
29
+ :no_follow => {
30
+ :name => 'No Follow',
31
+ :flag => 1,
32
+ :desc => %Q[The link in question bore a "rel=nofollow" directive indicating that no juice should flow over the link.]
33
+ },
34
+ :same_subdomain => {
35
+ :name => 'Same Subdomain',
36
+ :flag => 2,
37
+ :desc => %Q[The link is between two pages on the same domain. This is an internal link.]
38
+ },
39
+ :meta_refresh => {
40
+ :name => 'Meta Refresh',
41
+ :flag => 4,
42
+ :desc => %Q[The link is actually a meta refresh from the source page to the target.]
43
+ },
44
+ :same_ip_address => {
45
+ :name => 'Same IP Address',
46
+ :flag => 8,
47
+ :desc => %Q[The link is between two pages hosted on the same IP address, strongly indicating a potential administrative relationship between the two.]
48
+ },
49
+ :same_c_block => {
50
+ :name => 'Same C-Block',
51
+ :flag => 16,
52
+ :desc => %Q[The link is between two pages hosted on the same C Block of IP addresses, indicating a potential administrative relationship between the two.]
53
+ },
54
+ :redirect301 => {
55
+ :name => '301',
56
+ :flag => 64,
57
+ :desc => %Q[The link is a 301 redirect. The source page returned a 301 redirect to our crawler, indicating that the resource was available on the target.]
58
+ },
59
+ :redirect302 => {
60
+ :name => '302',
61
+ :flag => 128,
62
+ :desc => %Q[The link is a 302 redirect. The source page returned a 302 redirect to our crawler, indicating that the resource was temporarily available on the target.]
63
+ },
64
+ :no_script => {
65
+ :name => 'No Script',
66
+ :flag => 256,
67
+ :desc => %Q[The link was located within a noscript html block. This means the link may not have been visible to users using javascript.]
68
+ },
69
+ :off_screen => {
70
+ :name => 'Off Screen',
71
+ :flag => 512,
72
+ :desc => %Q[We determined that the link likely appears offscreen. This means that the link may not have been visible to most users.]
73
+ },
74
+ :meta_no_follow => {
75
+ :name => 'Meta No Follow',
76
+ :flag => 2048,
77
+ :desc => %Q[The link appeared on a page using a page level (meta) no follow directive. This link passes no juice.]
78
+ },
79
+ :same_root_domain => {
80
+ :name => 'Same Root Domain',
81
+ :flag => 4096,
82
+ :desc => %Q[The link is between two pages on the same root domain. The link is not internal, but this strongly indicates an administrative relationship between the two pages.]
83
+ },
84
+ :feed_autodiscovery => {
85
+ :name => 'Feed Autodiscovery',
86
+ :flag => 16384,
87
+ :desc => %Q[The link indicates a syndication feed (e.g. rss or atom) for the source page.]
88
+ },
89
+ :rel_canonical => {
90
+ :name => 'Rel Canonical',
91
+ :flag => 32768,
92
+ :desc => %Q[The link indicates a canonical form of the page using the rel=canonical directive]
93
+ }
94
+ }
95
+ ResponseFlagMap = {}
96
+ ResponseFlags.each {|k,v| ResponseFlagMap[v[:flag]] = k }
97
+
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,183 @@
1
+ module Linkscape
2
+ module Constants
3
+ module URLMetrics
4
+
5
+ RequestBits = {
6
+ :title => {
7
+ :name => 'Title',
8
+ :flag => 1,
9
+ :desc => %Q[The title of the page if available. For example: "Request-Response Format"]
10
+ },
11
+ :url => {
12
+ :name => 'URL',
13
+ :flag => 4,
14
+ :desc => %Q[The url of the page. For example: "apiwiki.seomoz.org/Request-Response+Format"]
15
+ },
16
+ :fq_domain => {
17
+ :name => 'Subdomain',
18
+ :flag => 8,
19
+ :desc => %Q[The subdomain of the url. For example: "apiwiki.seomoz.org"]
20
+ },
21
+ :pl_domain => {
22
+ :name => 'Root Domain',
23
+ :flag => 16,
24
+ :desc => %Q[The root domain of the url. For example: "seomoz.org"]
25
+ },
26
+ :external_links => {
27
+ :name => 'External Links',
28
+ :flag => 32,
29
+ :desc => %Q[The number of juice-passing external links to the url.]
30
+ },
31
+ :fq_domain_external_links => {
32
+ :name => 'Subdomain External Links',
33
+ :flag => 64,
34
+ :desc => %Q[The number of juice-passing external links to the subdomain of the url.]
35
+ },
36
+ :pl_domain_external_links => {
37
+ :name => 'Root Domain External Links',
38
+ :flag => 128,
39
+ :desc => %Q[The number of juice-passing external links to the root domain of the url.]
40
+ },
41
+ :juice_links => {
42
+ :name => 'Juice-Passing Links',
43
+ :flag => 256,
44
+ :desc => %Q[The number of juice-passing links (internal or external) to the url.]
45
+ },
46
+ :fq_domains_linking => {
47
+ :name => 'Subdomains Linking',
48
+ :flag => 512,
49
+ :desc => %Q[The number of subdomains with any pages linking to the url.]
50
+ },
51
+ :pl_domains_linking => {
52
+ :name => 'Root Domains Linking',
53
+ :flag => 1024,
54
+ :desc => %Q[The number of root domains with any pages linking to the url.]
55
+ },
56
+ :links => {
57
+ :name => 'Links',
58
+ :flag => 2048,
59
+ :desc => %Q[The number of links (juice-passing or not, internal or external) to the url.]
60
+ },
61
+ :fq_domain_links => {
62
+ :name => 'Subdomain Links',
63
+ :flag => 4294967296,
64
+ :desc => %Q[The number of links to any page on the subdomain of the url.]
65
+ },
66
+ :fq_domain_fq_domains_linking => {
67
+ :name => 'Subdomain Subdomains Linking',
68
+ :flag => 4096,
69
+ :desc => %Q[The number of subdomains with any pages linking to the subdomain of the url.]
70
+ },
71
+ :fq_domain_pl_domains_linking => {
72
+ :name => 'Subdomain Root Domains Linking',
73
+ :flag => 17179869184,
74
+ :desc => %Q[The number of domains with any pages linking to the subdomain of the url.]
75
+ },
76
+ :pl_domain_links => {
77
+ :name => 'Root Domain Links',
78
+ :flag => 8589934592,
79
+ :desc => %Q[The number of links to any page on the root domain of the url.]
80
+ },
81
+ :pl_domain_pl_domains_linking => {
82
+ :name => 'Root Domain Root Domains Linking',
83
+ :flag => 8192,
84
+ :desc => %Q[The number of root domains with any pages linking to the root domain of the url.]
85
+ },
86
+ :mozrank => {
87
+ :name => 'mozRank',
88
+ :flag => 16384,
89
+ :desc => %Q[The mozRank of the url. Requesting this metric will provide both the pretty 10-point score and the raw score.]
90
+ },
91
+ :fq_domain_mozrank => {
92
+ :name => 'Subdomain mozRank',
93
+ :flag => 32768,
94
+ :desc => %Q[The mozRank of the subdomain of the url. Requesting this metric will provide both the pretty 10-point score and the raw score.]
95
+ },
96
+ :pl_domain_mozrank => {
97
+ :name => 'Root Domain mozRank',
98
+ :flag => 65536,
99
+ :desc => %Q[The mozRank of the Root Domain of the url. Requesting this metric will provide both the pretty 10-point score and the raw score.]
100
+ },
101
+ :moztrust => {
102
+ :name => 'mozTrust',
103
+ :flag => 131072,
104
+ :desc => %Q[The mozTrust of the url. Requesting this metric will provide both the pretty 10-point score and the raw score.]
105
+ },
106
+ :fq_domain_moztrust => {
107
+ :name => 'Subdomain mozTrust',
108
+ :flag => 262144,
109
+ :desc => %Q[The mozTrust of the subdomain of the url. Requesting this metric will provide both the pretty 10-point score and the raw score.]
110
+ },
111
+ :pl_domain_moztrust => {
112
+ :name => 'Root Domain mozTrust',
113
+ :flag => 524288,
114
+ :desc => %Q[The mozTrust of the root domain of the url. Requesting this metric will provide both the pretty 10-point score and the raw score.]
115
+ },
116
+ :external_mozrank => {
117
+ :name => 'External mozRank',
118
+ :flag => 1048576,
119
+ :desc => %Q[The portion of the url's mozRank coming from external links. Requesting this metric will provide both the pretty 10-point score and the raw score.]
120
+ },
121
+ :fq_domain_external_mozrank_sum_raw => {
122
+ :name => 'Subdomain External Domain Juice',
123
+ :flag => 2097152,
124
+ :desc => %Q[The portion of the mozRank of all pages on the subdomain coming from external links. Requesting this metric will provide both the pretty 10-point score and the raw score.]
125
+ },
126
+ :pl_domain_external_mozrank_sum_raw => {
127
+ :name => 'Root Domain External Domain Juice',
128
+ :flag => 4194304,
129
+ :desc => %Q[The portion of the mozRank of all pages on the root domain coming from external links. Requesting this metric will provide both the pretty 10-point score and the raw score.]
130
+ # source.External mozRank sum of all PL Domain Pages (raw) - 9.8334596959365e-11
131
+ },
132
+ :fq_domain_mozrank_sum_raw => {
133
+ :name => 'Subdomain Domain Juice',
134
+ :flag => 8388608,
135
+ :desc => %Q[The mozRank of all pages on the subdomain combined. Requesting this metric will provide both the pretty 10-point score and the raw score.]
136
+ },
137
+ :pl_domain_mozrank_sum_raw => {
138
+ :name => 'Root Domain Domain Juice',
139
+ :flag => 16777216,
140
+ :desc => %Q[The mozRank of all pages on the root domain combined. Requesting this metric will provide both the pretty 10-point score and the raw score.]
141
+ },
142
+ :canonical_url => {
143
+ :name => 'Canonical URL',
144
+ :flag => 268435456,
145
+ :desc => %Q[If the url canaonicalizes to a different form, that canonical form will be available in this field]
146
+ },
147
+ :status => {
148
+ :name => 'HTTP Status Code',
149
+ :flag => 536870912,
150
+ :desc => %Q[The HTTP status code recorded by Linkscape for this URL (if available)]
151
+ },
152
+
153
+ :page_authority => {
154
+ :name => 'Page Authority',
155
+ :flag => 34359738368,
156
+ :desc => %Q[The page authority of this URL. This will return the pretty 100-point score.]
157
+ },
158
+ :domain_authority => {
159
+ :name => 'Domain Authority',
160
+ :flag => 68719476736,
161
+ :desc => %Q[The page authority of all pages on the root domain. This will return the pretty 100-point score.]
162
+ },
163
+ :page_authority_raw => {
164
+ :name => 'Raw Page Authority',
165
+ :flag => 137438953472,
166
+ :desc => %Q[The page authority of this URL. This will return the raw score.]
167
+ },
168
+ :domain_authority_raw => {
169
+ :name => 'Raw Domain Authority',
170
+ :flag => 274877906944,
171
+ :desc => %Q[The page authority of all pages on the root domain. This will return the raw score.]
172
+ },
173
+
174
+ }
175
+ RequestBits[:all] = {
176
+ :name => 'All columnts',
177
+ :flag => RequestBits.keys.inject(0) {|sum,k| sum + RequestBits[k][:flag]},
178
+ :desc => %Q[Requests all known columns from the API]
179
+ }
180
+
181
+ end
182
+ end
183
+ end
@@ -0,0 +1,4 @@
1
+ class RecursionError < StandardError; end
2
+ class AuthenticationError < StandardError; end
3
+ class InvalidArgument < StandardError; end
4
+ class MissingArgument < StandardError; end
@@ -0,0 +1,80 @@
1
+ module Linkscape
2
+ class Request
3
+ require 'net/http'
4
+ require 'uri'
5
+ require 'cgi'
6
+ # require 'base64'
7
+ # require 'rubygems'
8
+ # require 'hmac-sha1'
9
+
10
+ attr_accessor :requestURL
11
+
12
+ URL_TEMPLATE = %Q[http://:apiHost:/:apiRoot:/:api:/:url:?AccessID=:accessID:&Expires=:expiration:&Signature=:signature:]
13
+
14
+ def self.run(options)
15
+ self.new(options).run
16
+ end
17
+
18
+ def initialize(options)
19
+
20
+ case options[:url]
21
+ when String
22
+ new_vals = {:url => CGI::escape(options[:url].sub(/^https?:\/\//, '')) }
23
+ when Array
24
+ @body = options[:url].collect{ |u| u.sub(/^https?:\/\//, '') }
25
+ new_vals = {:url => ""}
26
+ else
27
+ raise "URL most be a String or an Array"
28
+ end
29
+
30
+ @requestURL = URL_TEMPLATE.template(signRequest(options.merge(new_vals)))
31
+ @requestURL += "&" + options[:query].collect{|k,v| "#{CGI::escape(k.to_s)}=#{CGI::escape(v.to_s)}"}.join('&') if options[:query] && Hash === options[:query]
32
+ @requestURL += "&" + options[:query] if options[:query] && String === options[:query]
33
+
34
+ options[:offset] = 0 if options[:offset] && options[:offset] < 0
35
+ @requestURL += "&Offset=#{options[:offset]}" if options[:offset]
36
+
37
+ options[:limit] = 1000 if options[:limit] && options[:limit] > 1000
38
+ @requestURL += "&Limit=#{options[:limit]}" if options[:limit]
39
+ end
40
+
41
+ def run
42
+ res = fetch(URI.parse(@requestURL))
43
+ # res = fetch(URI.parse('http://martian.at/other/ose.json'))
44
+ return Response.new(self, res)
45
+ end
46
+
47
+ def inspect
48
+ #<Linkscape::Request:0x1016228a0 @requestURL="http://lsapi.seomoz.com/linkscape/mozrank/www.martian.at%2F?AccessID=ose&Expires=1258772331&Signature=Hfwssn0ZbWMe9MEf6%2FWoHOGFHzQ%3D">
49
+ %Q[#<#{self.class}="#{@requestURL}">]
50
+ end
51
+
52
+ private
53
+ def signRequest options
54
+ Linkscape::Signer.signParams(options)
55
+ options
56
+ end
57
+
58
+ def fetch(uri, limit = 10)
59
+ # You should choose better exception.
60
+ raise RecursionError, 'HTTP redirect too deep' if limit == 0
61
+
62
+ # Fetch with a POST of thers is a body
63
+ response = if @body
64
+ http = Net::HTTP.new(uri.host, uri.port)
65
+ request = Net::HTTP::Post.new(uri.request_uri)
66
+ request.body = @body.to_json
67
+ http.request(request)
68
+ else
69
+ Net::HTTP.get_response(uri)
70
+ end
71
+
72
+
73
+
74
+ return fetch(response['location'], limit - 1) if Net::HTTPSuccess == response
75
+
76
+ response
77
+ end
78
+
79
+ end
80
+ end
@@ -0,0 +1,138 @@
1
+ require 'forwardable'
2
+ module Linkscape
3
+ class Response
4
+ extend Forwardable
5
+
6
+ class ResponseData
7
+ include Enumerable
8
+ extend Forwardable
9
+
10
+ attr_reader :type, :subjects
11
+
12
+ class Flags
13
+ def initialize(bitfield, type)
14
+ @value = bitfield
15
+ @flags = Linkscape::Constants::LinkMetrics::ResponseFlags.to_a.collect{|k,vv| k if (@value & vv[:flag]) == vv[:flag]}.compact if type == :link
16
+ @flags = Linkscape::Constants::AnchorMetrics::ResponseFlags.to_a.collect{|k,vv| k if (@value & vv[:flag]) == vv[:flag]}.compact if type == :anchor
17
+ end
18
+ def [](key); @flags.include? key.to_sym; end
19
+ def to_a; @flags; end
20
+ def to_hash; @flags.inject({}){|h,f|h[f]=true;h}; end
21
+ def to_s
22
+ %Q[#{@value}=#{self.to_a.inspect}]
23
+ end
24
+ end
25
+
26
+ def initialize(data, type=nil)
27
+ @data = data
28
+ @type = if type
29
+ type.to_sym
30
+ elsif Hash === @data
31
+ :hash
32
+ elsif Array === @data
33
+ :array
34
+ end
35
+ @data.symbolize_keys! if Hash === @data
36
+ @data = @data.collect{|d|ResponseData.new(d)} if Array === @data
37
+
38
+ if @type == :hash
39
+ subdatas = {}
40
+ @data.each do |k,v|
41
+ if field = Linkscape::Constants::ResponseFields[k]
42
+ if subject = field[:subject]
43
+ subdatas[subject] ||= {}
44
+ v = ResponseData::Flags.new(v, field[:bitfield]) if field[:bitfield]
45
+ subdatas[subject][field[:key]] = v
46
+ end
47
+ end
48
+ end
49
+ @subjects = []
50
+ subdatas.each do |k,v|
51
+ @data[k] = ResponseData.new(v, k)
52
+ @subjects.push k
53
+ end
54
+ end
55
+
56
+ end
57
+
58
+ def_delegators :@data, :length, :each, :each_index, :map, :collect, :select, :keys
59
+ def [](*args)
60
+ if Array === @data
61
+ @data[*args]
62
+ else
63
+ k = args.first.to_sym
64
+ if @subjects && @subjects.length == 1 && @data[@subjects.first][k]
65
+ @data[@subjects.first][k]
66
+ else
67
+ @data[args.first.to_sym]
68
+ end
69
+ end
70
+ rescue
71
+ nil
72
+ end
73
+
74
+ def to_s(indent="")
75
+ printer = Proc.new do |h,prefix|
76
+ o = ""
77
+ h.sort{|l,r|l[0].to_s<=>r[0].to_s}.each do |k,v|
78
+ field = Linkscape::Constants::ResponseFields[k]
79
+ v = v.to_s
80
+ # v = ((field && field[:bitfield]) ? v.to_a.inspect : v).to_s
81
+ #desc = field ? field[:name] : '*'+k.inspect
82
+ #o += %Q[%s%-#{Linkscape::Constants::LongestNameLength+15}.#{Linkscape::Constants::LongestNameLength+15}s - %s\n] % [prefix, desc, v]
83
+ o += %Q[%s%-#{Linkscape::Constants::LongestKeyLength+5}.#{Linkscape::Constants::LongestKeyLength+5}s - %s\n] % [prefix, k, v]
84
+ end
85
+ o
86
+ end
87
+ if type == :array
88
+ o = ""
89
+ @data.each_with_index do |d,idx|
90
+ o += %Q[#{indent}[#{idx}]\n] + d.to_s("#{indent} ") + "\n"
91
+ end
92
+ o
93
+ elsif @subjects
94
+ o = ""
95
+ @subjects.each do |s|
96
+ o += %Q[#{indent}#{s}\n]
97
+ o += printer.call(@data[s], "#{indent} #{s}.")
98
+ end
99
+ o
100
+ else
101
+ printer.call(@data, indent)
102
+ end
103
+ end
104
+
105
+ def inspect
106
+ #<Linkscape::Response:0x10161d8a0 @response=#<Net::HTTPUnauthorized 401 Unauthorized readbody=true>>
107
+ %Q[#<#{self.class} @type=#{@type.inspect}] + (@subjects ? %Q[ @subjects=#{@subjects.inspect}] : "") + %Q[>]
108
+ end
109
+
110
+ end
111
+
112
+ require 'rubygems'
113
+ require 'json'
114
+
115
+ attr_accessor :request, :response, :data, :valid
116
+
117
+ def initialize(request, response)
118
+ @valid = false
119
+ @request = request
120
+ @response = response
121
+ if Net::HTTPSuccess === response
122
+ @data = ResponseData.new(JSON.parse(response.body))
123
+ @valid = true
124
+ end
125
+ end
126
+
127
+ def_delegators :@data, :[], :length, :each, :each_index, :map, :collect, :select
128
+
129
+ def valid?; valid; end
130
+
131
+ def inspect
132
+ #<Linkscape::Response:0x10161d8a0 @response=#<Net::HTTPUnauthorized 401 Unauthorized readbody=true>>
133
+ %Q[#<#{self.class} @response=#{@response.class.inspect} @request="#{@request.requestURL}">]
134
+ end
135
+
136
+ end
137
+
138
+ end