web_analytics_discovery 2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rspec +2 -0
- data/.travis.yml +9 -0
- data/Gemfile +2 -0
- data/LICENSE +661 -0
- data/README.md +133 -0
- data/Rakefile +7 -0
- data/bin/web_analytics_discover +77 -0
- data/lib/web_analytics_discovery.rb +23 -0
- data/lib/web_analytics_discovery/grabber/alexa.rb +33 -0
- data/lib/web_analytics_discovery/grabber/googleanalytics.rb +29 -0
- data/lib/web_analytics_discovery/grabber/liveinternet.rb +61 -0
- data/lib/web_analytics_discovery/grabber/mailru.rb +89 -0
- data/lib/web_analytics_discovery/grabber/openstat.rb +44 -0
- data/lib/web_analytics_discovery/grabber/quantcast.rb +84 -0
- data/lib/web_analytics_discovery/grabber/rambler.rb +100 -0
- data/lib/web_analytics_discovery/grabber/tns.rb +117 -0
- data/lib/web_analytics_discovery/grabber/yandexmetrika.rb +54 -0
- data/lib/web_analytics_discovery/grabberutils.rb +54 -0
- data/lib/web_analytics_discovery/version.rb +3 -0
- data/spec/alexa_spec.rb +13 -0
- data/spec/liveinternet_spec.rb +15 -0
- data/spec/mailru_spec.rb +36 -0
- data/spec/openstat_spec.rb +24 -0
- data/spec/quantcast_spec.rb +59 -0
- data/spec/rambler_spec.rb +63 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/tns_spec.rb +21 -0
- data/web_analytics_discovery.gemspec +50 -0
- metadata +158 -0
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'web_analytics_discovery/grabberutils'
|
2
|
+
|
3
|
+
module WebAnalyticsDiscovery
|
4
|
+
class Openstat
|
5
|
+
include GrabberUtils
|
6
|
+
|
7
|
+
BIG_SITE_ID = 601119
|
8
|
+
|
9
|
+
def run(url)
|
10
|
+
@page = download(url)
|
11
|
+
run_id(find_id)
|
12
|
+
end
|
13
|
+
|
14
|
+
def find_id
|
15
|
+
case @page
|
16
|
+
when /<span id="(?:openstat|spylog)(\d+)"><\/span>/
|
17
|
+
$1
|
18
|
+
when /<img src=["']?http:\/\/u([0-9.]+)\.spylog\.com\/cnt/
|
19
|
+
$1.gsub(/[.]/, '').to_i
|
20
|
+
else
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def run_id(id)
|
26
|
+
return nil unless id
|
27
|
+
r = {:id => id}
|
28
|
+
doc = download("http://rating.openstat.ru/site/#{id}")
|
29
|
+
r[:visitors_day], r[:visits_day], r[:pv_day] = grab(doc, 'osb-rating_site-e-table-col-m-day')
|
30
|
+
r[:visitors_mon], r[:visits_mon], r[:pv_mon] = grab(doc, 'osb-rating_site-e-table-col-m-month')
|
31
|
+
return r
|
32
|
+
end
|
33
|
+
|
34
|
+
def grab(doc, classname)
|
35
|
+
a = []
|
36
|
+
doc.gsub(/#{classname}">([^<]+)<\/td>/) {
|
37
|
+
r = $1
|
38
|
+
r.gsub!(/ /, '')
|
39
|
+
a << r.to_i
|
40
|
+
}
|
41
|
+
return a
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
require 'cgi'
|
4
|
+
require 'uri'
|
5
|
+
require 'json'
|
6
|
+
require 'web_analytics_discovery/grabberutils'
|
7
|
+
|
8
|
+
module WebAnalyticsDiscovery
|
9
|
+
class Quantcast
|
10
|
+
include GrabberUtils
|
11
|
+
|
12
|
+
def run(url)
|
13
|
+
uri = URI.parse(url)
|
14
|
+
run_id(uri.host)
|
15
|
+
end
|
16
|
+
|
17
|
+
def run_id(host)
|
18
|
+
r = {}
|
19
|
+
|
20
|
+
# Get auth cookies
|
21
|
+
doc = download("https://www.quantcast.com/#{host}")
|
22
|
+
if doc =~ /<td class="reach" id="reach-(.*?)">/
|
23
|
+
r[:id] = id = $1
|
24
|
+
else
|
25
|
+
return nil
|
26
|
+
end
|
27
|
+
|
28
|
+
# Quantcast has no traffic info? We should stop here
|
29
|
+
return r if doc =~ /content="We do not have enough information to provide a traffic estimate./
|
30
|
+
|
31
|
+
return run_id_quantified(r, host) if doc =~ /<h4>Quantified<\/h4>/
|
32
|
+
|
33
|
+
# Use auth cookies with API call
|
34
|
+
d = traffic_api_call(host, id, 'US', 'DAY30')
|
35
|
+
|
36
|
+
# points = d['reach']['US']['PEOPLE']['WEB']
|
37
|
+
# points.each { |pt|
|
38
|
+
# puts Time.at(pt['timestamp']).to_s + "\t" + pt['reach'].to_s
|
39
|
+
# }
|
40
|
+
|
41
|
+
r[:visitors_mon] = d['summaries']['US']['PEOPLE']['WEB']['reach'].to_i
|
42
|
+
|
43
|
+
return r
|
44
|
+
end
|
45
|
+
|
46
|
+
# Parse more precise, direct statistics on a quantified site
|
47
|
+
def run_id_quantified(r, host)
|
48
|
+
d = traffic_api_call(host, r[:id], 'GLOBAL', 'DAY1')
|
49
|
+
r[:visitors_day] = avg_last_metric(d, 'UNIQUES', 7)
|
50
|
+
r[:pv_day] = avg_last_metric(d, 'PAGE_VIEWS', 7)
|
51
|
+
r[:visits_day]= avg_last_metric(d, 'VISITS', 7)
|
52
|
+
|
53
|
+
d = traffic_api_call(host, r[:id], 'GLOBAL', 'DAY7')
|
54
|
+
r[:visitors_week] = avg_last_metric(d, 'UNIQUES', 1)
|
55
|
+
r[:pv_week] = avg_last_metric(d, 'PAGE_VIEWS', 1)
|
56
|
+
r[:visits_week]= avg_last_metric(d, 'VISITS', 1)
|
57
|
+
|
58
|
+
d = traffic_api_call(host, r[:id], 'GLOBAL', 'DAY30')['summaries']['GLOBAL']
|
59
|
+
r[:visitors_mon] = d['UNIQUES']['WEB']['reach'].to_i
|
60
|
+
r[:pv_mon] = d['PAGE_VIEWS']['WEB']['reach'].to_i
|
61
|
+
r[:visits_mon] = d['VISITS']['WEB']['reach'].to_i
|
62
|
+
|
63
|
+
return r
|
64
|
+
end
|
65
|
+
|
66
|
+
def traffic_api_call(host, id, country, period)
|
67
|
+
id_encoded = CGI::escape(id)
|
68
|
+
doc = download(
|
69
|
+
"https://www.quantcast.com/api/profile/traffic/?&wUnit=#{id_encoded}&country=#{country}&period=#{period}&countType=",
|
70
|
+
'UTF-8',
|
71
|
+
'Referer' => "https://www.quantcast.com/#{host}?country=#{country}",
|
72
|
+
'X-Requested-With' => 'XMLHttpRequest'
|
73
|
+
)
|
74
|
+
return JSON.load(doc).first
|
75
|
+
end
|
76
|
+
|
77
|
+
def avg_last_metric(d, metric, last_n)
|
78
|
+
sum = 0
|
79
|
+
d['reach']['GLOBAL'][metric]['ONLINE_WEB'][-last_n..-1].each { |pt| sum += pt['reach'] }
|
80
|
+
d['reach']['GLOBAL'][metric]['MOBILE_WEB'][-last_n..-1].each { |pt| sum += pt['reach'] }
|
81
|
+
return (sum / last_n).to_i
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'web_analytics_discovery/grabberutils'
|
4
|
+
|
5
|
+
module WebAnalyticsDiscovery
|
6
|
+
class Rambler
|
7
|
+
include GrabberUtils
|
8
|
+
|
9
|
+
SEC_PER_DAY = 24 * 60 * 60
|
10
|
+
|
11
|
+
def run(url)
|
12
|
+
@page = download(url)
|
13
|
+
run_id(find_id)
|
14
|
+
end
|
15
|
+
|
16
|
+
def find_id
|
17
|
+
case @page
|
18
|
+
when /_top100q.push\(\["setAccount", "(\d+)"\]\)/,
|
19
|
+
/<a href="http:\/\/top100\.rambler\.ru\/cgi-bin\/stats_top100\.cgi\?(\d+)"/,
|
20
|
+
/<script.*src="http:\/\/counter\.rambler\.ru\/top100\.jcn\?(\d+)/,
|
21
|
+
/<img src="http:\/\/counter\.rambler\.ru\/top100\.cnt\?(\d+)"/
|
22
|
+
$1.to_i
|
23
|
+
else
|
24
|
+
nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def run_id(id)
|
29
|
+
return nil unless id
|
30
|
+
r = {:id => id}
|
31
|
+
|
32
|
+
# doc = download("http://top100.rambler.ru/resStats/#{id}/")
|
33
|
+
doc = download("http://top100.rambler.ru/resStats/#{id}/?_export=csv&_id=#{id}&_page=0", 'UTF-16LE')
|
34
|
+
|
35
|
+
# Сегодня, Вчера, За 7 дней, На прошлой неделе, За 30 дней, В прошлом месяце
|
36
|
+
|
37
|
+
if doc =~ /уникальных\t(\d+)\t(\d+)\t(\d+)\t(\d+)\t(\d+)\t(\d+)/
|
38
|
+
r[:visitors_day] = $2.to_i
|
39
|
+
r[:visitors_week] = $4.to_i
|
40
|
+
r[:visitors_mon] = $6.to_i
|
41
|
+
end
|
42
|
+
|
43
|
+
if doc =~ /Визитов \(сессий\)\t(\d+)\t(\d+)\t(\d+)\t(\d+)\t(\d+)\t(\d+)/
|
44
|
+
r[:visits_day] = $2.to_i
|
45
|
+
r[:visits_week] = $4.to_i
|
46
|
+
r[:visits_mon] = $6.to_i
|
47
|
+
end
|
48
|
+
|
49
|
+
if doc =~ /Просмотров страниц\n\s*всего\t(\d+)\t(\d+)\t(\d+)\t(\d+)\t(\d+)\t(\d+)/
|
50
|
+
r[:pv_day] = $2.to_i
|
51
|
+
r[:pv_week] = $4.to_i
|
52
|
+
r[:pv_mon] = $6.to_i
|
53
|
+
end
|
54
|
+
|
55
|
+
# Plan B: if proper CSV export failed, we'll try to look up information in rating catalogue
|
56
|
+
unless r[:visitors_day]
|
57
|
+
now = Time.now
|
58
|
+
r[:visitors_day], r[:pv_day] = parse_rating_table("http://top100.rambler.ru/?range=#{spec_yesterday(now)}&stat=1&statcol=1%2C2&query=#{id}", id)
|
59
|
+
r[:visitors_week], r[:pv_week] = parse_rating_table("http://top100.rambler.ru/?range=#{spec_last_week(now)}&stat=1&statcol=1%2C2&query=#{id}", id)
|
60
|
+
r[:visitors_mon], r[:pv_mon] = parse_rating_table("http://top100.rambler.ru/?range=#{spec_last_month(now)}&stat=1&statcol=1%2C2&query=#{id}", id)
|
61
|
+
end
|
62
|
+
|
63
|
+
return r
|
64
|
+
end
|
65
|
+
|
66
|
+
def parse_rating_table(url, id)
|
67
|
+
doc = download(url)
|
68
|
+
if doc =~ /<tr>(\s*<td align="right">.*?<a href="\/resStats\/#{id}\/.*?)<\/tr>/m
|
69
|
+
table_row = $1
|
70
|
+
if table_row =~ /<td align="right">([0-9 ]+)<\/td>\s*<td class="last" align="right">([0-9 ]+)<\/td>/m
|
71
|
+
v = $1
|
72
|
+
pv = $2
|
73
|
+
v = v.gsub(/ /, '').to_i
|
74
|
+
pv = pv.gsub(/ /, '').to_i
|
75
|
+
return [v, pv]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
return [nil, nil]
|
79
|
+
end
|
80
|
+
|
81
|
+
def spec_yesterday(now)
|
82
|
+
(now - SEC_PER_DAY).strftime('%d.%m.%Y')
|
83
|
+
end
|
84
|
+
|
85
|
+
def spec_last_week(now)
|
86
|
+
wday = now.wday
|
87
|
+
wday = 7 if wday == 0
|
88
|
+
end_week = now - (wday * SEC_PER_DAY)
|
89
|
+
start_week = end_week - 6 * SEC_PER_DAY
|
90
|
+
"#{start_week.strftime('%d.%m.%Y')}+-+#{end_week.strftime('%d.%m.%Y')}"
|
91
|
+
end
|
92
|
+
|
93
|
+
def spec_last_month(now)
|
94
|
+
this_month_1 = Time.new(now.year, now.month, 1)
|
95
|
+
last_month_end = this_month_1 - SEC_PER_DAY
|
96
|
+
last_month_start = Time.new(last_month_end.year, last_month_end.month, 1)
|
97
|
+
"#{last_month_start.strftime('%d.%m.%Y')}+-+#{last_month_end.strftime('%d.%m.%Y')}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'web_analytics_discovery/grabberutils'
|
4
|
+
|
5
|
+
require 'date'
|
6
|
+
|
7
|
+
module WebAnalyticsDiscovery
|
8
|
+
class TNS
|
9
|
+
include GrabberUtils
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
# This one requires xlsx2csv utility
|
13
|
+
begin
|
14
|
+
parser_version = `xlsx2csv --version`
|
15
|
+
rescue Errno::ENOENT
|
16
|
+
raise 'xlsx2csv not available: unable to run TNS report discovery'
|
17
|
+
end
|
18
|
+
|
19
|
+
# And an unzip utility
|
20
|
+
begin
|
21
|
+
unzip_version = `unzip -v`
|
22
|
+
rescue Errno::ENOENT
|
23
|
+
raise 'unzip not available: unable to run TNS report discovery'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Parsing TNS report involves the following stages:
|
28
|
+
#
|
29
|
+
# 1. Download non-empty "directory" page from their web site
|
30
|
+
# for a current year (keep requesting older years if we keep
|
31
|
+
# getting empty output, bail out on HTTP error)
|
32
|
+
#
|
33
|
+
# 2. Download first (most recent) report listed on that "directory" page
|
34
|
+
#
|
35
|
+
# 3. Unpack (unzip) downloaded report file; it's a zip that
|
36
|
+
# contains multiple files, including single .xlsx file with
|
37
|
+
# raw data.
|
38
|
+
#
|
39
|
+
# 4. Convert .xlsx file into something more readable (CSV)
|
40
|
+
# with external utility.
|
41
|
+
#
|
42
|
+
# 5. Parse resulting CSV report into memory (it's relatively
|
43
|
+
# short - as of 2014-10, TNS lists only ~500 sites)
|
44
|
+
def parse_report
|
45
|
+
report_url = query_directory
|
46
|
+
zipped = download_file(report_url)
|
47
|
+
unzipped = ensure_unpack(zipped)
|
48
|
+
converted = ensure_convert(unzipped)
|
49
|
+
|
50
|
+
@report = {}
|
51
|
+
File.open(converted).each_line { |l|
|
52
|
+
c = l.chomp.split(/\t/)
|
53
|
+
|
54
|
+
# Skip headers
|
55
|
+
next if c.size < 5
|
56
|
+
|
57
|
+
# Skip table column headers
|
58
|
+
next if c[0].empty?
|
59
|
+
|
60
|
+
# Skip generic audience info columns
|
61
|
+
next if c[1].empty?
|
62
|
+
|
63
|
+
# Downcase URL and calculate proper monthly visitors
|
64
|
+
visitors = (c[2].to_f * 1000).to_i
|
65
|
+
url = c[1].downcase.gsub(/ \(сайт\)$/, '')
|
66
|
+
|
67
|
+
@report[url] = visitors
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
MAX_TRIES = 5
|
72
|
+
|
73
|
+
def query_directory
|
74
|
+
y = Date.today.year
|
75
|
+
MAX_TRIES.times {
|
76
|
+
dir = download("http://www.tns-global.ru/services/media/media-audience/internet/information/?arrFilter_pf%5BYEAR%5D=#{y}&set_filter=%D0%9F%D0%BE%D0%BA%D0%B0%D0%B7%D0%B0%D1%82%D1%8C&set_filter=Y")
|
77
|
+
if dir =~ /<a href="(\/services\/media\/media-audience\/internet\/information\/\?download=\d+&date=.*?)">/
|
78
|
+
return "http://www.tns-global.ru#{$1}"
|
79
|
+
end
|
80
|
+
y -= 1
|
81
|
+
}
|
82
|
+
raise 'Unable to query report directory - not a single report found'
|
83
|
+
end
|
84
|
+
|
85
|
+
def ensure_unpack(zipped)
|
86
|
+
unzipped = "#{CACHE_DIR}/tns_#{File.basename(zipped)}.xlsx"
|
87
|
+
unless File.exists?(unzipped)
|
88
|
+
system("unzip -pq '#{zipped}' *.xlsx >'#{unzipped}'")
|
89
|
+
raise 'Unable to unpack TNS report' unless $?.exitstatus == 0
|
90
|
+
end
|
91
|
+
return unzipped
|
92
|
+
end
|
93
|
+
|
94
|
+
def ensure_convert(unzipped)
|
95
|
+
converted = "#{CACHE_DIR}/#{File.basename(unzipped)}.tsv"
|
96
|
+
unless File.exists?(converted)
|
97
|
+
system("xlsx2csv -d tab -s 1 '#{unzipped}' >'#{converted}'")
|
98
|
+
raise 'Unable to convert TNS report to .tsv' unless $?.exitstatus == 0
|
99
|
+
end
|
100
|
+
return converted
|
101
|
+
end
|
102
|
+
|
103
|
+
def run(url)
|
104
|
+
run_id(find_id(url))
|
105
|
+
end
|
106
|
+
|
107
|
+
def find_id(url)
|
108
|
+
URI.parse(url).host
|
109
|
+
end
|
110
|
+
|
111
|
+
def run_id(id)
|
112
|
+
parse_report unless @report
|
113
|
+
v = @report[id]
|
114
|
+
return v ? {:id => id, :visitors_mon => v} : nil
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'web_analytics_discovery/grabberutils'
|
5
|
+
|
6
|
+
module WebAnalyticsDiscovery
|
7
|
+
class YandexMetrika
|
8
|
+
include GrabberUtils
|
9
|
+
|
10
|
+
def run(url)
|
11
|
+
@page = download(url)
|
12
|
+
run_id(find_id)
|
13
|
+
end
|
14
|
+
|
15
|
+
def find_id
|
16
|
+
case @page
|
17
|
+
when /yaCounter(\d+) = new Ya\.Metrika\(\{id:(\d+)/
|
18
|
+
$1
|
19
|
+
else
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def run_id(id)
|
25
|
+
return nil unless id
|
26
|
+
r = {:id => id}
|
27
|
+
|
28
|
+
json = download("http://bs.yandex.ru/informer/#{id}/json")
|
29
|
+
|
30
|
+
# Unfortunately, it's very weird JSON, so it's easier to parse it with regexp
|
31
|
+
# {pageviews:[42917,576537,764371,843611,826967,1009246,990612],visits:[21298,278959,309217,335495,324285,420460,430497],uniques:[20511,240509,254201,275157,270031,356657,366913],
|
32
|
+
|
33
|
+
r[:pv_day] = do_list($1) if json =~ /pageviews:\[([0-9,]+)\]/
|
34
|
+
r[:visits_day] = do_list($1) if json =~ /visits:\[([0-9,]+)\]/
|
35
|
+
r[:visitors_day] = do_list($1) if json =~ /uniques:\[([0-9,]+)\]/
|
36
|
+
|
37
|
+
# Calculate approximations
|
38
|
+
r[:pv_week] = r[:pv_day] * 7 if r[:pv_day]
|
39
|
+
r[:pv_mon] = (r[:pv_day] * AVG_DAYS_IN_MONTH).to_i if r[:pv_day]
|
40
|
+
|
41
|
+
return r
|
42
|
+
end
|
43
|
+
|
44
|
+
def do_list(list)
|
45
|
+
els = list.split(/,/).map { |x| x.to_i }
|
46
|
+
|
47
|
+
# Throw out first element, it's current day, which is incomplete
|
48
|
+
els.shift
|
49
|
+
|
50
|
+
sum = els.inject { |a, b| a + b }
|
51
|
+
return sum / els.size
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'digest/md5'
|
3
|
+
|
4
|
+
module GrabberUtils
|
5
|
+
CACHE_DIR = 'cache'
|
6
|
+
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20100101 Firefox/22.0'
|
7
|
+
|
8
|
+
class DownloadError < Exception; end
|
9
|
+
|
10
|
+
def download(url, encoding = 'UTF-8', options = {})
|
11
|
+
fn = download_file(url, options)
|
12
|
+
|
13
|
+
# Truly horrible hack to work around Ruby 1.9.2+ strict handling of invalid UTF-8 characters
|
14
|
+
s = File.read(fn)
|
15
|
+
s.encode!('UTF-16', encoding, :invalid => :replace, :replace => '?')
|
16
|
+
s.encode!('UTF-8', 'UTF-16', :invalid => :replace, :replace => '?')
|
17
|
+
end
|
18
|
+
|
19
|
+
# Downloads a file, returns filename in cache directory
|
20
|
+
def download_file(url, options = {})
|
21
|
+
FileUtils.mkdir_p(CACHE_DIR)
|
22
|
+
localfile = options['localfile'] || mangle_url(url)
|
23
|
+
fn = CACHE_DIR + '/' + localfile
|
24
|
+
unless FileTest.exists?(fn)
|
25
|
+
opt = {
|
26
|
+
'user-agent' => USER_AGENT,
|
27
|
+
'load-cookies' => 'cookies.txt',
|
28
|
+
'save-cookies' => 'cookies.txt',
|
29
|
+
}
|
30
|
+
if options['Referer']
|
31
|
+
opt['referer'] = options['Referer']
|
32
|
+
end
|
33
|
+
opt = opt.map { |k, v| "--#{k}='#{v}'" }.join(' ')
|
34
|
+
system("wget --append-output=wget.log --keep-session-cookies -O'#{fn}' #{opt} '#{url}'")
|
35
|
+
if $?.exitstatus != 0
|
36
|
+
File.delete(fn)
|
37
|
+
raise DownloadError.new
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
return fn
|
42
|
+
end
|
43
|
+
|
44
|
+
def mangle_url(url)
|
45
|
+
if url.length < 200
|
46
|
+
f = url.gsub(/[:\/]/, '_')
|
47
|
+
else
|
48
|
+
f = Digest::MD5.hexdigest(url)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Average number of days per month
|
53
|
+
AVG_DAYS_IN_MONTH = 365.25 / 12
|
54
|
+
end
|