skinny_jeans 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +9 -2
- data/VERSION +1 -1
- data/lib/skinny_jeans.rb +67 -18
- data/lib/skinny_jeans_string_parser.rb +59 -0
- data/skinny_jeans.gemspec +7 -2
- data/test/skinny_jeans_string_parser_test.rb +36 -0
- data/test/skinny_jeans_test.rb +53 -0
- metadata +10 -6
data/README.rdoc
CHANGED
@@ -9,10 +9,14 @@ http://img696.imageshack.us/img696/75/skinnys3.jpg
|
|
9
9
|
0.0.0.0 - - [01/Oct/2010:00:00:01 -0700] "GET /posts/my-first-post HTTP/1.1" 200 1337 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" "-"
|
10
10
|
0.0.0.0 - - [01/Oct/2010:00:00:03 -0700] "GET /posts/my-first-post HTTP/1.1" 200 1337 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" "-"
|
11
11
|
0.0.0.0 - - [02/Oct/2010:00:00:03 -0700] "GET /posts/my-first-post HTTP/1.1" 200 1337 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" "-"
|
12
|
+
0.0.0.0 - - [02/Oct/2010:00:00:04 -0700] "GET /posts/my-first-post HTTP/1.1" 200 1337 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/?q=some+stuff)" "-"
|
13
|
+
0.0.0.0 - - [02/Oct/2010:00:00:05 -0700] "GET /posts/my-first-post HTTP/1.1" 200 1337 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://search.yahoo.com/?p=some+stuff)" "-"
|
12
14
|
|
13
|
-
* then you get 2 SQL rows that look like:
|
15
|
+
* then you get 2 SQL rows in the pageviews table that look like:
|
14
16
|
2010-10-01, my-first-post, 3
|
15
17
|
2010-10-02, my-first-post, 1
|
18
|
+
* and 1 SQL row in the pageview_keywords table that look like: (search referrals ONLY, but... WITH keyword tracking)
|
19
|
+
2010-10-02, "my-first-post", 2, "some stuff"
|
16
20
|
* note the date columns truncate timestamp, so the days are in whatever timezone your log file reports in
|
17
21
|
|
18
22
|
|
@@ -39,4 +43,7 @@ http://img696.imageshack.us/img696/75/skinnys3.jpg
|
|
39
43
|
|
40
44
|
== NOTES
|
41
45
|
* supports gzipped files
|
42
|
-
* creates a temp copy of the log file before parsing
|
46
|
+
* creates a temp copy of the log file before parsing
|
47
|
+
* currently requires each line to be unique
|
48
|
+
* this could be a problem if a single client manages to hit the same page more than once in less than 1 second.
|
49
|
+
* this is only a problem if the last line parsed is one of the pages that was hit by the same client more than once in less than one second
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/lib/skinny_jeans.rb
CHANGED
@@ -5,6 +5,9 @@ require 'sqlite3'
|
|
5
5
|
require 'active_record'
|
6
6
|
require 'zlib'
|
7
7
|
require 'fileutils'
|
8
|
+
require 'uri'
|
9
|
+
require 'cgi'
|
10
|
+
require 'skinny_jeans_string_parser'
|
8
11
|
# require 'home_run'
|
9
12
|
|
10
13
|
class SkinnyJeans
|
@@ -13,13 +16,14 @@ class SkinnyJeans
|
|
13
16
|
self.new(logfile_path, sqlite_db_path, path_regexp, date_regexp).execute
|
14
17
|
end
|
15
18
|
|
16
|
-
attr_accessor :hash_of_dates, :last_pageview_at
|
19
|
+
attr_accessor :hash_of_dates, :hash_of_dates_for_keywords, :last_pageview_at
|
17
20
|
|
18
21
|
def initialize(logfile_path, sqlite_db_path, path_regexp, date_regexp)
|
19
22
|
@logfile_path, @sqlite_db_path, @path_regexp, @date_regexp = [logfile_path, sqlite_db_path, path_regexp, date_regexp]
|
20
23
|
@is_gzipped = !logfile_path.to_s[/gz/].nil?
|
21
24
|
prepare_db
|
22
25
|
@hash_of_dates = {}
|
26
|
+
@hash_of_dates_for_keywords = {}
|
23
27
|
@last_datetime = nil
|
24
28
|
end
|
25
29
|
|
@@ -49,6 +53,19 @@ class SkinnyJeans
|
|
49
53
|
t.column :last_line_parsed, :string
|
50
54
|
end
|
51
55
|
end
|
56
|
+
|
57
|
+
# addition from 2010-12-06 to track search traffic specifically
|
58
|
+
if !PageviewKeyword.table_exists?
|
59
|
+
SkinnyJeanDb.connection.create_table(:pageview_keywords) do |t|
|
60
|
+
t.column :date, :date
|
61
|
+
t.column :path, :string
|
62
|
+
t.column :pageview_count, :integer
|
63
|
+
t.column :keyword, :string
|
64
|
+
end
|
65
|
+
SkinnyJeanDb.connection.add_index(:pageview_keywords, [:date, :path, :keyword], :name => "date_path_keyword_index")
|
66
|
+
# SkinnyJeanDb.connection.add_index(:pageview_keywords, [:date, :pageview_count], :name => "date_pageview_count_index")
|
67
|
+
end
|
68
|
+
|
52
69
|
end
|
53
70
|
|
54
71
|
def execute
|
@@ -63,7 +80,7 @@ class SkinnyJeans
|
|
63
80
|
if last_update
|
64
81
|
last_pageview_at, last_line_parsed = last_update.last_pageview_at, last_update.last_line_parsed
|
65
82
|
file_reader do |line, lineno|
|
66
|
-
if line == last_line_parsed
|
83
|
+
if line.to_s[0..254] == last_line_parsed.to_s[0..254]
|
67
84
|
lineno_of_last_line_parsed = lineno
|
68
85
|
break
|
69
86
|
end
|
@@ -77,18 +94,19 @@ class SkinnyJeans
|
|
77
94
|
|
78
95
|
file_reader do |line, index|
|
79
96
|
lineno += 1
|
80
|
-
next if lineno_of_last_line_parsed && lineno
|
97
|
+
next if lineno_of_last_line_parsed && lineno <= lineno_of_last_line_parsed
|
81
98
|
|
82
99
|
path_match = line[@path_regexp, 1]
|
83
100
|
next if path_match.nil?
|
84
101
|
date_match = line[@date_regexp, 1]
|
85
102
|
next if date_match.nil?
|
86
|
-
|
103
|
+
datetime_obj = parse_string_as_date(date_match)
|
87
104
|
|
88
|
-
next if lineno_of_last_line_parsed.nil? && !last_pageview_at.nil? &&
|
105
|
+
next if lineno_of_last_line_parsed.nil? && !last_pageview_at.nil? && datetime_obj < last_pageview_at
|
89
106
|
|
90
|
-
insert_or_increment(
|
91
|
-
|
107
|
+
insert_or_increment(datetime_obj, path_match, SkinnyJeansStringParser.extract_search_query(line))
|
108
|
+
@last_pageview_at = datetime_obj
|
109
|
+
last_line_parsed = line.to_s[0..254] # only 255 characters because we store it in the database
|
92
110
|
lines_parsed += 1
|
93
111
|
end
|
94
112
|
end
|
@@ -96,7 +114,9 @@ class SkinnyJeans
|
|
96
114
|
puts "completed parsing in #{realtime}"
|
97
115
|
|
98
116
|
persisted = 0
|
117
|
+
persisted_pageview_keywords = 0
|
99
118
|
realtime = Benchmark.realtime do
|
119
|
+
|
100
120
|
hash_of_dates.each do |date, hash_of_paths|
|
101
121
|
hash_of_paths.keys.each do |path|
|
102
122
|
pv = Pageview.find_or_create_by_date_and_path(date, path)
|
@@ -106,13 +126,30 @@ class SkinnyJeans
|
|
106
126
|
persisted += 1
|
107
127
|
end
|
108
128
|
end
|
129
|
+
|
130
|
+
hash_of_dates_for_keywords.each do |date, hash_of_paths|
|
131
|
+
hash_of_paths.keys.each do |path|
|
132
|
+
hash_of_paths[path].keys.each do |keyword|
|
133
|
+
pvk = PageviewKeyword.find_or_create_by_date_and_path_and_keyword(date, path, keyword)
|
134
|
+
pvk.keyword = keyword.to_s[0..254]
|
135
|
+
pvk.pageview_count ||= 0
|
136
|
+
pvk.pageview_count += hash_of_paths[path][keyword]
|
137
|
+
pvk.save!
|
138
|
+
persisted_pageview_keywords += 1
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
109
143
|
end
|
110
|
-
|
144
|
+
|
111
145
|
puts "completed persistence in #{realtime}"
|
112
146
|
|
113
|
-
Update.create!({:last_pageview_at => self.last_pageview_at, :lines_parsed => lines_parsed, :last_line_parsed => last_line_parsed})
|
147
|
+
Update.create!({:last_pageview_at => self.last_pageview_at, :lines_parsed => lines_parsed, :last_line_parsed => last_line_parsed.to_s[0..254]})
|
114
148
|
|
115
|
-
puts
|
149
|
+
puts("total records in DB: #{Pageview.count}
|
150
|
+
lines parsed this round: #{lines_parsed}
|
151
|
+
lines persisted this round:#{persisted}
|
152
|
+
total SkinnyJeans executions since inception: #{Update.count}")
|
116
153
|
|
117
154
|
return self
|
118
155
|
|
@@ -137,6 +174,7 @@ class SkinnyJeans
|
|
137
174
|
|
138
175
|
def pageview;get_ar_class(Pageview);end
|
139
176
|
def update;get_ar_class(Update);end
|
177
|
+
def pageview_keyword;get_ar_class(PageviewKeyword);end
|
140
178
|
|
141
179
|
def get_ar_class(klass)
|
142
180
|
begin;return(klass);rescue(ActiveRecord::ConnectionNotEstablished);prepare_db;end
|
@@ -150,21 +188,32 @@ class SkinnyJeans
|
|
150
188
|
Time.parse("#{year}-#{month}-#{day} #{hour}:#{minute}:#{seconds} #{zone}")
|
151
189
|
end
|
152
190
|
|
153
|
-
def insert_or_increment(
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
hash_of_dates[
|
159
|
-
|
191
|
+
def insert_or_increment(_datetime_obj, _path, _search_keyword = nil)
|
192
|
+
|
193
|
+
date_string = _datetime_obj.strftime(("%Y-%m-%d"))
|
194
|
+
|
195
|
+
# data for all pageviews
|
196
|
+
hash_of_dates[date_string] ||= {}
|
197
|
+
hash_of_dates[date_string][_path] ||= 0
|
198
|
+
hash_of_dates[date_string][_path] += 1
|
199
|
+
|
200
|
+
return if _search_keyword.nil?
|
201
|
+
|
202
|
+
# data for just pageviews coming from search
|
203
|
+
hash_of_dates_for_keywords[date_string] ||= {}
|
204
|
+
hash_of_dates_for_keywords[date_string][_path] ||= {}
|
205
|
+
hash_of_dates_for_keywords[date_string][_path][_search_keyword] ||= 0
|
206
|
+
hash_of_dates_for_keywords[date_string][_path][_search_keyword] += 1
|
207
|
+
|
160
208
|
end
|
161
209
|
|
162
210
|
class Pageview < SkinnyJeanDb
|
163
211
|
end
|
212
|
+
class PageviewKeyword < SkinnyJeanDb
|
213
|
+
end
|
164
214
|
class Update < SkinnyJeanDb
|
165
215
|
end
|
166
216
|
|
167
|
-
|
168
217
|
end
|
169
218
|
|
170
219
|
# SkinnyJeans::execute(ARGV.first) if "#{$0}".gsub(/.*\//,"") == "skinny_jeans.rb"
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# example
|
2
|
+
# SkinnyJeansStringParser.extract_search_query("http://search.aol.com/aol/search?enabled_terms=&s_it=comsearch50&q=cool+stuff")
|
3
|
+
# => "cool stuff"
|
4
|
+
|
5
|
+
class SkinnyJeansStringParser
|
6
|
+
|
7
|
+
def self.extract_search_query(_url)
|
8
|
+
self.new(_url).get_search_keyword
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_accessor :string_value
|
12
|
+
def initialize(string_value)
|
13
|
+
@string_value = string_value
|
14
|
+
end
|
15
|
+
|
16
|
+
# iterate through any URLs we find in a string and return a search query or nil
|
17
|
+
def get_search_keyword
|
18
|
+
!all_urls.nil? ? all_urls.collect { |_url| extract_search_query_from_url(_url) }[0] : nil
|
19
|
+
end
|
20
|
+
|
21
|
+
# pre: some referring URL from google, yahoo, AOL, bing, ask
|
22
|
+
# post: whatever the search query was, ASCII or GTFO
|
23
|
+
def extract_search_query_from_url(url)
|
24
|
+
val = nil
|
25
|
+
case url
|
26
|
+
when /google\.com/
|
27
|
+
val=return_param_from_url(url, "q")
|
28
|
+
when /search\.yahoo\.com/
|
29
|
+
val=return_param_from_url(url, "p")
|
30
|
+
when /search\.aol\.com/
|
31
|
+
val=return_param_from_url(url, "q")
|
32
|
+
when /ask\.com/
|
33
|
+
val=return_param_from_url(url, "q")
|
34
|
+
when /bing\.com/
|
35
|
+
val=return_param_from_url(url, "q")
|
36
|
+
end
|
37
|
+
# whitelist of acceptable characters
|
38
|
+
val = val.present? && val.gsub(/[^0-9A-Za-z\s"'!@#\$%\^&\*\(\)\?\<\>\[\]:;,\.+-_=]/, '') != val ? nil : val
|
39
|
+
return val
|
40
|
+
end
|
41
|
+
|
42
|
+
# pre: like http://example.org?q=cool&fun=no, "fun"
|
43
|
+
# post: "no"
|
44
|
+
def return_param_from_url(url, param_name)
|
45
|
+
_uri = URI.parse(url)
|
46
|
+
if _uri.query.present?
|
47
|
+
_cgi = CGI.parse(_uri.query)
|
48
|
+
return _cgi[param_name].to_s.strip if _cgi[param_name]
|
49
|
+
end
|
50
|
+
return nil
|
51
|
+
end
|
52
|
+
|
53
|
+
# find all URLs in a string that are at beginning or end of string or are tokenized by spaces
|
54
|
+
def all_urls
|
55
|
+
@all_urls ||= string_value.split(/\s+/).reject { |_string| !_string.match(/^['"]?https?:['"]?/) }.collect { |url| url.gsub(/["']/,'') }
|
56
|
+
@all_urls.empty? ? nil : @all_urls
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
data/skinny_jeans.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{skinny_jeans}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.5.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Jonathan Otto"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-12-06}
|
13
13
|
s.email = %q{jonathan.otto@gmail.com}
|
14
14
|
s.extra_rdoc_files = [
|
15
15
|
"README.rdoc",
|
@@ -22,6 +22,7 @@ Gem::Specification.new do |s|
|
|
22
22
|
"TODO",
|
23
23
|
"VERSION",
|
24
24
|
"lib/skinny_jeans.rb",
|
25
|
+
"lib/skinny_jeans_string_parser.rb",
|
25
26
|
"skinny_jeans.gemspec"
|
26
27
|
]
|
27
28
|
s.homepage = %q{http://github.com/jotto/skinny_jeans}
|
@@ -29,6 +30,10 @@ Gem::Specification.new do |s|
|
|
29
30
|
s.require_paths = ["lib"]
|
30
31
|
s.rubygems_version = %q{1.3.7}
|
31
32
|
s.summary = %q{Fast webserver log parser for persisting daily pageviews per path to sqlite}
|
33
|
+
s.test_files = [
|
34
|
+
"test/skinny_jeans_string_parser_test.rb",
|
35
|
+
"test/skinny_jeans_test.rb"
|
36
|
+
]
|
32
37
|
|
33
38
|
if s.respond_to? :specification_version then
|
34
39
|
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../lib/skinny_jeans'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'pp'
|
4
|
+
require 'fileutils'
|
5
|
+
class SkinnyJeansStringParserTest < Test::Unit::TestCase
|
6
|
+
|
7
|
+
|
8
|
+
def test_can_get_all_urls_from_string
|
9
|
+
_string=<<-EOF
|
10
|
+
98.244.200.209 - - [01/Dec/2010:11:51:26 -0800] "GET /deals/apple-ipod-touch HTTP/1.1" 200 11448 "http://www.google.com/m/search?oe=UTF-8&client=safari&hl=en&q=best+deals+for+the+4th+generation+iPod+touch+32+gb&gws_link_params=spell:1&ei=aqb2TJDBLqGutgfp862NAg&ved=0CBEQBSgA" "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_1 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7" "-"
|
11
|
+
EOF
|
12
|
+
sjsp = SkinnyJeansStringParser.new(_string)
|
13
|
+
assert_equal 1, sjsp.all_urls.size
|
14
|
+
assert_equal "http://www.google.com/m/search?oe=UTF-8&client=safari&hl=en&q=best+deals+for+the+4th+generation+iPod+touch+32+gb&gws_link_params=spell:1&ei=aqb2TJDBLqGutgfp862NAg&ved=0CBEQBSgA",
|
15
|
+
sjsp.all_urls.first
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_return_nil_of_no_urls
|
19
|
+
_string=<<-EOF
|
20
|
+
207.46.12.204 - - [01/Dec/2010:11:48:00 -0800] "GET /deals/skullcandy-inkd-earbuds HTTP/1.1" 200 5732 "-" "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; SLCC1; .NET CLR 1.1.4322; .NET CLR 2.0.40607; .NET CLR 3.0.04506.648)" "-"
|
21
|
+
EOF
|
22
|
+
sjsp = SkinnyJeansStringParser.new(_string)
|
23
|
+
assert_nil sjsp.all_urls
|
24
|
+
|
25
|
+
assert_nil sjsp.get_search_keyword
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_can_get_keyword_from_url
|
29
|
+
_string=<<-EOF
|
30
|
+
98.244.200.209 - - [01/Dec/2010:11:51:26 -0800] "GET /deals/apple-ipod-touch HTTP/1.1" 200 11448 "http://www.google.com/m/search?oe=UTF-8&client=safari&hl=en&q=best+deals+for+the+4th+generation+iPod+touch+32+gb&gws_link_params=spell:1&ei=aqb2TJDBLqGutgfp862NAg&ved=0CBEQBSgA" "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_1 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7" "-"
|
31
|
+
EOF
|
32
|
+
sjsp = SkinnyJeansStringParser.new(_string)
|
33
|
+
assert_equal "best deals for the 4th generation iPod touch 32 gb", sjsp.get_search_keyword
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../lib/skinny_jeans'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'pp'
|
4
|
+
require 'fileutils'
|
5
|
+
class SkinnyJeansTest < Test::Unit::TestCase
|
6
|
+
|
7
|
+
|
8
|
+
def test_parse_pick_up_where_left_off
|
9
|
+
db_path = "./skinny_jeans_test.db"
|
10
|
+
FileUtils.rm(db_path) if File.exists?(db_path)
|
11
|
+
sj=SkinnyJeans.new(logfile_path = "small_access_log.log", sqlite_skinny_jeans = db_path, path_regexp = /\s\/deals\/(.*)\sHTTP/, date_regexp = /\[(\d.*\d)\]/)
|
12
|
+
sj.execute
|
13
|
+
assert_equal 20, sj.pageview.count
|
14
|
+
assert_equal 2, sj.pageview.find_by_path("flip-video").pageview_count
|
15
|
+
assert_equal 2, sj.pageview.find_by_path("delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control").pageview_count
|
16
|
+
|
17
|
+
# puts "
|
18
|
+
#
|
19
|
+
# -----
|
20
|
+
#
|
21
|
+
# "
|
22
|
+
# the 2nd file is the same, but with 2 additional lines for flip-video and apple-ipod-touch
|
23
|
+
sj=SkinnyJeans.new(logfile_path = "small_access_log_part_2.log", sqlite_skinny_jeans = db_path, path_regexp = /\s\/deals\/(.*)\sHTTP/, date_regexp = /\[(\d.*\d)\]/)
|
24
|
+
sj.execute
|
25
|
+
assert_equal 3, sj.pageview.find_by_path("flip-video").pageview_count
|
26
|
+
assert_equal 2, sj.pageview.find_by_path("apple-ipod-touch").pageview_count
|
27
|
+
|
28
|
+
# the last line of small_access_log_part_2 is over 1000 chars, make sure we can handle it properly
|
29
|
+
assert_equal 255, sj.update.last.last_line_parsed.size
|
30
|
+
_last_line=<<-EOF
|
31
|
+
82.31.245.117 - - [01/Dec/2010:11:52:29 -0800] "GET /deals/delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control HTTP/1.1" 200 6244 "http://www.google.co.uk/imgres?imgurl=http://dealzon.com/pictures/deals/6656/large/delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control.jpg%3F1264075353&imgrefurl=http://dealzon.com/deals/delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control&usg=__Y-Tkbg0EefRA5uKrR0b2gs7HqSo=&h=300&w=287&sz=11&hl=en&start=49&zoom=1&tbnid=18tsbSh1DQmf2M:&tbnh=142&tbnw=133&prev=/images%3Fq%3Dblack%2Bradiator%2Bheater%26um%3D1%26hl%3Den%26biw%3D1020%26bih%3D624%26tbs%3Disch:10%2C1872&um=1&itbs=1&iact=hc&vpx=757&vpy=67&dur=140&hovh=230&hovw=220&tx=158&ty=118&ei=Tab2TMWkCMqL4AauyI2SBw&oei=Q6b2TLPJDoW3hQfDlri5BQ&esq=4&page=4&ndsp=15&ved=1t:429,r:14,s:49&biw=1020&bih=624" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; Sky Broadband; SLCC1; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; InfoPath.3)" "-"
|
32
|
+
EOF
|
33
|
+
assert_equal _last_line.strip![0..254], sj.update.last.last_line_parsed
|
34
|
+
assert_equal 3, sj.pageview.find_by_path("delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control").pageview_count
|
35
|
+
|
36
|
+
# puts "
|
37
|
+
#
|
38
|
+
# -----
|
39
|
+
#
|
40
|
+
# "
|
41
|
+
|
42
|
+
# the 3rd has 1 additional line so we can ensure we can leave off on a line over 255 characters
|
43
|
+
sj=SkinnyJeans.new(logfile_path = "small_access_log_part_3.log", sqlite_skinny_jeans = db_path, path_regexp = /\s\/deals\/(.*)\sHTTP/, date_regexp = /\[(\d.*\d)\]/)
|
44
|
+
sj.execute
|
45
|
+
assert_equal 3, sj.pageview.find_by_path("delonghi-hhp1500-safeheat-mica-panel-radiator-heater-with-thermostat-control").pageview_count
|
46
|
+
assert_equal 3, sj.pageview.find_by_path("apple-ipod-touch").pageview_count
|
47
|
+
assert_equal 3, sj.pageview_keyword.find_by_path_and_keyword("apple-ipod-touch", "best deals for the 4th generation iPod touch 32 gb").pageview_count
|
48
|
+
# PP.pp sj.pageview_keyword.all
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: skinny_jeans
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 11
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 5
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.5.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jonathan Otto
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-12-06 00:00:00 -06:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -66,7 +66,10 @@ files:
|
|
66
66
|
- TODO
|
67
67
|
- VERSION
|
68
68
|
- lib/skinny_jeans.rb
|
69
|
+
- lib/skinny_jeans_string_parser.rb
|
69
70
|
- skinny_jeans.gemspec
|
71
|
+
- test/skinny_jeans_string_parser_test.rb
|
72
|
+
- test/skinny_jeans_test.rb
|
70
73
|
has_rdoc: true
|
71
74
|
homepage: http://github.com/jotto/skinny_jeans
|
72
75
|
licenses: []
|
@@ -101,5 +104,6 @@ rubygems_version: 1.3.7
|
|
101
104
|
signing_key:
|
102
105
|
specification_version: 3
|
103
106
|
summary: Fast webserver log parser for persisting daily pageviews per path to sqlite
|
104
|
-
test_files:
|
105
|
-
|
107
|
+
test_files:
|
108
|
+
- test/skinny_jeans_string_parser_test.rb
|
109
|
+
- test/skinny_jeans_test.rb
|