skinny_jeans 0.8.1 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -1,4 +1,4 @@
1
- = SKINNY JEANS LOG PARSING WITH RUBY & SQLITE FOR HIPSTERS
1
+ = SKINNY JEANS FAST LOG PARSING WITH RUBY & SQLITE FOR HIPSTERS
2
2
  http://img696.imageshack.us/img696/75/skinnys3.jpg
3
3
 
4
4
  == EXAMPLE
@@ -30,24 +30,22 @@ http://img696.imageshack.us/img696/75/skinnys3.jpg
30
30
  sj = SkinnyJeans::LogParser::execute(logfile_path = "access.log", sqlite_skinny_jeans = "sqlite_skinny_jeans.db", path_regexp = /\s\/posts\/(.*)\sHTTP/, date_regexp = /\[(\d.*\d)\]/)
31
31
  sj.pageview.where("date = '2010-10-01' and path = 'my-first-post'")
32
32
  => #<SkinnyJeans::Pageview id: 1, date: "2010-10-01", path: "my-first-post", pageview_count: 3>
33
- 1. NOTE: for now **you have to monkey patch the SkinnyJeans#parse_string_as_date**
33
+ 1. NOTE: for now **you may have to monkey patch the SkinnyJeans#parse_string_as_date**
34
34
  2. Parse oldest logs first, then run regularly against your main log, let logrotate handle the rest (skinny_jeans remembers where it left off)
35
35
  3. ASSUMES reading log files in ascending order, keeps track of last line read so you could put it on a scheduler or cron job
36
- 4. access the 2 activerecord classes, sj.pageview (returns Pageview class), and sj.update
36
+ 4. access the 2 activerecord classes, sj.pageview (returns Pageview class), and sj.update happily within your own app
37
37
  5. enjoy the skinny jeans
38
38
 
39
39
 
40
- == PERFORMANCE (without organic search tracking)
41
- * it parses 100,000 lines in < 2.5 seconds
42
- * persists 1,000 requests with 2 compound indexes in 15 seconds, or 10 seconds with home_run c extension
43
- * 25,000 rows == 4 megabyte sqlite database
40
+ == PERFORMANCE
41
+ * it parses 300,000 lines in < 9 seconds and
42
+ * persists 6,000 requests (in 2 different tables, one of the tables is for search engine referrals) with 2 compound indexes in 18 seconds
44
43
 
45
44
  == NOTES
46
45
  * supports gzipped files
47
46
  * creates a temp copy of the log file before parsing
48
47
  * currently requires each line to be unique
49
- * this could be a problem if a single client manages to hit the same page more than once in less than 1 second.
50
- * this is only a problem if the last line parsed is one of the pages that was hit by the same client more than once in less than one second
48
+ * line uniqueness is potential problem if a single client manages to hit the same page more than once in less than 1 second AND it's the last line of a log file
51
49
 
52
50
  == LICENSE
53
51
 
data/Rakefile CHANGED
@@ -8,9 +8,10 @@ begin
8
8
  s.email = "jonathan.otto@gmail.com"
9
9
  s.homepage = "http://github.com/jotto/skinny_jeans"
10
10
  s.authors = ["Jonathan Otto"]
11
- s.add_dependency 'sqlite3-ruby', '>= 1.2.4'
12
- s.add_dependency 'activerecord', '>= 2.3.8'
11
+ s.add_dependency 'sqlite3-ruby', '>= 1.3.3'
12
+ s.add_dependency 'activerecord', '>= 3.0.0'
13
13
  s.add_dependency 'spinner', '>= 1.0.0'
14
+ s.add_dependency 'home_run', '>= 1.0.1'
14
15
  end
15
16
  rescue LoadError
16
17
  puts "Jeweler, or one of its dependencies, is not available. Install it with: sudo gem install jeweler"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.1
1
+ 0.9.0
@@ -29,7 +29,7 @@ module SkinnyJeans
29
29
  val=return_param_from_valid_url_or_path(url,"q")
30
30
  end
31
31
  # whitelist of acceptable characters
32
- val = val.present? && val.gsub(/[^0-9A-Za-z\s"'!@#\$%\^&\*\(\)\?\<\>\[\]:;,\.+-_=]/, '') != val ? nil : val
32
+ val = !!val && val.gsub(/[^0-9A-Za-z\s"'!@#\$%\^&\*\(\)\?\<\>\[\]:;,\.+-_=]/, '') != val ? nil : val
33
33
  return val
34
34
  end
35
35
 
@@ -40,18 +40,13 @@ module SkinnyJeans
40
40
  if _uri.query.present?
41
41
  _cgi = CGI.parse(_uri.query)
42
42
  if _cgi[param_name]
43
- val = unescape_string(_cgi[param_name].join).strip.downcase
43
+ val = URI.decode(_cgi[param_name].join).strip.downcase
44
44
  return (!val.nil? && val!='' ? val : nil)
45
45
  end
46
46
  end
47
47
  return nil
48
48
  end
49
49
 
50
- def unescape_string(_string)
51
- temp = _string.dup
52
- temp = CGI.unescape(temp) while CGI.unescape(temp) != temp
53
- temp
54
- end
55
50
  end
56
51
 
57
52
  attr_accessor :string_value
data/lib/skinny_jeans.rb CHANGED
@@ -8,9 +8,16 @@ require 'rubygems'
8
8
  require 'active_record'
9
9
  require 'sqlite3'
10
10
  require 'spinner'
11
+
12
+ # faster URI decoding (neglible savings)
13
+ # leaving comment for reference
14
+ # require 'escape_utils'
15
+
11
16
  require File.expand_path(File.dirname(__FILE__) + "/skinny_jeans/string_parser")
12
17
  require File.expand_path(File.dirname(__FILE__) + "/skinny_jeans/log_parser")
13
- # require 'home_run'
18
+
19
+ # faster date parsing (about a 17% speed boost)
20
+ require 'home_run'
14
21
 
15
22
 
16
23
  # SkinnyJeans::execute(ARGV.first) if "#{$0}".gsub(/.*\//,"") == "skinny_jeans.rb"
@@ -64,3 +71,25 @@ module SkinnyJeans
64
71
 
65
72
  end
66
73
 
74
+ # class CGI
75
+ # # @@accept_charset="UTF-8" unless defined?(@@accept_charset)
76
+ # # # URL-encode a string.
77
+ # # # url_encoded_string = CGI::escape("'Stop!' said Fred")
78
+ # # # # => "%27Stop%21%27+said+Fred"
79
+ # # def CGI::escape(string)
80
+ # # string.gsub(/([^ a-zA-Z0-9_.-]+)/) do
81
+ # # '%' + $1.unpack('H2' * $1.bytesize).join('%').upcase
82
+ # # end.tr(' ', '+')
83
+ # # end
84
+ #
85
+ #
86
+ # # URL-decode a string with encoding(optional).
87
+ # # string = CGI::unescape("%27Stop%21%27+said+Fred")
88
+ # # # => "'Stop!' said Fred"
89
+ # def CGI::unescape(string,encoding=@@accept_charset)
90
+ # str=string.tr('+', ' ').force_encoding(Encoding::ASCII_8BIT).gsub(/((?:%[0-9a-fA-F]{2})+)/u) do
91
+ # [$1.delete('%')].pack('H*')
92
+ # end.force_encoding(encoding)
93
+ # str.valid_encoding? ? str : str.force_encoding(string.encoding)
94
+ # end
95
+ # end
data/skinny_jeans.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{skinny_jeans}
8
- s.version = "0.8.1"
8
+ s.version = "0.9.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Jonathan Otto"]
@@ -40,18 +40,21 @@ Gem::Specification.new do |s|
40
40
  s.specification_version = 3
41
41
 
42
42
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
43
- s.add_runtime_dependency(%q<sqlite3-ruby>, [">= 1.2.4"])
44
- s.add_runtime_dependency(%q<activerecord>, [">= 2.3.8"])
43
+ s.add_runtime_dependency(%q<sqlite3-ruby>, [">= 1.3.3"])
44
+ s.add_runtime_dependency(%q<activerecord>, [">= 3.0.0"])
45
45
  s.add_runtime_dependency(%q<spinner>, [">= 1.0.0"])
46
+ s.add_runtime_dependency(%q<home_run>, [">= 1.0.1"])
46
47
  else
47
- s.add_dependency(%q<sqlite3-ruby>, [">= 1.2.4"])
48
- s.add_dependency(%q<activerecord>, [">= 2.3.8"])
48
+ s.add_dependency(%q<sqlite3-ruby>, [">= 1.3.3"])
49
+ s.add_dependency(%q<activerecord>, [">= 3.0.0"])
49
50
  s.add_dependency(%q<spinner>, [">= 1.0.0"])
51
+ s.add_dependency(%q<home_run>, [">= 1.0.1"])
50
52
  end
51
53
  else
52
- s.add_dependency(%q<sqlite3-ruby>, [">= 1.2.4"])
53
- s.add_dependency(%q<activerecord>, [">= 2.3.8"])
54
+ s.add_dependency(%q<sqlite3-ruby>, [">= 1.3.3"])
55
+ s.add_dependency(%q<activerecord>, [">= 3.0.0"])
54
56
  s.add_dependency(%q<spinner>, [">= 1.0.0"])
57
+ s.add_dependency(%q<home_run>, [">= 1.0.1"])
55
58
  end
56
59
  end
57
60
 
@@ -65,6 +65,7 @@ class SkinnyJeans::StringParserTest < Test::Unit::TestCase
65
65
 
66
66
  assert_equal "\"dealzon\"", SkinnyJeans::StringParser.extract_search_query("http://www.google.com/search?client=safari&rls=en&q=%22dealzon%22&ie=UTF-8&oe=UTF-8")
67
67
  assert_equal "\'dealzon\'", SkinnyJeans::StringParser.extract_search_query("http://www.google.com/search?client=safari&rls=en&q='dealzon'&ie=UTF-8&oe=UTF-8")
68
+ assert_equal "legos on sale %80 off", SkinnyJeans::StringParser::extract_search_query("http://www.google.com/url?sa=t&source=web&cd=8&ved=0CF0QFjAH&url=http%3A%2F%2Fdealzon.com%2Fdeals%2Flego-mindstorms-nxt-robotics-micro-controller-brick-black&rct=j&q=legos%20on%20sale%20%2580%20off&ei=klp9TdyXEPC10QHMpujiAw&usg=AFQjCNFksmyHsu8gLq436-GpHmeu-OAcWQ")
68
69
  end
69
70
 
70
71
  end
@@ -9,6 +9,7 @@ class SkinnyJeansTest < Test::Unit::TestCase
9
9
  def test_will_work
10
10
  db_path = File.expand_path(File.dirname(__FILE__) + "/more_sample_data/skinny_jeans_deal_show.db")
11
11
  FileUtils.rm(db_path) if File.exists?(db_path)
12
+ # broken_access_big.log is 337,925 lines
12
13
  _logfile_path = File.expand_path(File.dirname(__FILE__) + "/more_sample_data/broken_access_big.log")
13
14
  puts db_path.inspect
14
15
  sj=SkinnyJeans::LogParser.execute(_logfile_path, sqlite_skinny_jeans = db_path, path_regexp = /\s\/deals\/(.*)\sHTTP/, date_regexp = /\[(\d.*\d)\]/)
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 8
8
- - 1
9
- version: 0.8.1
7
+ - 9
8
+ - 0
9
+ version: 0.9.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jonathan Otto
@@ -27,9 +27,9 @@ dependencies:
27
27
  - !ruby/object:Gem::Version
28
28
  segments:
29
29
  - 1
30
- - 2
31
- - 4
32
- version: 1.2.4
30
+ - 3
31
+ - 3
32
+ version: 1.3.3
33
33
  type: :runtime
34
34
  version_requirements: *id001
35
35
  - !ruby/object:Gem::Dependency
@@ -41,10 +41,10 @@ dependencies:
41
41
  - - ">="
42
42
  - !ruby/object:Gem::Version
43
43
  segments:
44
- - 2
45
44
  - 3
46
- - 8
47
- version: 2.3.8
45
+ - 0
46
+ - 0
47
+ version: 3.0.0
48
48
  type: :runtime
49
49
  version_requirements: *id002
50
50
  - !ruby/object:Gem::Dependency
@@ -62,6 +62,21 @@ dependencies:
62
62
  version: 1.0.0
63
63
  type: :runtime
64
64
  version_requirements: *id003
65
+ - !ruby/object:Gem::Dependency
66
+ name: home_run
67
+ prerelease: false
68
+ requirement: &id004 !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ segments:
74
+ - 1
75
+ - 0
76
+ - 1
77
+ version: 1.0.1
78
+ type: :runtime
79
+ version_requirements: *id004
65
80
  description: Fast webserver log parser for persisting daily pageviews per path to sqlite
66
81
  email: jonathan.otto@gmail.com
67
82
  executables: []