log_analysis 0.1.0 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,29 +1,48 @@
1
1
  require 'log_analysis/version'
2
- require 'log_analysis/preprocess'
2
+ require 'log_analysis/loading_data'
3
3
  require 'log_analysis/user_identification'
4
4
  require 'log_analysis/session_identification'
5
+ require 'log_analysis/transformation'
6
+ require 'log_analysis/data_mining'
7
+ require 'log_analysis/intepretation'
8
+ require 'time'
5
9
 
6
10
  class LogAnalysis
7
11
  class Error < StandardError; end
8
12
  # Your code goes here...
9
13
 
10
- attr_reader :path, :origin_logs
14
+ attr_accessor :path, :type, :match_uri, :conf, :sup, :origin_data
11
15
 
12
16
  def initialize(path, type = nil)
13
- @path = path
14
- @type = type
15
- @origin_logs = PreProcess.input(path, type)
17
+ @path = path
18
+ @type = type
19
+ @origin_data = LoadingData.input(path, type)
16
20
  end
17
21
 
18
- def cleaned_data
19
- PreProcess.data_cleaning(@origin_logs)
22
+ def selecting_data
23
+ return @origin_data if @match_uri.nil?
24
+
25
+ @origin_data.select { |record| record.uri.match?(@match_uri) }
26
+ end
27
+
28
+ def preprocessing_data
29
+ filter = selecting_data.select { |record| record.status_200? && record.method_get? && record.uri_without_data && !record.robot? }
30
+ user = UserIdentification.execute(filter)
31
+ session = SessionIdentification.execute(user)
32
+ session
33
+ end
34
+
35
+ def transformation
36
+ Transformation.execute(preprocessing_data)
20
37
  end
21
38
 
22
- def identified_user
23
- UserIdentification.execute(cleaned_data)
39
+ def data_mining
40
+ @conf ||= 0.5
41
+ @sup ||= 60
42
+ DataMining.execute(transformation, @conf, @sup)
24
43
  end
25
44
 
26
- def identified_session
27
- SessionIdentification.execute(cleaned_data)
45
+ def intepretation
46
+ Intepretation.execute(data_mining)
28
47
  end
29
48
  end
@@ -0,0 +1,48 @@
1
+ require 'time'
2
+ require 'log_analysis/version'
3
+
4
+ module DataMining
5
+ class Error < StandardError; end
6
+ # Your code goes here...
7
+
8
+ def self.execute(transform_data, min_conf, min_sup)
9
+ File.open(LogAnalysis::TRANSFORM_DATA_PATH, 'w+') { |f| transform_data.keys.each { |e| f.puts(transform_data[e].map { |i| i.is_a?(Array) ? i.join(' ') : i }.join(' -1 ').concat(' -1 -2')) } }
10
+ system("java -jar #{LogAnalysis::JAR_FILE_PATH} run SPADE #{LogAnalysis::TRANSFORM_DATA_PATH} #{LogAnalysis::RULE_FILE_PATH} #{min_sup}%")
11
+ rule_gen(get_seq(File.read(LogAnalysis::RULE_FILE_PATH)), min_conf)
12
+ end
13
+
14
+ def self.rule_gen(seqs, min_conf)
15
+ seqs.each_with_object([]) { |seq, arr| seqs.each { |sub| arr.push([seq[0], sub[0], seq[1] / sub[1]]) if sub[0] != seq[0] && sub_seq?(sub[0], seq[0]) && seq[1] / sub[1] >= min_conf } }
16
+ end
17
+
18
+ def self.sub_seq?(first, second)
19
+ ptr = 0
20
+ first.each do |sub|
21
+ return false if ptr >= second.size
22
+
23
+ (ptr..second.size - 1).each do |n|
24
+ if sub?(second[n], sub)
25
+ ptr = n + 1
26
+ break
27
+ end
28
+ return false if ptr == second.size - 1
29
+ end
30
+ end
31
+ true
32
+ end
33
+
34
+ def self.sub?(str, sub)
35
+ mark_sub = 0
36
+ sub.split(' ').each { |char| mark_sub += 1 if str.include?(char) }
37
+
38
+ mark_sub == sub.split(' ').size
39
+ end
40
+
41
+ def self.get_seq(seq_str)
42
+ seq = seq_str.split("\n")
43
+ seq.each_with_object([]) do |s, arr|
44
+ split_seq = s.split('-1')
45
+ arr.push([split_seq[0..-2], split_seq[-1][-1].to_f])
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,22 @@
1
+ require 'log_analysis/version'
2
+
3
+ module Intepretation
4
+ class Error < StandardError; end
5
+ # Your code goes here...
6
+
7
+ def self.execute(data_mining)
8
+ map_uri = File.read(LogAnalysis::MAP_URI_FILE_PATH).split(' ')
9
+ move_data
10
+
11
+ data_mining.map do |data|
12
+ seq, sub, rea = data
13
+ [seq.map { |i| map_uri[i.to_i] }, sub.map { |i| map_uri[i.to_i] }, rea]
14
+ end
15
+ end
16
+
17
+ def self.move_data
18
+ return unless File.directory?(LogAnalysis::DATA_PATH)
19
+
20
+ system('mv', "*_#{Time.now.strftime('%Y%m%d')}.txt", LogAnalysis::DATA_PATH)
21
+ end
22
+ end
@@ -0,0 +1,70 @@
1
+ require 'log_analysis/model/record'
2
+ require 'log_analysis/model/user_identity'
3
+ require 'json'
4
+
5
+ module LoadingData
6
+ class Error < StandardError; end
7
+ # Your code goes here...
8
+
9
+ REGEX_KEYS = /(time:| host:| status:| size:| request_length:| req:| method:| uri:| referer:| ua:| reqtime:| runtime:| apptime:| cache:| vhost:| server:| user:| forwardedfor:| forwardedproto:)/.freeze
10
+ REGEX_NGINX = /\A^(?<host>\S*) (?<identity>\S*) (?<user>\S*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?:\s+\S*)?)?" (?<code>\S*) (?<size>\S*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)"(?:\s+(?<http_x_forwarded_for>\S+))?)?$/.freeze
11
+ REGEX_APACHE = %r{(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (.{0})- \[([^\]]+?)\] "(GET|POST|PUT|DELETE) ([^\s]+?) (HTTP\/1\.1)" (\d+) (\d+) "-" "(.*)"}.freeze
12
+
13
+ CONVERT_RECORD = { 'nginx' => 'convert_nginx_logs', 'apache' => 'convert_apache_logs', 'default' => 'to_records' }.freeze
14
+
15
+ def self.input(file_path, type)
16
+ @users = []
17
+ text_file = File.readlines(file_path)
18
+
19
+ text_file.each_with_object([]).with_index do |(line, arr), i|
20
+ preprocessed_log = type.nil? ? line.gsub(/[\t]/i, ' ').chomp! : line
21
+ record_params = send(CONVERT_RECORD[type.nil? ? 'nginx' : type], preprocessed_log)
22
+ record = Record.new(record_params) if record_params && preprocessed_log
23
+
24
+ system('clear')
25
+ puts "#{((i.to_f / text_file.size) * 100).round}/100"
26
+ arr.push(record) if record
27
+ end
28
+ end
29
+
30
+ def self.to_record(log)
31
+ o = log.gsub!('\t', ' ')
32
+ o = log.split(REGEX_KEYS)
33
+ o = o.map(&:strip)
34
+ o.delete('')
35
+ o.each_slice(2).to_a.each_with_object({}) { |pair, log_obj| log_obj.merge!(to_json(pair)) }
36
+ end
37
+
38
+ def self.convert_nginx_logs(log)
39
+ o = log.split(REGEX_NGINX)
40
+
41
+ return false if o.size <= 1
42
+ o.delete('')
43
+
44
+ {}.tap do |p|
45
+ p['host'] = o[0]
46
+ p['user'] = o[2]
47
+ p['time'] = o[3]
48
+ p['method'] = o[4]
49
+ p['uri'] = o[5]
50
+ p['status'] = o[6]
51
+ p['size'] = o[7]
52
+ p['referer'] = o[8]
53
+ p['ua'] = o[9]
54
+ p['forwarded'] = o[10]
55
+ p['user'] = save_user(o)
56
+ end
57
+ end
58
+
59
+ def self.to_json(pair)
60
+ { pair.first.delete(':') => pair.last }
61
+ end
62
+
63
+ def self.save_user(log)
64
+ user = @users.find { |i| i.host == log[0] && i.user_agent.to_s == log[9] }
65
+ return user unless user.nil?
66
+
67
+ @users.push(UserIdentity.new(host: IPAddr.new(log[0]), user_agent: UserAgent.parse(log[9])))
68
+ @users.last
69
+ end
70
+ end
@@ -25,7 +25,7 @@ class Record
25
25
 
26
26
  attr_reader :params
27
27
 
28
- DATA_TYPE = %w[.txt .json .js .css .jpg .jpeg .gif .woff2 .ico .png .bmp .mp3 .wav .avi .mpeg .vmw .mpg .map .pdf .doc .svg].freeze
28
+ DATA_TYPE = %w[.txt .json .js .css .jpg .jpeg .gif .woff2 .ico .png .bmp .mp3 .wav .avi .mpeg .vmw .mpg .map .pdf .doc .svg .otf].freeze
29
29
  REGEX_BOT = /facebookexternalhit|Mediapartners-Google|AWS|-|Crawler|spider|Detection/.freeze
30
30
 
31
31
  def initialize(params)
@@ -70,7 +70,7 @@ class Record
70
70
  p['apptime'] = @params['apptime'].to_f
71
71
  p['cache'] = validate_string(@params['cache'])
72
72
  p['vhost'] = validate_string(@params['vhost'])
73
- p['user'] = UserIdentity.new(host: IPAddr.new(@params['host']), user_agent: UserAgent.parse(@params['ua']))
73
+ p['user'] = @params['user'] || nil
74
74
  p['forwardedfor'] = validate_string(@params['forwardedfor'])
75
75
  p['forwardedproto'] = validate_string(@params['forwardedproto'])
76
76
  end
@@ -1,13 +1,15 @@
1
1
  require 'active_support/core_ext/module/delegation'
2
+ require 'log_analysis/model/record'
2
3
  require 'useragent'
3
4
 
4
5
  class UserIdentity
5
- attr_accessor :host, :user_agent
6
+ attr_accessor :host, :user_agent, :records
6
7
 
7
8
  delegate :browser, :version, :os, :platform, :mobile?, :application, :localization, to: :user_agent
8
9
 
9
10
  def initialize(params)
10
11
  @host = params[:host]
11
12
  @user_agent = params[:user_agent]
13
+ @records = params[:records]
12
14
  end
13
15
  end
@@ -7,19 +7,13 @@ module SessionIdentification
7
7
  class Error < StandardError; end
8
8
  # Your code goes here...
9
9
 
10
- def self.execute(cleaned_data)
11
- session_identity = []
12
- cleaned_data.each do |record|
13
- isession = session_identity.rindex { |s| s.user.host == record.user.host && s.user.user_agent == record.user.user_agent }
14
-
15
- if isession.present? && validate_time_session(session_identity[isession].records.last.time, record.time)
16
- session_identity[isession].records << record
17
- else
18
- session_identity << SessionIdentity.new(session_identity_params(record))
10
+ def self.execute(identified_user)
11
+ identified_user.each_with_object([]) do |user, arr|
12
+ user.records.each do |record|
13
+ isession = arr.rindex { |s| s.user == user }
14
+ isession.present? && validate_time_session(arr[isession].records.last.time, record.time) ? arr[isession].records << record : arr << SessionIdentity.new(session_identity_params(record))
19
15
  end
20
16
  end
21
-
22
- session_identity.map { |i| i.records.map(&:uri) }
23
17
  end
24
18
 
25
19
  private
@@ -0,0 +1,24 @@
1
+ require 'log_analysis/model/session_identity'
2
+ require 'log_analysis/model/user_identity'
3
+ require 'log_analysis/version'
4
+
5
+ module Transformation
6
+ class Error < StandardError; end
7
+ # Your code goes here...
8
+
9
+ def self.execute(identified_session)
10
+ map_uri = []
11
+ transform = identified_session.each_with_object({}) do |v, hash|
12
+ uries = v.records.map(&:uri)
13
+ uries.each { |i| map_uri.push(i) unless map_uri.include?(i) }
14
+ if hash.key?(v.user.host.to_s)
15
+ uries.size == 1 ? hash[v.user.host.to_s] += v.records.map { |i| map_uri.index(i.uri) } : hash[v.user.host.to_s].push(v.records.map { |i| map_uri.index(i.uri) })
16
+ else
17
+ hash.merge!(v.user.host.to_s => v.records.map { |i| map_uri.index(i.uri) })
18
+ end
19
+ end
20
+
21
+ File.open(LogAnalysis::MAP_URI_FILE_PATH, 'w+') { |f| f.write(map_uri.join(' ')) }
22
+ transform
23
+ end
24
+ end
@@ -6,10 +6,14 @@ module UserIdentification
6
6
  # Your code goes here...
7
7
 
8
8
  def self.execute(cleaned_data)
9
- data = cleaned_data.map { |record| [record.host, record.ua.to_s] }.uniq
10
- data.each_with_object([]) do |record, arrs|
11
- o = UserIdentity.new(host: record.first, user_agent: record.last)
12
- arrs << o
9
+ cleaned_data.each_with_object([]) do |record, arr|
10
+ user = arr.detect { |i| i == record.user }
11
+ if user
12
+ user.records.push(record)
13
+ else
14
+ record.user.records = [record]
15
+ arr << record.user
16
+ end
13
17
  end
14
18
  end
15
19
  end
@@ -1,3 +1,8 @@
1
1
  class LogAnalysis
2
- VERSION = '0.1.0'.freeze
2
+ VERSION = '0.1.5'.freeze
3
+ TRANSFORM_DATA_PATH = "transform_data_#{Time.now.strftime('%Y%m%d')}.txt".freeze
4
+ RULE_FILE_PATH = "output_#{Time.now.strftime('%Y%m%d')}.txt".freeze
5
+ MAP_URI_FILE_PATH = "map_uri_#{Time.now.strftime('%Y%m%d')}.txt".freeze
6
+ JAR_FILE_PATH = File.join(File.dirname(__FILE__), './files/spmf.jar')
7
+ DATA_PATH = File.expand_path('data/log_analysis', '~')
3
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: log_analysis
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Tran
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-06-27 00:00:00.000000000 Z
11
+ date: 2020-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: useragent
@@ -53,14 +53,19 @@ files:
53
53
  - LICENSE.txt
54
54
  - README.md
55
55
  - Rakefile
56
+ - access.log
56
57
  - bin/console
57
58
  - bin/setup
58
59
  - lib/log_analysis.rb
60
+ - lib/log_analysis/data_mining.rb
61
+ - lib/log_analysis/files/spmf.jar
62
+ - lib/log_analysis/intepretation.rb
63
+ - lib/log_analysis/loading_data.rb
59
64
  - lib/log_analysis/model/record.rb
60
65
  - lib/log_analysis/model/session_identity.rb
61
66
  - lib/log_analysis/model/user_identity.rb
62
- - lib/log_analysis/preprocess.rb
63
67
  - lib/log_analysis/session_identification.rb
68
+ - lib/log_analysis/transformation.rb
64
69
  - lib/log_analysis/user_identification.rb
65
70
  - lib/log_analysis/version.rb
66
71
  - log_analysis.gemspec
@@ -70,7 +75,7 @@ licenses:
70
75
  metadata:
71
76
  homepage_uri: https://github.com/michaelt0520/log_analysis_thesis
72
77
  source_code_uri: https://github.com/michaelt0520/log_analysis_thesis
73
- post_install_message:
78
+ post_install_message:
74
79
  rdoc_options: []
75
80
  require_paths:
76
81
  - lib
@@ -85,8 +90,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
85
90
  - !ruby/object:Gem::Version
86
91
  version: '0'
87
92
  requirements: []
88
- rubygems_version: 3.1.3
89
- signing_key:
93
+ rubygems_version: 3.1.2
94
+ signing_key:
90
95
  specification_version: 4
91
96
  summary: Log Analysis for thesis Huflit
92
97
  test_files: []
@@ -1,75 +0,0 @@
1
- require 'log_analysis/model/record'
2
- require 'json'
3
- require 'pry'
4
-
5
- module PreProcess
6
- class Error < StandardError; end
7
- # Your code goes here...
8
-
9
- REGEX_KEYS = /(time:| host:| status:| size:| request_length:| req:| method:| uri:| referer:| ua:| reqtime:| runtime:| apptime:| cache:| vhost:| server:| user:| forwardedfor:| forwardedproto:)/.freeze
10
- REGEX_NGINX = /\A^(?<host>\S*) (?<identity>\S*) (?<user>\S*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?:\s+\S*)?)?" (?<code>\S*) (?<size>\S*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)"(?:\s+(?<http_x_forwarded_for>\S+))?)?$/.freeze
11
- REGEX_APACHE = %r{(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (.{0})- \[([^\]]+?)\] "(GET|POST|PUT|DELETE) ([^\s]+?) (HTTP\/1\.1)" (\d+) (\d+) "-" "(.*)"}.freeze
12
-
13
- CONVERT_RECORD = { 'nginx' => 'convert_nginx_logs', 'apache' => 'convert_apache_logs', 'default' => 'to_records' }.freeze
14
-
15
- def self.input(file_path, type)
16
- arr_logs = File.readlines(file_path).each_with_object([]) do |line, arr|
17
- preprocess_log = type.nil? ? line.gsub(/[\t]/i, ' ').chomp! : line
18
- arr.push(preprocess_log)
19
- end
20
-
21
- send(CONVERT_RECORD[type.nil? ? 'nginx' : type], arr_logs)
22
- end
23
-
24
- def self.data_cleaning(logs)
25
- logs.select { |record| record.status_200? && record.method_get? && record.uri_without_data && !record.robot? }
26
- end
27
-
28
- def self.to_records(logs)
29
- logs.each_with_object([]).with_index do |(log, arrays), i|
30
- next if log.nil?
31
-
32
- o = log.split(REGEX_KEYS)
33
- o = o.map(&:strip)
34
- o.delete('')
35
-
36
- log = o.each_slice(2).to_a.each_with_object({}) do |pair, log_obj|
37
- log_obj.merge!(to_json(pair))
38
- end
39
-
40
- arrays << Record.new(log)
41
-
42
- puts "#{i}/#{logs.size}"
43
- end
44
- end
45
-
46
- def self.convert_nginx_logs(logs)
47
- logs.each_with_object([]).with_index do |(log, arrays), i|
48
- next if log.nil?
49
-
50
- o = log.split(REGEX_NGINX)
51
- o.delete('')
52
-
53
- obj = {}.tap do |p|
54
- p['host'] = o[0]
55
- p['user'] = o[2]
56
- p['time'] = o[3]
57
- p['method'] = o[4]
58
- p['uri'] = o[5]
59
- p['status'] = o[6]
60
- p['size'] = o[7]
61
- p['referer'] = o[8]
62
- p['ua'] = o[9]
63
- p['forwarded'] = o[10]
64
- end
65
-
66
- arrays << Record.new(obj)
67
-
68
- puts "#{i}/#{logs.size}"
69
- end
70
- end
71
-
72
- def self.to_json(pair)
73
- { pair.first.delete(':') => pair.last }
74
- end
75
- end