log_analysis 0.1.1 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/Gemfile.lock +3 -3
- data/README.md +14 -9
- data/access.log +10000 -7549
- data/lib/log_analysis.rb +30 -12
- data/lib/log_analysis/{rule_generation.rb → data_mining.rb} +5 -16
- data/{spmf.jar → lib/log_analysis/files/spmf.jar} +0 -0
- data/lib/log_analysis/intepretation.rb +22 -0
- data/lib/log_analysis/{preprocess.rb → loading_data.rb} +11 -7
- data/lib/log_analysis/model/user_identity.rb +3 -1
- data/lib/log_analysis/session_identification.rb +6 -4
- data/lib/log_analysis/transformation.rb +2 -4
- data/lib/log_analysis/user_identification.rb +9 -1
- data/lib/log_analysis/version.rb +6 -2
- metadata +10 -9
data/lib/log_analysis.rb
CHANGED
@@ -1,36 +1,54 @@
|
|
1
1
|
require 'log_analysis/version'
|
2
|
-
require 'log_analysis/
|
2
|
+
require 'log_analysis/loading_data'
|
3
3
|
require 'log_analysis/user_identification'
|
4
4
|
require 'log_analysis/session_identification'
|
5
5
|
require 'log_analysis/transformation'
|
6
|
-
require 'log_analysis/
|
6
|
+
require 'log_analysis/data_mining'
|
7
|
+
require 'log_analysis/intepretation'
|
8
|
+
require 'time'
|
7
9
|
|
8
10
|
class LogAnalysis
|
9
11
|
class Error < StandardError; end
|
10
12
|
# Your code goes here...
|
11
13
|
|
12
|
-
|
14
|
+
attr_accessor :path, :type, :match_uri, :conf, :sup, :origin_data
|
13
15
|
|
14
16
|
def initialize(path, type = nil)
|
15
17
|
@path = path
|
16
18
|
@type = type
|
17
|
-
@
|
18
|
-
system('mkdir', '-p', LogAnalysis::DATA_PATH)
|
19
|
+
@origin_data = LoadingData.input(path, type)
|
19
20
|
end
|
20
21
|
|
21
|
-
def
|
22
|
-
|
22
|
+
def selecting_data
|
23
|
+
return @origin_data if @match_uri.nil?
|
24
|
+
|
25
|
+
@origin_data.select { |record| record.uri.match?(@match_uri) }
|
23
26
|
end
|
24
27
|
|
25
|
-
def
|
26
|
-
|
28
|
+
def preprocessing_data
|
29
|
+
data = selecting_data
|
30
|
+
return if data.nil? || data.empty?
|
31
|
+
|
32
|
+
filter = data.select { |record| record.status_200? && record.method_get? && record.uri_without_data && !record.robot? }
|
33
|
+
user = UserIdentification.execute(filter)
|
34
|
+
session = SessionIdentification.execute(user)
|
35
|
+
session
|
27
36
|
end
|
28
37
|
|
29
38
|
def transformation
|
30
|
-
|
39
|
+
data = preprocessing_data
|
40
|
+
Transformation.execute(data) unless data.nil? || data.empty?
|
41
|
+
end
|
42
|
+
|
43
|
+
def data_mining
|
44
|
+
data = transformation
|
45
|
+
@conf ||= 0.5
|
46
|
+
@sup ||= 60
|
47
|
+
DataMining.execute(data, @conf, @sup) unless data.nil? || data.empty?
|
31
48
|
end
|
32
49
|
|
33
|
-
def
|
34
|
-
|
50
|
+
def intepretation
|
51
|
+
data = data_mining
|
52
|
+
Intepretation.execute(data) unless data.nil? || data.empty?
|
35
53
|
end
|
36
54
|
end
|
@@ -1,25 +1,14 @@
|
|
1
1
|
require 'time'
|
2
2
|
require 'log_analysis/version'
|
3
3
|
|
4
|
-
module
|
5
|
-
JAR_FILE_PATH = File.expand_path('spmf.jar')
|
6
|
-
TRANSFORM_DATA_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}transform_data_#{Time.now.strftime('%Y%m%d')}.txt")
|
7
|
-
RULE_FILE_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}output_#{Time.now.strftime('%Y%m%d')}.txt")
|
8
|
-
MAP_URI_FILE_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}map_uri_#{Time.now.strftime('%Y%m%d')}.txt")
|
9
|
-
|
4
|
+
module DataMining
|
10
5
|
class Error < StandardError; end
|
11
6
|
# Your code goes here...
|
12
7
|
|
13
|
-
def self.execute(transform_data)
|
14
|
-
File.open(TRANSFORM_DATA_PATH, 'w+') { |f| transform_data.keys.each { |e| f.puts(transform_data[e].map { |i| i.is_a?(Array) ? i.join(' ') : i }.join(' -1 ').concat(' -1 -2')) } }
|
15
|
-
system("java -jar #{JAR_FILE_PATH} run SPADE #{TRANSFORM_DATA_PATH} #{RULE_FILE_PATH}
|
16
|
-
|
17
|
-
map_uri = File.read(MAP_URI_FILE_PATH).split(' ')
|
18
|
-
|
19
|
-
result.map do |rule|
|
20
|
-
seq, sub, rea = rule
|
21
|
-
[seq.map { |i| map_uri[i.to_i] }, sub.map { |i| map_uri[i.to_i] }, rea]
|
22
|
-
end
|
8
|
+
def self.execute(transform_data, min_conf, min_sup)
|
9
|
+
File.open(LogAnalysis::TRANSFORM_DATA_PATH, 'w+') { |f| transform_data.keys.each { |e| f.puts(transform_data[e].map { |i| i.is_a?(Array) ? i.join(' ') : i }.join(' -1 ').concat(' -1 -2')) } }
|
10
|
+
system("java -jar #{LogAnalysis::JAR_FILE_PATH} run SPADE #{LogAnalysis::TRANSFORM_DATA_PATH} #{LogAnalysis::RULE_FILE_PATH} #{min_sup}%")
|
11
|
+
rule_gen(get_seq(File.read(LogAnalysis::RULE_FILE_PATH)), min_conf)
|
23
12
|
end
|
24
13
|
|
25
14
|
def self.rule_gen(seqs, min_conf)
|
File without changes
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'log_analysis/version'
|
2
|
+
|
3
|
+
module Intepretation
|
4
|
+
class Error < StandardError; end
|
5
|
+
# Your code goes here...
|
6
|
+
|
7
|
+
def self.execute(data_mining)
|
8
|
+
map_uri = File.read(LogAnalysis::MAP_URI_FILE_PATH).split(' ')
|
9
|
+
move_data
|
10
|
+
|
11
|
+
data_mining.map do |data|
|
12
|
+
seq, sub, rea = data
|
13
|
+
[seq.map { |i| map_uri[i.to_i] }, sub.map { |i| map_uri[i.to_i] }, rea]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.move_data
|
18
|
+
return unless File.directory?(LogAnalysis::DATA_PATH)
|
19
|
+
|
20
|
+
system('mv', "*_#{Time.now.strftime('%Y%m%d')}.txt", LogAnalysis::DATA_PATH)
|
21
|
+
end
|
22
|
+
end
|
@@ -2,7 +2,7 @@ require 'log_analysis/model/record'
|
|
2
2
|
require 'log_analysis/model/user_identity'
|
3
3
|
require 'json'
|
4
4
|
|
5
|
-
module
|
5
|
+
module LoadingData
|
6
6
|
class Error < StandardError; end
|
7
7
|
# Your code goes here...
|
8
8
|
|
@@ -13,15 +13,17 @@ module PreProcess
|
|
13
13
|
CONVERT_RECORD = { 'nginx' => 'convert_nginx_logs', 'apache' => 'convert_apache_logs', 'default' => 'to_records' }.freeze
|
14
14
|
|
15
15
|
def self.input(file_path, type)
|
16
|
-
@users
|
16
|
+
@users = []
|
17
|
+
text_file = File.readlines(file_path)
|
17
18
|
|
18
|
-
|
19
|
+
text_file.each_with_object([]).with_index do |(line, arr), i|
|
19
20
|
preprocessed_log = type.nil? ? line.gsub(/[\t]/i, ' ').chomp! : line
|
20
|
-
|
21
|
+
record_params = send(CONVERT_RECORD[type.nil? ? 'nginx' : type], preprocessed_log)
|
22
|
+
record = Record.new(record_params) if record_params && preprocessed_log
|
21
23
|
|
22
|
-
|
23
|
-
|
24
|
-
|
24
|
+
system('clear')
|
25
|
+
puts "#{((i.to_f / text_file.size) * 100).round}/100"
|
26
|
+
arr.push(record) if record
|
25
27
|
end
|
26
28
|
end
|
27
29
|
|
@@ -35,6 +37,8 @@ module PreProcess
|
|
35
37
|
|
36
38
|
def self.convert_nginx_logs(log)
|
37
39
|
o = log.split(REGEX_NGINX)
|
40
|
+
|
41
|
+
return false if o.size <= 1
|
38
42
|
o.delete('')
|
39
43
|
|
40
44
|
{}.tap do |p|
|
@@ -1,13 +1,15 @@
|
|
1
1
|
require 'active_support/core_ext/module/delegation'
|
2
|
+
require 'log_analysis/model/record'
|
2
3
|
require 'useragent'
|
3
4
|
|
4
5
|
class UserIdentity
|
5
|
-
attr_accessor :host, :user_agent
|
6
|
+
attr_accessor :host, :user_agent, :records
|
6
7
|
|
7
8
|
delegate :browser, :version, :os, :platform, :mobile?, :application, :localization, to: :user_agent
|
8
9
|
|
9
10
|
def initialize(params)
|
10
11
|
@host = params[:host]
|
11
12
|
@user_agent = params[:user_agent]
|
13
|
+
@records = params[:records]
|
12
14
|
end
|
13
15
|
end
|
@@ -7,10 +7,12 @@ module SessionIdentification
|
|
7
7
|
class Error < StandardError; end
|
8
8
|
# Your code goes here...
|
9
9
|
|
10
|
-
def self.execute(
|
11
|
-
|
12
|
-
isession = arr.rindex { |s| s.user ==
|
13
|
-
|
10
|
+
def self.execute(identified_user)
|
11
|
+
identified_user.each_with_object([]) do |user, arr|
|
12
|
+
isession = arr.rindex { |s| s.user == user }
|
13
|
+
user.records.each do |record|
|
14
|
+
isession.present? && validate_time_session(arr[isession].records.last.time, record.time) ? arr[isession].records << record : arr << SessionIdentity.new(session_identity_params(record))
|
15
|
+
end
|
14
16
|
end
|
15
17
|
end
|
16
18
|
|
@@ -3,13 +3,11 @@ require 'log_analysis/model/user_identity'
|
|
3
3
|
require 'log_analysis/version'
|
4
4
|
|
5
5
|
module Transformation
|
6
|
-
MAP_URI_FILE_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}map_uri_#{Time.now.strftime('%Y%m%d')}.txt")
|
7
|
-
|
8
6
|
class Error < StandardError; end
|
9
7
|
# Your code goes here...
|
10
8
|
|
11
9
|
def self.execute(identified_session)
|
12
|
-
map_uri = []
|
10
|
+
map_uri = ['-']
|
13
11
|
transform = identified_session.each_with_object({}) do |v, hash|
|
14
12
|
uries = v.records.map(&:uri)
|
15
13
|
uries.each { |i| map_uri.push(i) unless map_uri.include?(i) }
|
@@ -20,7 +18,7 @@ module Transformation
|
|
20
18
|
end
|
21
19
|
end
|
22
20
|
|
23
|
-
File.open(MAP_URI_FILE_PATH, 'w+') { |f| f.write(map_uri.join(' ')) }
|
21
|
+
File.open(LogAnalysis::MAP_URI_FILE_PATH, 'w+') { |f| f.write(map_uri.join(' ')) }
|
24
22
|
transform
|
25
23
|
end
|
26
24
|
end
|
@@ -6,6 +6,14 @@ module UserIdentification
|
|
6
6
|
# Your code goes here...
|
7
7
|
|
8
8
|
def self.execute(cleaned_data)
|
9
|
-
cleaned_data.
|
9
|
+
cleaned_data.each_with_object([]) do |record, arr|
|
10
|
+
user = arr.detect { |i| i == record.user }
|
11
|
+
if user
|
12
|
+
user.records.push(record)
|
13
|
+
else
|
14
|
+
record.user.records = [record]
|
15
|
+
arr << record.user
|
16
|
+
end
|
17
|
+
end
|
10
18
|
end
|
11
19
|
end
|
data/lib/log_analysis/version.rb
CHANGED
@@ -1,4 +1,8 @@
|
|
1
1
|
class LogAnalysis
|
2
|
-
VERSION
|
3
|
-
|
2
|
+
VERSION = '0.1.6'.freeze
|
3
|
+
TRANSFORM_DATA_PATH = "transform_data_#{Time.now.strftime('%Y%m%d')}.txt".freeze
|
4
|
+
RULE_FILE_PATH = "output_#{Time.now.strftime('%Y%m%d')}.txt".freeze
|
5
|
+
MAP_URI_FILE_PATH = "map_uri_#{Time.now.strftime('%Y%m%d')}.txt".freeze
|
6
|
+
JAR_FILE_PATH = File.join(File.dirname(__FILE__), './files/spmf.jar')
|
7
|
+
DATA_PATH = File.expand_path('data/log_analysis', '~')
|
4
8
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: log_analysis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Tran
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: useragent
|
@@ -57,24 +57,25 @@ files:
|
|
57
57
|
- bin/console
|
58
58
|
- bin/setup
|
59
59
|
- lib/log_analysis.rb
|
60
|
+
- lib/log_analysis/data_mining.rb
|
61
|
+
- lib/log_analysis/files/spmf.jar
|
62
|
+
- lib/log_analysis/intepretation.rb
|
63
|
+
- lib/log_analysis/loading_data.rb
|
60
64
|
- lib/log_analysis/model/record.rb
|
61
65
|
- lib/log_analysis/model/session_identity.rb
|
62
66
|
- lib/log_analysis/model/user_identity.rb
|
63
|
-
- lib/log_analysis/preprocess.rb
|
64
|
-
- lib/log_analysis/rule_generation.rb
|
65
67
|
- lib/log_analysis/session_identification.rb
|
66
68
|
- lib/log_analysis/transformation.rb
|
67
69
|
- lib/log_analysis/user_identification.rb
|
68
70
|
- lib/log_analysis/version.rb
|
69
71
|
- log_analysis.gemspec
|
70
|
-
- spmf.jar
|
71
72
|
homepage: https://github.com/michaelt0520/log_analysis_thesis
|
72
73
|
licenses:
|
73
74
|
- MIT
|
74
75
|
metadata:
|
75
76
|
homepage_uri: https://github.com/michaelt0520/log_analysis_thesis
|
76
77
|
source_code_uri: https://github.com/michaelt0520/log_analysis_thesis
|
77
|
-
post_install_message:
|
78
|
+
post_install_message:
|
78
79
|
rdoc_options: []
|
79
80
|
require_paths:
|
80
81
|
- lib
|
@@ -89,8 +90,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
90
|
- !ruby/object:Gem::Version
|
90
91
|
version: '0'
|
91
92
|
requirements: []
|
92
|
-
rubygems_version: 3.1.
|
93
|
-
signing_key:
|
93
|
+
rubygems_version: 3.1.2
|
94
|
+
signing_key:
|
94
95
|
specification_version: 4
|
95
96
|
summary: Log Analysis for thesis Huflit
|
96
97
|
test_files: []
|