log_analysis 0.1.1 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/Gemfile.lock +3 -3
- data/README.md +14 -9
- data/access.log +10000 -7549
- data/lib/log_analysis.rb +30 -12
- data/lib/log_analysis/{rule_generation.rb → data_mining.rb} +5 -16
- data/{spmf.jar → lib/log_analysis/files/spmf.jar} +0 -0
- data/lib/log_analysis/intepretation.rb +22 -0
- data/lib/log_analysis/{preprocess.rb → loading_data.rb} +11 -7
- data/lib/log_analysis/model/user_identity.rb +3 -1
- data/lib/log_analysis/session_identification.rb +6 -4
- data/lib/log_analysis/transformation.rb +2 -4
- data/lib/log_analysis/user_identification.rb +9 -1
- data/lib/log_analysis/version.rb +6 -2
- metadata +10 -9
data/lib/log_analysis.rb
CHANGED
@@ -1,36 +1,54 @@
|
|
1
1
|
require 'log_analysis/version'
|
2
|
-
require 'log_analysis/
|
2
|
+
require 'log_analysis/loading_data'
|
3
3
|
require 'log_analysis/user_identification'
|
4
4
|
require 'log_analysis/session_identification'
|
5
5
|
require 'log_analysis/transformation'
|
6
|
-
require 'log_analysis/
|
6
|
+
require 'log_analysis/data_mining'
|
7
|
+
require 'log_analysis/intepretation'
|
8
|
+
require 'time'
|
7
9
|
|
8
10
|
class LogAnalysis
|
9
11
|
class Error < StandardError; end
|
10
12
|
# Your code goes here...
|
11
13
|
|
12
|
-
|
14
|
+
attr_accessor :path, :type, :match_uri, :conf, :sup, :origin_data
|
13
15
|
|
14
16
|
def initialize(path, type = nil)
|
15
17
|
@path = path
|
16
18
|
@type = type
|
17
|
-
@
|
18
|
-
system('mkdir', '-p', LogAnalysis::DATA_PATH)
|
19
|
+
@origin_data = LoadingData.input(path, type)
|
19
20
|
end
|
20
21
|
|
21
|
-
def
|
22
|
-
|
22
|
+
def selecting_data
|
23
|
+
return @origin_data if @match_uri.nil?
|
24
|
+
|
25
|
+
@origin_data.select { |record| record.uri.match?(@match_uri) }
|
23
26
|
end
|
24
27
|
|
25
|
-
def
|
26
|
-
|
28
|
+
def preprocessing_data
|
29
|
+
data = selecting_data
|
30
|
+
return if data.nil? || data.empty?
|
31
|
+
|
32
|
+
filter = data.select { |record| record.status_200? && record.method_get? && record.uri_without_data && !record.robot? }
|
33
|
+
user = UserIdentification.execute(filter)
|
34
|
+
session = SessionIdentification.execute(user)
|
35
|
+
session
|
27
36
|
end
|
28
37
|
|
29
38
|
def transformation
|
30
|
-
|
39
|
+
data = preprocessing_data
|
40
|
+
Transformation.execute(data) unless data.nil? || data.empty?
|
41
|
+
end
|
42
|
+
|
43
|
+
def data_mining
|
44
|
+
data = transformation
|
45
|
+
@conf ||= 0.5
|
46
|
+
@sup ||= 60
|
47
|
+
DataMining.execute(data, @conf, @sup) unless data.nil? || data.empty?
|
31
48
|
end
|
32
49
|
|
33
|
-
def
|
34
|
-
|
50
|
+
def intepretation
|
51
|
+
data = data_mining
|
52
|
+
Intepretation.execute(data) unless data.nil? || data.empty?
|
35
53
|
end
|
36
54
|
end
|
@@ -1,25 +1,14 @@
|
|
1
1
|
require 'time'
|
2
2
|
require 'log_analysis/version'
|
3
3
|
|
4
|
-
module
|
5
|
-
JAR_FILE_PATH = File.expand_path('spmf.jar')
|
6
|
-
TRANSFORM_DATA_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}transform_data_#{Time.now.strftime('%Y%m%d')}.txt")
|
7
|
-
RULE_FILE_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}output_#{Time.now.strftime('%Y%m%d')}.txt")
|
8
|
-
MAP_URI_FILE_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}map_uri_#{Time.now.strftime('%Y%m%d')}.txt")
|
9
|
-
|
4
|
+
module DataMining
|
10
5
|
class Error < StandardError; end
|
11
6
|
# Your code goes here...
|
12
7
|
|
13
|
-
def self.execute(transform_data)
|
14
|
-
File.open(TRANSFORM_DATA_PATH, 'w+') { |f| transform_data.keys.each { |e| f.puts(transform_data[e].map { |i| i.is_a?(Array) ? i.join(' ') : i }.join(' -1 ').concat(' -1 -2')) } }
|
15
|
-
system("java -jar #{JAR_FILE_PATH} run SPADE #{TRANSFORM_DATA_PATH} #{RULE_FILE_PATH}
|
16
|
-
|
17
|
-
map_uri = File.read(MAP_URI_FILE_PATH).split(' ')
|
18
|
-
|
19
|
-
result.map do |rule|
|
20
|
-
seq, sub, rea = rule
|
21
|
-
[seq.map { |i| map_uri[i.to_i] }, sub.map { |i| map_uri[i.to_i] }, rea]
|
22
|
-
end
|
8
|
+
def self.execute(transform_data, min_conf, min_sup)
|
9
|
+
File.open(LogAnalysis::TRANSFORM_DATA_PATH, 'w+') { |f| transform_data.keys.each { |e| f.puts(transform_data[e].map { |i| i.is_a?(Array) ? i.join(' ') : i }.join(' -1 ').concat(' -1 -2')) } }
|
10
|
+
system("java -jar #{LogAnalysis::JAR_FILE_PATH} run SPADE #{LogAnalysis::TRANSFORM_DATA_PATH} #{LogAnalysis::RULE_FILE_PATH} #{min_sup}%")
|
11
|
+
rule_gen(get_seq(File.read(LogAnalysis::RULE_FILE_PATH)), min_conf)
|
23
12
|
end
|
24
13
|
|
25
14
|
def self.rule_gen(seqs, min_conf)
|
File without changes
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'log_analysis/version'
|
2
|
+
|
3
|
+
module Intepretation
|
4
|
+
class Error < StandardError; end
|
5
|
+
# Your code goes here...
|
6
|
+
|
7
|
+
def self.execute(data_mining)
|
8
|
+
map_uri = File.read(LogAnalysis::MAP_URI_FILE_PATH).split(' ')
|
9
|
+
move_data
|
10
|
+
|
11
|
+
data_mining.map do |data|
|
12
|
+
seq, sub, rea = data
|
13
|
+
[seq.map { |i| map_uri[i.to_i] }, sub.map { |i| map_uri[i.to_i] }, rea]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.move_data
|
18
|
+
return unless File.directory?(LogAnalysis::DATA_PATH)
|
19
|
+
|
20
|
+
system('mv', "*_#{Time.now.strftime('%Y%m%d')}.txt", LogAnalysis::DATA_PATH)
|
21
|
+
end
|
22
|
+
end
|
@@ -2,7 +2,7 @@ require 'log_analysis/model/record'
|
|
2
2
|
require 'log_analysis/model/user_identity'
|
3
3
|
require 'json'
|
4
4
|
|
5
|
-
module
|
5
|
+
module LoadingData
|
6
6
|
class Error < StandardError; end
|
7
7
|
# Your code goes here...
|
8
8
|
|
@@ -13,15 +13,17 @@ module PreProcess
|
|
13
13
|
CONVERT_RECORD = { 'nginx' => 'convert_nginx_logs', 'apache' => 'convert_apache_logs', 'default' => 'to_records' }.freeze
|
14
14
|
|
15
15
|
def self.input(file_path, type)
|
16
|
-
@users
|
16
|
+
@users = []
|
17
|
+
text_file = File.readlines(file_path)
|
17
18
|
|
18
|
-
|
19
|
+
text_file.each_with_object([]).with_index do |(line, arr), i|
|
19
20
|
preprocessed_log = type.nil? ? line.gsub(/[\t]/i, ' ').chomp! : line
|
20
|
-
|
21
|
+
record_params = send(CONVERT_RECORD[type.nil? ? 'nginx' : type], preprocessed_log)
|
22
|
+
record = Record.new(record_params) if record_params && preprocessed_log
|
21
23
|
|
22
|
-
|
23
|
-
|
24
|
-
|
24
|
+
system('clear')
|
25
|
+
puts "#{((i.to_f / text_file.size) * 100).round}/100"
|
26
|
+
arr.push(record) if record
|
25
27
|
end
|
26
28
|
end
|
27
29
|
|
@@ -35,6 +37,8 @@ module PreProcess
|
|
35
37
|
|
36
38
|
def self.convert_nginx_logs(log)
|
37
39
|
o = log.split(REGEX_NGINX)
|
40
|
+
|
41
|
+
return false if o.size <= 1
|
38
42
|
o.delete('')
|
39
43
|
|
40
44
|
{}.tap do |p|
|
@@ -1,13 +1,15 @@
|
|
1
1
|
require 'active_support/core_ext/module/delegation'
|
2
|
+
require 'log_analysis/model/record'
|
2
3
|
require 'useragent'
|
3
4
|
|
4
5
|
class UserIdentity
|
5
|
-
attr_accessor :host, :user_agent
|
6
|
+
attr_accessor :host, :user_agent, :records
|
6
7
|
|
7
8
|
delegate :browser, :version, :os, :platform, :mobile?, :application, :localization, to: :user_agent
|
8
9
|
|
9
10
|
def initialize(params)
|
10
11
|
@host = params[:host]
|
11
12
|
@user_agent = params[:user_agent]
|
13
|
+
@records = params[:records]
|
12
14
|
end
|
13
15
|
end
|
@@ -7,10 +7,12 @@ module SessionIdentification
|
|
7
7
|
class Error < StandardError; end
|
8
8
|
# Your code goes here...
|
9
9
|
|
10
|
-
def self.execute(
|
11
|
-
|
12
|
-
isession = arr.rindex { |s| s.user ==
|
13
|
-
|
10
|
+
def self.execute(identified_user)
|
11
|
+
identified_user.each_with_object([]) do |user, arr|
|
12
|
+
isession = arr.rindex { |s| s.user == user }
|
13
|
+
user.records.each do |record|
|
14
|
+
isession.present? && validate_time_session(arr[isession].records.last.time, record.time) ? arr[isession].records << record : arr << SessionIdentity.new(session_identity_params(record))
|
15
|
+
end
|
14
16
|
end
|
15
17
|
end
|
16
18
|
|
@@ -3,13 +3,11 @@ require 'log_analysis/model/user_identity'
|
|
3
3
|
require 'log_analysis/version'
|
4
4
|
|
5
5
|
module Transformation
|
6
|
-
MAP_URI_FILE_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}map_uri_#{Time.now.strftime('%Y%m%d')}.txt")
|
7
|
-
|
8
6
|
class Error < StandardError; end
|
9
7
|
# Your code goes here...
|
10
8
|
|
11
9
|
def self.execute(identified_session)
|
12
|
-
map_uri = []
|
10
|
+
map_uri = ['-']
|
13
11
|
transform = identified_session.each_with_object({}) do |v, hash|
|
14
12
|
uries = v.records.map(&:uri)
|
15
13
|
uries.each { |i| map_uri.push(i) unless map_uri.include?(i) }
|
@@ -20,7 +18,7 @@ module Transformation
|
|
20
18
|
end
|
21
19
|
end
|
22
20
|
|
23
|
-
File.open(MAP_URI_FILE_PATH, 'w+') { |f| f.write(map_uri.join(' ')) }
|
21
|
+
File.open(LogAnalysis::MAP_URI_FILE_PATH, 'w+') { |f| f.write(map_uri.join(' ')) }
|
24
22
|
transform
|
25
23
|
end
|
26
24
|
end
|
@@ -6,6 +6,14 @@ module UserIdentification
|
|
6
6
|
# Your code goes here...
|
7
7
|
|
8
8
|
def self.execute(cleaned_data)
|
9
|
-
cleaned_data.
|
9
|
+
cleaned_data.each_with_object([]) do |record, arr|
|
10
|
+
user = arr.detect { |i| i == record.user }
|
11
|
+
if user
|
12
|
+
user.records.push(record)
|
13
|
+
else
|
14
|
+
record.user.records = [record]
|
15
|
+
arr << record.user
|
16
|
+
end
|
17
|
+
end
|
10
18
|
end
|
11
19
|
end
|
data/lib/log_analysis/version.rb
CHANGED
@@ -1,4 +1,8 @@
|
|
1
1
|
class LogAnalysis
|
2
|
-
VERSION
|
3
|
-
|
2
|
+
VERSION = '0.1.6'.freeze
|
3
|
+
TRANSFORM_DATA_PATH = "transform_data_#{Time.now.strftime('%Y%m%d')}.txt".freeze
|
4
|
+
RULE_FILE_PATH = "output_#{Time.now.strftime('%Y%m%d')}.txt".freeze
|
5
|
+
MAP_URI_FILE_PATH = "map_uri_#{Time.now.strftime('%Y%m%d')}.txt".freeze
|
6
|
+
JAR_FILE_PATH = File.join(File.dirname(__FILE__), './files/spmf.jar')
|
7
|
+
DATA_PATH = File.expand_path('data/log_analysis', '~')
|
4
8
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: log_analysis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Tran
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: useragent
|
@@ -57,24 +57,25 @@ files:
|
|
57
57
|
- bin/console
|
58
58
|
- bin/setup
|
59
59
|
- lib/log_analysis.rb
|
60
|
+
- lib/log_analysis/data_mining.rb
|
61
|
+
- lib/log_analysis/files/spmf.jar
|
62
|
+
- lib/log_analysis/intepretation.rb
|
63
|
+
- lib/log_analysis/loading_data.rb
|
60
64
|
- lib/log_analysis/model/record.rb
|
61
65
|
- lib/log_analysis/model/session_identity.rb
|
62
66
|
- lib/log_analysis/model/user_identity.rb
|
63
|
-
- lib/log_analysis/preprocess.rb
|
64
|
-
- lib/log_analysis/rule_generation.rb
|
65
67
|
- lib/log_analysis/session_identification.rb
|
66
68
|
- lib/log_analysis/transformation.rb
|
67
69
|
- lib/log_analysis/user_identification.rb
|
68
70
|
- lib/log_analysis/version.rb
|
69
71
|
- log_analysis.gemspec
|
70
|
-
- spmf.jar
|
71
72
|
homepage: https://github.com/michaelt0520/log_analysis_thesis
|
72
73
|
licenses:
|
73
74
|
- MIT
|
74
75
|
metadata:
|
75
76
|
homepage_uri: https://github.com/michaelt0520/log_analysis_thesis
|
76
77
|
source_code_uri: https://github.com/michaelt0520/log_analysis_thesis
|
77
|
-
post_install_message:
|
78
|
+
post_install_message:
|
78
79
|
rdoc_options: []
|
79
80
|
require_paths:
|
80
81
|
- lib
|
@@ -89,8 +90,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
90
|
- !ruby/object:Gem::Version
|
90
91
|
version: '0'
|
91
92
|
requirements: []
|
92
|
-
rubygems_version: 3.1.
|
93
|
-
signing_key:
|
93
|
+
rubygems_version: 3.1.2
|
94
|
+
signing_key:
|
94
95
|
specification_version: 4
|
95
96
|
summary: Log Analysis for thesis Huflit
|
96
97
|
test_files: []
|