log_analysis 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/Gemfile.lock +2 -2
- data/README.md +14 -9
- data/access.log +10000 -7549
- data/lib/log_analysis.rb +21 -25
- data/lib/log_analysis/{rule_generation.rb → data_mining.rb} +4 -15
- data/lib/log_analysis/intepretation.rb +22 -0
- data/lib/log_analysis/{preprocess.rb → loading_data.rb} +2 -4
- data/lib/log_analysis/model/user_identity.rb +3 -1
- data/lib/log_analysis/session_identification.rb +6 -4
- data/lib/log_analysis/transformation.rb +1 -3
- data/lib/log_analysis/user_identification.rb +9 -1
- data/lib/log_analysis/version.rb +6 -1
- metadata +5 -4
data/lib/log_analysis.rb
CHANGED
@@ -1,52 +1,48 @@
|
|
1
1
|
require 'log_analysis/version'
|
2
|
-
require 'log_analysis/
|
2
|
+
require 'log_analysis/loading_data'
|
3
3
|
require 'log_analysis/user_identification'
|
4
4
|
require 'log_analysis/session_identification'
|
5
5
|
require 'log_analysis/transformation'
|
6
|
-
require 'log_analysis/
|
6
|
+
require 'log_analysis/data_mining'
|
7
|
+
require 'log_analysis/intepretation'
|
7
8
|
require 'time'
|
8
9
|
|
9
10
|
class LogAnalysis
|
10
11
|
class Error < StandardError; end
|
11
12
|
# Your code goes here...
|
12
13
|
|
13
|
-
|
14
|
+
attr_accessor :path, :type, :match_uri, :conf, :sup, :origin_data
|
14
15
|
|
15
16
|
def initialize(path, type = nil)
|
16
17
|
@path = path
|
17
18
|
@type = type
|
18
|
-
@
|
19
|
-
if block_given?
|
20
|
-
yield(record) ? record : nil
|
21
|
-
else
|
22
|
-
record
|
23
|
-
end
|
24
|
-
end
|
19
|
+
@origin_data = LoadingData.input(path, type)
|
25
20
|
end
|
26
21
|
|
27
|
-
def
|
28
|
-
|
22
|
+
def selecting_data
|
23
|
+
return @origin_data if @match_uri.nil?
|
24
|
+
|
25
|
+
@origin_data.select { |record| record.uri.match?(@match_uri) }
|
29
26
|
end
|
30
27
|
|
31
|
-
def
|
32
|
-
|
28
|
+
def preprocessing_data
|
29
|
+
filter = selecting_data.select { |record| record.status_200? && record.method_get? && record.uri_without_data && !record.robot? }
|
30
|
+
user = UserIdentification.execute(filter)
|
31
|
+
session = SessionIdentification.execute(user)
|
32
|
+
session
|
33
33
|
end
|
34
34
|
|
35
35
|
def transformation
|
36
|
-
Transformation.execute(
|
36
|
+
Transformation.execute(preprocessing_data)
|
37
37
|
end
|
38
38
|
|
39
|
-
def
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
def data_mining
|
40
|
+
@conf ||= 0.5
|
41
|
+
@sup ||= 60
|
42
|
+
DataMining.execute(transformation, @conf, @sup)
|
43
43
|
end
|
44
44
|
|
45
|
-
def
|
46
|
-
|
47
|
-
|
48
|
-
system('mv', "transform_data_#{Time.now.strftime('%Y%m%d')}.txt", '/home/app/data/waazabag/')
|
49
|
-
system('mv', "output_#{Time.now.strftime('%Y%m%d')}.txt", '/home/app/data/waazabag/')
|
50
|
-
system('mv', "map_uri_#{Time.now.strftime('%Y%m%d')}.txt", '/home/app/data/waazabag/')
|
45
|
+
def intepretation
|
46
|
+
Intepretation.execute(data_mining)
|
51
47
|
end
|
52
48
|
end
|
@@ -1,25 +1,14 @@
|
|
1
1
|
require 'time'
|
2
2
|
require 'log_analysis/version'
|
3
3
|
|
4
|
-
module
|
5
|
-
JAR_FILE_PATH = File.join(File.dirname(__FILE__), './files/spmf.jar')
|
6
|
-
TRANSFORM_DATA_PATH = "transform_data_#{Time.now.strftime('%Y%m%d')}.txt".freeze
|
7
|
-
RULE_FILE_PATH = "output_#{Time.now.strftime('%Y%m%d')}.txt".freeze
|
8
|
-
MAP_URI_FILE_PATH = "map_uri_#{Time.now.strftime('%Y%m%d')}.txt".freeze
|
9
|
-
|
4
|
+
module DataMining
|
10
5
|
class Error < StandardError; end
|
11
6
|
# Your code goes here...
|
12
7
|
|
13
8
|
def self.execute(transform_data, min_conf, min_sup)
|
14
|
-
File.open(TRANSFORM_DATA_PATH, 'w+') { |f| transform_data.keys.each { |e| f.puts(transform_data[e].map { |i| i.is_a?(Array) ? i.join(' ') : i }.join(' -1 ').concat(' -1 -2')) } }
|
15
|
-
system("java -jar #{JAR_FILE_PATH} run SPADE #{TRANSFORM_DATA_PATH} #{RULE_FILE_PATH} #{min_sup}%")
|
16
|
-
|
17
|
-
map_uri = File.read(MAP_URI_FILE_PATH).split(' ')
|
18
|
-
|
19
|
-
result.map do |rule|
|
20
|
-
seq, sub, rea = rule
|
21
|
-
[seq.map { |i| map_uri[i.to_i] }, sub.map { |i| map_uri[i.to_i] }, rea]
|
22
|
-
end
|
9
|
+
File.open(LogAnalysis::TRANSFORM_DATA_PATH, 'w+') { |f| transform_data.keys.each { |e| f.puts(transform_data[e].map { |i| i.is_a?(Array) ? i.join(' ') : i }.join(' -1 ').concat(' -1 -2')) } }
|
10
|
+
system("java -jar #{LogAnalysis::JAR_FILE_PATH} run SPADE #{LogAnalysis::TRANSFORM_DATA_PATH} #{LogAnalysis::RULE_FILE_PATH} #{min_sup}%")
|
11
|
+
rule_gen(get_seq(File.read(LogAnalysis::RULE_FILE_PATH)), min_conf)
|
23
12
|
end
|
24
13
|
|
25
14
|
def self.rule_gen(seqs, min_conf)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'log_analysis/version'
|
2
|
+
|
3
|
+
module Intepretation
|
4
|
+
class Error < StandardError; end
|
5
|
+
# Your code goes here...
|
6
|
+
|
7
|
+
def self.execute(data_mining)
|
8
|
+
map_uri = File.read(LogAnalysis::MAP_URI_FILE_PATH).split(' ')
|
9
|
+
move_data
|
10
|
+
|
11
|
+
data_mining.map do |data|
|
12
|
+
seq, sub, rea = data
|
13
|
+
[seq.map { |i| map_uri[i.to_i] }, sub.map { |i| map_uri[i.to_i] }, rea]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.move_data
|
18
|
+
return unless File.directory?(LogAnalysis::DATA_PATH)
|
19
|
+
|
20
|
+
system('mv', "*_#{Time.now.strftime('%Y%m%d')}.txt", LogAnalysis::DATA_PATH)
|
21
|
+
end
|
22
|
+
end
|
@@ -2,7 +2,7 @@ require 'log_analysis/model/record'
|
|
2
2
|
require 'log_analysis/model/user_identity'
|
3
3
|
require 'json'
|
4
4
|
|
5
|
-
module
|
5
|
+
module LoadingData
|
6
6
|
class Error < StandardError; end
|
7
7
|
# Your code goes here...
|
8
8
|
|
@@ -20,12 +20,10 @@ module PreProcess
|
|
20
20
|
preprocessed_log = type.nil? ? line.gsub(/[\t]/i, ' ').chomp! : line
|
21
21
|
record_params = send(CONVERT_RECORD[type.nil? ? 'nginx' : type], preprocessed_log)
|
22
22
|
record = Record.new(record_params) if record_params && preprocessed_log
|
23
|
-
record = yield(record) if block_given? && record
|
24
|
-
|
25
|
-
arr.push(record) if record && record.status_200? && record.method_get? && record.uri_without_data && !record.robot?
|
26
23
|
|
27
24
|
system('clear')
|
28
25
|
puts "#{((i.to_f / text_file.size) * 100).round}/100"
|
26
|
+
arr.push(record) if record
|
29
27
|
end
|
30
28
|
end
|
31
29
|
|
@@ -1,13 +1,15 @@
|
|
1
1
|
require 'active_support/core_ext/module/delegation'
|
2
|
+
require 'log_analysis/model/record'
|
2
3
|
require 'useragent'
|
3
4
|
|
4
5
|
class UserIdentity
|
5
|
-
attr_accessor :host, :user_agent
|
6
|
+
attr_accessor :host, :user_agent, :records
|
6
7
|
|
7
8
|
delegate :browser, :version, :os, :platform, :mobile?, :application, :localization, to: :user_agent
|
8
9
|
|
9
10
|
def initialize(params)
|
10
11
|
@host = params[:host]
|
11
12
|
@user_agent = params[:user_agent]
|
13
|
+
@records = params[:records]
|
12
14
|
end
|
13
15
|
end
|
@@ -7,10 +7,12 @@ module SessionIdentification
|
|
7
7
|
class Error < StandardError; end
|
8
8
|
# Your code goes here...
|
9
9
|
|
10
|
-
def self.execute(
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
def self.execute(identified_user)
|
11
|
+
identified_user.each_with_object([]) do |user, arr|
|
12
|
+
user.records.each do |record|
|
13
|
+
isession = arr.rindex { |s| s.user == user }
|
14
|
+
isession.present? && validate_time_session(arr[isession].records.last.time, record.time) ? arr[isession].records << record : arr << SessionIdentity.new(session_identity_params(record))
|
15
|
+
end
|
14
16
|
end
|
15
17
|
end
|
16
18
|
|
@@ -3,8 +3,6 @@ require 'log_analysis/model/user_identity'
|
|
3
3
|
require 'log_analysis/version'
|
4
4
|
|
5
5
|
module Transformation
|
6
|
-
MAP_URI_FILE_PATH = "map_uri_#{Time.now.strftime('%Y%m%d')}.txt".freeze
|
7
|
-
|
8
6
|
class Error < StandardError; end
|
9
7
|
# Your code goes here...
|
10
8
|
|
@@ -20,7 +18,7 @@ module Transformation
|
|
20
18
|
end
|
21
19
|
end
|
22
20
|
|
23
|
-
File.open(MAP_URI_FILE_PATH, 'w+') { |f| f.write(map_uri.join(' ')) }
|
21
|
+
File.open(LogAnalysis::MAP_URI_FILE_PATH, 'w+') { |f| f.write(map_uri.join(' ')) }
|
24
22
|
transform
|
25
23
|
end
|
26
24
|
end
|
@@ -6,6 +6,14 @@ module UserIdentification
|
|
6
6
|
# Your code goes here...
|
7
7
|
|
8
8
|
def self.execute(cleaned_data)
|
9
|
-
cleaned_data.
|
9
|
+
cleaned_data.each_with_object([]) do |record, arr|
|
10
|
+
user = arr.detect { |i| i == record.user }
|
11
|
+
if user
|
12
|
+
user.records.push(record)
|
13
|
+
else
|
14
|
+
record.user.records = [record]
|
15
|
+
arr << record.user
|
16
|
+
end
|
17
|
+
end
|
10
18
|
end
|
11
19
|
end
|
data/lib/log_analysis/version.rb
CHANGED
@@ -1,3 +1,8 @@
|
|
1
1
|
class LogAnalysis
|
2
|
-
VERSION
|
2
|
+
VERSION = '0.1.5'.freeze
|
3
|
+
TRANSFORM_DATA_PATH = "transform_data_#{Time.now.strftime('%Y%m%d')}.txt".freeze
|
4
|
+
RULE_FILE_PATH = "output_#{Time.now.strftime('%Y%m%d')}.txt".freeze
|
5
|
+
MAP_URI_FILE_PATH = "map_uri_#{Time.now.strftime('%Y%m%d')}.txt".freeze
|
6
|
+
JAR_FILE_PATH = File.join(File.dirname(__FILE__), './files/spmf.jar')
|
7
|
+
DATA_PATH = File.expand_path('data/log_analysis', '~')
|
3
8
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: log_analysis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Tran
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: useragent
|
@@ -57,12 +57,13 @@ files:
|
|
57
57
|
- bin/console
|
58
58
|
- bin/setup
|
59
59
|
- lib/log_analysis.rb
|
60
|
+
- lib/log_analysis/data_mining.rb
|
60
61
|
- lib/log_analysis/files/spmf.jar
|
62
|
+
- lib/log_analysis/intepretation.rb
|
63
|
+
- lib/log_analysis/loading_data.rb
|
61
64
|
- lib/log_analysis/model/record.rb
|
62
65
|
- lib/log_analysis/model/session_identity.rb
|
63
66
|
- lib/log_analysis/model/user_identity.rb
|
64
|
-
- lib/log_analysis/preprocess.rb
|
65
|
-
- lib/log_analysis/rule_generation.rb
|
66
67
|
- lib/log_analysis/session_identification.rb
|
67
68
|
- lib/log_analysis/transformation.rb
|
68
69
|
- lib/log_analysis/user_identification.rb
|