log_analysis 0.1.1 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,36 +1,54 @@
1
1
  require 'log_analysis/version'
2
- require 'log_analysis/preprocess'
2
+ require 'log_analysis/loading_data'
3
3
  require 'log_analysis/user_identification'
4
4
  require 'log_analysis/session_identification'
5
5
  require 'log_analysis/transformation'
6
- require 'log_analysis/rule_generation'
6
+ require 'log_analysis/data_mining'
7
+ require 'log_analysis/intepretation'
8
+ require 'time'
7
9
 
8
10
  class LogAnalysis
9
11
  class Error < StandardError; end
10
12
  # Your code goes here...
11
13
 
12
- attr_reader :path, :type, :cleaned_data
14
+ attr_accessor :path, :type, :match_uri, :conf, :sup, :origin_data
13
15
 
14
16
  def initialize(path, type = nil)
15
17
  @path = path
16
18
  @type = type
17
- @cleaned_data = PreProcess.input(path, type)
18
- system('mkdir', '-p', LogAnalysis::DATA_PATH)
19
+ @origin_data = LoadingData.input(path, type)
19
20
  end
20
21
 
21
- def identified_user
22
- UserIdentification.execute(@cleaned_data)
22
+ def selecting_data
23
+ return @origin_data if @match_uri.nil?
24
+
25
+ @origin_data.select { |record| record.uri.match?(@match_uri) }
23
26
  end
24
27
 
25
- def identified_session
26
- SessionIdentification.execute(@cleaned_data)
28
+ def preprocessing_data
29
+ data = selecting_data
30
+ return if data.nil? || data.empty?
31
+
32
+ filter = data.select { |record| record.status_200? && record.method_get? && record.uri_without_data && !record.robot? }
33
+ user = UserIdentification.execute(filter)
34
+ session = SessionIdentification.execute(user)
35
+ session
27
36
  end
28
37
 
29
38
  def transformation
30
- Transformation.execute(identified_session)
39
+ data = preprocessing_data
40
+ Transformation.execute(data) unless data.nil? || data.empty?
41
+ end
42
+
43
+ def data_mining
44
+ data = transformation
45
+ @conf ||= 0.5
46
+ @sup ||= 60
47
+ DataMining.execute(data, @conf, @sup) unless data.nil? || data.empty?
31
48
  end
32
49
 
33
- def rule_generation
34
- RuleGeneration.execute(transformation)
50
+ def intepretation
51
+ data = data_mining
52
+ Intepretation.execute(data) unless data.nil? || data.empty?
35
53
  end
36
54
  end
@@ -1,25 +1,14 @@
1
1
  require 'time'
2
2
  require 'log_analysis/version'
3
3
 
4
- module RuleGeneration
5
- JAR_FILE_PATH = File.expand_path('spmf.jar')
6
- TRANSFORM_DATA_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}transform_data_#{Time.now.strftime('%Y%m%d')}.txt")
7
- RULE_FILE_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}output_#{Time.now.strftime('%Y%m%d')}.txt")
8
- MAP_URI_FILE_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}map_uri_#{Time.now.strftime('%Y%m%d')}.txt")
9
-
4
+ module DataMining
10
5
  class Error < StandardError; end
11
6
  # Your code goes here...
12
7
 
13
- def self.execute(transform_data)
14
- File.open(TRANSFORM_DATA_PATH, 'w+') { |f| transform_data.keys.each { |e| f.puts(transform_data[e].map { |i| i.is_a?(Array) ? i.join(' ') : i }.join(' -1 ').concat(' -1 -2')) } }
15
- system("java -jar #{JAR_FILE_PATH} run SPADE #{TRANSFORM_DATA_PATH} #{RULE_FILE_PATH} 65%")
16
- result = rule_gen(get_seq(File.read(RULE_FILE_PATH)), 0.5)
17
- map_uri = File.read(MAP_URI_FILE_PATH).split(' ')
18
-
19
- result.map do |rule|
20
- seq, sub, rea = rule
21
- [seq.map { |i| map_uri[i.to_i] }, sub.map { |i| map_uri[i.to_i] }, rea]
22
- end
8
+ def self.execute(transform_data, min_conf, min_sup)
9
+ File.open(LogAnalysis::TRANSFORM_DATA_PATH, 'w+') { |f| transform_data.keys.each { |e| f.puts(transform_data[e].map { |i| i.is_a?(Array) ? i.join(' ') : i }.join(' -1 ').concat(' -1 -2')) } }
10
+ system("java -jar #{LogAnalysis::JAR_FILE_PATH} run SPADE #{LogAnalysis::TRANSFORM_DATA_PATH} #{LogAnalysis::RULE_FILE_PATH} #{min_sup}%")
11
+ rule_gen(get_seq(File.read(LogAnalysis::RULE_FILE_PATH)), min_conf)
23
12
  end
24
13
 
25
14
  def self.rule_gen(seqs, min_conf)
@@ -0,0 +1,22 @@
1
+ require 'log_analysis/version'
2
+
3
+ module Intepretation
4
+ class Error < StandardError; end
5
+ # Your code goes here...
6
+
7
+ def self.execute(data_mining)
8
+ map_uri = File.read(LogAnalysis::MAP_URI_FILE_PATH).split(' ')
9
+ move_data
10
+
11
+ data_mining.map do |data|
12
+ seq, sub, rea = data
13
+ [seq.map { |i| map_uri[i.to_i] }, sub.map { |i| map_uri[i.to_i] }, rea]
14
+ end
15
+ end
16
+
17
+ def self.move_data
18
+ return unless File.directory?(LogAnalysis::DATA_PATH)
19
+
20
+ system('mv', "*_#{Time.now.strftime('%Y%m%d')}.txt", LogAnalysis::DATA_PATH)
21
+ end
22
+ end
@@ -2,7 +2,7 @@ require 'log_analysis/model/record'
2
2
  require 'log_analysis/model/user_identity'
3
3
  require 'json'
4
4
 
5
- module PreProcess
5
+ module LoadingData
6
6
  class Error < StandardError; end
7
7
  # Your code goes here...
8
8
 
@@ -13,15 +13,17 @@ module PreProcess
13
13
  CONVERT_RECORD = { 'nginx' => 'convert_nginx_logs', 'apache' => 'convert_apache_logs', 'default' => 'to_records' }.freeze
14
14
 
15
15
  def self.input(file_path, type)
16
- @users = []
16
+ @users = []
17
+ text_file = File.readlines(file_path)
17
18
 
18
- File.readlines(file_path).each_with_object([]).with_index do |(line, arr), i|
19
+ text_file.each_with_object([]).with_index do |(line, arr), i|
19
20
  preprocessed_log = type.nil? ? line.gsub(/[\t]/i, ' ').chomp! : line
20
- record = Record.new(send(CONVERT_RECORD[type.nil? ? 'nginx' : type], preprocessed_log)) unless preprocessed_log.nil?
21
+ record_params = send(CONVERT_RECORD[type.nil? ? 'nginx' : type], preprocessed_log)
22
+ record = Record.new(record_params) if record_params && preprocessed_log
21
23
 
22
- arr.push(record) if record.status_200? && record.method_get? && record.uri_without_data && !record.robot?
23
-
24
- puts arr.size
24
+ system('clear')
25
+ puts "#{((i.to_f / text_file.size) * 100).round}/100"
26
+ arr.push(record) if record
25
27
  end
26
28
  end
27
29
 
@@ -35,6 +37,8 @@ module PreProcess
35
37
 
36
38
  def self.convert_nginx_logs(log)
37
39
  o = log.split(REGEX_NGINX)
40
+
41
+ return false if o.size <= 1
38
42
  o.delete('')
39
43
 
40
44
  {}.tap do |p|
@@ -1,13 +1,15 @@
1
1
  require 'active_support/core_ext/module/delegation'
2
+ require 'log_analysis/model/record'
2
3
  require 'useragent'
3
4
 
4
5
  class UserIdentity
5
- attr_accessor :host, :user_agent
6
+ attr_accessor :host, :user_agent, :records
6
7
 
7
8
  delegate :browser, :version, :os, :platform, :mobile?, :application, :localization, to: :user_agent
8
9
 
9
10
  def initialize(params)
10
11
  @host = params[:host]
11
12
  @user_agent = params[:user_agent]
13
+ @records = params[:records]
12
14
  end
13
15
  end
@@ -7,10 +7,12 @@ module SessionIdentification
7
7
  class Error < StandardError; end
8
8
  # Your code goes here...
9
9
 
10
- def self.execute(cleaned_data)
11
- cleaned_data.each_with_object([]) do |record, arr|
12
- isession = arr.rindex { |s| s.user == record.user }
13
- isession.present? && validate_time_session(arr[isession].records.last.time, record.time) ? arr[isession].records << record : arr << SessionIdentity.new(session_identity_params(record))
10
+ def self.execute(identified_user)
11
+ identified_user.each_with_object([]) do |user, arr|
12
+ isession = arr.rindex { |s| s.user == user }
13
+ user.records.each do |record|
14
+ isession.present? && validate_time_session(arr[isession].records.last.time, record.time) ? arr[isession].records << record : arr << SessionIdentity.new(session_identity_params(record))
15
+ end
14
16
  end
15
17
  end
16
18
 
@@ -3,13 +3,11 @@ require 'log_analysis/model/user_identity'
3
3
  require 'log_analysis/version'
4
4
 
5
5
  module Transformation
6
- MAP_URI_FILE_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}map_uri_#{Time.now.strftime('%Y%m%d')}.txt")
7
-
8
6
  class Error < StandardError; end
9
7
  # Your code goes here...
10
8
 
11
9
  def self.execute(identified_session)
12
- map_uri = []
10
+ map_uri = ['-']
13
11
  transform = identified_session.each_with_object({}) do |v, hash|
14
12
  uries = v.records.map(&:uri)
15
13
  uries.each { |i| map_uri.push(i) unless map_uri.include?(i) }
@@ -20,7 +18,7 @@ module Transformation
20
18
  end
21
19
  end
22
20
 
23
- File.open(MAP_URI_FILE_PATH, 'w+') { |f| f.write(map_uri.join(' ')) }
21
+ File.open(LogAnalysis::MAP_URI_FILE_PATH, 'w+') { |f| f.write(map_uri.join(' ')) }
24
22
  transform
25
23
  end
26
24
  end
@@ -6,6 +6,14 @@ module UserIdentification
6
6
  # Your code goes here...
7
7
 
8
8
  def self.execute(cleaned_data)
9
- cleaned_data.map(&:user).uniq
9
+ cleaned_data.each_with_object([]) do |record, arr|
10
+ user = arr.detect { |i| i == record.user }
11
+ if user
12
+ user.records.push(record)
13
+ else
14
+ record.user.records = [record]
15
+ arr << record.user
16
+ end
17
+ end
10
18
  end
11
19
  end
@@ -1,4 +1,8 @@
1
1
  class LogAnalysis
2
- VERSION = '0.1.1'.freeze
3
- DATA_PATH = '~/data/waazabag/'.freeze
2
+ VERSION = '0.1.6'.freeze
3
+ TRANSFORM_DATA_PATH = "transform_data_#{Time.now.strftime('%Y%m%d')}.txt".freeze
4
+ RULE_FILE_PATH = "output_#{Time.now.strftime('%Y%m%d')}.txt".freeze
5
+ MAP_URI_FILE_PATH = "map_uri_#{Time.now.strftime('%Y%m%d')}.txt".freeze
6
+ JAR_FILE_PATH = File.join(File.dirname(__FILE__), './files/spmf.jar')
7
+ DATA_PATH = File.expand_path('data/log_analysis', '~')
4
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: log_analysis
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Tran
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-12 00:00:00.000000000 Z
11
+ date: 2020-07-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: useragent
@@ -57,24 +57,25 @@ files:
57
57
  - bin/console
58
58
  - bin/setup
59
59
  - lib/log_analysis.rb
60
+ - lib/log_analysis/data_mining.rb
61
+ - lib/log_analysis/files/spmf.jar
62
+ - lib/log_analysis/intepretation.rb
63
+ - lib/log_analysis/loading_data.rb
60
64
  - lib/log_analysis/model/record.rb
61
65
  - lib/log_analysis/model/session_identity.rb
62
66
  - lib/log_analysis/model/user_identity.rb
63
- - lib/log_analysis/preprocess.rb
64
- - lib/log_analysis/rule_generation.rb
65
67
  - lib/log_analysis/session_identification.rb
66
68
  - lib/log_analysis/transformation.rb
67
69
  - lib/log_analysis/user_identification.rb
68
70
  - lib/log_analysis/version.rb
69
71
  - log_analysis.gemspec
70
- - spmf.jar
71
72
  homepage: https://github.com/michaelt0520/log_analysis_thesis
72
73
  licenses:
73
74
  - MIT
74
75
  metadata:
75
76
  homepage_uri: https://github.com/michaelt0520/log_analysis_thesis
76
77
  source_code_uri: https://github.com/michaelt0520/log_analysis_thesis
77
- post_install_message:
78
+ post_install_message:
78
79
  rdoc_options: []
79
80
  require_paths:
80
81
  - lib
@@ -89,8 +90,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
89
90
  - !ruby/object:Gem::Version
90
91
  version: '0'
91
92
  requirements: []
92
- rubygems_version: 3.1.3
93
- signing_key:
93
+ rubygems_version: 3.1.2
94
+ signing_key:
94
95
  specification_version: 4
95
96
  summary: Log Analysis for thesis Huflit
96
97
  test_files: []