log_analysis 0.1.1 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,36 +1,54 @@
1
1
  require 'log_analysis/version'
2
- require 'log_analysis/preprocess'
2
+ require 'log_analysis/loading_data'
3
3
  require 'log_analysis/user_identification'
4
4
  require 'log_analysis/session_identification'
5
5
  require 'log_analysis/transformation'
6
- require 'log_analysis/rule_generation'
6
+ require 'log_analysis/data_mining'
7
+ require 'log_analysis/intepretation'
8
+ require 'time'
7
9
 
8
10
  class LogAnalysis
9
11
  class Error < StandardError; end
10
12
  # Your code goes here...
11
13
 
12
- attr_reader :path, :type, :cleaned_data
14
+ attr_accessor :path, :type, :match_uri, :conf, :sup, :origin_data
13
15
 
14
16
  def initialize(path, type = nil)
15
17
  @path = path
16
18
  @type = type
17
- @cleaned_data = PreProcess.input(path, type)
18
- system('mkdir', '-p', LogAnalysis::DATA_PATH)
19
+ @origin_data = LoadingData.input(path, type)
19
20
  end
20
21
 
21
- def identified_user
22
- UserIdentification.execute(@cleaned_data)
22
+ def selecting_data
23
+ return @origin_data if @match_uri.nil?
24
+
25
+ @origin_data.select { |record| record.uri.match?(@match_uri) }
23
26
  end
24
27
 
25
- def identified_session
26
- SessionIdentification.execute(@cleaned_data)
28
+ def preprocessing_data
29
+ data = selecting_data
30
+ return if data.nil? || data.empty?
31
+
32
+ filter = data.select { |record| record.status_200? && record.method_get? && record.uri_without_data && !record.robot? }
33
+ user = UserIdentification.execute(filter)
34
+ session = SessionIdentification.execute(user)
35
+ session
27
36
  end
28
37
 
29
38
  def transformation
30
- Transformation.execute(identified_session)
39
+ data = preprocessing_data
40
+ Transformation.execute(data) unless data.nil? || data.empty?
41
+ end
42
+
43
+ def data_mining
44
+ data = transformation
45
+ @conf ||= 0.5
46
+ @sup ||= 60
47
+ DataMining.execute(data, @conf, @sup) unless data.nil? || data.empty?
31
48
  end
32
49
 
33
- def rule_generation
34
- RuleGeneration.execute(transformation)
50
+ def intepretation
51
+ data = data_mining
52
+ Intepretation.execute(data) unless data.nil? || data.empty?
35
53
  end
36
54
  end
@@ -1,25 +1,14 @@
1
1
  require 'time'
2
2
  require 'log_analysis/version'
3
3
 
4
- module RuleGeneration
5
- JAR_FILE_PATH = File.expand_path('spmf.jar')
6
- TRANSFORM_DATA_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}transform_data_#{Time.now.strftime('%Y%m%d')}.txt")
7
- RULE_FILE_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}output_#{Time.now.strftime('%Y%m%d')}.txt")
8
- MAP_URI_FILE_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}map_uri_#{Time.now.strftime('%Y%m%d')}.txt")
9
-
4
+ module DataMining
10
5
  class Error < StandardError; end
11
6
  # Your code goes here...
12
7
 
13
- def self.execute(transform_data)
14
- File.open(TRANSFORM_DATA_PATH, 'w+') { |f| transform_data.keys.each { |e| f.puts(transform_data[e].map { |i| i.is_a?(Array) ? i.join(' ') : i }.join(' -1 ').concat(' -1 -2')) } }
15
- system("java -jar #{JAR_FILE_PATH} run SPADE #{TRANSFORM_DATA_PATH} #{RULE_FILE_PATH} 65%")
16
- result = rule_gen(get_seq(File.read(RULE_FILE_PATH)), 0.5)
17
- map_uri = File.read(MAP_URI_FILE_PATH).split(' ')
18
-
19
- result.map do |rule|
20
- seq, sub, rea = rule
21
- [seq.map { |i| map_uri[i.to_i] }, sub.map { |i| map_uri[i.to_i] }, rea]
22
- end
8
+ def self.execute(transform_data, min_conf, min_sup)
9
+ File.open(LogAnalysis::TRANSFORM_DATA_PATH, 'w+') { |f| transform_data.keys.each { |e| f.puts(transform_data[e].map { |i| i.is_a?(Array) ? i.join(' ') : i }.join(' -1 ').concat(' -1 -2')) } }
10
+ system("java -jar #{LogAnalysis::JAR_FILE_PATH} run SPADE #{LogAnalysis::TRANSFORM_DATA_PATH} #{LogAnalysis::RULE_FILE_PATH} #{min_sup}%")
11
+ rule_gen(get_seq(File.read(LogAnalysis::RULE_FILE_PATH)), min_conf)
23
12
  end
24
13
 
25
14
  def self.rule_gen(seqs, min_conf)
@@ -0,0 +1,22 @@
1
+ require 'log_analysis/version'
2
+
3
+ module Intepretation
4
+ class Error < StandardError; end
5
+ # Your code goes here...
6
+
7
+ def self.execute(data_mining)
8
+ map_uri = File.read(LogAnalysis::MAP_URI_FILE_PATH).split(' ')
9
+ move_data
10
+
11
+ data_mining.map do |data|
12
+ seq, sub, rea = data
13
+ [seq.map { |i| map_uri[i.to_i] }, sub.map { |i| map_uri[i.to_i] }, rea]
14
+ end
15
+ end
16
+
17
+ def self.move_data
18
+ return unless File.directory?(LogAnalysis::DATA_PATH)
19
+
20
+ system('mv', "*_#{Time.now.strftime('%Y%m%d')}.txt", LogAnalysis::DATA_PATH)
21
+ end
22
+ end
@@ -2,7 +2,7 @@ require 'log_analysis/model/record'
2
2
  require 'log_analysis/model/user_identity'
3
3
  require 'json'
4
4
 
5
- module PreProcess
5
+ module LoadingData
6
6
  class Error < StandardError; end
7
7
  # Your code goes here...
8
8
 
@@ -13,15 +13,17 @@ module PreProcess
13
13
  CONVERT_RECORD = { 'nginx' => 'convert_nginx_logs', 'apache' => 'convert_apache_logs', 'default' => 'to_records' }.freeze
14
14
 
15
15
  def self.input(file_path, type)
16
- @users = []
16
+ @users = []
17
+ text_file = File.readlines(file_path)
17
18
 
18
- File.readlines(file_path).each_with_object([]).with_index do |(line, arr), i|
19
+ text_file.each_with_object([]).with_index do |(line, arr), i|
19
20
  preprocessed_log = type.nil? ? line.gsub(/[\t]/i, ' ').chomp! : line
20
- record = Record.new(send(CONVERT_RECORD[type.nil? ? 'nginx' : type], preprocessed_log)) unless preprocessed_log.nil?
21
+ record_params = send(CONVERT_RECORD[type.nil? ? 'nginx' : type], preprocessed_log)
22
+ record = Record.new(record_params) if record_params && preprocessed_log
21
23
 
22
- arr.push(record) if record.status_200? && record.method_get? && record.uri_without_data && !record.robot?
23
-
24
- puts arr.size
24
+ system('clear')
25
+ puts "#{((i.to_f / text_file.size) * 100).round}/100"
26
+ arr.push(record) if record
25
27
  end
26
28
  end
27
29
 
@@ -35,6 +37,8 @@ module PreProcess
35
37
 
36
38
  def self.convert_nginx_logs(log)
37
39
  o = log.split(REGEX_NGINX)
40
+
41
+ return false if o.size <= 1
38
42
  o.delete('')
39
43
 
40
44
  {}.tap do |p|
@@ -1,13 +1,15 @@
1
1
  require 'active_support/core_ext/module/delegation'
2
+ require 'log_analysis/model/record'
2
3
  require 'useragent'
3
4
 
4
5
  class UserIdentity
5
- attr_accessor :host, :user_agent
6
+ attr_accessor :host, :user_agent, :records
6
7
 
7
8
  delegate :browser, :version, :os, :platform, :mobile?, :application, :localization, to: :user_agent
8
9
 
9
10
  def initialize(params)
10
11
  @host = params[:host]
11
12
  @user_agent = params[:user_agent]
13
+ @records = params[:records]
12
14
  end
13
15
  end
@@ -7,10 +7,12 @@ module SessionIdentification
7
7
  class Error < StandardError; end
8
8
  # Your code goes here...
9
9
 
10
- def self.execute(cleaned_data)
11
- cleaned_data.each_with_object([]) do |record, arr|
12
- isession = arr.rindex { |s| s.user == record.user }
13
- isession.present? && validate_time_session(arr[isession].records.last.time, record.time) ? arr[isession].records << record : arr << SessionIdentity.new(session_identity_params(record))
10
+ def self.execute(identified_user)
11
+ identified_user.each_with_object([]) do |user, arr|
12
+ isession = arr.rindex { |s| s.user == user }
13
+ user.records.each do |record|
14
+ isession.present? && validate_time_session(arr[isession].records.last.time, record.time) ? arr[isession].records << record : arr << SessionIdentity.new(session_identity_params(record))
15
+ end
14
16
  end
15
17
  end
16
18
 
@@ -3,13 +3,11 @@ require 'log_analysis/model/user_identity'
3
3
  require 'log_analysis/version'
4
4
 
5
5
  module Transformation
6
- MAP_URI_FILE_PATH = File.expand_path("#{LogAnalysis::DATA_PATH}map_uri_#{Time.now.strftime('%Y%m%d')}.txt")
7
-
8
6
  class Error < StandardError; end
9
7
  # Your code goes here...
10
8
 
11
9
  def self.execute(identified_session)
12
- map_uri = []
10
+ map_uri = ['-']
13
11
  transform = identified_session.each_with_object({}) do |v, hash|
14
12
  uries = v.records.map(&:uri)
15
13
  uries.each { |i| map_uri.push(i) unless map_uri.include?(i) }
@@ -20,7 +18,7 @@ module Transformation
20
18
  end
21
19
  end
22
20
 
23
- File.open(MAP_URI_FILE_PATH, 'w+') { |f| f.write(map_uri.join(' ')) }
21
+ File.open(LogAnalysis::MAP_URI_FILE_PATH, 'w+') { |f| f.write(map_uri.join(' ')) }
24
22
  transform
25
23
  end
26
24
  end
@@ -6,6 +6,14 @@ module UserIdentification
6
6
  # Your code goes here...
7
7
 
8
8
  def self.execute(cleaned_data)
9
- cleaned_data.map(&:user).uniq
9
+ cleaned_data.each_with_object([]) do |record, arr|
10
+ user = arr.detect { |i| i == record.user }
11
+ if user
12
+ user.records.push(record)
13
+ else
14
+ record.user.records = [record]
15
+ arr << record.user
16
+ end
17
+ end
10
18
  end
11
19
  end
@@ -1,4 +1,8 @@
1
1
  class LogAnalysis
2
- VERSION = '0.1.1'.freeze
3
- DATA_PATH = '~/data/waazabag/'.freeze
2
+ VERSION = '0.1.6'.freeze
3
+ TRANSFORM_DATA_PATH = "transform_data_#{Time.now.strftime('%Y%m%d')}.txt".freeze
4
+ RULE_FILE_PATH = "output_#{Time.now.strftime('%Y%m%d')}.txt".freeze
5
+ MAP_URI_FILE_PATH = "map_uri_#{Time.now.strftime('%Y%m%d')}.txt".freeze
6
+ JAR_FILE_PATH = File.join(File.dirname(__FILE__), './files/spmf.jar')
7
+ DATA_PATH = File.expand_path('data/log_analysis', '~')
4
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: log_analysis
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Tran
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-07-12 00:00:00.000000000 Z
11
+ date: 2020-07-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: useragent
@@ -57,24 +57,25 @@ files:
57
57
  - bin/console
58
58
  - bin/setup
59
59
  - lib/log_analysis.rb
60
+ - lib/log_analysis/data_mining.rb
61
+ - lib/log_analysis/files/spmf.jar
62
+ - lib/log_analysis/intepretation.rb
63
+ - lib/log_analysis/loading_data.rb
60
64
  - lib/log_analysis/model/record.rb
61
65
  - lib/log_analysis/model/session_identity.rb
62
66
  - lib/log_analysis/model/user_identity.rb
63
- - lib/log_analysis/preprocess.rb
64
- - lib/log_analysis/rule_generation.rb
65
67
  - lib/log_analysis/session_identification.rb
66
68
  - lib/log_analysis/transformation.rb
67
69
  - lib/log_analysis/user_identification.rb
68
70
  - lib/log_analysis/version.rb
69
71
  - log_analysis.gemspec
70
- - spmf.jar
71
72
  homepage: https://github.com/michaelt0520/log_analysis_thesis
72
73
  licenses:
73
74
  - MIT
74
75
  metadata:
75
76
  homepage_uri: https://github.com/michaelt0520/log_analysis_thesis
76
77
  source_code_uri: https://github.com/michaelt0520/log_analysis_thesis
77
- post_install_message:
78
+ post_install_message:
78
79
  rdoc_options: []
79
80
  require_paths:
80
81
  - lib
@@ -89,8 +90,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
89
90
  - !ruby/object:Gem::Version
90
91
  version: '0'
91
92
  requirements: []
92
- rubygems_version: 3.1.3
93
- signing_key:
93
+ rubygems_version: 3.1.2
94
+ signing_key:
94
95
  specification_version: 4
95
96
  summary: Log Analysis for thesis Huflit
96
97
  test_files: []