fluent-diagtool 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/AUTHORS +1 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +34 -0
- data/LICENSE +202 -0
- data/README.md +133 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/diagtool.rb +37 -0
- data/bin/setup +8 -0
- data/bin/word_list_sample +2 -0
- data/exe/diagtool +38 -0
- data/fluent-diagtool.gemspec +25 -0
- data/lib/fluent/diagtool/collectutils.rb +165 -0
- data/lib/fluent/diagtool/diagutils.rb +217 -0
- data/lib/fluent/diagtool/maskutils.rb +314 -0
- data/lib/fluent/diagtool/validutils.rb +88 -0
- data/lib/fluent/diagtool/version.rb +5 -0
- metadata +64 -0
@@ -0,0 +1,165 @@
|
|
1
|
+
#
|
2
|
+
# Fluentd
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
#
|
16
|
+
|
17
|
+
require 'fileutils'
|
18
|
+
require 'open3'
|
19
|
+
require 'logger'
|
20
|
+
|
21
|
+
module Diagtool
|
22
|
+
class CollectUtils
|
23
|
+
def initialize(conf, log_level)
|
24
|
+
@logger = Logger.new(STDOUT, level: log_level, formatter: proc {|severity, datetime, progname, msg|
|
25
|
+
"#{datetime}: [Diagutils] [#{severity}] #{msg}\n"
|
26
|
+
})
|
27
|
+
@time_format = conf[:time]
|
28
|
+
@output_dir = conf[:output_dir]
|
29
|
+
@workdir = conf[:workdir]
|
30
|
+
|
31
|
+
@tdenv = get_tdenv()
|
32
|
+
@tdconf = @tdenv['FLUENT_CONF'].split('/')[-1]
|
33
|
+
@tdconf_path = @tdenv['FLUENT_CONF'].gsub(@tdconf,'')
|
34
|
+
@tdlog = @tdenv['TD_AGENT_LOG_FILE'].split('/')[-1]
|
35
|
+
@tdlog_path = @tdenv['TD_AGENT_LOG_FILE'].gsub(@tdlog,'')
|
36
|
+
|
37
|
+
@osenv = get_osenv()
|
38
|
+
@oslog_path = '/var/log/'
|
39
|
+
@oslog = 'messages'
|
40
|
+
@sysctl_path = '/etc/'
|
41
|
+
@sysctl = 'sysctl.conf'
|
42
|
+
|
43
|
+
@logger.info("Loading the environment parameters...")
|
44
|
+
@logger.info(" operating system = #{@osenv['Operating System']}")
|
45
|
+
@logger.info(" kernel version = #{@osenv['Kernel']}")
|
46
|
+
@logger.info(" td-agent conf path = #{@tdconf_path}")
|
47
|
+
@logger.info(" td-agent conf file = #{@tdconf}")
|
48
|
+
@logger.info(" td-agent log path = #{@tdlog_path}")
|
49
|
+
@logger.info(" td-agent log = #{@tdlog}")
|
50
|
+
end
|
51
|
+
def get_osenv()
|
52
|
+
stdout, stderr, status = Open3.capture3('hostnamectl')
|
53
|
+
os_dict = {}
|
54
|
+
stdout.each_line { |l|
|
55
|
+
s = l.split(":")
|
56
|
+
os_dict[s[0].chomp.strip] = s[1].chomp.strip
|
57
|
+
}
|
58
|
+
File.open(@workdir+'/os_env.output', 'w') do |f|
|
59
|
+
f.puts(stdout)
|
60
|
+
end
|
61
|
+
return os_dict
|
62
|
+
end
|
63
|
+
def get_tdenv()
|
64
|
+
stdout, stderr, status = Open3.capture3('systemctl cat td-agent')
|
65
|
+
env_dict = {}
|
66
|
+
File.open(@workdir+'/td-agent_env.output', 'w') do |f|
|
67
|
+
f.puts(stdout)
|
68
|
+
end
|
69
|
+
stdout.split().each do | l |
|
70
|
+
if l.include?('Environment')
|
71
|
+
env_dict[l.split('=')[1]] = l.split('=')[2]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
return env_dict
|
75
|
+
end
|
76
|
+
def export_env()
|
77
|
+
env = {
|
78
|
+
:os => @osenv['Operating System'],
|
79
|
+
:kernel => @osenv['Kernel'],
|
80
|
+
:tdconf => @tdconf,
|
81
|
+
:tdconf_path => @tdconf_path,
|
82
|
+
:tdlog => @tdlog,
|
83
|
+
:tdlog_path => @tdlog_path
|
84
|
+
}
|
85
|
+
return env
|
86
|
+
end
|
87
|
+
def collect_tdconf()
|
88
|
+
FileUtils.mkdir_p(@workdir+@tdconf_path)
|
89
|
+
FileUtils.cp(@tdconf_path+@tdconf, @workdir+@tdconf_path)
|
90
|
+
return @workdir+@tdconf_path+@tdconf
|
91
|
+
end
|
92
|
+
def collect_tdlog()
|
93
|
+
FileUtils.mkdir_p(@workdir+@tdlog_path)
|
94
|
+
FileUtils.cp_r(@tdlog_path, @workdir+@oslog_path)
|
95
|
+
return Dir.glob(@workdir+@tdlog_path+@tdlog+'*')
|
96
|
+
end
|
97
|
+
def collect_sysctl()
|
98
|
+
FileUtils.mkdir_p(@workdir+@sysctl_path)
|
99
|
+
FileUtils.cp(@sysctl_path+@sysctl, @workdir+@sysctl_path)
|
100
|
+
return @workdir+@sysctl_path+@sysctl
|
101
|
+
end
|
102
|
+
def collect_oslog()
|
103
|
+
FileUtils.mkdir_p(@workdir+@oslog_path)
|
104
|
+
FileUtils.cp(@oslog_path+@oslog, @workdir+@oslog_path)
|
105
|
+
return @workdir+@oslog_path+@oslog
|
106
|
+
end
|
107
|
+
def collect_ulimit()
|
108
|
+
output = @workdir+'/ulimit_n.output'
|
109
|
+
stdout, stderr, status = Open3.capture3("ulimit -n")
|
110
|
+
File.open(output, 'w') do |f|
|
111
|
+
f.puts(stdout)
|
112
|
+
end
|
113
|
+
return output
|
114
|
+
end
|
115
|
+
def collect_meminfo()
|
116
|
+
output = @workdir+'/meminfo.output'
|
117
|
+
stdout, stderr, status = Open3.capture3("cat /proc/meminfo")
|
118
|
+
File.open(output, 'w') do |f|
|
119
|
+
f.puts(stdout)
|
120
|
+
end
|
121
|
+
return output
|
122
|
+
end
|
123
|
+
def collect_netstat_n()
|
124
|
+
output = @workdir+'/netstat_n.output'
|
125
|
+
stdout, stderr, status = Open3.capture3("netstat -n")
|
126
|
+
File.open(output, 'w') do |f|
|
127
|
+
f.puts(stdout)
|
128
|
+
end
|
129
|
+
return output
|
130
|
+
end
|
131
|
+
def collect_netstat_s()
|
132
|
+
output = @workdir+'/netstat_s.output'
|
133
|
+
stdout, stderr, status = Open3.capture3("netstat -s")
|
134
|
+
File.open(output, 'w') do |f|
|
135
|
+
f.puts(stdout)
|
136
|
+
end
|
137
|
+
return output
|
138
|
+
end
|
139
|
+
def collect_ntp(command)
|
140
|
+
output = @workdir+'/ntp_info.output'
|
141
|
+
stdout_date, stderr_date, status_date = Open3.capture3("date")
|
142
|
+
stdout_ntp, stderr_ntp, status_ntp = Open3.capture3("chronyc sources") if command == "chrony"
|
143
|
+
stdout_ntp, stderr_ntp, status_ntp = Open3.capture3("ntpq -p") if command == "ntp"
|
144
|
+
File.open(output, 'w') do |f|
|
145
|
+
f.puts(stdout_date)
|
146
|
+
f.puts(stdout_ntp)
|
147
|
+
end
|
148
|
+
return output
|
149
|
+
end
|
150
|
+
def collect_tdgems()
|
151
|
+
output = @workdir+'/tdgem_list.output'
|
152
|
+
stdout, stderr, status = Open3.capture3("td-agent-gem list | grep fluent")
|
153
|
+
File.open(output, 'w') do |f|
|
154
|
+
f.puts(stdout)
|
155
|
+
end
|
156
|
+
return output
|
157
|
+
end
|
158
|
+
def compress_output()
|
159
|
+
Dir.chdir(@output_dir)
|
160
|
+
tar_file = 'diagout-'+@time_format+'.tar.gz'
|
161
|
+
stdout, stderr, status = Open3.capture3("tar cvfz #{tar_file} #{@time_format}")
|
162
|
+
return @output_dir + '/' + tar_file
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,217 @@
|
|
1
|
+
#
|
2
|
+
# Fluentd
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
#
|
16
|
+
|
17
|
+
require 'logger'
|
18
|
+
require 'fileutils'
|
19
|
+
require 'fluent/diagtool/collectutils'
|
20
|
+
require 'fluent/diagtool/maskutils'
|
21
|
+
require 'fluent/diagtool/validutils'
|
22
|
+
include Diagtool
|
23
|
+
|
24
|
+
module Diagtool
|
25
|
+
class DiagUtils
|
26
|
+
def initialize(params)
|
27
|
+
time = Time.new
|
28
|
+
@time_format = time.strftime("%Y%m%d%0k%M%0S")
|
29
|
+
@conf = parse_diagconf(params)
|
30
|
+
@conf[:time] = @time_format
|
31
|
+
@conf[:workdir] = @conf[:output_dir] + '/' + @time_format
|
32
|
+
FileUtils.mkdir_p(@conf[:workdir])
|
33
|
+
diaglog = @conf[:workdir] + '/diagtool.output'
|
34
|
+
@masklog = './mask_' + @time_format + '.json'
|
35
|
+
@logger = Logger.new(STDOUT, formatter: proc {|severity, datetime, progname, msg|
|
36
|
+
"#{datetime}: [Diagtool] [#{severity}] #{msg}\n"
|
37
|
+
})
|
38
|
+
@logger_file = Logger.new(diaglog, formatter: proc {|severity, datetime, progname, msg|
|
39
|
+
"#{datetime}: [Diagtool] [#{severity}] #{msg}\n"
|
40
|
+
})
|
41
|
+
diaglogger_info("Parsing command options...")
|
42
|
+
diaglogger_info(" Option : Output directory = #{@conf[:output_dir]}")
|
43
|
+
diaglogger_info(" Option : Mask = #{@conf[:mask]}")
|
44
|
+
diaglogger_info(" Option : Word list = #{@conf[:words]}")
|
45
|
+
diaglogger_info(" Option : Hash Seed = #{@conf[:seed]}")
|
46
|
+
end
|
47
|
+
def diagtool()
|
48
|
+
loglevel = 'WARN'
|
49
|
+
diaglogger_info("Initializing parameters...")
|
50
|
+
c = CollectUtils.new(@conf, loglevel)
|
51
|
+
c_env = c.export_env()
|
52
|
+
diaglogger_info("[Collect] Loading the environment parameters...")
|
53
|
+
diaglogger_info("[Collect] operating system = #{c_env[:os]}")
|
54
|
+
diaglogger_info("[Collect] kernel version = #{c_env[:kernel]}")
|
55
|
+
diaglogger_info("[Collect] td-agent conf path = #{c_env[:tdconf_path]}")
|
56
|
+
diaglogger_info("[Collect] td-agent conf file = #{c_env[:tdconf]}")
|
57
|
+
diaglogger_info("[Collect] td-agent log path = #{c_env[:tdlog_path]}")
|
58
|
+
diaglogger_info("[Collect] td-agent log = #{c_env[:tdlog]}")
|
59
|
+
m = MaskUtils.new(@conf, loglevel)
|
60
|
+
v = ValidUtils.new(loglevel)
|
61
|
+
|
62
|
+
diaglogger_info("[Collect] Collecting log files of td-agent...")
|
63
|
+
tdlog = c.collect_tdlog()
|
64
|
+
diaglogger_info("[Collect] log files of td-agent are stored in #{tdlog}")
|
65
|
+
|
66
|
+
diaglogger_info("[Collect] Collecting config file of td-agent...")
|
67
|
+
tdconf = c.collect_tdconf()
|
68
|
+
diaglogger_info("[Collect] config file is stored in #{tdconf}")
|
69
|
+
|
70
|
+
diaglogger_info("[Collect] Collecting td-agent gem information...")
|
71
|
+
tdgem = c.collect_tdgems()
|
72
|
+
diaglogger_info("[Collect] td-agent gem information is stored in #{tdgem}")
|
73
|
+
|
74
|
+
diaglogger_info("[Collect] Collecting config file of OS log...")
|
75
|
+
oslog = c.collect_oslog()
|
76
|
+
if @conf[:mask] == 'yes'
|
77
|
+
diaglogger_info("[Mask] Masking OS log file : #{oslog}...")
|
78
|
+
oslog = m.mask_tdlog(oslog, clean = true)
|
79
|
+
end
|
80
|
+
diaglogger_info("[Collect] config file is stored in #{oslog}")
|
81
|
+
|
82
|
+
diaglogger_info("[Collect] Collecting OS memory information...")
|
83
|
+
meminfo = c.collect_meminfo()
|
84
|
+
diaglogger_info("[Collect] config file is stored in #{meminfo}")
|
85
|
+
|
86
|
+
diaglogger_info("[Collect] Collecting date/time information...")
|
87
|
+
if system('which chronyc > /dev/null 2>&1')
|
88
|
+
ntp = c.collect_ntp(command="chrony")
|
89
|
+
elsif system('which ntpq > /dev/null 2>&1')
|
90
|
+
ntp = c.collect_ntp(command="ntp")
|
91
|
+
else
|
92
|
+
diaglogger_warn("[Collect] chrony/ntp does not exist. skip collectig date/time information")
|
93
|
+
end
|
94
|
+
diaglogger_info("[Collect] date/time information is stored in #{ntp}")
|
95
|
+
|
96
|
+
diaglogger_info("[Collect] Collecting netstat information...")
|
97
|
+
if system('which netstat > /dev/null 2>&1')
|
98
|
+
netstat_n = c.collect_netstat_n()
|
99
|
+
netstat_s = c.collect_netstat_s()
|
100
|
+
if @conf[:mask] == 'yes'
|
101
|
+
diaglogger_info("[Mask] Masking netstat file : #{netstat_n}...")
|
102
|
+
netstat_n = m.mask_tdlog(netstat_n, clean = true)
|
103
|
+
end
|
104
|
+
diaglogger_info("[Collect] netstat information is stored in #{netstat_n} and #{netstat_s}")
|
105
|
+
else
|
106
|
+
diaglogger_warn("[Collect] netstat does not exist. skip collectig netstat")
|
107
|
+
end
|
108
|
+
|
109
|
+
diaglogger_info("[Collect] Collecting systctl information...")
|
110
|
+
sysctl = c.collect_sysctl()
|
111
|
+
diaglogger_info("[Collect] sysctl information is stored in #{sysctl}")
|
112
|
+
|
113
|
+
diaglogger_info("[Valid] Validating systctl information...")
|
114
|
+
ret, sysctl = v.valid_sysctl(sysctl)
|
115
|
+
list = sysctl.keys
|
116
|
+
list.each do |k|
|
117
|
+
if sysctl[k]['result'] == 'correct'
|
118
|
+
diaglogger_info("[Valid] Sysctl: #{k} => #{sysctl[k]['value']} is correct (recommendation is #{sysctl[k]['recommend']})")
|
119
|
+
elsif sysctl[k]['result'] == 'incorrect'
|
120
|
+
diaglogger_warn("[Valid] Sysctl: #{k} => #{sysctl[k]['value']} is incorrect (recommendation is #{sysctl[k]['recommend']})")
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
diaglogger_info("[Collect] Collecting ulimit information...")
|
125
|
+
ulimit = c.collect_ulimit()
|
126
|
+
diaglogger_info("[Collect] ulimit information is stored in #{ulimit}")
|
127
|
+
|
128
|
+
diaglogger_info("[Valid] Validating ulimit information...")
|
129
|
+
ret, rec, val = v.valid_ulimit(ulimit)
|
130
|
+
if ret == true
|
131
|
+
diaglogger_info("[Valid] ulimit => #{val} is correct (recommendation is >#{rec})")
|
132
|
+
else
|
133
|
+
diaglogger_warn("[Valid] ulimit => #{val} is incorrect (recommendation is >#{rec})")
|
134
|
+
end
|
135
|
+
|
136
|
+
if @conf[:mask] == 'yes'
|
137
|
+
diaglogger_info("[Mask] Masking td-agent config file : #{tdconf}...")
|
138
|
+
m.mask_tdlog(tdconf, clean = true)
|
139
|
+
tdlog.each do | file |
|
140
|
+
diaglogger_info("[Mask] Masking td-agent log file : #{file}...")
|
141
|
+
filename = file.split("/")[-1]
|
142
|
+
if filename.include?(".gz")
|
143
|
+
m.mask_tdlog_gz(file, clean = true)
|
144
|
+
elsif
|
145
|
+
m.mask_tdlog(file, clean = true)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
if @conf[:mask] == 'yes'
|
151
|
+
diaglogger_info("[Mask] Export mask log file : #{@masklog}")
|
152
|
+
m.export_masklog(@masklog)
|
153
|
+
end
|
154
|
+
|
155
|
+
tar_file = c.compress_output()
|
156
|
+
diaglogger_info("[Collect] Generate tar file #{tar_file}")
|
157
|
+
end
|
158
|
+
|
159
|
+
def parse_diagconf(params)
|
160
|
+
options = {
|
161
|
+
:output_dir => '',
|
162
|
+
:mask => 'no',
|
163
|
+
:words => [],
|
164
|
+
:wfile => '',
|
165
|
+
:seed => ''
|
166
|
+
}
|
167
|
+
if params[:output] != nil
|
168
|
+
if Dir.exist?(params[:output])
|
169
|
+
options[:output_dir] = params[:output]
|
170
|
+
else
|
171
|
+
raise "output directory '#{output_dir}' does not exist"
|
172
|
+
end
|
173
|
+
else
|
174
|
+
raise "output directory '-o' must be specified"
|
175
|
+
end
|
176
|
+
if params[:mask] == nil
|
177
|
+
options[:mask] = 'no'
|
178
|
+
else
|
179
|
+
if params[:mask] == 'yes' || params[:mask] == 'no'
|
180
|
+
options[:mask] = params[:mask]
|
181
|
+
else
|
182
|
+
raise "invalid arguments '#{params[:mask]}' : input of '-m|--mask' should be 'yes' or 'no'"
|
183
|
+
end
|
184
|
+
end
|
185
|
+
options[:words] = params[:"word-list"] if params[:"word-list"] != nil
|
186
|
+
if params[:"word-file"] != nil
|
187
|
+
f = params[:"word-file"]
|
188
|
+
if File.exist?(f)
|
189
|
+
File.readlines(f).each do |l|
|
190
|
+
options[:words].append(l.gsub(/\n/,''))
|
191
|
+
end
|
192
|
+
else
|
193
|
+
raise "#{params[:"word-file"]} : No such file or directory"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
options[:words] = options[:words].uniq
|
197
|
+
options[:seed] = params[:"hash-seed"] if params[:"hash-seed"] != nil
|
198
|
+
return options
|
199
|
+
end
|
200
|
+
def diaglogger_debug(str)
|
201
|
+
@logger.debug(str)
|
202
|
+
@logger_file.debug(str)
|
203
|
+
end
|
204
|
+
def diaglogger_info(str)
|
205
|
+
@logger.info(str)
|
206
|
+
@logger_file.info(str)
|
207
|
+
end
|
208
|
+
def diaglogger_warn(str)
|
209
|
+
@logger.warn(str)
|
210
|
+
@logger_file.warn(str)
|
211
|
+
end
|
212
|
+
def diaglogger_error(str)
|
213
|
+
@logger.error(str)
|
214
|
+
@logger_file.error(str)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
@@ -0,0 +1,314 @@
|
|
1
|
+
#
|
2
|
+
# Fluentd
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
#
|
16
|
+
|
17
|
+
require 'digest'
|
18
|
+
require 'fileutils'
|
19
|
+
require 'logger'
|
20
|
+
require 'open3'
|
21
|
+
require 'json'
|
22
|
+
|
23
|
+
module Diagtool
|
24
|
+
class MaskUtils
|
25
|
+
def initialize(conf, log_level)
|
26
|
+
@words = conf[:words]
|
27
|
+
@logger = Logger.new(STDOUT, level: log_level, formatter: proc {|severity, datetime, progname, msg|
|
28
|
+
"#{datetime}: [Maskutils] [#{severity}] #{msg}\n"
|
29
|
+
})
|
30
|
+
@logger.debug("Initialize Maskutils: sanitized word = #{conf[:words]}")
|
31
|
+
@hash_seed = conf[:seed]
|
32
|
+
@id = {
|
33
|
+
:fid =>'',
|
34
|
+
:lid =>'',
|
35
|
+
:cid =>''
|
36
|
+
}
|
37
|
+
@masklog = Hash.new { |h,k| h[k] = Hash.new(&h.default_proc) }
|
38
|
+
end
|
39
|
+
def mask_tdlog(input_file, clean)
|
40
|
+
line_id = 0
|
41
|
+
f = File.open(input_file+'.mask', 'w')
|
42
|
+
File.readlines(input_file).each do |line|
|
43
|
+
line = line.encode('utf-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '') # temporary
|
44
|
+
@id[:fid] = input_file
|
45
|
+
@id[:lid] = line_id
|
46
|
+
line_masked = mask_tdlog_inspector(line)
|
47
|
+
f.puts(line_masked)
|
48
|
+
line_id+=1
|
49
|
+
end
|
50
|
+
f.close
|
51
|
+
FileUtils.rm(input_file) if clean == true
|
52
|
+
return input_file+'.mask'
|
53
|
+
end
|
54
|
+
def mask_tdlog_gz(input_file, clean)
|
55
|
+
line_id = 0
|
56
|
+
f = File.open(input_file+'.mask', 'w')
|
57
|
+
gunzip_file = input_file+'.mask'+'.tmp'
|
58
|
+
Open3.capture3("gunzip --keep -c #{input_file} > #{gunzip_file}")
|
59
|
+
File.readlines(gunzip_file).each do |line|
|
60
|
+
@id[:fid] = input_file
|
61
|
+
@id[:lid] = line_id
|
62
|
+
line_masked = mask_tdlog_inspector(line)
|
63
|
+
f.puts(line_masked)
|
64
|
+
line_id+=1
|
65
|
+
end
|
66
|
+
f.close
|
67
|
+
FileUtils.rm(gunzip_file)
|
68
|
+
FileUtils.rm(input_file) if clean == true
|
69
|
+
return input_file+'.mask'
|
70
|
+
end
|
71
|
+
def mask_tdlog_inspector(line)
|
72
|
+
i = 0
|
73
|
+
contents=[]
|
74
|
+
@logger.debug("Input Line: #{line.chomp}")
|
75
|
+
@logger.debug("Splitted Line: #{line.split(/\s/)}")
|
76
|
+
loop do
|
77
|
+
contents[i] = line.split(/\s/)[i].to_s
|
78
|
+
@logger.debug("Splitted Line #{i}: #{contents[i]}")
|
79
|
+
@id[:cid] = i.to_s
|
80
|
+
if contents[i].include?(',')
|
81
|
+
contents_s = contents[i].split(',')
|
82
|
+
cnt = 0
|
83
|
+
loop do
|
84
|
+
@id[:cid] = i.to_s + '-' + cnt.to_s
|
85
|
+
if contents_s[cnt].include?('://') ## Mask <http/dRuby>://<address:ip/hostname>:<port>
|
86
|
+
is_mask, masked_contents = mask_url_pattern(contents_s[cnt])
|
87
|
+
if is_mask
|
88
|
+
@logger.debug(" URL Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
|
89
|
+
contents_s[cnt] = masked_contents
|
90
|
+
end
|
91
|
+
elsif contents_s[cnt].include?('=')
|
92
|
+
is_mask, masked_contents = mask_equal_pattern(contents_s[cnt])
|
93
|
+
if is_mask
|
94
|
+
@logger.debug(" Equal Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
|
95
|
+
contents_s[cnt] = masked_contents
|
96
|
+
end
|
97
|
+
elsif contents_s[cnt].include?(':') ## Mask <address:ip/hostname>:<port>
|
98
|
+
is_mask, masked_contents = mask_colon_pattern(contents_s[cnt])
|
99
|
+
if is_mask
|
100
|
+
@logger.debug(" Colon Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
|
101
|
+
contents_s[cnt] = masked_contents
|
102
|
+
end
|
103
|
+
elsif contents_s[cnt].include?('/') ## Mask <address:ip/hostname>:<port>
|
104
|
+
is_mask, masked_contents = mask_slash_pattern(contents_s[cnt])
|
105
|
+
if is_mask
|
106
|
+
@logger.debug(" Slash Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
|
107
|
+
contents_s[cnt] = masked_contents
|
108
|
+
end
|
109
|
+
else
|
110
|
+
is_mask, masked_contents = mask_direct_pattern(contents_s[cnt])
|
111
|
+
if is_mask
|
112
|
+
@logger.debug(" Direct Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
|
113
|
+
contents_s[cnt] = masked_contents
|
114
|
+
end
|
115
|
+
end
|
116
|
+
cnt+=1
|
117
|
+
break if cnt >= contents_s.length
|
118
|
+
end
|
119
|
+
contents[i] = contents_s.join(',')
|
120
|
+
else
|
121
|
+
if contents[i].include?('://') ## Mask <http/dRuby>://<address:ip/hostname>:<port>
|
122
|
+
is_mask, masked_contents = mask_url_pattern(contents[i])
|
123
|
+
if is_mask
|
124
|
+
@logger.debug(" URL Pattern Detected: #{contents[i]} -> #{masked_contents}")
|
125
|
+
contents[i] = masked_contents
|
126
|
+
end
|
127
|
+
elsif contents[i].include?('=')
|
128
|
+
is_mask, masked_contents = mask_equal_pattern(contents[i])
|
129
|
+
if is_mask
|
130
|
+
@logger.debug(" Equal Pattern Detected: #{contents[i]} -> #{masked_contents}")
|
131
|
+
contents[i] = masked_contents
|
132
|
+
end
|
133
|
+
elsif contents[i].include?(':') ## Mask <address:ip/hostname>:<port>
|
134
|
+
is_mask, masked_contents = mask_colon_pattern(contents[i])
|
135
|
+
if is_mask
|
136
|
+
@logger.debug(" Colon Pattern Detected: #{contents[i]} -> #{masked_contents}")
|
137
|
+
contents[i] = masked_contents
|
138
|
+
end
|
139
|
+
elsif contents[i].include?('/')
|
140
|
+
is_mask, masked_contents = mask_slash_pattern(contents[i])
|
141
|
+
if is_mask
|
142
|
+
@logger.debug(" Slash Pattern Detected: #{contents[i]} -> #{masked_contents}")
|
143
|
+
contents[i] = masked_contents
|
144
|
+
end
|
145
|
+
else
|
146
|
+
is_mask, masked_contents = mask_direct_pattern(contents[i])
|
147
|
+
if is_mask
|
148
|
+
@logger.debug(" Direct Pattern Detected: #{contents[i]} -> #{masked_contents}")
|
149
|
+
contents[i] = masked_contents
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
i+=1
|
154
|
+
break if i >= line.split(/\,|\s/).length
|
155
|
+
end
|
156
|
+
line_masked = contents.join(' ')
|
157
|
+
@logger.debug("Masked Line: #{line_masked}")
|
158
|
+
return line_masked
|
159
|
+
end
|
160
|
+
def mask_direct_pattern(str)
|
161
|
+
is_mask = false
|
162
|
+
if str.include?(">")
|
163
|
+
str = str.gsub(">",'')
|
164
|
+
is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(str)
|
165
|
+
str_m = chunk_mask + ">" if is_mask
|
166
|
+
elsif str.include?("]")
|
167
|
+
str = str.gsub("]",'')
|
168
|
+
is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(str)
|
169
|
+
str_m = chunk_mask + "]" if is_mask
|
170
|
+
else
|
171
|
+
is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(str)
|
172
|
+
str_mask = chunk_mask if is_mask
|
173
|
+
end
|
174
|
+
return is_mask, str_mask
|
175
|
+
end
|
176
|
+
def mask_url_pattern(str)
|
177
|
+
is_mask = false
|
178
|
+
url = str.split('://')
|
179
|
+
cnt_url = 0
|
180
|
+
loop do
|
181
|
+
if url[cnt_url].include?(':')
|
182
|
+
address = url[cnt_url].split(':')
|
183
|
+
cnt_address = 0
|
184
|
+
loop do
|
185
|
+
if address[cnt_address].include?("]")
|
186
|
+
is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(address[cnt_address].gsub(']',''))
|
187
|
+
address[cnt_address] = chunk_mask + "]" if is_mask
|
188
|
+
elsif address[cnt_address].include?(">")
|
189
|
+
is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(address[cnt_address].gsub('>',''))
|
190
|
+
address[cnt_address] = chunk_mask + ">" if is_mask
|
191
|
+
else
|
192
|
+
is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(address[cnt_address])
|
193
|
+
address[cnt_address] = chunk_mask if is_mask
|
194
|
+
end
|
195
|
+
cnt_address+=1
|
196
|
+
break if cnt_address >= address.length || is_mask == true
|
197
|
+
end
|
198
|
+
url[cnt_url] = address.join(':')
|
199
|
+
else
|
200
|
+
if url[cnt_url].include?("]")
|
201
|
+
is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(url[cnt_url].gsub(']',''))
|
202
|
+
url[cnt_url] = chunk_mask + "]" if is_mask
|
203
|
+
elsif url[cnt_url].include?(">")
|
204
|
+
is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(url[cnt_url].gsub('>',''))
|
205
|
+
url[cnt_url] = chunk_mask + ">" if is_mask
|
206
|
+
else
|
207
|
+
is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(url[cnt_url])
|
208
|
+
url[cnt_url] = chunk_mask if is_mask
|
209
|
+
end
|
210
|
+
end
|
211
|
+
cnt_url+=1
|
212
|
+
break if cnt_url >= url.length || is_mask == true
|
213
|
+
end
|
214
|
+
str_mask = url.join('://')
|
215
|
+
str_mask << ":" if str.end_with?(':')
|
216
|
+
return is_mask, str_mask
|
217
|
+
end
|
218
|
+
def mask_equal_pattern(str)
|
219
|
+
is_mask = false
|
220
|
+
l = str.split('=') ## Mask host=<address:ip/hostname> or bind=<address: ip/hostname>
|
221
|
+
i = 0
|
222
|
+
loop do
|
223
|
+
is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(l[i])
|
224
|
+
l[i] = chunk_mask if is_mask
|
225
|
+
i+=1
|
226
|
+
break if i >= l.length || is_mask == true
|
227
|
+
end
|
228
|
+
str_mask = l.join('=')
|
229
|
+
return is_mask, str_mask
|
230
|
+
end
|
231
|
+
def mask_colon_pattern(str)
|
232
|
+
is_mask = false
|
233
|
+
l = str.split(':')
|
234
|
+
i = 0
|
235
|
+
loop do
|
236
|
+
is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(l[i])
|
237
|
+
l[i] = chunk_mask if is_mask
|
238
|
+
i+=1
|
239
|
+
break if i >= l.length || is_mask == true
|
240
|
+
end
|
241
|
+
str_mask = l.join(':')
|
242
|
+
str_mask << ":" if str.end_with?(':')
|
243
|
+
return is_mask, str_mask
|
244
|
+
end
|
245
|
+
def mask_slash_pattern(str)
|
246
|
+
is_mask = false
|
247
|
+
l = str.split('/')
|
248
|
+
i = 0
|
249
|
+
loop do
|
250
|
+
is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(l[i])
|
251
|
+
l[i] = chunk_mask if is_mask
|
252
|
+
i+=1
|
253
|
+
break if i >= l.length || is_mask == true
|
254
|
+
end
|
255
|
+
str_mask = l.join('/')
|
256
|
+
str_mask << ":" if str.end_with?(':')
|
257
|
+
return is_mask, str_mask
|
258
|
+
end
|
259
|
+
def is_ipv4?(str)
|
260
|
+
!!(str =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/)
|
261
|
+
end
|
262
|
+
def is_fqdn?(str)
|
263
|
+
#!!(str =~ /^\b((?=[a-z0-9-]{1,63}\.)[a-z0-9]+(-[a-z0-9]+)*\.)+([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])$/)
|
264
|
+
!!(str =~ /^\b(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.){2,}([A-Za-z]|[A-Za-z][A-Za-z\-]*[A-Za-z]){2,}$/)
|
265
|
+
#!!(str =~ /^\b(?=^.{1,254}$)(^(?:(?!\d+\.)[a-zA-Z0-9_\-]{1,63}\.?)+(?:[a-zA-Z]{2,})$)/)
|
266
|
+
end
|
267
|
+
def is_words?(str)
|
268
|
+
value = false
|
269
|
+
@words.each do | l |
|
270
|
+
if str == l
|
271
|
+
value = true
|
272
|
+
break
|
273
|
+
end
|
274
|
+
end
|
275
|
+
return value
|
276
|
+
end
|
277
|
+
def mask_ipv4_fqdn_words(str)
|
278
|
+
str = str.to_s
|
279
|
+
mtype = ''
|
280
|
+
is_mask = false
|
281
|
+
if is_ipv4?(str.gsub(/\\\"|\'|\"|\\\'/,''))
|
282
|
+
str = str.gsub(/\\\"|\'|\"|\\\'/,'')
|
283
|
+
mtype = 'IPv4'
|
284
|
+
is_mask = true
|
285
|
+
elsif is_fqdn?(str.gsub(/\\\"|\'|\"|\\\'/,''))
|
286
|
+
str = str.gsub(/\\\"|\'|\"|\\\'/,'')
|
287
|
+
mtype = 'FQDN'
|
288
|
+
is_mask = true
|
289
|
+
elsif is_words?(str.gsub(/\\\"|\'|\"|\\\'/,''))
|
290
|
+
str = str.gsub(/\\\"|\'|\"|\\\'/,'')
|
291
|
+
mtype = 'Word'
|
292
|
+
is_mask =true
|
293
|
+
end
|
294
|
+
if is_mask
|
295
|
+
str_mask = mtype + '_' + Digest::MD5.hexdigest(@hash_seed + str)
|
296
|
+
put_masklog(str, str_mask)
|
297
|
+
else
|
298
|
+
str_mask = str
|
299
|
+
end
|
300
|
+
return is_mask, str, str_mask
|
301
|
+
end
|
302
|
+
def put_masklog(str, str_mask)
|
303
|
+
uid = "Line#{@id[:lid]}-#{@id[:cid]}"
|
304
|
+
@masklog[@id[:fid]][uid]['original'] = str
|
305
|
+
@masklog[@id[:fid]][uid]['mask'] = str_mask
|
306
|
+
end
|
307
|
+
def export_masklog(output_file)
|
308
|
+
masklog_json = JSON.pretty_generate(@masklog)
|
309
|
+
File.open(output_file, 'w') do |f|
|
310
|
+
f.puts(masklog_json)
|
311
|
+
end
|
312
|
+
end
|
313
|
+
end
|
314
|
+
end
|