fluent-diagtool 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,165 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ require 'fileutils'
18
+ require 'open3'
19
+ require 'logger'
20
+
21
+ module Diagtool
22
+ class CollectUtils
23
+ def initialize(conf, log_level)
24
+ @logger = Logger.new(STDOUT, level: log_level, formatter: proc {|severity, datetime, progname, msg|
25
+ "#{datetime}: [Diagutils] [#{severity}] #{msg}\n"
26
+ })
27
+ @time_format = conf[:time]
28
+ @output_dir = conf[:output_dir]
29
+ @workdir = conf[:workdir]
30
+
31
+ @tdenv = get_tdenv()
32
+ @tdconf = @tdenv['FLUENT_CONF'].split('/')[-1]
33
+ @tdconf_path = @tdenv['FLUENT_CONF'].gsub(@tdconf,'')
34
+ @tdlog = @tdenv['TD_AGENT_LOG_FILE'].split('/')[-1]
35
+ @tdlog_path = @tdenv['TD_AGENT_LOG_FILE'].gsub(@tdlog,'')
36
+
37
+ @osenv = get_osenv()
38
+ @oslog_path = '/var/log/'
39
+ @oslog = 'messages'
40
+ @sysctl_path = '/etc/'
41
+ @sysctl = 'sysctl.conf'
42
+
43
+ @logger.info("Loading the environment parameters...")
44
+ @logger.info(" operating system = #{@osenv['Operating System']}")
45
+ @logger.info(" kernel version = #{@osenv['Kernel']}")
46
+ @logger.info(" td-agent conf path = #{@tdconf_path}")
47
+ @logger.info(" td-agent conf file = #{@tdconf}")
48
+ @logger.info(" td-agent log path = #{@tdlog_path}")
49
+ @logger.info(" td-agent log = #{@tdlog}")
50
+ end
51
+ def get_osenv()
52
+ stdout, stderr, status = Open3.capture3('hostnamectl')
53
+ os_dict = {}
54
+ stdout.each_line { |l|
55
+ s = l.split(":")
56
+ os_dict[s[0].chomp.strip] = s[1].chomp.strip
57
+ }
58
+ File.open(@workdir+'/os_env.output', 'w') do |f|
59
+ f.puts(stdout)
60
+ end
61
+ return os_dict
62
+ end
63
+ def get_tdenv()
64
+ stdout, stderr, status = Open3.capture3('systemctl cat td-agent')
65
+ env_dict = {}
66
+ File.open(@workdir+'/td-agent_env.output', 'w') do |f|
67
+ f.puts(stdout)
68
+ end
69
+ stdout.split().each do | l |
70
+ if l.include?('Environment')
71
+ env_dict[l.split('=')[1]] = l.split('=')[2]
72
+ end
73
+ end
74
+ return env_dict
75
+ end
76
+ def export_env()
77
+ env = {
78
+ :os => @osenv['Operating System'],
79
+ :kernel => @osenv['Kernel'],
80
+ :tdconf => @tdconf,
81
+ :tdconf_path => @tdconf_path,
82
+ :tdlog => @tdlog,
83
+ :tdlog_path => @tdlog_path
84
+ }
85
+ return env
86
+ end
87
+ def collect_tdconf()
88
+ FileUtils.mkdir_p(@workdir+@tdconf_path)
89
+ FileUtils.cp(@tdconf_path+@tdconf, @workdir+@tdconf_path)
90
+ return @workdir+@tdconf_path+@tdconf
91
+ end
92
+ def collect_tdlog()
93
+ FileUtils.mkdir_p(@workdir+@tdlog_path)
94
+ FileUtils.cp_r(@tdlog_path, @workdir+@oslog_path)
95
+ return Dir.glob(@workdir+@tdlog_path+@tdlog+'*')
96
+ end
97
+ def collect_sysctl()
98
+ FileUtils.mkdir_p(@workdir+@sysctl_path)
99
+ FileUtils.cp(@sysctl_path+@sysctl, @workdir+@sysctl_path)
100
+ return @workdir+@sysctl_path+@sysctl
101
+ end
102
+ def collect_oslog()
103
+ FileUtils.mkdir_p(@workdir+@oslog_path)
104
+ FileUtils.cp(@oslog_path+@oslog, @workdir+@oslog_path)
105
+ return @workdir+@oslog_path+@oslog
106
+ end
107
+ def collect_ulimit()
108
+ output = @workdir+'/ulimit_n.output'
109
+ stdout, stderr, status = Open3.capture3("ulimit -n")
110
+ File.open(output, 'w') do |f|
111
+ f.puts(stdout)
112
+ end
113
+ return output
114
+ end
115
+ def collect_meminfo()
116
+ output = @workdir+'/meminfo.output'
117
+ stdout, stderr, status = Open3.capture3("cat /proc/meminfo")
118
+ File.open(output, 'w') do |f|
119
+ f.puts(stdout)
120
+ end
121
+ return output
122
+ end
123
+ def collect_netstat_n()
124
+ output = @workdir+'/netstat_n.output'
125
+ stdout, stderr, status = Open3.capture3("netstat -n")
126
+ File.open(output, 'w') do |f|
127
+ f.puts(stdout)
128
+ end
129
+ return output
130
+ end
131
+ def collect_netstat_s()
132
+ output = @workdir+'/netstat_s.output'
133
+ stdout, stderr, status = Open3.capture3("netstat -s")
134
+ File.open(output, 'w') do |f|
135
+ f.puts(stdout)
136
+ end
137
+ return output
138
+ end
139
+ def collect_ntp(command)
140
+ output = @workdir+'/ntp_info.output'
141
+ stdout_date, stderr_date, status_date = Open3.capture3("date")
142
+ stdout_ntp, stderr_ntp, status_ntp = Open3.capture3("chronyc sources") if command == "chrony"
143
+ stdout_ntp, stderr_ntp, status_ntp = Open3.capture3("ntpq -p") if command == "ntp"
144
+ File.open(output, 'w') do |f|
145
+ f.puts(stdout_date)
146
+ f.puts(stdout_ntp)
147
+ end
148
+ return output
149
+ end
150
+ def collect_tdgems()
151
+ output = @workdir+'/tdgem_list.output'
152
+ stdout, stderr, status = Open3.capture3("td-agent-gem list | grep fluent")
153
+ File.open(output, 'w') do |f|
154
+ f.puts(stdout)
155
+ end
156
+ return output
157
+ end
158
+ def compress_output()
159
+ Dir.chdir(@output_dir)
160
+ tar_file = 'diagout-'+@time_format+'.tar.gz'
161
+ stdout, stderr, status = Open3.capture3("tar cvfz #{tar_file} #{@time_format}")
162
+ return @output_dir + '/' + tar_file
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,217 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ require 'logger'
18
+ require 'fileutils'
19
+ require 'fluent/diagtool/collectutils'
20
+ require 'fluent/diagtool/maskutils'
21
+ require 'fluent/diagtool/validutils'
22
+ include Diagtool
23
+
24
+ module Diagtool
25
+ class DiagUtils
26
+ def initialize(params)
27
+ time = Time.new
28
+ @time_format = time.strftime("%Y%m%d%0k%M%0S")
29
+ @conf = parse_diagconf(params)
30
+ @conf[:time] = @time_format
31
+ @conf[:workdir] = @conf[:output_dir] + '/' + @time_format
32
+ FileUtils.mkdir_p(@conf[:workdir])
33
+ diaglog = @conf[:workdir] + '/diagtool.output'
34
+ @masklog = './mask_' + @time_format + '.json'
35
+ @logger = Logger.new(STDOUT, formatter: proc {|severity, datetime, progname, msg|
36
+ "#{datetime}: [Diagtool] [#{severity}] #{msg}\n"
37
+ })
38
+ @logger_file = Logger.new(diaglog, formatter: proc {|severity, datetime, progname, msg|
39
+ "#{datetime}: [Diagtool] [#{severity}] #{msg}\n"
40
+ })
41
+ diaglogger_info("Parsing command options...")
42
+ diaglogger_info(" Option : Output directory = #{@conf[:output_dir]}")
43
+ diaglogger_info(" Option : Mask = #{@conf[:mask]}")
44
+ diaglogger_info(" Option : Word list = #{@conf[:words]}")
45
+ diaglogger_info(" Option : Hash Seed = #{@conf[:seed]}")
46
+ end
47
+ def diagtool()
48
+ loglevel = 'WARN'
49
+ diaglogger_info("Initializing parameters...")
50
+ c = CollectUtils.new(@conf, loglevel)
51
+ c_env = c.export_env()
52
+ diaglogger_info("[Collect] Loading the environment parameters...")
53
+ diaglogger_info("[Collect] operating system = #{c_env[:os]}")
54
+ diaglogger_info("[Collect] kernel version = #{c_env[:kernel]}")
55
+ diaglogger_info("[Collect] td-agent conf path = #{c_env[:tdconf_path]}")
56
+ diaglogger_info("[Collect] td-agent conf file = #{c_env[:tdconf]}")
57
+ diaglogger_info("[Collect] td-agent log path = #{c_env[:tdlog_path]}")
58
+ diaglogger_info("[Collect] td-agent log = #{c_env[:tdlog]}")
59
+ m = MaskUtils.new(@conf, loglevel)
60
+ v = ValidUtils.new(loglevel)
61
+
62
+ diaglogger_info("[Collect] Collecting log files of td-agent...")
63
+ tdlog = c.collect_tdlog()
64
+ diaglogger_info("[Collect] log files of td-agent are stored in #{tdlog}")
65
+
66
+ diaglogger_info("[Collect] Collecting config file of td-agent...")
67
+ tdconf = c.collect_tdconf()
68
+ diaglogger_info("[Collect] config file is stored in #{tdconf}")
69
+
70
+ diaglogger_info("[Collect] Collecting td-agent gem information...")
71
+ tdgem = c.collect_tdgems()
72
+ diaglogger_info("[Collect] td-agent gem information is stored in #{tdgem}")
73
+
74
+ diaglogger_info("[Collect] Collecting config file of OS log...")
75
+ oslog = c.collect_oslog()
76
+ if @conf[:mask] == 'yes'
77
+ diaglogger_info("[Mask] Masking OS log file : #{oslog}...")
78
+ oslog = m.mask_tdlog(oslog, clean = true)
79
+ end
80
+ diaglogger_info("[Collect] config file is stored in #{oslog}")
81
+
82
+ diaglogger_info("[Collect] Collecting OS memory information...")
83
+ meminfo = c.collect_meminfo()
84
+ diaglogger_info("[Collect] config file is stored in #{meminfo}")
85
+
86
+ diaglogger_info("[Collect] Collecting date/time information...")
87
+ if system('which chronyc > /dev/null 2>&1')
88
+ ntp = c.collect_ntp(command="chrony")
89
+ elsif system('which ntpq > /dev/null 2>&1')
90
+ ntp = c.collect_ntp(command="ntp")
91
+ else
92
+ diaglogger_warn("[Collect] chrony/ntp does not exist. skip collectig date/time information")
93
+ end
94
+ diaglogger_info("[Collect] date/time information is stored in #{ntp}")
95
+
96
+ diaglogger_info("[Collect] Collecting netstat information...")
97
+ if system('which netstat > /dev/null 2>&1')
98
+ netstat_n = c.collect_netstat_n()
99
+ netstat_s = c.collect_netstat_s()
100
+ if @conf[:mask] == 'yes'
101
+ diaglogger_info("[Mask] Masking netstat file : #{netstat_n}...")
102
+ netstat_n = m.mask_tdlog(netstat_n, clean = true)
103
+ end
104
+ diaglogger_info("[Collect] netstat information is stored in #{netstat_n} and #{netstat_s}")
105
+ else
106
+ diaglogger_warn("[Collect] netstat does not exist. skip collectig netstat")
107
+ end
108
+
109
+ diaglogger_info("[Collect] Collecting systctl information...")
110
+ sysctl = c.collect_sysctl()
111
+ diaglogger_info("[Collect] sysctl information is stored in #{sysctl}")
112
+
113
+ diaglogger_info("[Valid] Validating systctl information...")
114
+ ret, sysctl = v.valid_sysctl(sysctl)
115
+ list = sysctl.keys
116
+ list.each do |k|
117
+ if sysctl[k]['result'] == 'correct'
118
+ diaglogger_info("[Valid] Sysctl: #{k} => #{sysctl[k]['value']} is correct (recommendation is #{sysctl[k]['recommend']})")
119
+ elsif sysctl[k]['result'] == 'incorrect'
120
+ diaglogger_warn("[Valid] Sysctl: #{k} => #{sysctl[k]['value']} is incorrect (recommendation is #{sysctl[k]['recommend']})")
121
+ end
122
+ end
123
+
124
+ diaglogger_info("[Collect] Collecting ulimit information...")
125
+ ulimit = c.collect_ulimit()
126
+ diaglogger_info("[Collect] ulimit information is stored in #{ulimit}")
127
+
128
+ diaglogger_info("[Valid] Validating ulimit information...")
129
+ ret, rec, val = v.valid_ulimit(ulimit)
130
+ if ret == true
131
+ diaglogger_info("[Valid] ulimit => #{val} is correct (recommendation is >#{rec})")
132
+ else
133
+ diaglogger_warn("[Valid] ulimit => #{val} is incorrect (recommendation is >#{rec})")
134
+ end
135
+
136
+ if @conf[:mask] == 'yes'
137
+ diaglogger_info("[Mask] Masking td-agent config file : #{tdconf}...")
138
+ m.mask_tdlog(tdconf, clean = true)
139
+ tdlog.each do | file |
140
+ diaglogger_info("[Mask] Masking td-agent log file : #{file}...")
141
+ filename = file.split("/")[-1]
142
+ if filename.include?(".gz")
143
+ m.mask_tdlog_gz(file, clean = true)
144
+ elsif
145
+ m.mask_tdlog(file, clean = true)
146
+ end
147
+ end
148
+ end
149
+
150
+ if @conf[:mask] == 'yes'
151
+ diaglogger_info("[Mask] Export mask log file : #{@masklog}")
152
+ m.export_masklog(@masklog)
153
+ end
154
+
155
+ tar_file = c.compress_output()
156
+ diaglogger_info("[Collect] Generate tar file #{tar_file}")
157
+ end
158
+
159
+ def parse_diagconf(params)
160
+ options = {
161
+ :output_dir => '',
162
+ :mask => 'no',
163
+ :words => [],
164
+ :wfile => '',
165
+ :seed => ''
166
+ }
167
+ if params[:output] != nil
168
+ if Dir.exist?(params[:output])
169
+ options[:output_dir] = params[:output]
170
+ else
171
+ raise "output directory '#{output_dir}' does not exist"
172
+ end
173
+ else
174
+ raise "output directory '-o' must be specified"
175
+ end
176
+ if params[:mask] == nil
177
+ options[:mask] = 'no'
178
+ else
179
+ if params[:mask] == 'yes' || params[:mask] == 'no'
180
+ options[:mask] = params[:mask]
181
+ else
182
+ raise "invalid arguments '#{params[:mask]}' : input of '-m|--mask' should be 'yes' or 'no'"
183
+ end
184
+ end
185
+ options[:words] = params[:"word-list"] if params[:"word-list"] != nil
186
+ if params[:"word-file"] != nil
187
+ f = params[:"word-file"]
188
+ if File.exist?(f)
189
+ File.readlines(f).each do |l|
190
+ options[:words].append(l.gsub(/\n/,''))
191
+ end
192
+ else
193
+ raise "#{params[:"word-file"]} : No such file or directory"
194
+ end
195
+ end
196
+ options[:words] = options[:words].uniq
197
+ options[:seed] = params[:"hash-seed"] if params[:"hash-seed"] != nil
198
+ return options
199
+ end
200
+ def diaglogger_debug(str)
201
+ @logger.debug(str)
202
+ @logger_file.debug(str)
203
+ end
204
+ def diaglogger_info(str)
205
+ @logger.info(str)
206
+ @logger_file.info(str)
207
+ end
208
+ def diaglogger_warn(str)
209
+ @logger.warn(str)
210
+ @logger_file.warn(str)
211
+ end
212
+ def diaglogger_error(str)
213
+ @logger.error(str)
214
+ @logger_file.error(str)
215
+ end
216
+ end
217
+ end
@@ -0,0 +1,314 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ require 'digest'
18
+ require 'fileutils'
19
+ require 'logger'
20
+ require 'open3'
21
+ require 'json'
22
+
23
+ module Diagtool
24
+ class MaskUtils
25
+ def initialize(conf, log_level)
26
+ @words = conf[:words]
27
+ @logger = Logger.new(STDOUT, level: log_level, formatter: proc {|severity, datetime, progname, msg|
28
+ "#{datetime}: [Maskutils] [#{severity}] #{msg}\n"
29
+ })
30
+ @logger.debug("Initialize Maskutils: sanitized word = #{conf[:words]}")
31
+ @hash_seed = conf[:seed]
32
+ @id = {
33
+ :fid =>'',
34
+ :lid =>'',
35
+ :cid =>''
36
+ }
37
+ @masklog = Hash.new { |h,k| h[k] = Hash.new(&h.default_proc) }
38
+ end
39
+ def mask_tdlog(input_file, clean)
40
+ line_id = 0
41
+ f = File.open(input_file+'.mask', 'w')
42
+ File.readlines(input_file).each do |line|
43
+ line = line.encode('utf-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '') # temporary
44
+ @id[:fid] = input_file
45
+ @id[:lid] = line_id
46
+ line_masked = mask_tdlog_inspector(line)
47
+ f.puts(line_masked)
48
+ line_id+=1
49
+ end
50
+ f.close
51
+ FileUtils.rm(input_file) if clean == true
52
+ return input_file+'.mask'
53
+ end
54
+ def mask_tdlog_gz(input_file, clean)
55
+ line_id = 0
56
+ f = File.open(input_file+'.mask', 'w')
57
+ gunzip_file = input_file+'.mask'+'.tmp'
58
+ Open3.capture3("gunzip --keep -c #{input_file} > #{gunzip_file}")
59
+ File.readlines(gunzip_file).each do |line|
60
+ @id[:fid] = input_file
61
+ @id[:lid] = line_id
62
+ line_masked = mask_tdlog_inspector(line)
63
+ f.puts(line_masked)
64
+ line_id+=1
65
+ end
66
+ f.close
67
+ FileUtils.rm(gunzip_file)
68
+ FileUtils.rm(input_file) if clean == true
69
+ return input_file+'.mask'
70
+ end
71
+ def mask_tdlog_inspector(line)
72
+ i = 0
73
+ contents=[]
74
+ @logger.debug("Input Line: #{line.chomp}")
75
+ @logger.debug("Splitted Line: #{line.split(/\s/)}")
76
+ loop do
77
+ contents[i] = line.split(/\s/)[i].to_s
78
+ @logger.debug("Splitted Line #{i}: #{contents[i]}")
79
+ @id[:cid] = i.to_s
80
+ if contents[i].include?(',')
81
+ contents_s = contents[i].split(',')
82
+ cnt = 0
83
+ loop do
84
+ @id[:cid] = i.to_s + '-' + cnt.to_s
85
+ if contents_s[cnt].include?('://') ## Mask <http/dRuby>://<address:ip/hostname>:<port>
86
+ is_mask, masked_contents = mask_url_pattern(contents_s[cnt])
87
+ if is_mask
88
+ @logger.debug(" URL Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
89
+ contents_s[cnt] = masked_contents
90
+ end
91
+ elsif contents_s[cnt].include?('=')
92
+ is_mask, masked_contents = mask_equal_pattern(contents_s[cnt])
93
+ if is_mask
94
+ @logger.debug(" Equal Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
95
+ contents_s[cnt] = masked_contents
96
+ end
97
+ elsif contents_s[cnt].include?(':') ## Mask <address:ip/hostname>:<port>
98
+ is_mask, masked_contents = mask_colon_pattern(contents_s[cnt])
99
+ if is_mask
100
+ @logger.debug(" Colon Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
101
+ contents_s[cnt] = masked_contents
102
+ end
103
+ elsif contents_s[cnt].include?('/') ## Mask <address:ip/hostname>:<port>
104
+ is_mask, masked_contents = mask_slash_pattern(contents_s[cnt])
105
+ if is_mask
106
+ @logger.debug(" Slash Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
107
+ contents_s[cnt] = masked_contents
108
+ end
109
+ else
110
+ is_mask, masked_contents = mask_direct_pattern(contents_s[cnt])
111
+ if is_mask
112
+ @logger.debug(" Direct Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
113
+ contents_s[cnt] = masked_contents
114
+ end
115
+ end
116
+ cnt+=1
117
+ break if cnt >= contents_s.length
118
+ end
119
+ contents[i] = contents_s.join(',')
120
+ else
121
+ if contents[i].include?('://') ## Mask <http/dRuby>://<address:ip/hostname>:<port>
122
+ is_mask, masked_contents = mask_url_pattern(contents[i])
123
+ if is_mask
124
+ @logger.debug(" URL Pattern Detected: #{contents[i]} -> #{masked_contents}")
125
+ contents[i] = masked_contents
126
+ end
127
+ elsif contents[i].include?('=')
128
+ is_mask, masked_contents = mask_equal_pattern(contents[i])
129
+ if is_mask
130
+ @logger.debug(" Equal Pattern Detected: #{contents[i]} -> #{masked_contents}")
131
+ contents[i] = masked_contents
132
+ end
133
+ elsif contents[i].include?(':') ## Mask <address:ip/hostname>:<port>
134
+ is_mask, masked_contents = mask_colon_pattern(contents[i])
135
+ if is_mask
136
+ @logger.debug(" Colon Pattern Detected: #{contents[i]} -> #{masked_contents}")
137
+ contents[i] = masked_contents
138
+ end
139
+ elsif contents[i].include?('/')
140
+ is_mask, masked_contents = mask_slash_pattern(contents[i])
141
+ if is_mask
142
+ @logger.debug(" Slash Pattern Detected: #{contents[i]} -> #{masked_contents}")
143
+ contents[i] = masked_contents
144
+ end
145
+ else
146
+ is_mask, masked_contents = mask_direct_pattern(contents[i])
147
+ if is_mask
148
+ @logger.debug(" Direct Pattern Detected: #{contents[i]} -> #{masked_contents}")
149
+ contents[i] = masked_contents
150
+ end
151
+ end
152
+ end
153
+ i+=1
154
+ break if i >= line.split(/\,|\s/).length
155
+ end
156
+ line_masked = contents.join(' ')
157
+ @logger.debug("Masked Line: #{line_masked}")
158
+ return line_masked
159
+ end
160
+ def mask_direct_pattern(str)
161
+ is_mask = false
162
+ if str.include?(">")
163
+ str = str.gsub(">",'')
164
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(str)
165
+ str_m = chunk_mask + ">" if is_mask
166
+ elsif str.include?("]")
167
+ str = str.gsub("]",'')
168
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(str)
169
+ str_m = chunk_mask + "]" if is_mask
170
+ else
171
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(str)
172
+ str_mask = chunk_mask if is_mask
173
+ end
174
+ return is_mask, str_mask
175
+ end
176
+ def mask_url_pattern(str)
177
+ is_mask = false
178
+ url = str.split('://')
179
+ cnt_url = 0
180
+ loop do
181
+ if url[cnt_url].include?(':')
182
+ address = url[cnt_url].split(':')
183
+ cnt_address = 0
184
+ loop do
185
+ if address[cnt_address].include?("]")
186
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(address[cnt_address].gsub(']',''))
187
+ address[cnt_address] = chunk_mask + "]" if is_mask
188
+ elsif address[cnt_address].include?(">")
189
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(address[cnt_address].gsub('>',''))
190
+ address[cnt_address] = chunk_mask + ">" if is_mask
191
+ else
192
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(address[cnt_address])
193
+ address[cnt_address] = chunk_mask if is_mask
194
+ end
195
+ cnt_address+=1
196
+ break if cnt_address >= address.length || is_mask == true
197
+ end
198
+ url[cnt_url] = address.join(':')
199
+ else
200
+ if url[cnt_url].include?("]")
201
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(url[cnt_url].gsub(']',''))
202
+ url[cnt_url] = chunk_mask + "]" if is_mask
203
+ elsif url[cnt_url].include?(">")
204
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(url[cnt_url].gsub('>',''))
205
+ url[cnt_url] = chunk_mask + ">" if is_mask
206
+ else
207
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(url[cnt_url])
208
+ url[cnt_url] = chunk_mask if is_mask
209
+ end
210
+ end
211
+ cnt_url+=1
212
+ break if cnt_url >= url.length || is_mask == true
213
+ end
214
+ str_mask = url.join('://')
215
+ str_mask << ":" if str.end_with?(':')
216
+ return is_mask, str_mask
217
+ end
218
+ def mask_equal_pattern(str)
219
+ is_mask = false
220
+ l = str.split('=') ## Mask host=<address:ip/hostname> or bind=<address: ip/hostname>
221
+ i = 0
222
+ loop do
223
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(l[i])
224
+ l[i] = chunk_mask if is_mask
225
+ i+=1
226
+ break if i >= l.length || is_mask == true
227
+ end
228
+ str_mask = l.join('=')
229
+ return is_mask, str_mask
230
+ end
231
+ def mask_colon_pattern(str)
232
+ is_mask = false
233
+ l = str.split(':')
234
+ i = 0
235
+ loop do
236
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(l[i])
237
+ l[i] = chunk_mask if is_mask
238
+ i+=1
239
+ break if i >= l.length || is_mask == true
240
+ end
241
+ str_mask = l.join(':')
242
+ str_mask << ":" if str.end_with?(':')
243
+ return is_mask, str_mask
244
+ end
245
+ def mask_slash_pattern(str)
246
+ is_mask = false
247
+ l = str.split('/')
248
+ i = 0
249
+ loop do
250
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(l[i])
251
+ l[i] = chunk_mask if is_mask
252
+ i+=1
253
+ break if i >= l.length || is_mask == true
254
+ end
255
+ str_mask = l.join('/')
256
+ str_mask << ":" if str.end_with?(':')
257
+ return is_mask, str_mask
258
+ end
259
+ def is_ipv4?(str)
260
+ !!(str =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/)
261
+ end
262
+ def is_fqdn?(str)
263
+ #!!(str =~ /^\b((?=[a-z0-9-]{1,63}\.)[a-z0-9]+(-[a-z0-9]+)*\.)+([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])$/)
264
+ !!(str =~ /^\b(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.){2,}([A-Za-z]|[A-Za-z][A-Za-z\-]*[A-Za-z]){2,}$/)
265
+ #!!(str =~ /^\b(?=^.{1,254}$)(^(?:(?!\d+\.)[a-zA-Z0-9_\-]{1,63}\.?)+(?:[a-zA-Z]{2,})$)/)
266
+ end
267
+ def is_words?(str)
268
+ value = false
269
+ @words.each do | l |
270
+ if str == l
271
+ value = true
272
+ break
273
+ end
274
+ end
275
+ return value
276
+ end
277
+ def mask_ipv4_fqdn_words(str)
278
+ str = str.to_s
279
+ mtype = ''
280
+ is_mask = false
281
+ if is_ipv4?(str.gsub(/\\\"|\'|\"|\\\'/,''))
282
+ str = str.gsub(/\\\"|\'|\"|\\\'/,'')
283
+ mtype = 'IPv4'
284
+ is_mask = true
285
+ elsif is_fqdn?(str.gsub(/\\\"|\'|\"|\\\'/,''))
286
+ str = str.gsub(/\\\"|\'|\"|\\\'/,'')
287
+ mtype = 'FQDN'
288
+ is_mask = true
289
+ elsif is_words?(str.gsub(/\\\"|\'|\"|\\\'/,''))
290
+ str = str.gsub(/\\\"|\'|\"|\\\'/,'')
291
+ mtype = 'Word'
292
+ is_mask =true
293
+ end
294
+ if is_mask
295
+ str_mask = mtype + '_' + Digest::MD5.hexdigest(@hash_seed + str)
296
+ put_masklog(str, str_mask)
297
+ else
298
+ str_mask = str
299
+ end
300
+ return is_mask, str, str_mask
301
+ end
302
+ def put_masklog(str, str_mask)
303
+ uid = "Line#{@id[:lid]}-#{@id[:cid]}"
304
+ @masklog[@id[:fid]][uid]['original'] = str
305
+ @masklog[@id[:fid]][uid]['mask'] = str_mask
306
+ end
307
+ def export_masklog(output_file)
308
+ masklog_json = JSON.pretty_generate(@masklog)
309
+ File.open(output_file, 'w') do |f|
310
+ f.puts(masklog_json)
311
+ end
312
+ end
313
+ end
314
+ end