fluent-diagtool 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,165 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ require 'fileutils'
18
+ require 'open3'
19
+ require 'logger'
20
+
21
+ module Diagtool
22
+ class CollectUtils
23
+ def initialize(conf, log_level)
24
+ @logger = Logger.new(STDOUT, level: log_level, formatter: proc {|severity, datetime, progname, msg|
25
+ "#{datetime}: [Diagutils] [#{severity}] #{msg}\n"
26
+ })
27
+ @time_format = conf[:time]
28
+ @output_dir = conf[:output_dir]
29
+ @workdir = conf[:workdir]
30
+
31
+ @tdenv = get_tdenv()
32
+ @tdconf = @tdenv['FLUENT_CONF'].split('/')[-1]
33
+ @tdconf_path = @tdenv['FLUENT_CONF'].gsub(@tdconf,'')
34
+ @tdlog = @tdenv['TD_AGENT_LOG_FILE'].split('/')[-1]
35
+ @tdlog_path = @tdenv['TD_AGENT_LOG_FILE'].gsub(@tdlog,'')
36
+
37
+ @osenv = get_osenv()
38
+ @oslog_path = '/var/log/'
39
+ @oslog = 'messages'
40
+ @sysctl_path = '/etc/'
41
+ @sysctl = 'sysctl.conf'
42
+
43
+ @logger.info("Loading the environment parameters...")
44
+ @logger.info(" operating system = #{@osenv['Operating System']}")
45
+ @logger.info(" kernel version = #{@osenv['Kernel']}")
46
+ @logger.info(" td-agent conf path = #{@tdconf_path}")
47
+ @logger.info(" td-agent conf file = #{@tdconf}")
48
+ @logger.info(" td-agent log path = #{@tdlog_path}")
49
+ @logger.info(" td-agent log = #{@tdlog}")
50
+ end
51
+ def get_osenv()
52
+ stdout, stderr, status = Open3.capture3('hostnamectl')
53
+ os_dict = {}
54
+ stdout.each_line { |l|
55
+ s = l.split(":")
56
+ os_dict[s[0].chomp.strip] = s[1].chomp.strip
57
+ }
58
+ File.open(@workdir+'/os_env.output', 'w') do |f|
59
+ f.puts(stdout)
60
+ end
61
+ return os_dict
62
+ end
63
+ def get_tdenv()
64
+ stdout, stderr, status = Open3.capture3('systemctl cat td-agent')
65
+ env_dict = {}
66
+ File.open(@workdir+'/td-agent_env.output', 'w') do |f|
67
+ f.puts(stdout)
68
+ end
69
+ stdout.split().each do | l |
70
+ if l.include?('Environment')
71
+ env_dict[l.split('=')[1]] = l.split('=')[2]
72
+ end
73
+ end
74
+ return env_dict
75
+ end
76
+ def export_env()
77
+ env = {
78
+ :os => @osenv['Operating System'],
79
+ :kernel => @osenv['Kernel'],
80
+ :tdconf => @tdconf,
81
+ :tdconf_path => @tdconf_path,
82
+ :tdlog => @tdlog,
83
+ :tdlog_path => @tdlog_path
84
+ }
85
+ return env
86
+ end
87
+ def collect_tdconf()
88
+ FileUtils.mkdir_p(@workdir+@tdconf_path)
89
+ FileUtils.cp(@tdconf_path+@tdconf, @workdir+@tdconf_path)
90
+ return @workdir+@tdconf_path+@tdconf
91
+ end
92
+ def collect_tdlog()
93
+ FileUtils.mkdir_p(@workdir+@tdlog_path)
94
+ FileUtils.cp_r(@tdlog_path, @workdir+@oslog_path)
95
+ return Dir.glob(@workdir+@tdlog_path+@tdlog+'*')
96
+ end
97
+ def collect_sysctl()
98
+ FileUtils.mkdir_p(@workdir+@sysctl_path)
99
+ FileUtils.cp(@sysctl_path+@sysctl, @workdir+@sysctl_path)
100
+ return @workdir+@sysctl_path+@sysctl
101
+ end
102
+ def collect_oslog()
103
+ FileUtils.mkdir_p(@workdir+@oslog_path)
104
+ FileUtils.cp(@oslog_path+@oslog, @workdir+@oslog_path)
105
+ return @workdir+@oslog_path+@oslog
106
+ end
107
+ def collect_ulimit()
108
+ output = @workdir+'/ulimit_n.output'
109
+ stdout, stderr, status = Open3.capture3("ulimit -n")
110
+ File.open(output, 'w') do |f|
111
+ f.puts(stdout)
112
+ end
113
+ return output
114
+ end
115
+ def collect_meminfo()
116
+ output = @workdir+'/meminfo.output'
117
+ stdout, stderr, status = Open3.capture3("cat /proc/meminfo")
118
+ File.open(output, 'w') do |f|
119
+ f.puts(stdout)
120
+ end
121
+ return output
122
+ end
123
+ def collect_netstat_n()
124
+ output = @workdir+'/netstat_n.output'
125
+ stdout, stderr, status = Open3.capture3("netstat -n")
126
+ File.open(output, 'w') do |f|
127
+ f.puts(stdout)
128
+ end
129
+ return output
130
+ end
131
+ def collect_netstat_s()
132
+ output = @workdir+'/netstat_s.output'
133
+ stdout, stderr, status = Open3.capture3("netstat -s")
134
+ File.open(output, 'w') do |f|
135
+ f.puts(stdout)
136
+ end
137
+ return output
138
+ end
139
+ def collect_ntp(command)
140
+ output = @workdir+'/ntp_info.output'
141
+ stdout_date, stderr_date, status_date = Open3.capture3("date")
142
+ stdout_ntp, stderr_ntp, status_ntp = Open3.capture3("chronyc sources") if command == "chrony"
143
+ stdout_ntp, stderr_ntp, status_ntp = Open3.capture3("ntpq -p") if command == "ntp"
144
+ File.open(output, 'w') do |f|
145
+ f.puts(stdout_date)
146
+ f.puts(stdout_ntp)
147
+ end
148
+ return output
149
+ end
150
+ def collect_tdgems()
151
+ output = @workdir+'/tdgem_list.output'
152
+ stdout, stderr, status = Open3.capture3("td-agent-gem list | grep fluent")
153
+ File.open(output, 'w') do |f|
154
+ f.puts(stdout)
155
+ end
156
+ return output
157
+ end
158
+ def compress_output()
159
+ Dir.chdir(@output_dir)
160
+ tar_file = 'diagout-'+@time_format+'.tar.gz'
161
+ stdout, stderr, status = Open3.capture3("tar cvfz #{tar_file} #{@time_format}")
162
+ return @output_dir + '/' + tar_file
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,217 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ require 'logger'
18
+ require 'fileutils'
19
+ require 'fluent/diagtool/collectutils'
20
+ require 'fluent/diagtool/maskutils'
21
+ require 'fluent/diagtool/validutils'
22
+ include Diagtool
23
+
24
+ module Diagtool
25
+ class DiagUtils
26
+ def initialize(params)
27
+ time = Time.new
28
+ @time_format = time.strftime("%Y%m%d%0k%M%0S")
29
+ @conf = parse_diagconf(params)
30
+ @conf[:time] = @time_format
31
+ @conf[:workdir] = @conf[:output_dir] + '/' + @time_format
32
+ FileUtils.mkdir_p(@conf[:workdir])
33
+ diaglog = @conf[:workdir] + '/diagtool.output'
34
+ @masklog = './mask_' + @time_format + '.json'
35
+ @logger = Logger.new(STDOUT, formatter: proc {|severity, datetime, progname, msg|
36
+ "#{datetime}: [Diagtool] [#{severity}] #{msg}\n"
37
+ })
38
+ @logger_file = Logger.new(diaglog, formatter: proc {|severity, datetime, progname, msg|
39
+ "#{datetime}: [Diagtool] [#{severity}] #{msg}\n"
40
+ })
41
+ diaglogger_info("Parsing command options...")
42
+ diaglogger_info(" Option : Output directory = #{@conf[:output_dir]}")
43
+ diaglogger_info(" Option : Mask = #{@conf[:mask]}")
44
+ diaglogger_info(" Option : Word list = #{@conf[:words]}")
45
+ diaglogger_info(" Option : Hash Seed = #{@conf[:seed]}")
46
+ end
47
+ def diagtool()
48
+ loglevel = 'WARN'
49
+ diaglogger_info("Initializing parameters...")
50
+ c = CollectUtils.new(@conf, loglevel)
51
+ c_env = c.export_env()
52
+ diaglogger_info("[Collect] Loading the environment parameters...")
53
+ diaglogger_info("[Collect] operating system = #{c_env[:os]}")
54
+ diaglogger_info("[Collect] kernel version = #{c_env[:kernel]}")
55
+ diaglogger_info("[Collect] td-agent conf path = #{c_env[:tdconf_path]}")
56
+ diaglogger_info("[Collect] td-agent conf file = #{c_env[:tdconf]}")
57
+ diaglogger_info("[Collect] td-agent log path = #{c_env[:tdlog_path]}")
58
+ diaglogger_info("[Collect] td-agent log = #{c_env[:tdlog]}")
59
+ m = MaskUtils.new(@conf, loglevel)
60
+ v = ValidUtils.new(loglevel)
61
+
62
+ diaglogger_info("[Collect] Collecting log files of td-agent...")
63
+ tdlog = c.collect_tdlog()
64
+ diaglogger_info("[Collect] log files of td-agent are stored in #{tdlog}")
65
+
66
+ diaglogger_info("[Collect] Collecting config file of td-agent...")
67
+ tdconf = c.collect_tdconf()
68
+ diaglogger_info("[Collect] config file is stored in #{tdconf}")
69
+
70
+ diaglogger_info("[Collect] Collecting td-agent gem information...")
71
+ tdgem = c.collect_tdgems()
72
+ diaglogger_info("[Collect] td-agent gem information is stored in #{tdgem}")
73
+
74
+ diaglogger_info("[Collect] Collecting config file of OS log...")
75
+ oslog = c.collect_oslog()
76
+ if @conf[:mask] == 'yes'
77
+ diaglogger_info("[Mask] Masking OS log file : #{oslog}...")
78
+ oslog = m.mask_tdlog(oslog, clean = true)
79
+ end
80
+ diaglogger_info("[Collect] config file is stored in #{oslog}")
81
+
82
+ diaglogger_info("[Collect] Collecting OS memory information...")
83
+ meminfo = c.collect_meminfo()
84
+ diaglogger_info("[Collect] config file is stored in #{meminfo}")
85
+
86
+ diaglogger_info("[Collect] Collecting date/time information...")
87
+ if system('which chronyc > /dev/null 2>&1')
88
+ ntp = c.collect_ntp(command="chrony")
89
+ elsif system('which ntpq > /dev/null 2>&1')
90
+ ntp = c.collect_ntp(command="ntp")
91
+ else
92
+ diaglogger_warn("[Collect] chrony/ntp does not exist. skip collectig date/time information")
93
+ end
94
+ diaglogger_info("[Collect] date/time information is stored in #{ntp}")
95
+
96
+ diaglogger_info("[Collect] Collecting netstat information...")
97
+ if system('which netstat > /dev/null 2>&1')
98
+ netstat_n = c.collect_netstat_n()
99
+ netstat_s = c.collect_netstat_s()
100
+ if @conf[:mask] == 'yes'
101
+ diaglogger_info("[Mask] Masking netstat file : #{netstat_n}...")
102
+ netstat_n = m.mask_tdlog(netstat_n, clean = true)
103
+ end
104
+ diaglogger_info("[Collect] netstat information is stored in #{netstat_n} and #{netstat_s}")
105
+ else
106
+ diaglogger_warn("[Collect] netstat does not exist. skip collectig netstat")
107
+ end
108
+
109
+ diaglogger_info("[Collect] Collecting systctl information...")
110
+ sysctl = c.collect_sysctl()
111
+ diaglogger_info("[Collect] sysctl information is stored in #{sysctl}")
112
+
113
+ diaglogger_info("[Valid] Validating systctl information...")
114
+ ret, sysctl = v.valid_sysctl(sysctl)
115
+ list = sysctl.keys
116
+ list.each do |k|
117
+ if sysctl[k]['result'] == 'correct'
118
+ diaglogger_info("[Valid] Sysctl: #{k} => #{sysctl[k]['value']} is correct (recommendation is #{sysctl[k]['recommend']})")
119
+ elsif sysctl[k]['result'] == 'incorrect'
120
+ diaglogger_warn("[Valid] Sysctl: #{k} => #{sysctl[k]['value']} is incorrect (recommendation is #{sysctl[k]['recommend']})")
121
+ end
122
+ end
123
+
124
+ diaglogger_info("[Collect] Collecting ulimit information...")
125
+ ulimit = c.collect_ulimit()
126
+ diaglogger_info("[Collect] ulimit information is stored in #{ulimit}")
127
+
128
+ diaglogger_info("[Valid] Validating ulimit information...")
129
+ ret, rec, val = v.valid_ulimit(ulimit)
130
+ if ret == true
131
+ diaglogger_info("[Valid] ulimit => #{val} is correct (recommendation is >#{rec})")
132
+ else
133
+ diaglogger_warn("[Valid] ulimit => #{val} is incorrect (recommendation is >#{rec})")
134
+ end
135
+
136
+ if @conf[:mask] == 'yes'
137
+ diaglogger_info("[Mask] Masking td-agent config file : #{tdconf}...")
138
+ m.mask_tdlog(tdconf, clean = true)
139
+ tdlog.each do | file |
140
+ diaglogger_info("[Mask] Masking td-agent log file : #{file}...")
141
+ filename = file.split("/")[-1]
142
+ if filename.include?(".gz")
143
+ m.mask_tdlog_gz(file, clean = true)
144
+ elsif
145
+ m.mask_tdlog(file, clean = true)
146
+ end
147
+ end
148
+ end
149
+
150
+ if @conf[:mask] == 'yes'
151
+ diaglogger_info("[Mask] Export mask log file : #{@masklog}")
152
+ m.export_masklog(@masklog)
153
+ end
154
+
155
+ tar_file = c.compress_output()
156
+ diaglogger_info("[Collect] Generate tar file #{tar_file}")
157
+ end
158
+
159
+ def parse_diagconf(params)
160
+ options = {
161
+ :output_dir => '',
162
+ :mask => 'no',
163
+ :words => [],
164
+ :wfile => '',
165
+ :seed => ''
166
+ }
167
+ if params[:output] != nil
168
+ if Dir.exist?(params[:output])
169
+ options[:output_dir] = params[:output]
170
+ else
171
+ raise "output directory '#{output_dir}' does not exist"
172
+ end
173
+ else
174
+ raise "output directory '-o' must be specified"
175
+ end
176
+ if params[:mask] == nil
177
+ options[:mask] = 'no'
178
+ else
179
+ if params[:mask] == 'yes' || params[:mask] == 'no'
180
+ options[:mask] = params[:mask]
181
+ else
182
+ raise "invalid arguments '#{params[:mask]}' : input of '-m|--mask' should be 'yes' or 'no'"
183
+ end
184
+ end
185
+ options[:words] = params[:"word-list"] if params[:"word-list"] != nil
186
+ if params[:"word-file"] != nil
187
+ f = params[:"word-file"]
188
+ if File.exist?(f)
189
+ File.readlines(f).each do |l|
190
+ options[:words].append(l.gsub(/\n/,''))
191
+ end
192
+ else
193
+ raise "#{params[:"word-file"]} : No such file or directory"
194
+ end
195
+ end
196
+ options[:words] = options[:words].uniq
197
+ options[:seed] = params[:"hash-seed"] if params[:"hash-seed"] != nil
198
+ return options
199
+ end
200
+ def diaglogger_debug(str)
201
+ @logger.debug(str)
202
+ @logger_file.debug(str)
203
+ end
204
+ def diaglogger_info(str)
205
+ @logger.info(str)
206
+ @logger_file.info(str)
207
+ end
208
+ def diaglogger_warn(str)
209
+ @logger.warn(str)
210
+ @logger_file.warn(str)
211
+ end
212
+ def diaglogger_error(str)
213
+ @logger.error(str)
214
+ @logger_file.error(str)
215
+ end
216
+ end
217
+ end
@@ -0,0 +1,314 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ require 'digest'
18
+ require 'fileutils'
19
+ require 'logger'
20
+ require 'open3'
21
+ require 'json'
22
+
23
+ module Diagtool
24
+ class MaskUtils
25
+ def initialize(conf, log_level)
26
+ @words = conf[:words]
27
+ @logger = Logger.new(STDOUT, level: log_level, formatter: proc {|severity, datetime, progname, msg|
28
+ "#{datetime}: [Maskutils] [#{severity}] #{msg}\n"
29
+ })
30
+ @logger.debug("Initialize Maskutils: sanitized word = #{conf[:words]}")
31
+ @hash_seed = conf[:seed]
32
+ @id = {
33
+ :fid =>'',
34
+ :lid =>'',
35
+ :cid =>''
36
+ }
37
+ @masklog = Hash.new { |h,k| h[k] = Hash.new(&h.default_proc) }
38
+ end
39
+ def mask_tdlog(input_file, clean)
40
+ line_id = 0
41
+ f = File.open(input_file+'.mask', 'w')
42
+ File.readlines(input_file).each do |line|
43
+ line = line.encode('utf-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '') # temporary
44
+ @id[:fid] = input_file
45
+ @id[:lid] = line_id
46
+ line_masked = mask_tdlog_inspector(line)
47
+ f.puts(line_masked)
48
+ line_id+=1
49
+ end
50
+ f.close
51
+ FileUtils.rm(input_file) if clean == true
52
+ return input_file+'.mask'
53
+ end
54
+ def mask_tdlog_gz(input_file, clean)
55
+ line_id = 0
56
+ f = File.open(input_file+'.mask', 'w')
57
+ gunzip_file = input_file+'.mask'+'.tmp'
58
+ Open3.capture3("gunzip --keep -c #{input_file} > #{gunzip_file}")
59
+ File.readlines(gunzip_file).each do |line|
60
+ @id[:fid] = input_file
61
+ @id[:lid] = line_id
62
+ line_masked = mask_tdlog_inspector(line)
63
+ f.puts(line_masked)
64
+ line_id+=1
65
+ end
66
+ f.close
67
+ FileUtils.rm(gunzip_file)
68
+ FileUtils.rm(input_file) if clean == true
69
+ return input_file+'.mask'
70
+ end
71
+ def mask_tdlog_inspector(line)
72
+ i = 0
73
+ contents=[]
74
+ @logger.debug("Input Line: #{line.chomp}")
75
+ @logger.debug("Splitted Line: #{line.split(/\s/)}")
76
+ loop do
77
+ contents[i] = line.split(/\s/)[i].to_s
78
+ @logger.debug("Splitted Line #{i}: #{contents[i]}")
79
+ @id[:cid] = i.to_s
80
+ if contents[i].include?(',')
81
+ contents_s = contents[i].split(',')
82
+ cnt = 0
83
+ loop do
84
+ @id[:cid] = i.to_s + '-' + cnt.to_s
85
+ if contents_s[cnt].include?('://') ## Mask <http/dRuby>://<address:ip/hostname>:<port>
86
+ is_mask, masked_contents = mask_url_pattern(contents_s[cnt])
87
+ if is_mask
88
+ @logger.debug(" URL Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
89
+ contents_s[cnt] = masked_contents
90
+ end
91
+ elsif contents_s[cnt].include?('=')
92
+ is_mask, masked_contents = mask_equal_pattern(contents_s[cnt])
93
+ if is_mask
94
+ @logger.debug(" Equal Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
95
+ contents_s[cnt] = masked_contents
96
+ end
97
+ elsif contents_s[cnt].include?(':') ## Mask <address:ip/hostname>:<port>
98
+ is_mask, masked_contents = mask_colon_pattern(contents_s[cnt])
99
+ if is_mask
100
+ @logger.debug(" Colon Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
101
+ contents_s[cnt] = masked_contents
102
+ end
103
+ elsif contents_s[cnt].include?('/') ## Mask <address:ip/hostname>:<port>
104
+ is_mask, masked_contents = mask_slash_pattern(contents_s[cnt])
105
+ if is_mask
106
+ @logger.debug(" Slash Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
107
+ contents_s[cnt] = masked_contents
108
+ end
109
+ else
110
+ is_mask, masked_contents = mask_direct_pattern(contents_s[cnt])
111
+ if is_mask
112
+ @logger.debug(" Direct Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
113
+ contents_s[cnt] = masked_contents
114
+ end
115
+ end
116
+ cnt+=1
117
+ break if cnt >= contents_s.length
118
+ end
119
+ contents[i] = contents_s.join(',')
120
+ else
121
+ if contents[i].include?('://') ## Mask <http/dRuby>://<address:ip/hostname>:<port>
122
+ is_mask, masked_contents = mask_url_pattern(contents[i])
123
+ if is_mask
124
+ @logger.debug(" URL Pattern Detected: #{contents[i]} -> #{masked_contents}")
125
+ contents[i] = masked_contents
126
+ end
127
+ elsif contents[i].include?('=')
128
+ is_mask, masked_contents = mask_equal_pattern(contents[i])
129
+ if is_mask
130
+ @logger.debug(" Equal Pattern Detected: #{contents[i]} -> #{masked_contents}")
131
+ contents[i] = masked_contents
132
+ end
133
+ elsif contents[i].include?(':') ## Mask <address:ip/hostname>:<port>
134
+ is_mask, masked_contents = mask_colon_pattern(contents[i])
135
+ if is_mask
136
+ @logger.debug(" Colon Pattern Detected: #{contents[i]} -> #{masked_contents}")
137
+ contents[i] = masked_contents
138
+ end
139
+ elsif contents[i].include?('/')
140
+ is_mask, masked_contents = mask_slash_pattern(contents[i])
141
+ if is_mask
142
+ @logger.debug(" Slash Pattern Detected: #{contents[i]} -> #{masked_contents}")
143
+ contents[i] = masked_contents
144
+ end
145
+ else
146
+ is_mask, masked_contents = mask_direct_pattern(contents[i])
147
+ if is_mask
148
+ @logger.debug(" Direct Pattern Detected: #{contents[i]} -> #{masked_contents}")
149
+ contents[i] = masked_contents
150
+ end
151
+ end
152
+ end
153
+ i+=1
154
+ break if i >= line.split(/\,|\s/).length
155
+ end
156
+ line_masked = contents.join(' ')
157
+ @logger.debug("Masked Line: #{line_masked}")
158
+ return line_masked
159
+ end
160
+ def mask_direct_pattern(str)
161
+ is_mask = false
162
+ if str.include?(">")
163
+ str = str.gsub(">",'')
164
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(str)
165
+ str_m = chunk_mask + ">" if is_mask
166
+ elsif str.include?("]")
167
+ str = str.gsub("]",'')
168
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(str)
169
+ str_m = chunk_mask + "]" if is_mask
170
+ else
171
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(str)
172
+ str_mask = chunk_mask if is_mask
173
+ end
174
+ return is_mask, str_mask
175
+ end
176
+ def mask_url_pattern(str)
177
+ is_mask = false
178
+ url = str.split('://')
179
+ cnt_url = 0
180
+ loop do
181
+ if url[cnt_url].include?(':')
182
+ address = url[cnt_url].split(':')
183
+ cnt_address = 0
184
+ loop do
185
+ if address[cnt_address].include?("]")
186
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(address[cnt_address].gsub(']',''))
187
+ address[cnt_address] = chunk_mask + "]" if is_mask
188
+ elsif address[cnt_address].include?(">")
189
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(address[cnt_address].gsub('>',''))
190
+ address[cnt_address] = chunk_mask + ">" if is_mask
191
+ else
192
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(address[cnt_address])
193
+ address[cnt_address] = chunk_mask if is_mask
194
+ end
195
+ cnt_address+=1
196
+ break if cnt_address >= address.length || is_mask == true
197
+ end
198
+ url[cnt_url] = address.join(':')
199
+ else
200
+ if url[cnt_url].include?("]")
201
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(url[cnt_url].gsub(']',''))
202
+ url[cnt_url] = chunk_mask + "]" if is_mask
203
+ elsif url[cnt_url].include?(">")
204
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(url[cnt_url].gsub('>',''))
205
+ url[cnt_url] = chunk_mask + ">" if is_mask
206
+ else
207
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(url[cnt_url])
208
+ url[cnt_url] = chunk_mask if is_mask
209
+ end
210
+ end
211
+ cnt_url+=1
212
+ break if cnt_url >= url.length || is_mask == true
213
+ end
214
+ str_mask = url.join('://')
215
+ str_mask << ":" if str.end_with?(':')
216
+ return is_mask, str_mask
217
+ end
218
+ def mask_equal_pattern(str)
219
+ is_mask = false
220
+ l = str.split('=') ## Mask host=<address:ip/hostname> or bind=<address: ip/hostname>
221
+ i = 0
222
+ loop do
223
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(l[i])
224
+ l[i] = chunk_mask if is_mask
225
+ i+=1
226
+ break if i >= l.length || is_mask == true
227
+ end
228
+ str_mask = l.join('=')
229
+ return is_mask, str_mask
230
+ end
231
+ def mask_colon_pattern(str)
232
+ is_mask = false
233
+ l = str.split(':')
234
+ i = 0
235
+ loop do
236
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(l[i])
237
+ l[i] = chunk_mask if is_mask
238
+ i+=1
239
+ break if i >= l.length || is_mask == true
240
+ end
241
+ str_mask = l.join(':')
242
+ str_mask << ":" if str.end_with?(':')
243
+ return is_mask, str_mask
244
+ end
245
+ def mask_slash_pattern(str)
246
+ is_mask = false
247
+ l = str.split('/')
248
+ i = 0
249
+ loop do
250
+ is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(l[i])
251
+ l[i] = chunk_mask if is_mask
252
+ i+=1
253
+ break if i >= l.length || is_mask == true
254
+ end
255
+ str_mask = l.join('/')
256
+ str_mask << ":" if str.end_with?(':')
257
+ return is_mask, str_mask
258
+ end
259
+ def is_ipv4?(str)
260
+ !!(str =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/)
261
+ end
262
+ def is_fqdn?(str)
263
+ #!!(str =~ /^\b((?=[a-z0-9-]{1,63}\.)[a-z0-9]+(-[a-z0-9]+)*\.)+([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])$/)
264
+ !!(str =~ /^\b(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.){2,}([A-Za-z]|[A-Za-z][A-Za-z\-]*[A-Za-z]){2,}$/)
265
+ #!!(str =~ /^\b(?=^.{1,254}$)(^(?:(?!\d+\.)[a-zA-Z0-9_\-]{1,63}\.?)+(?:[a-zA-Z]{2,})$)/)
266
+ end
267
+ def is_words?(str)
268
+ value = false
269
+ @words.each do | l |
270
+ if str == l
271
+ value = true
272
+ break
273
+ end
274
+ end
275
+ return value
276
+ end
277
+ def mask_ipv4_fqdn_words(str)
278
+ str = str.to_s
279
+ mtype = ''
280
+ is_mask = false
281
+ if is_ipv4?(str.gsub(/\\\"|\'|\"|\\\'/,''))
282
+ str = str.gsub(/\\\"|\'|\"|\\\'/,'')
283
+ mtype = 'IPv4'
284
+ is_mask = true
285
+ elsif is_fqdn?(str.gsub(/\\\"|\'|\"|\\\'/,''))
286
+ str = str.gsub(/\\\"|\'|\"|\\\'/,'')
287
+ mtype = 'FQDN'
288
+ is_mask = true
289
+ elsif is_words?(str.gsub(/\\\"|\'|\"|\\\'/,''))
290
+ str = str.gsub(/\\\"|\'|\"|\\\'/,'')
291
+ mtype = 'Word'
292
+ is_mask =true
293
+ end
294
+ if is_mask
295
+ str_mask = mtype + '_' + Digest::MD5.hexdigest(@hash_seed + str)
296
+ put_masklog(str, str_mask)
297
+ else
298
+ str_mask = str
299
+ end
300
+ return is_mask, str, str_mask
301
+ end
302
+ def put_masklog(str, str_mask)
303
+ uid = "Line#{@id[:lid]}-#{@id[:cid]}"
304
+ @masklog[@id[:fid]][uid]['original'] = str
305
+ @masklog[@id[:fid]][uid]['mask'] = str_mask
306
+ end
307
+ def export_masklog(output_file)
308
+ masklog_json = JSON.pretty_generate(@masklog)
309
+ File.open(output_file, 'w') do |f|
310
+ f.puts(masklog_json)
311
+ end
312
+ end
313
+ end
314
+ end