logstash-input-file 4.0.5 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -3
- data/JAR_VERSION +1 -0
- data/docs/index.asciidoc +195 -37
- data/lib/filewatch/bootstrap.rb +74 -0
- data/lib/filewatch/discoverer.rb +94 -0
- data/lib/filewatch/helper.rb +65 -0
- data/lib/filewatch/observing_base.rb +97 -0
- data/lib/filewatch/observing_read.rb +23 -0
- data/lib/filewatch/observing_tail.rb +22 -0
- data/lib/filewatch/read_mode/handlers/base.rb +81 -0
- data/lib/filewatch/read_mode/handlers/read_file.rb +47 -0
- data/lib/filewatch/read_mode/handlers/read_zip_file.rb +57 -0
- data/lib/filewatch/read_mode/processor.rb +117 -0
- data/lib/filewatch/settings.rb +67 -0
- data/lib/filewatch/sincedb_collection.rb +215 -0
- data/lib/filewatch/sincedb_record_serializer.rb +70 -0
- data/lib/filewatch/sincedb_value.rb +87 -0
- data/lib/filewatch/tail_mode/handlers/base.rb +124 -0
- data/lib/filewatch/tail_mode/handlers/create.rb +17 -0
- data/lib/filewatch/tail_mode/handlers/create_initial.rb +21 -0
- data/lib/filewatch/tail_mode/handlers/delete.rb +11 -0
- data/lib/filewatch/tail_mode/handlers/grow.rb +11 -0
- data/lib/filewatch/tail_mode/handlers/shrink.rb +20 -0
- data/lib/filewatch/tail_mode/handlers/timeout.rb +10 -0
- data/lib/filewatch/tail_mode/handlers/unignore.rb +37 -0
- data/lib/filewatch/tail_mode/processor.rb +209 -0
- data/lib/filewatch/watch.rb +107 -0
- data/lib/filewatch/watched_file.rb +226 -0
- data/lib/filewatch/watched_files_collection.rb +84 -0
- data/lib/filewatch/winhelper.rb +65 -0
- data/lib/jars/filewatch-1.0.0.jar +0 -0
- data/lib/logstash/inputs/delete_completed_file_handler.rb +9 -0
- data/lib/logstash/inputs/file.rb +162 -107
- data/lib/logstash/inputs/file_listener.rb +61 -0
- data/lib/logstash/inputs/log_completed_file_handler.rb +13 -0
- data/logstash-input-file.gemspec +5 -4
- data/spec/filewatch/buftok_spec.rb +24 -0
- data/spec/filewatch/reading_spec.rb +128 -0
- data/spec/filewatch/sincedb_record_serializer_spec.rb +71 -0
- data/spec/filewatch/spec_helper.rb +120 -0
- data/spec/filewatch/tailing_spec.rb +440 -0
- data/spec/filewatch/watched_file_spec.rb +38 -0
- data/spec/filewatch/watched_files_collection_spec.rb +73 -0
- data/spec/filewatch/winhelper_spec.rb +22 -0
- data/spec/fixtures/compressed.log.gz +0 -0
- data/spec/fixtures/compressed.log.gzip +0 -0
- data/spec/fixtures/invalid_utf8.gbk.log +2 -0
- data/spec/fixtures/no-final-newline.log +2 -0
- data/spec/fixtures/uncompressed.log +2 -0
- data/spec/{spec_helper.rb → helpers/spec_helper.rb} +14 -41
- data/spec/inputs/file_read_spec.rb +155 -0
- data/spec/inputs/{file_spec.rb → file_tail_spec.rb} +55 -52
- metadata +96 -28
@@ -0,0 +1,94 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util/loggable"
|
3
|
+
|
4
|
+
module FileWatch
|
5
|
+
class Discoverer
|
6
|
+
# given a path or glob will prepare for and discover files to watch
|
7
|
+
# if they are not excluded or ignorable
|
8
|
+
# they are added to the watched_files collection and
|
9
|
+
# associated with a sincedb entry if one can be found
|
10
|
+
include LogStash::Util::Loggable
|
11
|
+
|
12
|
+
def initialize(watched_files_collection, sincedb_collection, settings)
|
13
|
+
@watching = []
|
14
|
+
@exclude = []
|
15
|
+
@watched_files_collection = watched_files_collection
|
16
|
+
@sincedb_collection = sincedb_collection
|
17
|
+
@settings = settings
|
18
|
+
@settings.exclude.each { |p| @exclude << p }
|
19
|
+
end
|
20
|
+
|
21
|
+
def add_path(path)
|
22
|
+
return if @watching.member?(path)
|
23
|
+
@watching << path
|
24
|
+
discover_files(path)
|
25
|
+
self
|
26
|
+
end
|
27
|
+
|
28
|
+
def discover
|
29
|
+
@watching.each do |path|
|
30
|
+
discover_files(path)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def can_exclude?(watched_file, new_discovery)
|
37
|
+
@exclude.each do |pattern|
|
38
|
+
if watched_file.pathname.fnmatch?(pattern)
|
39
|
+
if new_discovery
|
40
|
+
logger.debug("Discoverer can_exclude?: #{watched_file.path}: skipping " +
|
41
|
+
"because it matches exclude #{pattern}")
|
42
|
+
end
|
43
|
+
watched_file.unwatch
|
44
|
+
return true
|
45
|
+
end
|
46
|
+
end
|
47
|
+
false
|
48
|
+
end
|
49
|
+
|
50
|
+
def discover_files(path)
|
51
|
+
globbed = Dir.glob(path)
|
52
|
+
globbed = [path] if globbed.empty?
|
53
|
+
logger.debug("Discoverer found files, count: #{globbed.size}")
|
54
|
+
globbed.each do |file|
|
55
|
+
logger.debug("Discoverer found file, path: #{file}")
|
56
|
+
pathname = Pathname.new(file)
|
57
|
+
next unless pathname.file?
|
58
|
+
next if pathname.symlink?
|
59
|
+
new_discovery = false
|
60
|
+
watched_file = @watched_files_collection.watched_file_by_path(file)
|
61
|
+
if watched_file.nil?
|
62
|
+
logger.debug("Discoverer discover_files: #{path}: new: #{file} (exclude is #{@exclude.inspect})")
|
63
|
+
new_discovery = true
|
64
|
+
watched_file = WatchedFile.new(pathname, pathname.stat, @settings)
|
65
|
+
end
|
66
|
+
# if it already unwatched or its excluded then we can skip
|
67
|
+
next if watched_file.unwatched? || can_exclude?(watched_file, new_discovery)
|
68
|
+
|
69
|
+
if new_discovery
|
70
|
+
if watched_file.file_ignorable?
|
71
|
+
logger.debug("Discoverer discover_files: #{file}: skipping because it was last modified more than #{@settings.ignore_older} seconds ago")
|
72
|
+
# on discovery ignorable watched_files are put into the ignored state and that
|
73
|
+
# updates the size from the internal stat
|
74
|
+
# so the existing contents are not read.
|
75
|
+
# because, normally, a newly discovered file will
|
76
|
+
# have a watched_file size of zero
|
77
|
+
# they are still added to the collection so we know they are there for the next periodic discovery
|
78
|
+
watched_file.ignore
|
79
|
+
end
|
80
|
+
# now add the discovered file to the watched_files collection and adjust the sincedb collections
|
81
|
+
@watched_files_collection.add(watched_file)
|
82
|
+
# initially when the sincedb collection is filled with records from the persistence file
|
83
|
+
# each value is not associated with a watched file
|
84
|
+
# a sincedb_value can be:
|
85
|
+
# unassociated
|
86
|
+
# associated with this watched_file
|
87
|
+
# associated with a different watched_file
|
88
|
+
@sincedb_collection.associate(watched_file)
|
89
|
+
end
|
90
|
+
# at this point the watched file is created, is in the db but not yet opened or being processed
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# code downloaded from Ruby on Rails 4.2.1
|
3
|
+
# https://raw.githubusercontent.com/rails/rails/v4.2.1/activesupport/lib/active_support/core_ext/file/atomic.rb
|
4
|
+
# change method name to avoid borking active_support and vice versa
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
module FileHelper
|
8
|
+
extend self
|
9
|
+
# Write to a file atomically. Useful for situations where you don't
|
10
|
+
# want other processes or threads to see half-written files.
|
11
|
+
#
|
12
|
+
# File.write_atomically('important.file') do |file|
|
13
|
+
# file.write('hello')
|
14
|
+
# end
|
15
|
+
def write_atomically(file_name)
|
16
|
+
|
17
|
+
if File.exist?(file_name)
|
18
|
+
# Get original file permissions
|
19
|
+
old_stat = File.stat(file_name)
|
20
|
+
else
|
21
|
+
# If not possible, probe which are the default permissions in the
|
22
|
+
# destination directory.
|
23
|
+
old_stat = probe_stat_in(File.dirname(file_name))
|
24
|
+
end
|
25
|
+
|
26
|
+
mode = old_stat ? old_stat.mode : nil
|
27
|
+
|
28
|
+
# Create temporary file with identical permissions
|
29
|
+
temp_file = File.new(rand_filename(file_name), "w", mode)
|
30
|
+
temp_file.binmode
|
31
|
+
return_val = yield temp_file
|
32
|
+
temp_file.close
|
33
|
+
|
34
|
+
# Overwrite original file with temp file
|
35
|
+
File.rename(temp_file.path, file_name)
|
36
|
+
|
37
|
+
# Unable to get permissions of the original file => return
|
38
|
+
return return_val if old_stat.nil?
|
39
|
+
|
40
|
+
# Set correct uid/gid on new file
|
41
|
+
File.chown(old_stat.uid, old_stat.gid, file_name) if old_stat
|
42
|
+
|
43
|
+
return_val
|
44
|
+
end
|
45
|
+
|
46
|
+
def device?(file_name)
|
47
|
+
File.chardev?(file_name) || File.blockdev?(file_name)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Private utility method.
|
51
|
+
def probe_stat_in(dir) #:nodoc:
|
52
|
+
basename = rand_filename(".permissions_check")
|
53
|
+
file_name = File.join(dir, basename)
|
54
|
+
FileUtils.touch(file_name)
|
55
|
+
File.stat(file_name)
|
56
|
+
rescue
|
57
|
+
# ...
|
58
|
+
ensure
|
59
|
+
FileUtils.rm_f(file_name) if File.exist?(file_name)
|
60
|
+
end
|
61
|
+
|
62
|
+
def rand_filename(prefix)
|
63
|
+
[ prefix, Thread.current.object_id, Process.pid, rand(1000000) ].join('.')
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
## Interface API topology
|
4
|
+
# ObservingBase module (this file)
|
5
|
+
# is a module mixin proving common constructor and external API for File Input Plugin interaction
|
6
|
+
# calls build_specific_processor on ObservingRead or ObservingTail
|
7
|
+
# ObservingRead and ObservingTail
|
8
|
+
# provides the External API method subscribe(observer = NullObserver.new)
|
9
|
+
# build_specific_processor(settings) - provide a Tail or Read specific Processor.
|
10
|
+
# TailMode::Processor or ReadMode::Processor
|
11
|
+
# initialize_handlers(sincedb_collection, observer) - called when the observer subscribes to changes in a Mode,
|
12
|
+
# builds mode specific handler instances with references to the observer
|
13
|
+
# process_closed(watched_files) - provide specific processing of watched_files in the closed state
|
14
|
+
# process_ignored(watched_files) - provide specific processing of watched_files in the ignored state
|
15
|
+
# process_watched(watched_files) - provide specific processing of watched_files in the watched state
|
16
|
+
# process_active(watched_files) - provide specific processing of watched_files in the active state
|
17
|
+
# These methods can call "handler" methods that delegate to the specific Handler classes.
|
18
|
+
# TailMode::Handlers module namespace
|
19
|
+
# contains the Handler classes that deals with Tail mode file lifecycle "events".
|
20
|
+
# The TailMode::Handlers::Base
|
21
|
+
# handle(watched_file) - this method calls handle_specifically defined in a subclass
|
22
|
+
# handle_specifically(watched_file) - this is a noop method
|
23
|
+
# update_existing_specifically(watched_file, sincedb_value) - this is a noop method
|
24
|
+
# Each handler extends the Base class to provide specific implementations of these two methods:
|
25
|
+
# handle_specifically(watched_file)
|
26
|
+
# update_existing_specifically(watched_file, sincedb_value)
|
27
|
+
# ReadMode::Handlers module namespace
|
28
|
+
# contains the Handler classes that deals with Read mode file lifecycle "events".
|
29
|
+
# The ReadMode::Handlers::Base
|
30
|
+
# handle(watched_file) - this method calls handle_specifically defined in a subclass
|
31
|
+
# handle_specifically(watched_file) - this is a noop method
|
32
|
+
# Each handler extends the Base class to provide specific implementations of this method:
|
33
|
+
# handle_specifically(watched_file)
|
34
|
+
|
35
|
+
module FileWatch
|
36
|
+
module ObservingBase
|
37
|
+
attr_reader :watch, :sincedb_collection, :settings
|
38
|
+
|
39
|
+
def initialize(opts={})
|
40
|
+
options = {
|
41
|
+
:sincedb_write_interval => 10,
|
42
|
+
:stat_interval => 1,
|
43
|
+
:discover_interval => 5,
|
44
|
+
:exclude => [],
|
45
|
+
:start_new_files_at => :end,
|
46
|
+
:delimiter => "\n",
|
47
|
+
:file_chunk_count => FIXNUM_MAX,
|
48
|
+
:file_sort_by => "last_modified",
|
49
|
+
:file_sort_direction => "asc",
|
50
|
+
}.merge(opts)
|
51
|
+
unless options.include?(:sincedb_path)
|
52
|
+
raise NoSinceDBPathGiven.new("No sincedb_path set in options. This should have been added in the main LogStash::Inputs::File class")
|
53
|
+
end
|
54
|
+
@settings = Settings.from_options(options)
|
55
|
+
build_watch_and_dependencies
|
56
|
+
end
|
57
|
+
|
58
|
+
def build_watch_and_dependencies
|
59
|
+
logger.info("START, creating Discoverer, Watch with file and sincedb collections")
|
60
|
+
watched_files_collection = WatchedFilesCollection.new(@settings)
|
61
|
+
@sincedb_collection = SincedbCollection.new(@settings)
|
62
|
+
@sincedb_collection.open
|
63
|
+
discoverer = Discoverer.new(watched_files_collection, @sincedb_collection, @settings)
|
64
|
+
@watch = Watch.new(discoverer, watched_files_collection, @settings)
|
65
|
+
@watch.add_processor build_specific_processor(@settings)
|
66
|
+
end
|
67
|
+
|
68
|
+
def watch_this(path)
|
69
|
+
@watch.watch(path)
|
70
|
+
end
|
71
|
+
|
72
|
+
def sincedb_write(reason=nil)
|
73
|
+
# can be invoked from the file input
|
74
|
+
@sincedb_collection.write(reason)
|
75
|
+
end
|
76
|
+
|
77
|
+
# quit is a sort-of finalizer,
|
78
|
+
# it should be called for clean up
|
79
|
+
# before the instance is disposed of.
|
80
|
+
def quit
|
81
|
+
logger.info("QUIT - closing all files and shutting down.")
|
82
|
+
@watch.quit # <-- should close all the files
|
83
|
+
# sincedb_write("shutting down")
|
84
|
+
end
|
85
|
+
|
86
|
+
# close_file(path) is to be used by external code
|
87
|
+
# when it knows that it is completely done with a file.
|
88
|
+
# Other files or folders may still be being watched.
|
89
|
+
# Caution, once unwatched, a file can't be watched again
|
90
|
+
# unless a new instance of this class begins watching again.
|
91
|
+
# The sysadmin should rename, move or delete the file.
|
92
|
+
def close_file(path)
|
93
|
+
@watch.unwatch(path)
|
94
|
+
sincedb_write
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util/loggable"
|
3
|
+
require_relative "read_mode/processor"
|
4
|
+
|
5
|
+
module FileWatch
|
6
|
+
class ObservingRead
|
7
|
+
include LogStash::Util::Loggable
|
8
|
+
include ObservingBase
|
9
|
+
|
10
|
+
def subscribe(observer)
|
11
|
+
# observer here is the file input
|
12
|
+
watch.subscribe(observer, sincedb_collection)
|
13
|
+
sincedb_collection.write("read mode subscribe complete - shutting down")
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def build_specific_processor(settings)
|
19
|
+
ReadMode::Processor.new(settings)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util/loggable"
|
3
|
+
require_relative 'tail_mode/processor'
|
4
|
+
|
5
|
+
module FileWatch
|
6
|
+
class ObservingTail
|
7
|
+
include LogStash::Util::Loggable
|
8
|
+
include ObservingBase
|
9
|
+
|
10
|
+
def subscribe(observer)
|
11
|
+
# observer here is the file input
|
12
|
+
watch.subscribe(observer, sincedb_collection)
|
13
|
+
sincedb_collection.write("tail mode subscribe complete - shutting down")
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def build_specific_processor(settings)
|
19
|
+
TailMode::Processor.new(settings)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util/loggable"
|
3
|
+
|
4
|
+
module FileWatch module ReadMode module Handlers
|
5
|
+
class Base
|
6
|
+
include LogStash::Util::Loggable
|
7
|
+
|
8
|
+
attr_reader :sincedb_collection
|
9
|
+
|
10
|
+
def initialize(sincedb_collection, observer, settings)
|
11
|
+
@settings = settings
|
12
|
+
@sincedb_collection = sincedb_collection
|
13
|
+
@observer = observer
|
14
|
+
end
|
15
|
+
|
16
|
+
def handle(watched_file)
|
17
|
+
logger.debug("handling: #{watched_file.path}")
|
18
|
+
unless watched_file.has_listener?
|
19
|
+
watched_file.set_listener(@observer)
|
20
|
+
end
|
21
|
+
handle_specifically(watched_file)
|
22
|
+
end
|
23
|
+
|
24
|
+
def handle_specifically(watched_file)
|
25
|
+
# some handlers don't need to define this method
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def open_file(watched_file)
|
31
|
+
return true if watched_file.file_open?
|
32
|
+
logger.debug("opening #{watched_file.path}")
|
33
|
+
begin
|
34
|
+
watched_file.open
|
35
|
+
rescue
|
36
|
+
# don't emit this message too often. if a file that we can't
|
37
|
+
# read is changing a lot, we'll try to open it more often, and spam the logs.
|
38
|
+
now = Time.now.to_i
|
39
|
+
logger.warn("opening OPEN_WARN_INTERVAL is '#{OPEN_WARN_INTERVAL}'")
|
40
|
+
if watched_file.last_open_warning_at.nil? || now - watched_file.last_open_warning_at > OPEN_WARN_INTERVAL
|
41
|
+
logger.warn("failed to open #{watched_file.path}: #{$!.inspect}, #{$!.backtrace.take(3)}")
|
42
|
+
watched_file.last_open_warning_at = now
|
43
|
+
else
|
44
|
+
logger.debug("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}")
|
45
|
+
end
|
46
|
+
watched_file.watch # set it back to watch so we can try it again
|
47
|
+
end
|
48
|
+
if watched_file.file_open?
|
49
|
+
watched_file.listener.opened
|
50
|
+
true
|
51
|
+
else
|
52
|
+
false
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def add_or_update_sincedb_collection(watched_file)
|
57
|
+
sincedb_value = @sincedb_collection.find(watched_file)
|
58
|
+
if sincedb_value.nil?
|
59
|
+
add_new_value_sincedb_collection(watched_file)
|
60
|
+
elsif sincedb_value.watched_file == watched_file
|
61
|
+
update_existing_sincedb_collection_value(watched_file, sincedb_value)
|
62
|
+
else
|
63
|
+
logger.warn? && logger.warn("mismatch on sincedb_value.watched_file, this should have been handled by Discoverer")
|
64
|
+
end
|
65
|
+
watched_file.initial_completed
|
66
|
+
end
|
67
|
+
|
68
|
+
def update_existing_sincedb_collection_value(watched_file, sincedb_value)
|
69
|
+
logger.debug("update_existing_sincedb_collection_value: #{watched_file.path}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}")
|
70
|
+
# sincedb_value is the source of truth
|
71
|
+
watched_file.update_bytes_read(sincedb_value.position)
|
72
|
+
end
|
73
|
+
|
74
|
+
def add_new_value_sincedb_collection(watched_file)
|
75
|
+
sincedb_value = SincedbValue.new(0)
|
76
|
+
sincedb_value.set_watched_file(watched_file)
|
77
|
+
logger.debug("add_new_value_sincedb_collection: #{watched_file.path}", "position" => sincedb_value.position)
|
78
|
+
sincedb_collection.set(watched_file.sincedb_key, sincedb_value)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end end end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module FileWatch module ReadMode module Handlers
|
4
|
+
class ReadFile < Base
|
5
|
+
def handle_specifically(watched_file)
|
6
|
+
if open_file(watched_file)
|
7
|
+
add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
|
8
|
+
# if the `file_chunk_count` * `file_chunk_size` is less than the file size
|
9
|
+
# then this method will be executed multiple times
|
10
|
+
# and the seek is moved to just after a line boundary as recorded in the sincedb
|
11
|
+
# for each run - so we reset the buffer
|
12
|
+
watched_file.reset_buffer
|
13
|
+
watched_file.file_seek(watched_file.bytes_read)
|
14
|
+
changed = false
|
15
|
+
@settings.file_chunk_count.times do
|
16
|
+
begin
|
17
|
+
lines = watched_file.buffer_extract(watched_file.file_read(@settings.file_chunk_size))
|
18
|
+
logger.warn("read_to_eof: no delimiter found in current chunk") if lines.empty?
|
19
|
+
changed = true
|
20
|
+
lines.each do |line|
|
21
|
+
watched_file.listener.accept(line)
|
22
|
+
sincedb_collection.increment(watched_file.sincedb_key, line.bytesize + @settings.delimiter_byte_size)
|
23
|
+
end
|
24
|
+
rescue EOFError
|
25
|
+
# flush the buffer now in case there is no final delimiter
|
26
|
+
line = watched_file.buffer.flush
|
27
|
+
watched_file.listener.accept(line) unless line.empty?
|
28
|
+
watched_file.listener.eof
|
29
|
+
watched_file.file_close
|
30
|
+
sincedb_collection.unset_watched_file(watched_file)
|
31
|
+
watched_file.listener.deleted
|
32
|
+
watched_file.unwatch
|
33
|
+
break
|
34
|
+
rescue Errno::EWOULDBLOCK, Errno::EINTR
|
35
|
+
watched_file.listener.error
|
36
|
+
break
|
37
|
+
rescue => e
|
38
|
+
logger.error("read_to_eof: general error reading #{watched_file.path} - error: #{e.inspect}")
|
39
|
+
watched_file.listener.error
|
40
|
+
break
|
41
|
+
end
|
42
|
+
end
|
43
|
+
sincedb_collection.request_disk_flush if changed
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end end end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'java'
|
3
|
+
java_import java.io.InputStream
|
4
|
+
java_import java.io.InputStreamReader
|
5
|
+
java_import java.io.FileInputStream
|
6
|
+
java_import java.io.BufferedReader
|
7
|
+
java_import java.util.zip.GZIPInputStream
|
8
|
+
java_import java.util.zip.ZipException
|
9
|
+
|
10
|
+
module FileWatch module ReadMode module Handlers
|
11
|
+
class ReadZipFile < Base
|
12
|
+
def handle_specifically(watched_file)
|
13
|
+
add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
|
14
|
+
# can't really stripe read a zip file, its all or nothing.
|
15
|
+
watched_file.listener.opened
|
16
|
+
# what do we do about quit when we have just begun reading the zipped file (e.g. pipeline reloading)
|
17
|
+
# should we track lines read in the sincedb and
|
18
|
+
# fast forward through the lines until we reach unseen content?
|
19
|
+
# meaning that we can quit in the middle of a zip file
|
20
|
+
begin
|
21
|
+
file_stream = FileInputStream.new(watched_file.path)
|
22
|
+
gzip_stream = GZIPInputStream.new(file_stream)
|
23
|
+
decoder = InputStreamReader.new(gzip_stream, "UTF-8")
|
24
|
+
buffered = BufferedReader.new(decoder)
|
25
|
+
while (line = buffered.readLine(false))
|
26
|
+
watched_file.listener.accept(line)
|
27
|
+
end
|
28
|
+
watched_file.listener.eof
|
29
|
+
rescue ZipException => e
|
30
|
+
logger.error("Cannot decompress the gzip file at path: #{watched_file.path}")
|
31
|
+
watched_file.listener.error
|
32
|
+
else
|
33
|
+
sincedb_collection.store_last_read(watched_file.sincedb_key, watched_file.last_stat_size)
|
34
|
+
sincedb_collection.request_disk_flush
|
35
|
+
watched_file.listener.deleted
|
36
|
+
watched_file.unwatch
|
37
|
+
ensure
|
38
|
+
# rescue each close individually so all close attempts are tried
|
39
|
+
close_and_ignore_ioexception(buffered) unless buffered.nil?
|
40
|
+
close_and_ignore_ioexception(decoder) unless decoder.nil?
|
41
|
+
close_and_ignore_ioexception(gzip_stream) unless gzip_stream.nil?
|
42
|
+
close_and_ignore_ioexception(file_stream) unless file_stream.nil?
|
43
|
+
end
|
44
|
+
sincedb_collection.unset_watched_file(watched_file)
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def close_and_ignore_ioexception(closeable)
|
50
|
+
begin
|
51
|
+
closeable.close
|
52
|
+
rescue Exception => e # IOException can be thrown by any of the Java classes that implement the Closable interface.
|
53
|
+
logger.warn("Ignoring an IOException when closing an instance of #{closeable.class.name}", "exception" => e)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end end end
|