logstash-input-file 4.0.5 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -3
- data/JAR_VERSION +1 -0
- data/docs/index.asciidoc +195 -37
- data/lib/filewatch/bootstrap.rb +74 -0
- data/lib/filewatch/discoverer.rb +94 -0
- data/lib/filewatch/helper.rb +65 -0
- data/lib/filewatch/observing_base.rb +97 -0
- data/lib/filewatch/observing_read.rb +23 -0
- data/lib/filewatch/observing_tail.rb +22 -0
- data/lib/filewatch/read_mode/handlers/base.rb +81 -0
- data/lib/filewatch/read_mode/handlers/read_file.rb +47 -0
- data/lib/filewatch/read_mode/handlers/read_zip_file.rb +57 -0
- data/lib/filewatch/read_mode/processor.rb +117 -0
- data/lib/filewatch/settings.rb +67 -0
- data/lib/filewatch/sincedb_collection.rb +215 -0
- data/lib/filewatch/sincedb_record_serializer.rb +70 -0
- data/lib/filewatch/sincedb_value.rb +87 -0
- data/lib/filewatch/tail_mode/handlers/base.rb +124 -0
- data/lib/filewatch/tail_mode/handlers/create.rb +17 -0
- data/lib/filewatch/tail_mode/handlers/create_initial.rb +21 -0
- data/lib/filewatch/tail_mode/handlers/delete.rb +11 -0
- data/lib/filewatch/tail_mode/handlers/grow.rb +11 -0
- data/lib/filewatch/tail_mode/handlers/shrink.rb +20 -0
- data/lib/filewatch/tail_mode/handlers/timeout.rb +10 -0
- data/lib/filewatch/tail_mode/handlers/unignore.rb +37 -0
- data/lib/filewatch/tail_mode/processor.rb +209 -0
- data/lib/filewatch/watch.rb +107 -0
- data/lib/filewatch/watched_file.rb +226 -0
- data/lib/filewatch/watched_files_collection.rb +84 -0
- data/lib/filewatch/winhelper.rb +65 -0
- data/lib/jars/filewatch-1.0.0.jar +0 -0
- data/lib/logstash/inputs/delete_completed_file_handler.rb +9 -0
- data/lib/logstash/inputs/file.rb +162 -107
- data/lib/logstash/inputs/file_listener.rb +61 -0
- data/lib/logstash/inputs/log_completed_file_handler.rb +13 -0
- data/logstash-input-file.gemspec +5 -4
- data/spec/filewatch/buftok_spec.rb +24 -0
- data/spec/filewatch/reading_spec.rb +128 -0
- data/spec/filewatch/sincedb_record_serializer_spec.rb +71 -0
- data/spec/filewatch/spec_helper.rb +120 -0
- data/spec/filewatch/tailing_spec.rb +440 -0
- data/spec/filewatch/watched_file_spec.rb +38 -0
- data/spec/filewatch/watched_files_collection_spec.rb +73 -0
- data/spec/filewatch/winhelper_spec.rb +22 -0
- data/spec/fixtures/compressed.log.gz +0 -0
- data/spec/fixtures/compressed.log.gzip +0 -0
- data/spec/fixtures/invalid_utf8.gbk.log +2 -0
- data/spec/fixtures/no-final-newline.log +2 -0
- data/spec/fixtures/uncompressed.log +2 -0
- data/spec/{spec_helper.rb → helpers/spec_helper.rb} +14 -41
- data/spec/inputs/file_read_spec.rb +155 -0
- data/spec/inputs/{file_spec.rb → file_tail_spec.rb} +55 -52
- metadata +96 -28
@@ -0,0 +1,94 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util/loggable"
|
3
|
+
|
4
|
+
module FileWatch
|
5
|
+
class Discoverer
|
6
|
+
# given a path or glob will prepare for and discover files to watch
|
7
|
+
# if they are not excluded or ignorable
|
8
|
+
# they are added to the watched_files collection and
|
9
|
+
# associated with a sincedb entry if one can be found
|
10
|
+
include LogStash::Util::Loggable
|
11
|
+
|
12
|
+
def initialize(watched_files_collection, sincedb_collection, settings)
|
13
|
+
@watching = []
|
14
|
+
@exclude = []
|
15
|
+
@watched_files_collection = watched_files_collection
|
16
|
+
@sincedb_collection = sincedb_collection
|
17
|
+
@settings = settings
|
18
|
+
@settings.exclude.each { |p| @exclude << p }
|
19
|
+
end
|
20
|
+
|
21
|
+
def add_path(path)
|
22
|
+
return if @watching.member?(path)
|
23
|
+
@watching << path
|
24
|
+
discover_files(path)
|
25
|
+
self
|
26
|
+
end
|
27
|
+
|
28
|
+
def discover
|
29
|
+
@watching.each do |path|
|
30
|
+
discover_files(path)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def can_exclude?(watched_file, new_discovery)
|
37
|
+
@exclude.each do |pattern|
|
38
|
+
if watched_file.pathname.fnmatch?(pattern)
|
39
|
+
if new_discovery
|
40
|
+
logger.debug("Discoverer can_exclude?: #{watched_file.path}: skipping " +
|
41
|
+
"because it matches exclude #{pattern}")
|
42
|
+
end
|
43
|
+
watched_file.unwatch
|
44
|
+
return true
|
45
|
+
end
|
46
|
+
end
|
47
|
+
false
|
48
|
+
end
|
49
|
+
|
50
|
+
def discover_files(path)
|
51
|
+
globbed = Dir.glob(path)
|
52
|
+
globbed = [path] if globbed.empty?
|
53
|
+
logger.debug("Discoverer found files, count: #{globbed.size}")
|
54
|
+
globbed.each do |file|
|
55
|
+
logger.debug("Discoverer found file, path: #{file}")
|
56
|
+
pathname = Pathname.new(file)
|
57
|
+
next unless pathname.file?
|
58
|
+
next if pathname.symlink?
|
59
|
+
new_discovery = false
|
60
|
+
watched_file = @watched_files_collection.watched_file_by_path(file)
|
61
|
+
if watched_file.nil?
|
62
|
+
logger.debug("Discoverer discover_files: #{path}: new: #{file} (exclude is #{@exclude.inspect})")
|
63
|
+
new_discovery = true
|
64
|
+
watched_file = WatchedFile.new(pathname, pathname.stat, @settings)
|
65
|
+
end
|
66
|
+
# if it already unwatched or its excluded then we can skip
|
67
|
+
next if watched_file.unwatched? || can_exclude?(watched_file, new_discovery)
|
68
|
+
|
69
|
+
if new_discovery
|
70
|
+
if watched_file.file_ignorable?
|
71
|
+
logger.debug("Discoverer discover_files: #{file}: skipping because it was last modified more than #{@settings.ignore_older} seconds ago")
|
72
|
+
# on discovery ignorable watched_files are put into the ignored state and that
|
73
|
+
# updates the size from the internal stat
|
74
|
+
# so the existing contents are not read.
|
75
|
+
# because, normally, a newly discovered file will
|
76
|
+
# have a watched_file size of zero
|
77
|
+
# they are still added to the collection so we know they are there for the next periodic discovery
|
78
|
+
watched_file.ignore
|
79
|
+
end
|
80
|
+
# now add the discovered file to the watched_files collection and adjust the sincedb collections
|
81
|
+
@watched_files_collection.add(watched_file)
|
82
|
+
# initially when the sincedb collection is filled with records from the persistence file
|
83
|
+
# each value is not associated with a watched file
|
84
|
+
# a sincedb_value can be:
|
85
|
+
# unassociated
|
86
|
+
# associated with this watched_file
|
87
|
+
# associated with a different watched_file
|
88
|
+
@sincedb_collection.associate(watched_file)
|
89
|
+
end
|
90
|
+
# at this point the watched file is created, is in the db but not yet opened or being processed
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# code downloaded from Ruby on Rails 4.2.1
|
3
|
+
# https://raw.githubusercontent.com/rails/rails/v4.2.1/activesupport/lib/active_support/core_ext/file/atomic.rb
|
4
|
+
# change method name to avoid borking active_support and vice versa
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
module FileHelper
|
8
|
+
extend self
|
9
|
+
# Write to a file atomically. Useful for situations where you don't
|
10
|
+
# want other processes or threads to see half-written files.
|
11
|
+
#
|
12
|
+
# File.write_atomically('important.file') do |file|
|
13
|
+
# file.write('hello')
|
14
|
+
# end
|
15
|
+
def write_atomically(file_name)
|
16
|
+
|
17
|
+
if File.exist?(file_name)
|
18
|
+
# Get original file permissions
|
19
|
+
old_stat = File.stat(file_name)
|
20
|
+
else
|
21
|
+
# If not possible, probe which are the default permissions in the
|
22
|
+
# destination directory.
|
23
|
+
old_stat = probe_stat_in(File.dirname(file_name))
|
24
|
+
end
|
25
|
+
|
26
|
+
mode = old_stat ? old_stat.mode : nil
|
27
|
+
|
28
|
+
# Create temporary file with identical permissions
|
29
|
+
temp_file = File.new(rand_filename(file_name), "w", mode)
|
30
|
+
temp_file.binmode
|
31
|
+
return_val = yield temp_file
|
32
|
+
temp_file.close
|
33
|
+
|
34
|
+
# Overwrite original file with temp file
|
35
|
+
File.rename(temp_file.path, file_name)
|
36
|
+
|
37
|
+
# Unable to get permissions of the original file => return
|
38
|
+
return return_val if old_stat.nil?
|
39
|
+
|
40
|
+
# Set correct uid/gid on new file
|
41
|
+
File.chown(old_stat.uid, old_stat.gid, file_name) if old_stat
|
42
|
+
|
43
|
+
return_val
|
44
|
+
end
|
45
|
+
|
46
|
+
def device?(file_name)
|
47
|
+
File.chardev?(file_name) || File.blockdev?(file_name)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Private utility method.
|
51
|
+
def probe_stat_in(dir) #:nodoc:
|
52
|
+
basename = rand_filename(".permissions_check")
|
53
|
+
file_name = File.join(dir, basename)
|
54
|
+
FileUtils.touch(file_name)
|
55
|
+
File.stat(file_name)
|
56
|
+
rescue
|
57
|
+
# ...
|
58
|
+
ensure
|
59
|
+
FileUtils.rm_f(file_name) if File.exist?(file_name)
|
60
|
+
end
|
61
|
+
|
62
|
+
def rand_filename(prefix)
|
63
|
+
[ prefix, Thread.current.object_id, Process.pid, rand(1000000) ].join('.')
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
## Interface API topology
|
4
|
+
# ObservingBase module (this file)
|
5
|
+
# is a module mixin proving common constructor and external API for File Input Plugin interaction
|
6
|
+
# calls build_specific_processor on ObservingRead or ObservingTail
|
7
|
+
# ObservingRead and ObservingTail
|
8
|
+
# provides the External API method subscribe(observer = NullObserver.new)
|
9
|
+
# build_specific_processor(settings) - provide a Tail or Read specific Processor.
|
10
|
+
# TailMode::Processor or ReadMode::Processor
|
11
|
+
# initialize_handlers(sincedb_collection, observer) - called when the observer subscribes to changes in a Mode,
|
12
|
+
# builds mode specific handler instances with references to the observer
|
13
|
+
# process_closed(watched_files) - provide specific processing of watched_files in the closed state
|
14
|
+
# process_ignored(watched_files) - provide specific processing of watched_files in the ignored state
|
15
|
+
# process_watched(watched_files) - provide specific processing of watched_files in the watched state
|
16
|
+
# process_active(watched_files) - provide specific processing of watched_files in the active state
|
17
|
+
# These methods can call "handler" methods that delegate to the specific Handler classes.
|
18
|
+
# TailMode::Handlers module namespace
|
19
|
+
# contains the Handler classes that deals with Tail mode file lifecycle "events".
|
20
|
+
# The TailMode::Handlers::Base
|
21
|
+
# handle(watched_file) - this method calls handle_specifically defined in a subclass
|
22
|
+
# handle_specifically(watched_file) - this is a noop method
|
23
|
+
# update_existing_specifically(watched_file, sincedb_value) - this is a noop method
|
24
|
+
# Each handler extends the Base class to provide specific implementations of these two methods:
|
25
|
+
# handle_specifically(watched_file)
|
26
|
+
# update_existing_specifically(watched_file, sincedb_value)
|
27
|
+
# ReadMode::Handlers module namespace
|
28
|
+
# contains the Handler classes that deals with Read mode file lifecycle "events".
|
29
|
+
# The ReadMode::Handlers::Base
|
30
|
+
# handle(watched_file) - this method calls handle_specifically defined in a subclass
|
31
|
+
# handle_specifically(watched_file) - this is a noop method
|
32
|
+
# Each handler extends the Base class to provide specific implementations of this method:
|
33
|
+
# handle_specifically(watched_file)
|
34
|
+
|
35
|
+
module FileWatch
|
36
|
+
module ObservingBase
|
37
|
+
attr_reader :watch, :sincedb_collection, :settings
|
38
|
+
|
39
|
+
def initialize(opts={})
|
40
|
+
options = {
|
41
|
+
:sincedb_write_interval => 10,
|
42
|
+
:stat_interval => 1,
|
43
|
+
:discover_interval => 5,
|
44
|
+
:exclude => [],
|
45
|
+
:start_new_files_at => :end,
|
46
|
+
:delimiter => "\n",
|
47
|
+
:file_chunk_count => FIXNUM_MAX,
|
48
|
+
:file_sort_by => "last_modified",
|
49
|
+
:file_sort_direction => "asc",
|
50
|
+
}.merge(opts)
|
51
|
+
unless options.include?(:sincedb_path)
|
52
|
+
raise NoSinceDBPathGiven.new("No sincedb_path set in options. This should have been added in the main LogStash::Inputs::File class")
|
53
|
+
end
|
54
|
+
@settings = Settings.from_options(options)
|
55
|
+
build_watch_and_dependencies
|
56
|
+
end
|
57
|
+
|
58
|
+
def build_watch_and_dependencies
|
59
|
+
logger.info("START, creating Discoverer, Watch with file and sincedb collections")
|
60
|
+
watched_files_collection = WatchedFilesCollection.new(@settings)
|
61
|
+
@sincedb_collection = SincedbCollection.new(@settings)
|
62
|
+
@sincedb_collection.open
|
63
|
+
discoverer = Discoverer.new(watched_files_collection, @sincedb_collection, @settings)
|
64
|
+
@watch = Watch.new(discoverer, watched_files_collection, @settings)
|
65
|
+
@watch.add_processor build_specific_processor(@settings)
|
66
|
+
end
|
67
|
+
|
68
|
+
def watch_this(path)
|
69
|
+
@watch.watch(path)
|
70
|
+
end
|
71
|
+
|
72
|
+
def sincedb_write(reason=nil)
|
73
|
+
# can be invoked from the file input
|
74
|
+
@sincedb_collection.write(reason)
|
75
|
+
end
|
76
|
+
|
77
|
+
# quit is a sort-of finalizer,
|
78
|
+
# it should be called for clean up
|
79
|
+
# before the instance is disposed of.
|
80
|
+
def quit
|
81
|
+
logger.info("QUIT - closing all files and shutting down.")
|
82
|
+
@watch.quit # <-- should close all the files
|
83
|
+
# sincedb_write("shutting down")
|
84
|
+
end
|
85
|
+
|
86
|
+
# close_file(path) is to be used by external code
|
87
|
+
# when it knows that it is completely done with a file.
|
88
|
+
# Other files or folders may still be being watched.
|
89
|
+
# Caution, once unwatched, a file can't be watched again
|
90
|
+
# unless a new instance of this class begins watching again.
|
91
|
+
# The sysadmin should rename, move or delete the file.
|
92
|
+
def close_file(path)
|
93
|
+
@watch.unwatch(path)
|
94
|
+
sincedb_write
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util/loggable"
|
3
|
+
require_relative "read_mode/processor"
|
4
|
+
|
5
|
+
module FileWatch
|
6
|
+
class ObservingRead
|
7
|
+
include LogStash::Util::Loggable
|
8
|
+
include ObservingBase
|
9
|
+
|
10
|
+
def subscribe(observer)
|
11
|
+
# observer here is the file input
|
12
|
+
watch.subscribe(observer, sincedb_collection)
|
13
|
+
sincedb_collection.write("read mode subscribe complete - shutting down")
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def build_specific_processor(settings)
|
19
|
+
ReadMode::Processor.new(settings)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util/loggable"
|
3
|
+
require_relative 'tail_mode/processor'
|
4
|
+
|
5
|
+
module FileWatch
|
6
|
+
class ObservingTail
|
7
|
+
include LogStash::Util::Loggable
|
8
|
+
include ObservingBase
|
9
|
+
|
10
|
+
def subscribe(observer)
|
11
|
+
# observer here is the file input
|
12
|
+
watch.subscribe(observer, sincedb_collection)
|
13
|
+
sincedb_collection.write("tail mode subscribe complete - shutting down")
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def build_specific_processor(settings)
|
19
|
+
TailMode::Processor.new(settings)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util/loggable"
|
3
|
+
|
4
|
+
module FileWatch module ReadMode module Handlers
|
5
|
+
class Base
|
6
|
+
include LogStash::Util::Loggable
|
7
|
+
|
8
|
+
attr_reader :sincedb_collection
|
9
|
+
|
10
|
+
def initialize(sincedb_collection, observer, settings)
|
11
|
+
@settings = settings
|
12
|
+
@sincedb_collection = sincedb_collection
|
13
|
+
@observer = observer
|
14
|
+
end
|
15
|
+
|
16
|
+
def handle(watched_file)
|
17
|
+
logger.debug("handling: #{watched_file.path}")
|
18
|
+
unless watched_file.has_listener?
|
19
|
+
watched_file.set_listener(@observer)
|
20
|
+
end
|
21
|
+
handle_specifically(watched_file)
|
22
|
+
end
|
23
|
+
|
24
|
+
def handle_specifically(watched_file)
|
25
|
+
# some handlers don't need to define this method
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def open_file(watched_file)
|
31
|
+
return true if watched_file.file_open?
|
32
|
+
logger.debug("opening #{watched_file.path}")
|
33
|
+
begin
|
34
|
+
watched_file.open
|
35
|
+
rescue
|
36
|
+
# don't emit this message too often. if a file that we can't
|
37
|
+
# read is changing a lot, we'll try to open it more often, and spam the logs.
|
38
|
+
now = Time.now.to_i
|
39
|
+
logger.warn("opening OPEN_WARN_INTERVAL is '#{OPEN_WARN_INTERVAL}'")
|
40
|
+
if watched_file.last_open_warning_at.nil? || now - watched_file.last_open_warning_at > OPEN_WARN_INTERVAL
|
41
|
+
logger.warn("failed to open #{watched_file.path}: #{$!.inspect}, #{$!.backtrace.take(3)}")
|
42
|
+
watched_file.last_open_warning_at = now
|
43
|
+
else
|
44
|
+
logger.debug("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}")
|
45
|
+
end
|
46
|
+
watched_file.watch # set it back to watch so we can try it again
|
47
|
+
end
|
48
|
+
if watched_file.file_open?
|
49
|
+
watched_file.listener.opened
|
50
|
+
true
|
51
|
+
else
|
52
|
+
false
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def add_or_update_sincedb_collection(watched_file)
|
57
|
+
sincedb_value = @sincedb_collection.find(watched_file)
|
58
|
+
if sincedb_value.nil?
|
59
|
+
add_new_value_sincedb_collection(watched_file)
|
60
|
+
elsif sincedb_value.watched_file == watched_file
|
61
|
+
update_existing_sincedb_collection_value(watched_file, sincedb_value)
|
62
|
+
else
|
63
|
+
logger.warn? && logger.warn("mismatch on sincedb_value.watched_file, this should have been handled by Discoverer")
|
64
|
+
end
|
65
|
+
watched_file.initial_completed
|
66
|
+
end
|
67
|
+
|
68
|
+
def update_existing_sincedb_collection_value(watched_file, sincedb_value)
|
69
|
+
logger.debug("update_existing_sincedb_collection_value: #{watched_file.path}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}")
|
70
|
+
# sincedb_value is the source of truth
|
71
|
+
watched_file.update_bytes_read(sincedb_value.position)
|
72
|
+
end
|
73
|
+
|
74
|
+
def add_new_value_sincedb_collection(watched_file)
|
75
|
+
sincedb_value = SincedbValue.new(0)
|
76
|
+
sincedb_value.set_watched_file(watched_file)
|
77
|
+
logger.debug("add_new_value_sincedb_collection: #{watched_file.path}", "position" => sincedb_value.position)
|
78
|
+
sincedb_collection.set(watched_file.sincedb_key, sincedb_value)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end end end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module FileWatch module ReadMode module Handlers
|
4
|
+
class ReadFile < Base
|
5
|
+
def handle_specifically(watched_file)
|
6
|
+
if open_file(watched_file)
|
7
|
+
add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
|
8
|
+
# if the `file_chunk_count` * `file_chunk_size` is less than the file size
|
9
|
+
# then this method will be executed multiple times
|
10
|
+
# and the seek is moved to just after a line boundary as recorded in the sincedb
|
11
|
+
# for each run - so we reset the buffer
|
12
|
+
watched_file.reset_buffer
|
13
|
+
watched_file.file_seek(watched_file.bytes_read)
|
14
|
+
changed = false
|
15
|
+
@settings.file_chunk_count.times do
|
16
|
+
begin
|
17
|
+
lines = watched_file.buffer_extract(watched_file.file_read(@settings.file_chunk_size))
|
18
|
+
logger.warn("read_to_eof: no delimiter found in current chunk") if lines.empty?
|
19
|
+
changed = true
|
20
|
+
lines.each do |line|
|
21
|
+
watched_file.listener.accept(line)
|
22
|
+
sincedb_collection.increment(watched_file.sincedb_key, line.bytesize + @settings.delimiter_byte_size)
|
23
|
+
end
|
24
|
+
rescue EOFError
|
25
|
+
# flush the buffer now in case there is no final delimiter
|
26
|
+
line = watched_file.buffer.flush
|
27
|
+
watched_file.listener.accept(line) unless line.empty?
|
28
|
+
watched_file.listener.eof
|
29
|
+
watched_file.file_close
|
30
|
+
sincedb_collection.unset_watched_file(watched_file)
|
31
|
+
watched_file.listener.deleted
|
32
|
+
watched_file.unwatch
|
33
|
+
break
|
34
|
+
rescue Errno::EWOULDBLOCK, Errno::EINTR
|
35
|
+
watched_file.listener.error
|
36
|
+
break
|
37
|
+
rescue => e
|
38
|
+
logger.error("read_to_eof: general error reading #{watched_file.path} - error: #{e.inspect}")
|
39
|
+
watched_file.listener.error
|
40
|
+
break
|
41
|
+
end
|
42
|
+
end
|
43
|
+
sincedb_collection.request_disk_flush if changed
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end end end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'java'
|
3
|
+
java_import java.io.InputStream
|
4
|
+
java_import java.io.InputStreamReader
|
5
|
+
java_import java.io.FileInputStream
|
6
|
+
java_import java.io.BufferedReader
|
7
|
+
java_import java.util.zip.GZIPInputStream
|
8
|
+
java_import java.util.zip.ZipException
|
9
|
+
|
10
|
+
module FileWatch module ReadMode module Handlers
|
11
|
+
class ReadZipFile < Base
|
12
|
+
def handle_specifically(watched_file)
|
13
|
+
add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
|
14
|
+
# can't really stripe read a zip file, its all or nothing.
|
15
|
+
watched_file.listener.opened
|
16
|
+
# what do we do about quit when we have just begun reading the zipped file (e.g. pipeline reloading)
|
17
|
+
# should we track lines read in the sincedb and
|
18
|
+
# fast forward through the lines until we reach unseen content?
|
19
|
+
# meaning that we can quit in the middle of a zip file
|
20
|
+
begin
|
21
|
+
file_stream = FileInputStream.new(watched_file.path)
|
22
|
+
gzip_stream = GZIPInputStream.new(file_stream)
|
23
|
+
decoder = InputStreamReader.new(gzip_stream, "UTF-8")
|
24
|
+
buffered = BufferedReader.new(decoder)
|
25
|
+
while (line = buffered.readLine(false))
|
26
|
+
watched_file.listener.accept(line)
|
27
|
+
end
|
28
|
+
watched_file.listener.eof
|
29
|
+
rescue ZipException => e
|
30
|
+
logger.error("Cannot decompress the gzip file at path: #{watched_file.path}")
|
31
|
+
watched_file.listener.error
|
32
|
+
else
|
33
|
+
sincedb_collection.store_last_read(watched_file.sincedb_key, watched_file.last_stat_size)
|
34
|
+
sincedb_collection.request_disk_flush
|
35
|
+
watched_file.listener.deleted
|
36
|
+
watched_file.unwatch
|
37
|
+
ensure
|
38
|
+
# rescue each close individually so all close attempts are tried
|
39
|
+
close_and_ignore_ioexception(buffered) unless buffered.nil?
|
40
|
+
close_and_ignore_ioexception(decoder) unless decoder.nil?
|
41
|
+
close_and_ignore_ioexception(gzip_stream) unless gzip_stream.nil?
|
42
|
+
close_and_ignore_ioexception(file_stream) unless file_stream.nil?
|
43
|
+
end
|
44
|
+
sincedb_collection.unset_watched_file(watched_file)
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def close_and_ignore_ioexception(closeable)
|
50
|
+
begin
|
51
|
+
closeable.close
|
52
|
+
rescue Exception => e # IOException can be thrown by any of the Java classes that implement the Closable interface.
|
53
|
+
logger.warn("Ignoring an IOException when closing an instance of #{closeable.class.name}", "exception" => e)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end end end
|