fileminer 1.0.0RC1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7237a574207f6cb4913c5be6f2ef70dda7c735ffbf786fba9bfaf3941366fd88
4
+ data.tar.gz: 9e2a23b7b2a49fb78af7ae1ba969f562df855196b5d1aa066b84063e11611afe
5
+ SHA512:
6
+ metadata.gz: a47639237446113e7dcbf9dd5f79b7008ea102cb3993597743555714fe1ff92e6bb268deee20b8e0ef63c15618e4d745191e6a116333105a743f8ab0da5b32d9
7
+ data.tar.gz: e60806ee50da1f42d619c9fd01febb239790cb1ebc05938fa7d8634ec52e4da4d17ac3e3d316b8e2f184d45676cfc4a7bea8079e49e5cf5f215e33c9dcfd479a
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2018 fmjsjx
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,2 @@
1
+ # fileminer
2
+ A simple file/log transfer tool coding by ruby.
data/bin/fileminer ADDED
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby
2
+
3
+
4
+ require 'logger'
5
+ require 'yaml'
6
+ require 'fileminer'
7
+
8
+
9
+ logger = Logger.new STDERR
10
+
11
+ begin
12
+ # Usage:
13
+ # ruby -Ilib ./bin/fileminer /path/to/fileminer.yml
14
+ yml = File.open(ARGV[0]) { |io| io.read }
15
+ conf = YAML.load yml
16
+ # create fileminer instance
17
+ fileminer = FileMiner.new conf
18
+
19
+ # trap INT signal to stop mining
20
+ trap(:INT) { fileminer.stop_mining }
21
+
22
+ # start mining
23
+ fileminer.start_mining
24
+ rescue => e
25
+ logger.error e
26
+ usage = <<-EOS
27
+ Usage:
28
+ ruby -Ilib ./bin/fileminer /path/to/fileminer.yml
29
+ EOS
30
+ puts usage
31
+ end
@@ -0,0 +1,146 @@
1
+ # general settings
2
+ #fileminer.settings:
3
+
4
+ # default value is 30s
5
+ #refresh_files_time_trigger: 30s
6
+
7
+ # default value is 5s
8
+ #max_time_of_each_mining: 5s
9
+
10
+ # default value is -1
11
+ #max_lines_of_each_mining: -1
12
+
13
+ # default value is -1
14
+ #max_lines_of_each_file: -1
15
+
16
+ # TODO
17
+
18
+ # fileminer inputs
19
+ fileminer.inputs:
20
+
21
+ # setup the path of the registry file
22
+ # default value is /var/lib/fileminer/registry
23
+ #registry_path: /var/lib/fileminer/registry
24
+
25
+ # file paths
26
+ paths:
27
+ - /path/to/*.log
28
+
29
+ # other path
30
+ #- /other/path/*.log
31
+
32
+ # EOF seconds
33
+ # default value is 86400(1 day)
34
+ #eof_seconds: 86400
35
+
36
+ # Batch lines to read
37
+ # default value is 200
38
+ #batch_lines: 200
39
+
40
+ # Host
41
+ # default value is `Socket.gethostname`
42
+ #host: 127.0.0.1
43
+
44
+ # -- output --
45
+
46
+ # output to redis
47
+ # based on redis-rb, please make sure that redis has already been installed
48
+ # install redis: gem install redis
49
+ output.redis:
50
+ # the URI of the redis server
51
+ uri: redis://localhost:6379/0
52
+
53
+ # host of the redis server
54
+ # effective only if uri is not be set
55
+ # default value is localhost
56
+ #host: localhost
57
+
58
+ # port of the redis server
59
+ # default value is 6379
60
+ # effective only if uri is not be set
61
+ #port: 6379
62
+
63
+ # db of the redis server
64
+ # default value is 0
65
+ # effective only if uri is not be set
66
+ #db: 0
67
+
68
+ # password of the redis server
69
+ # effective only if uri is not be set
70
+ #password:
71
+
72
+ # key of the redis server
73
+ # required
74
+ key: fileminer
75
+
76
+ # output to kafka
77
+ # based on ruby-kafka, please make sure that ruby-kafka has already been installed
78
+ # install ruby-kafka: gem install ruby-kafka
79
+ #output.kafka:
80
+ # kafka brokers
81
+ # default value is ['localhost:9092']
82
+ #brokers: ['localhost:9092']
83
+
84
+ # producer client_id
85
+ # default value is fileminer
86
+ #client_id: fileminer
87
+
88
+ # kafka topic
89
+ # default value is fileminer
90
+ #topic: fileminer
91
+
92
+ # producer mode
93
+ # sync or async
94
+ # default is sync
95
+ #mode: sync
96
+
97
+ # auto delivery
98
+ # enabled or disabled
99
+ # default value is disabled
100
+ # effective only if mode is async
101
+ # when effective, at least one of delivery_threshold and delivery_interval should be set
102
+ #auto_delivery: disabled
103
+
104
+ # delivery threshold, trigger a delivery once x messages have been buffered
105
+ # effective only if mode is async and auto_deliver is enabled
106
+ #delivery_threshold: 100
107
+
108
+ # delivery interval, trigger a delivery every x seconds
109
+ # effective only if mode is async and auto_deliver is enabled
110
+ #delivery_interval: 30
111
+
112
+ # output to mysql
113
+ # based on mysql2, please make sure that mysql2 has already been installed
114
+ # install ruby-kafka: gem install mysql2
115
+ #output.mysql:
116
+ # host
117
+ # default value is localhost
118
+ #host: localhost
119
+
120
+ # port
121
+ # default value is 3306
122
+ #port: 3306
123
+
124
+ # username
125
+ #username: someuser
126
+
127
+ # password
128
+ #password: somepwd
129
+
130
+ # database name
131
+ #database: somedb
132
+
133
+ # encoding
134
+ # default value is utf8mb4
135
+ #encoding: utf8mb4
136
+
137
+ # SSL mode
138
+ # enabled or disabled
139
+ # default value is disabled
140
+ #ssl_mode: disabled
141
+
142
+ # table name
143
+ #table: sometable
144
+
145
+ # other outputs
146
+ # TODO
@@ -0,0 +1,128 @@
1
+ require 'set'
2
+ require 'json'
3
+
4
+
5
+ class Dir
6
+
7
+ class << self
8
+
9
+ # Creates the directory with the path given, including anynecessary but nonexistent parent directories.
10
+ #
11
+ # @param [String] path
12
+ def mkdirs(path)
13
+ parent = File.dirname path
14
+ mkdirs parent unless Dir.exist? parent
15
+ Dir.mkdir path
16
+ end
17
+
18
+ end
19
+
20
+ end
21
+
22
+
23
+ class Miner
24
+
25
+ DEFAULTS = {
26
+ registry_path: '/var/lib/fileminer/registry',
27
+ eof_seconds: 86400,
28
+ batch_lines: 200,
29
+ }
30
+
31
+ attr_reader :registry_path, :paths, :eof_seconds, :batch_lines, :files, :active_files
32
+
33
+ # Create a new file miner instance
34
+ #
35
+ # @param [Hash] options
36
+ # @option options [String] :registry_path (/var/lib/fileminer/registry)
37
+ # @option options [Array] :paths
38
+ # @option options [Integer] :eof_seconds (86400)
39
+ # @option options [Integer] :batch_lines (50)
40
+ # @option options [String] :host (Socket.gethostname)
41
+ def initialize(options = {})
42
+ # fix options by DEFAULTS
43
+ DEFAULTS.each { |k, v| options[k] = v unless options.key? k }
44
+ @registry_path = options[:registry_path]
45
+ @paths = options[:paths]
46
+ @eof_seconds = options[:eof_seconds]
47
+ @batch_lines = options[:batch_lines]
48
+ @host = options[:host]
49
+ if @host.nil?
50
+ require 'socket'
51
+ @host = Socket.gethostname
52
+ end
53
+ @files = []
54
+ @active_files = []
55
+ if File.exist? @registry_path
56
+ File.open(@registry_path) { |io| @files = JSON.parse(io.read, {symbolize_names: true}) }
57
+ @active_files = @files.select { |record| !record[:eof] }
58
+ else
59
+ parent_dir = File.dirname @registry_path
60
+ Dir.mkdirs parent_dir unless Dir.exist? parent_dir
61
+ end
62
+ end
63
+
64
+ # Save registry
65
+ def save_registry
66
+ File.open(@registry_path, 'w') { |io| io.write @files.to_json }
67
+ end
68
+
69
+ # Refresh
70
+ def refresh_files
71
+ now = Time.now
72
+ file_paths = Set.new
73
+ file_paths.merge Dir[*@paths].select { |path| File.file? path }
74
+ @active_file = @files.select do |record|
75
+ path = record[:path]
76
+ file_exists = file_paths.delete? path
77
+ unless record[:eof]
78
+ if file_exists
79
+ # check if EOF
80
+ if record[:pos] == File.size(path) && now - File.mtime(path) > @eof_seconds
81
+ record[:eof] = true
82
+ end
83
+ else
84
+ # missing file, set :eof to true
85
+ record[:eof] = true
86
+ end
87
+ end
88
+ !record[:eof]
89
+ end
90
+ file_paths.each do |path|
91
+ record = {path: path, pos: 0, eof: false}
92
+ @files << record
93
+ @active_files << record
94
+ end
95
+ @files_refresh_time = now
96
+ end
97
+
98
+ # Read lines
99
+ def read_lines(record)
100
+ file_path = record[:path]
101
+ File.open file_path do |io|
102
+ lines = []
103
+ io.pos = record[:pos]
104
+ while lines.size < @batch_lines
105
+ line = {host: @host, path: file_path, pos: io.pos}
106
+ begin
107
+ data = io.readline
108
+ break if data.nil?
109
+ if data[-1] != "\n"
110
+ io.pos = line[:pos]
111
+ break
112
+ end
113
+ rescue EOFError
114
+ break
115
+ end
116
+ line[:end] = io.pos
117
+ line[:data] = data
118
+ lines << line
119
+ end
120
+ lines
121
+ end
122
+ end
123
+
124
+ def files_need_refresh?(refresh_files_time_trigger)
125
+ Time.now - @files_refresh_time >= refresh_files_time_trigger
126
+ end
127
+
128
+ end
@@ -0,0 +1,62 @@
1
+ require 'kafka'
2
+ require 'json'
3
+ require_relative '../output'
4
+
5
+
6
+ module Output
7
+
8
+ class KafkaPlugin < OutputPlugin
9
+
10
+ # Create a kafka output plugin instance
11
+ #
12
+ # @param [Hash] options
13
+ # @option options [Array] :brokers (['localhost:9092'])
14
+ # @option options [String] :client_id ('fileminer')
15
+ # @option options [String] :topic ('fileminer')
16
+ # @option options [Symbol] :mode (:sync) :sync or :async
17
+ # @option options [Symbol] :auto_delivery (:disabled) :disabled or :enabled
18
+ # @option options [Hash] :delivery_conf
19
+ def initialize(options)
20
+ brokers = options[:brokers] || ['localhost:9092']
21
+ client_id = options[:client_id] || 'fileminer'
22
+ @topic = options[:topic] || 'fileminer'
23
+ @kafka = Kafka.new(brokers, client_id: client_id)
24
+ case @mode = options[:mode]
25
+ when :sync
26
+ @producer = @kafka.producer
27
+ when :async
28
+ case @auto_delivery = options[:auto_delivery]
29
+ when :disabled
30
+ @producer = @kafka.async_producer
31
+ when :enabled
32
+ @producer = @kafka.async_producer options[:delivery_conf]
33
+ else
34
+ raise "invalid value #@auto_delivery of auto_delivery"
35
+ end
36
+ else
37
+ raise "unsupported mode #@mode"
38
+ end
39
+ end
40
+
41
+ # Send all lines to kafka using producer API
42
+ #
43
+ # @param [Array] lines
44
+ # @yield a listener to be called after all lines just be delivered
45
+ def send_all(lines, &listener)
46
+ lines.each do |line|
47
+ message = line.to_json
48
+ @producer.produce(message, topic: @topic)
49
+ end
50
+ @producer.deliver_messages unless @mode == :async and @auto_delivery == :enabled
51
+ listener.call
52
+ end
53
+
54
+ # close the kafka producer
55
+ def close
56
+ @producer.shutdown
57
+ @kafka.close
58
+ end
59
+
60
+ end
61
+
62
+ end
@@ -0,0 +1,101 @@
1
+ require 'mysql2'
2
+ require_relative '../output'
3
+
4
+
5
+ module Output
6
+
7
+ class MysqlPlugin < OutputPlugin
8
+
9
+ DEFAULT_MYSQL = {
10
+ host: 'localhost',
11
+ port: 3306,
12
+ password: '',
13
+ encoding: 'utf8mb4',
14
+ ssl_mode: :disabled
15
+ }
16
+
17
+ # Create a mysql output plugin instance
18
+ #
19
+ # @param [Hash] options
20
+ # @option options [String] :host ('localhost')
21
+ # @option options [Integer] :port (3306)
22
+ # @option options [String] :username
23
+ # @option options [String] :password ('')
24
+ # @option options [String] :database
25
+ # @option options [String] :encoding ('utf8mb4')
26
+ # @option options [Symbol] :ssl_mode (:disabled)
27
+ # @option options [String] :table
28
+ def initialize(options)
29
+ raise 'Missing config username on output.mysql' unless options.key? :username
30
+ raise 'Missing config database on output.mysql' unless options.key? :database
31
+ raise 'Missing config table on output.mysql' unless options.key? :table
32
+ conf = DEFAULT_MYSQL.merge options
33
+ @table = conf.delete :table
34
+ conf[:port] = conf[:port].to_i
35
+ conf[:password] = conf[:password].to_s
36
+ @encoding = conf[:encoding]
37
+ conf[:ssl_mode] = :disabled if conf[:ssl_mode] != :enabled
38
+ @mysql = Mysql2::Client.new conf
39
+ create_table_if_not_exists
40
+ @sqls = Hash.new { |hash, key| hash[key] = generate_batch_sql key }
41
+ end
42
+
43
+ private
44
+ def create_table_if_not_exists
45
+ rs = @mysql.query 'SHOW TABLES'
46
+ tables = rs.map { |row| row.values[0] }
47
+ unless tables.include? @table
48
+ sql = create_table_sql
49
+ @mysql.query sql
50
+ end
51
+ end
52
+
53
+ def create_table_sql
54
+ <<-EOS
55
+ CREATE TABLE `#@table` (
56
+ `id` bigint(20) PRIMARY KEY AUTO_INCREMENT,
57
+ `host` varchar(255) NOT NULL,
58
+ `path` varchar(255) NOT NULL,
59
+ `pos` bigint(20) NOT NULL,
60
+ `end` bigint(20) NOT NULL,
61
+ `data` text NOT NULL,
62
+ UNIQUE KEY `UNIQUE_host_path_pos` (`host`,`path`,`pos`)
63
+ ) ENGINE=InnoDB DEFAULT CHARSET=#@encoding
64
+ EOS
65
+ end
66
+
67
+ def generate_batch_sql(size)
68
+ "INSERT IGNORE INTO `#@table`(`host`,`path`,`pos`,`end`,`data`) VALUES " << (['(?,?,?,?,?)'] * size).join(',')
69
+ end
70
+
71
+ def get_batch_sql(size)
72
+ if @sqls.key? size
73
+ @sqls[size]
74
+ else
75
+ @sqls[size] = generate_batch_sql size
76
+ end
77
+ end
78
+
79
+ # Send all lines to mysql
80
+ #
81
+ # @param [Array] lines
82
+ # @yield a listener to be called after all lines just be sent
83
+ public
84
+ def send_all(lines, &listener)
85
+ values = lines.flat_map { |line| [line[:host], line[:path], line[:pos], line[:end], line[:data]] }
86
+ sql = get_batch_sql lines.size
87
+ @mysql.query 'BEGIN'
88
+ begin
89
+ stat = @mysql.prepare sql
90
+ stat.execute *values
91
+ @mysql.query 'COMMIT'
92
+ listener.call
93
+ rescue => err
94
+ @mysql.query 'ROLLBACK'
95
+ raise err
96
+ end
97
+ end
98
+
99
+ end
100
+
101
+ end
@@ -0,0 +1,63 @@
1
+ require 'redis'
2
+ require 'json'
3
+ require_relative '../output'
4
+
5
+
6
+ module Output
7
+
8
+ class RedisPlugin < OutputPlugin
9
+
10
+ # Create a redis output plugin instance
11
+ #
12
+ # @param [Hash] options
13
+ # @option options [String] :uri redis URI string
14
+ # @option options [String] :host
15
+ # @option options [Integer] :port
16
+ # @option options [Integer] :db
17
+ # @option options [String] :password
18
+ # @option options [String] :key redis key
19
+ def initialize(options)
20
+ uri = options[:uri]
21
+ if uri.nil?
22
+ uri = parse_uri options
23
+ end
24
+ @key = options[:key]
25
+ raise 'Missing key config on output.redis' if @key.nil?
26
+ driver = require_lib?('hiredis') ? :hiredis : :ruby
27
+ @redis = Redis.new url: uri, driver: driver
28
+ end
29
+
30
+ private
31
+ def parse_uri(options)
32
+ host = options[:host] || 'localhost'
33
+ port = options[:port] || 6379
34
+ db = options[:db] || 0
35
+ password = options[:password]
36
+ if password.nil?
37
+ "redis://#{host}:#{port}/#{db}"
38
+ else
39
+ "redis://:#{password}@#{host}:#{port}/#{db}"
40
+ end
41
+ end
42
+
43
+ def require_lib?(name)
44
+ require name
45
+ rescue LoadError
46
+ false
47
+ end
48
+
49
+
50
+ # Send all lines to redis using LPUSH @key
51
+ #
52
+ # @param [Array] lines
53
+ # @yield a listener to be called after all lines just be sent
54
+ public
55
+ def send_all(lines, &listener)
56
+ messages = lines.map { |line| line.to_json }
57
+ @redis.lpush @key, messages
58
+ listener.call
59
+ end
60
+
61
+ end
62
+
63
+ end
@@ -0,0 +1,17 @@
1
+ # Output module
2
+ module Output
3
+ # abstract class OutputPlugin
4
+ class OutputPlugin
5
+
6
+ # If plugin is in batch mode
7
+ def batch?
8
+ true
9
+ end
10
+
11
+ def close
12
+ # do nothing default
13
+ end
14
+
15
+ end
16
+
17
+ end
@@ -0,0 +1 @@
1
+ require_relative 'output'
@@ -0,0 +1,5 @@
1
+ class FileMiner
2
+
3
+ VERSION = '1.0.0RC1'
4
+
5
+ end
data/lib/fileminer.rb ADDED
@@ -0,0 +1,243 @@
1
+ #!/usr/bin/env ruby
2
+
3
+
4
+ require 'logger'
5
+ require_relative 'fileminer/miner'
6
+ require_relative 'fileminer/plugins'
7
+
8
+ FILEMINER_SETTINGS = 'fileminer.settings'
9
+ FILEMINER_INPUTS = 'fileminer.inputs'
10
+
11
+
12
+ class Hash
13
+
14
+ def keys_to_sym
15
+ map { |k, v| [k.to_sym, v] }.to_h
16
+ end
17
+
18
+ def keys_to_sym!
19
+ new_hash = keys_to_sym
20
+ clear
21
+ merge! new_hash
22
+ end
23
+
24
+ end
25
+
26
+
27
+ class FileMiner
28
+
29
+ DEFAULT_SETTINGS = {
30
+ refresh_files_time_trigger: '30s',
31
+ max_time_of_each_mining: '5s',
32
+ max_lines_of_each_mining: -1,
33
+ max_lines_of_each_file: -1,
34
+ }
35
+
36
+ attr_reader :miner, :output, :running
37
+
38
+ # Create a new FileMiner instance
39
+ #
40
+ # @param [Hash] conf
41
+ def initialize(conf)
42
+ init_settings conf['fileminer.settings']
43
+ @output = init_output conf
44
+ raise 'Missing config fileminer.inputs' unless conf.key? 'fileminer.inputs'
45
+ @miner = Miner.new conf['fileminer.inputs'].keys_to_sym
46
+ @miner.refresh_files
47
+ @miner.save_registry
48
+ @running = false
49
+ end
50
+
51
+ private
52
+ def init_settings(conf)
53
+ if conf.nil?
54
+ conf = DEFAULT_SETTINGS.clone
55
+ else
56
+ conf = DEFAULT_SETTINGS.merge conf.keys_to_sym
57
+ end
58
+ # default logger to stderr
59
+ # TODO make logger configurable in future
60
+ @logger = Logger.new STDERR
61
+ @logger.level = Logger::WARN
62
+ # mining break trigger
63
+ max_time_of_each_mining = parse_time conf[:max_time_of_each_mining], 'max_time_of_each_mining on fileminer.settings'
64
+ max_lines_of_each_mining = conf[:max_lines_of_each_mining]
65
+ if max_lines_of_each_mining >= 0
66
+ @mining_break_trigger = lambda { |start_time, lines| Time.now - start_time > max_time_of_each_mining || lines >= max_lines_of_each_mining }
67
+ else
68
+ @mining_break_trigger = lambda { |start_time, lines| Time.now - start_time > max_time_of_each_mining }
69
+ end
70
+ # file break trigger
71
+ max_lines_of_each_file = conf[:max_lines_of_each_file]
72
+ if max_lines_of_each_file >= 0
73
+ @file_break_trigger = lambda { |lines| lines < @miner.batch_lines || lines >= max_lines_of_each_file }
74
+ else
75
+ @file_break_trigger = lambda { |lines| lines < @miner.batch_lines }
76
+ end
77
+ # refresh_files_time_trigger
78
+ @refresh_files_time_trigger = parse_time conf[:refresh_files_time_trigger], 'refresh_files_time_trigger on fileminer.settings'
79
+ end
80
+
81
+ def parse_time(value, conf_name)
82
+ if /^(\d+)(\w+)$/ =~ value
83
+ num = $1.to_i
84
+ unit = $2
85
+ case unit
86
+ when 'd'
87
+ num * 86400
88
+ when 'h'
89
+ num * 3600
90
+ when 'min'
91
+ num * 60
92
+ when 's'
93
+ num
94
+ when 'ms'
95
+ num.to_f / 1000
96
+ else
97
+ raise "Unsupported time unit '#{unit}' of #{conf_name}"
98
+ end
99
+ else
100
+ raise "Error format '#{value}' of #{conf_name}"
101
+ end
102
+ end
103
+
104
+ def mining_break?(start_time, lines)
105
+ @mining_break_trigger.call start_time, lines
106
+ end
107
+
108
+ def file_break?(lines)
109
+ @file_break_trigger.call lines
110
+ end
111
+
112
+ def init_output(conf)
113
+ case
114
+ when conf.key?('output.redis')
115
+ redis_conf = conf['output.redis'].keys_to_sym
116
+ init_output_redis redis_conf
117
+ when conf.key?('output.kafka')
118
+ kafka_conf = conf['output.kafka'].keys_to_sym
119
+ init_output_kafka kafka_conf
120
+ when conf.key?('output.mysql')
121
+ mysql_conf = conf['output.mysql'].keys_to_sym
122
+ init_output_mysql mysql_conf
123
+ else
124
+ raise 'Missing config for output'
125
+ end
126
+ end
127
+
128
+ def init_output_redis(redis_conf)
129
+ require_relative 'fileminer/output/redis'
130
+ Output::RedisPlugin.new redis_conf
131
+ end
132
+
133
+ def init_output_kafka(kafka_conf)
134
+ require_relative 'fileminer/output/kafka'
135
+ kafka_conf[:mode] = kafka_conf[:mode] == 'async' ? :async : :sync
136
+ if kafka_conf[:mode] == :async
137
+ kafka_conf[:auto_delivery] = kafka_conf[:auto_delivery] == 'enabled' ? :enabled : :disabled
138
+ if kafka_conf[:auto_delivery] == :enabled
139
+ delivery_threshold = kafka_conf.delete :delivery_threshold
140
+ delivery_interval = kafka_conf.delete :delivery_interval
141
+ raise 'Missing conf delivery_threshold or delivery_interval' if delivery_threshold.nil? && delivery_interval.nil?
142
+ kafka_conf[:delivery_conf] = delivery_conf = Hash.new
143
+ delivery_conf[:delivery_threshold] = delivery_threshold unless delivery_threshold.nil?
144
+ delivery_conf[:delivery_interval] = delivery_interval unless delivery_interval.nil?
145
+ end
146
+ end
147
+ Output::KafkaPlugin.new kafka_conf
148
+ end
149
+
150
+ def init_output_mysql(mysql_conf)
151
+ require_relative 'fileminer/output/mysql'
152
+ mysql_conf[:ssl_mode] = mysql_conf[:ssl_mode] == 'enabled' ? :enabled : :disabled
153
+ Output::MysqlPlugin.new mysql_conf
154
+ end
155
+
156
+ def send_lines(record, lines)
157
+ if @output.batch?
158
+ @output.send_all lines do
159
+ record[:pos] = lines[-1][:end]
160
+ @miner.save_registry
161
+ end
162
+ else
163
+ lines.each do |line|
164
+ @output.send line do
165
+ record[:pos] = line[:end]
166
+ @miner.save_registry
167
+ end
168
+ end
169
+ end
170
+ end
171
+
172
+ public
173
+ def mine_once
174
+ start_time = Time.now
175
+ full_lines = 0
176
+ @miner.active_files.all? do |record|
177
+ mining_next = true
178
+ if record[:pos] < File.size(record[:path])
179
+ file_lines = 0
180
+ loop do
181
+ lines = @miner.read_lines record
182
+ break if lines.empty?
183
+ send_lines record, lines
184
+ file_lines += lines.size
185
+ full_lines += lines.size
186
+ if mining_break? start_time, full_lines
187
+ mining_next = false
188
+ break
189
+ end
190
+ break if file_break? file_lines
191
+ end
192
+ end
193
+ mining_next
194
+ end
195
+ full_lines
196
+ end
197
+
198
+ def start_mining
199
+ unless @running
200
+ @running = true
201
+ while @running
202
+ begin
203
+ files_refreshed = check_files
204
+ sent_lines = mine_once
205
+ # sleep 5 seconds if no more data
206
+ # TODO using settings instead in future
207
+ if sent_lines == 0
208
+ @miner.save_registry if files_refreshed
209
+ sleep 5
210
+ end
211
+ rescue => e
212
+ @logger.error e
213
+ # sleep for a little while to wait output recover
214
+ sleep 5 if @running
215
+ end
216
+ end
217
+ @miner.save_registry
218
+ end
219
+ end
220
+
221
+ def check_files
222
+ if @miner.files_need_refresh? @refresh_files_time_trigger
223
+ @miner.refresh_files
224
+ end
225
+ end
226
+
227
+ def stop_mining
228
+ @running = false if @running
229
+ end
230
+
231
+ end
232
+
233
+
234
+ if __FILE__ == $0
235
+ # Usage:
236
+ # ruby fileminer.rb /etc/fileminer/fileminer.yml
237
+ require 'yaml'
238
+ yml = File.open(ARGV[0]) { |io| io.read }
239
+ conf = YAML.load yml
240
+ fileminer = FileMiner.new conf
241
+ trap(:INT) { fileminer.stop_mining }
242
+ fileminer.start_mining
243
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fileminer
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0RC1
5
+ platform: ruby
6
+ authors:
7
+ - Fang MinJie
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-01-17 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A simple file/log transfer tool coding by ruby.
14
+ email:
15
+ - fmjsjx@163.com
16
+ executables:
17
+ - fileminer
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - LICENSE
22
+ - README.md
23
+ - bin/fileminer
24
+ - conf/fileminer_default.yml
25
+ - lib/fileminer.rb
26
+ - lib/fileminer/miner.rb
27
+ - lib/fileminer/output.rb
28
+ - lib/fileminer/output/kafka.rb
29
+ - lib/fileminer/output/mysql.rb
30
+ - lib/fileminer/output/redis.rb
31
+ - lib/fileminer/plugins.rb
32
+ - lib/fileminer/version.rb
33
+ homepage: https://github.com/fmjsjx/fileminer
34
+ licenses:
35
+ - MIT
36
+ metadata: {}
37
+ post_install_message:
38
+ rdoc_options: []
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">"
49
+ - !ruby/object:Gem::Version
50
+ version: 1.3.1
51
+ requirements: []
52
+ rubygems_version: 3.0.1
53
+ signing_key:
54
+ specification_version: 4
55
+ summary: A simple file/log transfer tool coding by ruby.
56
+ test_files: []