fileminer 1.0.0RC1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7237a574207f6cb4913c5be6f2ef70dda7c735ffbf786fba9bfaf3941366fd88
4
+ data.tar.gz: 9e2a23b7b2a49fb78af7ae1ba969f562df855196b5d1aa066b84063e11611afe
5
+ SHA512:
6
+ metadata.gz: a47639237446113e7dcbf9dd5f79b7008ea102cb3993597743555714fe1ff92e6bb268deee20b8e0ef63c15618e4d745191e6a116333105a743f8ab0da5b32d9
7
+ data.tar.gz: e60806ee50da1f42d619c9fd01febb239790cb1ebc05938fa7d8634ec52e4da4d17ac3e3d316b8e2f184d45676cfc4a7bea8079e49e5cf5f215e33c9dcfd479a
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2018 fmjsjx
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,2 @@
1
+ # fileminer
2
+ A simple file/log transfer tool coding by ruby.
data/bin/fileminer ADDED
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby
2
+
3
+
4
+ require 'logger'
5
+ require 'yaml'
6
+ require 'fileminer'
7
+
8
+
9
+ logger = Logger.new STDERR
10
+
11
+ begin
12
+ # Usage:
13
+ # ruby -Ilib ./bin/fileminer /path/to/fileminer.yml
14
+ yml = File.open(ARGV[0]) { |io| io.read }
15
+ conf = YAML.load yml
16
+ # create fileminer instance
17
+ fileminer = FileMiner.new conf
18
+
19
+ # trap INT signal to stop mining
20
+ trap(:INT) { fileminer.stop_mining }
21
+
22
+ # start mining
23
+ fileminer.start_mining
24
+ rescue => e
25
+ logger.error e
26
+ usage = <<-EOS
27
+ Usage:
28
+ ruby -Ilib ./bin/fileminer /path/to/fileminer.yml
29
+ EOS
30
+ puts usage
31
+ end
@@ -0,0 +1,146 @@
1
+ # general settings
2
+ #fileminer.settings:
3
+
4
+ # default value is 30s
5
+ #refresh_files_time_trigger: 30s
6
+
7
+ # default value is 5s
8
+ #max_time_of_each_mining: 5s
9
+
10
+ # default value is -1
11
+ #max_lines_of_each_mining: -1
12
+
13
+ # default value is -1
14
+ #max_lines_of_each_file: -1
15
+
16
+ # TODO
17
+
18
+ # fileminer inputs
19
+ fileminer.inputs:
20
+
21
+ # setup the path of the registry file
22
+ # default value is /var/lib/fileminer/registry
23
+ #registry_path: /var/lib/fileminer/registry
24
+
25
+ # file paths
26
+ paths:
27
+ - /path/to/*.log
28
+
29
+ # other path
30
+ #- /other/path/*.log
31
+
32
+ # EOF seconds
33
+ # default value is 86400(1 day)
34
+ #eof_seconds: 86400
35
+
36
+ # Batch lines to read
37
+ # default value is 200
38
+ #batch_lines: 200
39
+
40
+ # Host
41
+ # default value is `Socket.gethostname`
42
+ #host: 127.0.0.1
43
+
44
+ # -- output --
45
+
46
+ # output to redis
47
+ # based on redis-rb, please make sure that redis has already been installed
48
+ # install redis: gem install redis
49
+ output.redis:
50
+ # the URI of the redis server
51
+ uri: redis://localhost:6379/0
52
+
53
+ # host of the redis server
54
+ # effective only if uri is not be set
55
+ # default value is localhost
56
+ #host: localhost
57
+
58
+ # port of the redis server
59
+ # default value is 6379
60
+ # effective only if uri is not be set
61
+ #port: 6379
62
+
63
+ # db of the redis server
64
+ # default value is 0
65
+ # effective only if uri is not be set
66
+ #db: 0
67
+
68
+ # password of the redis server
69
+ # effective only if uri is not be set
70
+ #password:
71
+
72
+ # key of the redis server
73
+ # required
74
+ key: fileminer
75
+
76
+ # output to kafka
77
+ # based on ruby-kafka, please make sure that ruby-kafka has already been installed
78
+ # install ruby-kafka: gem install ruby-kafka
79
+ #output.kafka:
80
+ # kafka brokers
81
+ # default value is ['localhost:9092']
82
+ #brokers: ['localhost:9092']
83
+
84
+ # producer client_id
85
+ # default value is fileminer
86
+ #client_id: fileminer
87
+
88
+ # kafka topic
89
+ # default value is fileminer
90
+ #topic: fileminer
91
+
92
+ # producer mode
93
+ # sync or async
94
+ # default is sync
95
+ #mode: sync
96
+
97
+ # auto delivery
98
+ # enabled or disabled
99
+ # default value is disabled
100
+ # effective only if mode is async
101
+ # when effective, at least one of delivery_threshold and delivery_interval should be set
102
+ #auto_delivery: disabled
103
+
104
+ # delivery threshold, trigger a delivery once x messages have been buffered
105
+ # effective only if mode is async and auto_deliver is enabled
106
+ #delivery_threshold: 100
107
+
108
+ # delivery interval, trigger a delivery every x seconds
109
+ # effective only if mode is async and auto_deliver is enabled
110
+ #delivery_interval: 30
111
+
112
+ # output to mysql
113
+ # based on mysql2, please make sure that mysql2 has already been installed
114
+ # install ruby-kafka: gem install mysql2
115
+ #output.mysql:
116
+ # host
117
+ # default value is localhost
118
+ #host: localhost
119
+
120
+ # port
121
+ # default value is 3306
122
+ #port: 3306
123
+
124
+ # username
125
+ #username: someuser
126
+
127
+ # password
128
+ #password: somepwd
129
+
130
+ # database name
131
+ #database: somedb
132
+
133
+ # encoding
134
+ # default value is utf8mb4
135
+ #encoding: utf8mb4
136
+
137
+ # SSL mode
138
+ # enabled or disabled
139
+ # default value is disabled
140
+ #ssl_mode: disabled
141
+
142
+ # table name
143
+ #table: sometable
144
+
145
+ # other outputs
146
+ # TODO
@@ -0,0 +1,128 @@
1
+ require 'set'
2
+ require 'json'
3
+
4
+
5
+ class Dir
6
+
7
+ class << self
8
+
9
+ # Creates the directory with the path given, including anynecessary but nonexistent parent directories.
10
+ #
11
+ # @param [String] path
12
+ def mkdirs(path)
13
+ parent = File.dirname path
14
+ mkdirs parent unless Dir.exist? parent
15
+ Dir.mkdir path
16
+ end
17
+
18
+ end
19
+
20
+ end
21
+
22
+
23
+ class Miner
24
+
25
+ DEFAULTS = {
26
+ registry_path: '/var/lib/fileminer/registry',
27
+ eof_seconds: 86400,
28
+ batch_lines: 200,
29
+ }
30
+
31
+ attr_reader :registry_path, :paths, :eof_seconds, :batch_lines, :files, :active_files
32
+
33
+ # Create a new file miner instance
34
+ #
35
+ # @param [Hash] options
36
+ # @option options [String] :registry_path (/var/lib/fileminer/registry)
37
+ # @option options [Array] :paths
38
+ # @option options [Integer] :eof_seconds (86400)
39
+ # @option options [Integer] :batch_lines (50)
40
+ # @option options [String] :host (Socket.gethostname)
41
+ def initialize(options = {})
42
+ # fix options by DEFAULTS
43
+ DEFAULTS.each { |k, v| options[k] = v unless options.key? k }
44
+ @registry_path = options[:registry_path]
45
+ @paths = options[:paths]
46
+ @eof_seconds = options[:eof_seconds]
47
+ @batch_lines = options[:batch_lines]
48
+ @host = options[:host]
49
+ if @host.nil?
50
+ require 'socket'
51
+ @host = Socket.gethostname
52
+ end
53
+ @files = []
54
+ @active_files = []
55
+ if File.exist? @registry_path
56
+ File.open(@registry_path) { |io| @files = JSON.parse(io.read, {symbolize_names: true}) }
57
+ @active_files = @files.select { |record| !record[:eof] }
58
+ else
59
+ parent_dir = File.dirname @registry_path
60
+ Dir.mkdirs parent_dir unless Dir.exist? parent_dir
61
+ end
62
+ end
63
+
64
+ # Save registry
65
+ def save_registry
66
+ File.open(@registry_path, 'w') { |io| io.write @files.to_json }
67
+ end
68
+
69
+ # Refresh
70
+ def refresh_files
71
+ now = Time.now
72
+ file_paths = Set.new
73
+ file_paths.merge Dir[*@paths].select { |path| File.file? path }
74
+ @active_file = @files.select do |record|
75
+ path = record[:path]
76
+ file_exists = file_paths.delete? path
77
+ unless record[:eof]
78
+ if file_exists
79
+ # check if EOF
80
+ if record[:pos] == File.size(path) && now - File.mtime(path) > @eof_seconds
81
+ record[:eof] = true
82
+ end
83
+ else
84
+ # missing file, set :eof to true
85
+ record[:eof] = true
86
+ end
87
+ end
88
+ !record[:eof]
89
+ end
90
+ file_paths.each do |path|
91
+ record = {path: path, pos: 0, eof: false}
92
+ @files << record
93
+ @active_files << record
94
+ end
95
+ @files_refresh_time = now
96
+ end
97
+
98
+ # Read lines
99
+ def read_lines(record)
100
+ file_path = record[:path]
101
+ File.open file_path do |io|
102
+ lines = []
103
+ io.pos = record[:pos]
104
+ while lines.size < @batch_lines
105
+ line = {host: @host, path: file_path, pos: io.pos}
106
+ begin
107
+ data = io.readline
108
+ break if data.nil?
109
+ if data[-1] != "\n"
110
+ io.pos = line[:pos]
111
+ break
112
+ end
113
+ rescue EOFError
114
+ break
115
+ end
116
+ line[:end] = io.pos
117
+ line[:data] = data
118
+ lines << line
119
+ end
120
+ lines
121
+ end
122
+ end
123
+
124
+ def files_need_refresh?(refresh_files_time_trigger)
125
+ Time.now - @files_refresh_time >= refresh_files_time_trigger
126
+ end
127
+
128
+ end
@@ -0,0 +1,62 @@
1
+ require 'kafka'
2
+ require 'json'
3
+ require_relative '../output'
4
+
5
+
6
+ module Output
7
+
8
+ class KafkaPlugin < OutputPlugin
9
+
10
+ # Create a kafka output plugin instance
11
+ #
12
+ # @param [Hash] options
13
+ # @option options [Array] :brokers (['localhost:9092'])
14
+ # @option options [String] :client_id ('fileminer')
15
+ # @option options [String] :topic ('fileminer')
16
+ # @option options [Symbol] :mode (:sync) :sync or :async
17
+ # @option options [Symbol] :auto_delivery (:disabled) :disabled or :enabled
18
+ # @option options [Hash] :delivery_conf
19
+ def initialize(options)
20
+ brokers = options[:brokers] || ['localhost:9092']
21
+ client_id = options[:client_id] || 'fileminer'
22
+ @topic = options[:topic] || 'fileminer'
23
+ @kafka = Kafka.new(brokers, client_id: client_id)
24
+ case @mode = options[:mode]
25
+ when :sync
26
+ @producer = @kafka.producer
27
+ when :async
28
+ case @auto_delivery = options[:auto_delivery]
29
+ when :disabled
30
+ @producer = @kafka.async_producer
31
+ when :enabled
32
+ @producer = @kafka.async_producer options[:delivery_conf]
33
+ else
34
+ raise "invalid value #@auto_delivery of auto_delivery"
35
+ end
36
+ else
37
+ raise "unsupported mode #@mode"
38
+ end
39
+ end
40
+
41
+ # Send all lines to kafka using producer API
42
+ #
43
+ # @param [Array] lines
44
+ # @yield a listener to be called after all lines just be delivered
45
+ def send_all(lines, &listener)
46
+ lines.each do |line|
47
+ message = line.to_json
48
+ @producer.produce(message, topic: @topic)
49
+ end
50
+ @producer.deliver_messages unless @mode == :async and @auto_delivery == :enabled
51
+ listener.call
52
+ end
53
+
54
+ # close the kafka producer
55
+ def close
56
+ @producer.shutdown
57
+ @kafka.close
58
+ end
59
+
60
+ end
61
+
62
+ end
@@ -0,0 +1,101 @@
1
+ require 'mysql2'
2
+ require_relative '../output'
3
+
4
+
5
+ module Output
6
+
7
+ class MysqlPlugin < OutputPlugin
8
+
9
+ DEFAULT_MYSQL = {
10
+ host: 'localhost',
11
+ port: 3306,
12
+ password: '',
13
+ encoding: 'utf8mb4',
14
+ ssl_mode: :disabled
15
+ }
16
+
17
+ # Create a mysql output plugin instance
18
+ #
19
+ # @param [Hash] options
20
+ # @option options [String] :host ('localhost')
21
+ # @option options [Integer] :port (3306)
22
+ # @option options [String] :username
23
+ # @option options [String] :password ('')
24
+ # @option options [String] :database
25
+ # @option options [String] :encoding ('utf8mb4')
26
+ # @option options [Symbol] :ssl_mode (:disabled)
27
+ # @option options [String] :table
28
+ def initialize(options)
29
+ raise 'Missing config username on output.mysql' unless options.key? :username
30
+ raise 'Missing config database on output.mysql' unless options.key? :database
31
+ raise 'Missing config table on output.mysql' unless options.key? :table
32
+ conf = DEFAULT_MYSQL.merge options
33
+ @table = conf.delete :table
34
+ conf[:port] = conf[:port].to_i
35
+ conf[:password] = conf[:password].to_s
36
+ @encoding = conf[:encoding]
37
+ conf[:ssl_mode] = :disabled if conf[:ssl_mode] != :enabled
38
+ @mysql = Mysql2::Client.new conf
39
+ create_table_if_not_exists
40
+ @sqls = Hash.new { |hash, key| hash[key] = generate_batch_sql key }
41
+ end
42
+
43
+ private
44
+ def create_table_if_not_exists
45
+ rs = @mysql.query 'SHOW TABLES'
46
+ tables = rs.map { |row| row.values[0] }
47
+ unless tables.include? @table
48
+ sql = create_table_sql
49
+ @mysql.query sql
50
+ end
51
+ end
52
+
53
+ def create_table_sql
54
+ <<-EOS
55
+ CREATE TABLE `#@table` (
56
+ `id` bigint(20) PRIMARY KEY AUTO_INCREMENT,
57
+ `host` varchar(255) NOT NULL,
58
+ `path` varchar(255) NOT NULL,
59
+ `pos` bigint(20) NOT NULL,
60
+ `end` bigint(20) NOT NULL,
61
+ `data` text NOT NULL,
62
+ UNIQUE KEY `UNIQUE_host_path_pos` (`host`,`path`,`pos`)
63
+ ) ENGINE=InnoDB DEFAULT CHARSET=#@encoding
64
+ EOS
65
+ end
66
+
67
+ def generate_batch_sql(size)
68
+ "INSERT IGNORE INTO `#@table`(`host`,`path`,`pos`,`end`,`data`) VALUES " << (['(?,?,?,?,?)'] * size).join(',')
69
+ end
70
+
71
+ def get_batch_sql(size)
72
+ if @sqls.key? size
73
+ @sqls[size]
74
+ else
75
+ @sqls[size] = generate_batch_sql size
76
+ end
77
+ end
78
+
79
+ # Send all lines to mysql
80
+ #
81
+ # @param [Array] lines
82
+ # @yield a listener to be called after all lines just be sent
83
+ public
84
+ def send_all(lines, &listener)
85
+ values = lines.flat_map { |line| [line[:host], line[:path], line[:pos], line[:end], line[:data]] }
86
+ sql = get_batch_sql lines.size
87
+ @mysql.query 'BEGIN'
88
+ begin
89
+ stat = @mysql.prepare sql
90
+ stat.execute *values
91
+ @mysql.query 'COMMIT'
92
+ listener.call
93
+ rescue => err
94
+ @mysql.query 'ROLLBACK'
95
+ raise err
96
+ end
97
+ end
98
+
99
+ end
100
+
101
+ end
@@ -0,0 +1,63 @@
1
+ require 'redis'
2
+ require 'json'
3
+ require_relative '../output'
4
+
5
+
6
+ module Output
7
+
8
+ class RedisPlugin < OutputPlugin
9
+
10
+ # Create a redis output plugin instance
11
+ #
12
+ # @param [Hash] options
13
+ # @option options [String] :uri redis URI string
14
+ # @option options [String] :host
15
+ # @option options [Integer] :port
16
+ # @option options [Integer] :db
17
+ # @option options [String] :password
18
+ # @option options [String] :key redis key
19
+ def initialize(options)
20
+ uri = options[:uri]
21
+ if uri.nil?
22
+ uri = parse_uri options
23
+ end
24
+ @key = options[:key]
25
+ raise 'Missing key config on output.redis' if @key.nil?
26
+ driver = require_lib?('hiredis') ? :hiredis : :ruby
27
+ @redis = Redis.new url: uri, driver: driver
28
+ end
29
+
30
+ private
31
+ def parse_uri(options)
32
+ host = options[:host] || 'localhost'
33
+ port = options[:port] || 6379
34
+ db = options[:db] || 0
35
+ password = options[:password]
36
+ if password.nil?
37
+ "redis://#{host}:#{port}/#{db}"
38
+ else
39
+ "redis://:#{password}@#{host}:#{port}/#{db}"
40
+ end
41
+ end
42
+
43
+ def require_lib?(name)
44
+ require name
45
+ rescue LoadError
46
+ false
47
+ end
48
+
49
+
50
+ # Send all lines to redis using LPUSH @key
51
+ #
52
+ # @param [Array] lines
53
+ # @yield a listener to be called after all lines just be sent
54
+ public
55
+ def send_all(lines, &listener)
56
+ messages = lines.map { |line| line.to_json }
57
+ @redis.lpush @key, messages
58
+ listener.call
59
+ end
60
+
61
+ end
62
+
63
+ end
@@ -0,0 +1,17 @@
1
+ # Output module
2
+ module Output
3
+ # abstract class OutputPlugin
4
+ class OutputPlugin
5
+
6
+ # If plugin is in batch mode
7
+ def batch?
8
+ true
9
+ end
10
+
11
+ def close
12
+ # do nothing default
13
+ end
14
+
15
+ end
16
+
17
+ end
@@ -0,0 +1 @@
1
+ require_relative 'output'
@@ -0,0 +1,5 @@
1
+ class FileMiner
2
+
3
+ VERSION = '1.0.0RC1'
4
+
5
+ end
data/lib/fileminer.rb ADDED
@@ -0,0 +1,243 @@
1
+ #!/usr/bin/env ruby
2
+
3
+
4
+ require 'logger'
5
+ require_relative 'fileminer/miner'
6
+ require_relative 'fileminer/plugins'
7
+
8
+ FILEMINER_SETTINGS = 'fileminer.settings'
9
+ FILEMINER_INPUTS = 'fileminer.inputs'
10
+
11
+
12
+ class Hash
13
+
14
+ def keys_to_sym
15
+ map { |k, v| [k.to_sym, v] }.to_h
16
+ end
17
+
18
+ def keys_to_sym!
19
+ new_hash = keys_to_sym
20
+ clear
21
+ merge! new_hash
22
+ end
23
+
24
+ end
25
+
26
+
27
+ class FileMiner
28
+
29
+ DEFAULT_SETTINGS = {
30
+ refresh_files_time_trigger: '30s',
31
+ max_time_of_each_mining: '5s',
32
+ max_lines_of_each_mining: -1,
33
+ max_lines_of_each_file: -1,
34
+ }
35
+
36
+ attr_reader :miner, :output, :running
37
+
38
+ # Create a new FileMiner instance
39
+ #
40
+ # @param [Hash] conf
41
+ def initialize(conf)
42
+ init_settings conf['fileminer.settings']
43
+ @output = init_output conf
44
+ raise 'Missing config fileminer.inputs' unless conf.key? 'fileminer.inputs'
45
+ @miner = Miner.new conf['fileminer.inputs'].keys_to_sym
46
+ @miner.refresh_files
47
+ @miner.save_registry
48
+ @running = false
49
+ end
50
+
51
+ private
52
+ def init_settings(conf)
53
+ if conf.nil?
54
+ conf = DEFAULT_SETTINGS.clone
55
+ else
56
+ conf = DEFAULT_SETTINGS.merge conf.keys_to_sym
57
+ end
58
+ # default logger to stderr
59
+ # TODO make logger configurable in future
60
+ @logger = Logger.new STDERR
61
+ @logger.level = Logger::WARN
62
+ # mining break trigger
63
+ max_time_of_each_mining = parse_time conf[:max_time_of_each_mining], 'max_time_of_each_mining on fileminer.settings'
64
+ max_lines_of_each_mining = conf[:max_lines_of_each_mining]
65
+ if max_lines_of_each_mining >= 0
66
+ @mining_break_trigger = lambda { |start_time, lines| Time.now - start_time > max_time_of_each_mining || lines >= max_lines_of_each_mining }
67
+ else
68
+ @mining_break_trigger = lambda { |start_time, lines| Time.now - start_time > max_time_of_each_mining }
69
+ end
70
+ # file break trigger
71
+ max_lines_of_each_file = conf[:max_lines_of_each_file]
72
+ if max_lines_of_each_file >= 0
73
+ @file_break_trigger = lambda { |lines| lines < @miner.batch_lines || lines >= max_lines_of_each_file }
74
+ else
75
+ @file_break_trigger = lambda { |lines| lines < @miner.batch_lines }
76
+ end
77
+ # refresh_files_time_trigger
78
+ @refresh_files_time_trigger = parse_time conf[:refresh_files_time_trigger], 'refresh_files_time_trigger on fileminer.settings'
79
+ end
80
+
81
+ def parse_time(value, conf_name)
82
+ if /^(\d+)(\w+)$/ =~ value
83
+ num = $1.to_i
84
+ unit = $2
85
+ case unit
86
+ when 'd'
87
+ num * 86400
88
+ when 'h'
89
+ num * 3600
90
+ when 'min'
91
+ num * 60
92
+ when 's'
93
+ num
94
+ when 'ms'
95
+ num.to_f / 1000
96
+ else
97
+ raise "Unsupported time unit '#{unit}' of #{conf_name}"
98
+ end
99
+ else
100
+ raise "Error format '#{value}' of #{conf_name}"
101
+ end
102
+ end
103
+
104
+ def mining_break?(start_time, lines)
105
+ @mining_break_trigger.call start_time, lines
106
+ end
107
+
108
+ def file_break?(lines)
109
+ @file_break_trigger.call lines
110
+ end
111
+
112
+ def init_output(conf)
113
+ case
114
+ when conf.key?('output.redis')
115
+ redis_conf = conf['output.redis'].keys_to_sym
116
+ init_output_redis redis_conf
117
+ when conf.key?('output.kafka')
118
+ kafka_conf = conf['output.kafka'].keys_to_sym
119
+ init_output_kafka kafka_conf
120
+ when conf.key?('output.mysql')
121
+ mysql_conf = conf['output.mysql'].keys_to_sym
122
+ init_output_mysql mysql_conf
123
+ else
124
+ raise 'Missing config for output'
125
+ end
126
+ end
127
+
128
+ def init_output_redis(redis_conf)
129
+ require_relative 'fileminer/output/redis'
130
+ Output::RedisPlugin.new redis_conf
131
+ end
132
+
133
+ def init_output_kafka(kafka_conf)
134
+ require_relative 'fileminer/output/kafka'
135
+ kafka_conf[:mode] = kafka_conf[:mode] == 'async' ? :async : :sync
136
+ if kafka_conf[:mode] == :async
137
+ kafka_conf[:auto_delivery] = kafka_conf[:auto_delivery] == 'enabled' ? :enabled : :disabled
138
+ if kafka_conf[:auto_delivery] == :enabled
139
+ delivery_threshold = kafka_conf.delete :delivery_threshold
140
+ delivery_interval = kafka_conf.delete :delivery_interval
141
+ raise 'Missing conf delivery_threshold or delivery_interval' if delivery_threshold.nil? && delivery_interval.nil?
142
+ kafka_conf[:delivery_conf] = delivery_conf = Hash.new
143
+ delivery_conf[:delivery_threshold] = delivery_threshold unless delivery_threshold.nil?
144
+ delivery_conf[:delivery_interval] = delivery_interval unless delivery_interval.nil?
145
+ end
146
+ end
147
+ Output::KafkaPlugin.new kafka_conf
148
+ end
149
+
150
+ def init_output_mysql(mysql_conf)
151
+ require_relative 'fileminer/output/mysql'
152
+ mysql_conf[:ssl_mode] = mysql_conf[:ssl_mode] == 'enabled' ? :enabled : :disabled
153
+ Output::MysqlPlugin.new mysql_conf
154
+ end
155
+
156
+ def send_lines(record, lines)
157
+ if @output.batch?
158
+ @output.send_all lines do
159
+ record[:pos] = lines[-1][:end]
160
+ @miner.save_registry
161
+ end
162
+ else
163
+ lines.each do |line|
164
+ @output.send line do
165
+ record[:pos] = line[:end]
166
+ @miner.save_registry
167
+ end
168
+ end
169
+ end
170
+ end
171
+
172
+ public
173
+ def mine_once
174
+ start_time = Time.now
175
+ full_lines = 0
176
+ @miner.active_files.all? do |record|
177
+ mining_next = true
178
+ if record[:pos] < File.size(record[:path])
179
+ file_lines = 0
180
+ loop do
181
+ lines = @miner.read_lines record
182
+ break if lines.empty?
183
+ send_lines record, lines
184
+ file_lines += lines.size
185
+ full_lines += lines.size
186
+ if mining_break? start_time, full_lines
187
+ mining_next = false
188
+ break
189
+ end
190
+ break if file_break? file_lines
191
+ end
192
+ end
193
+ mining_next
194
+ end
195
+ full_lines
196
+ end
197
+
198
+ def start_mining
199
+ unless @running
200
+ @running = true
201
+ while @running
202
+ begin
203
+ files_refreshed = check_files
204
+ sent_lines = mine_once
205
+ # sleep 5 seconds if no more data
206
+ # TODO using settings instead in future
207
+ if sent_lines == 0
208
+ @miner.save_registry if files_refreshed
209
+ sleep 5
210
+ end
211
+ rescue => e
212
+ @logger.error e
213
+ # sleep for a little while to wait output recover
214
+ sleep 5 if @running
215
+ end
216
+ end
217
+ @miner.save_registry
218
+ end
219
+ end
220
+
221
+ def check_files
222
+ if @miner.files_need_refresh? @refresh_files_time_trigger
223
+ @miner.refresh_files
224
+ end
225
+ end
226
+
227
+ def stop_mining
228
+ @running = false if @running
229
+ end
230
+
231
+ end
232
+
233
+
234
+ if __FILE__ == $0
235
+ # Usage:
236
+ # ruby fileminer.rb /etc/fileminer/fileminer.yml
237
+ require 'yaml'
238
+ yml = File.open(ARGV[0]) { |io| io.read }
239
+ conf = YAML.load yml
240
+ fileminer = FileMiner.new conf
241
+ trap(:INT) { fileminer.stop_mining }
242
+ fileminer.start_mining
243
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fileminer
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0RC1
5
+ platform: ruby
6
+ authors:
7
+ - Fang MinJie
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-01-17 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A simple file/log transfer tool coding by ruby.
14
+ email:
15
+ - fmjsjx@163.com
16
+ executables:
17
+ - fileminer
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - LICENSE
22
+ - README.md
23
+ - bin/fileminer
24
+ - conf/fileminer_default.yml
25
+ - lib/fileminer.rb
26
+ - lib/fileminer/miner.rb
27
+ - lib/fileminer/output.rb
28
+ - lib/fileminer/output/kafka.rb
29
+ - lib/fileminer/output/mysql.rb
30
+ - lib/fileminer/output/redis.rb
31
+ - lib/fileminer/plugins.rb
32
+ - lib/fileminer/version.rb
33
+ homepage: https://github.com/fmjsjx/fileminer
34
+ licenses:
35
+ - MIT
36
+ metadata: {}
37
+ post_install_message:
38
+ rdoc_options: []
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">"
49
+ - !ruby/object:Gem::Version
50
+ version: 1.3.1
51
+ requirements: []
52
+ rubygems_version: 3.0.1
53
+ signing_key:
54
+ specification_version: 4
55
+ summary: A simple file/log transfer tool coding by ruby.
56
+ test_files: []