fileminer 1.0.0RC1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +2 -0
- data/bin/fileminer +31 -0
- data/conf/fileminer_default.yml +146 -0
- data/lib/fileminer/miner.rb +128 -0
- data/lib/fileminer/output/kafka.rb +62 -0
- data/lib/fileminer/output/mysql.rb +101 -0
- data/lib/fileminer/output/redis.rb +63 -0
- data/lib/fileminer/output.rb +17 -0
- data/lib/fileminer/plugins.rb +1 -0
- data/lib/fileminer/version.rb +5 -0
- data/lib/fileminer.rb +243 -0
- metadata +56 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7237a574207f6cb4913c5be6f2ef70dda7c735ffbf786fba9bfaf3941366fd88
|
4
|
+
data.tar.gz: 9e2a23b7b2a49fb78af7ae1ba969f562df855196b5d1aa066b84063e11611afe
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a47639237446113e7dcbf9dd5f79b7008ea102cb3993597743555714fe1ff92e6bb268deee20b8e0ef63c15618e4d745191e6a116333105a743f8ab0da5b32d9
|
7
|
+
data.tar.gz: e60806ee50da1f42d619c9fd01febb239790cb1ebc05938fa7d8634ec52e4da4d17ac3e3d316b8e2f184d45676cfc4a7bea8079e49e5cf5f215e33c9dcfd479a
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2018 fmjsjx
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
data/bin/fileminer
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
|
4
|
+
require 'logger'
|
5
|
+
require 'yaml'
|
6
|
+
require 'fileminer'
|
7
|
+
|
8
|
+
|
9
|
+
logger = Logger.new STDERR
|
10
|
+
|
11
|
+
begin
|
12
|
+
# Usage:
|
13
|
+
# ruby -Ilib ./bin/fileminer /path/to/fileminer.yml
|
14
|
+
yml = File.open(ARGV[0]) { |io| io.read }
|
15
|
+
conf = YAML.load yml
|
16
|
+
# create fileminer instance
|
17
|
+
fileminer = FileMiner.new conf
|
18
|
+
|
19
|
+
# trap INT signal to stop mining
|
20
|
+
trap(:INT) { fileminer.stop_mining }
|
21
|
+
|
22
|
+
# start mining
|
23
|
+
fileminer.start_mining
|
24
|
+
rescue => e
|
25
|
+
logger.error e
|
26
|
+
usage = <<-EOS
|
27
|
+
Usage:
|
28
|
+
ruby -Ilib ./bin/fileminer /path/to/fileminer.yml
|
29
|
+
EOS
|
30
|
+
puts usage
|
31
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# general settings
|
2
|
+
#fileminer.settings:
|
3
|
+
|
4
|
+
# default value is 30s
|
5
|
+
#refresh_files_time_trigger: 30s
|
6
|
+
|
7
|
+
# default value is 5s
|
8
|
+
#max_time_of_each_mining: 5s
|
9
|
+
|
10
|
+
# default value is -1
|
11
|
+
#max_lines_of_each_mining: -1
|
12
|
+
|
13
|
+
# default value is -1
|
14
|
+
#max_lines_of_each_file: -1
|
15
|
+
|
16
|
+
# TODO
|
17
|
+
|
18
|
+
# fileminer inputs
|
19
|
+
fileminer.inputs:
|
20
|
+
|
21
|
+
# setup the path of the registry file
|
22
|
+
# default value is /var/lib/fileminer/registry
|
23
|
+
#registry_path: /var/lib/fileminer/registry
|
24
|
+
|
25
|
+
# file paths
|
26
|
+
paths:
|
27
|
+
- /path/to/*.log
|
28
|
+
|
29
|
+
# other path
|
30
|
+
#- /other/path/*.log
|
31
|
+
|
32
|
+
# EOF seconds
|
33
|
+
# default value is 86400(1 day)
|
34
|
+
#eof_seconds: 86400
|
35
|
+
|
36
|
+
# Batch lines to read
|
37
|
+
# default value is 200
|
38
|
+
#batch_lines: 200
|
39
|
+
|
40
|
+
# Host
|
41
|
+
# default value is `Socket.gethostname`
|
42
|
+
#host: 127.0.0.1
|
43
|
+
|
44
|
+
# -- output --
|
45
|
+
|
46
|
+
# output to redis
|
47
|
+
# based on redis-rb, please make sure that redis has already been installed
|
48
|
+
# install redis: gem install redis
|
49
|
+
output.redis:
|
50
|
+
# the URI of the redis server
|
51
|
+
uri: redis://localhost:6379/0
|
52
|
+
|
53
|
+
# host of the redis server
|
54
|
+
# effective only if uri is not be set
|
55
|
+
# default value is localhost
|
56
|
+
#host: localhost
|
57
|
+
|
58
|
+
# port of the redis server
|
59
|
+
# default value is 6379
|
60
|
+
# effective only if uri is not be set
|
61
|
+
#port: 6379
|
62
|
+
|
63
|
+
# db of the redis server
|
64
|
+
# default value is 0
|
65
|
+
# effective only if uri is not be set
|
66
|
+
#db: 0
|
67
|
+
|
68
|
+
# password of the redis server
|
69
|
+
# effective only if uri is not be set
|
70
|
+
#password:
|
71
|
+
|
72
|
+
# key of the redis server
|
73
|
+
# required
|
74
|
+
key: fileminer
|
75
|
+
|
76
|
+
# output to kafka
|
77
|
+
# based on ruby-kafka, please make sure that ruby-kafka has already been installed
|
78
|
+
# install ruby-kafka: gem install ruby-kafka
|
79
|
+
#output.kafka:
|
80
|
+
# kafka brokers
|
81
|
+
# default value is ['localhost:9092']
|
82
|
+
#brokers: ['localhost:9092']
|
83
|
+
|
84
|
+
# producer client_id
|
85
|
+
# default value is fileminer
|
86
|
+
#client_id: fileminer
|
87
|
+
|
88
|
+
# kafka topic
|
89
|
+
# default value is fileminer
|
90
|
+
#topic: fileminer
|
91
|
+
|
92
|
+
# producer mode
|
93
|
+
# sync or async
|
94
|
+
# default is sync
|
95
|
+
#mode: sync
|
96
|
+
|
97
|
+
# auto delivery
|
98
|
+
# enabled or disabled
|
99
|
+
# default value is disabled
|
100
|
+
# effective only if mode is async
|
101
|
+
# when effective, at least one of delivery_threshold and delivery_interval should be set
|
102
|
+
#auto_delivery: disabled
|
103
|
+
|
104
|
+
# delivery threshold, trigger a delivery once x messages have been buffered
|
105
|
+
# effective only if mode is async and auto_deliver is enabled
|
106
|
+
#delivery_threshold: 100
|
107
|
+
|
108
|
+
# delivery interval, trigger a delivery every x seconds
|
109
|
+
# effective only if mode is async and auto_deliver is enabled
|
110
|
+
#delivery_interval: 30
|
111
|
+
|
112
|
+
# output to mysql
|
113
|
+
# based on mysql2, please make sure that mysql2 has already been installed
|
114
|
+
# install ruby-kafka: gem install mysql2
|
115
|
+
#output.mysql:
|
116
|
+
# host
|
117
|
+
# default value is localhost
|
118
|
+
#host: localhost
|
119
|
+
|
120
|
+
# port
|
121
|
+
# default value is 3306
|
122
|
+
#port: 3306
|
123
|
+
|
124
|
+
# username
|
125
|
+
#username: someuser
|
126
|
+
|
127
|
+
# password
|
128
|
+
#password: somepwd
|
129
|
+
|
130
|
+
# database name
|
131
|
+
#database: somedb
|
132
|
+
|
133
|
+
# encoding
|
134
|
+
# default value is utf8mb4
|
135
|
+
#encoding: utf8mb4
|
136
|
+
|
137
|
+
# SSL mode
|
138
|
+
# enabled or disabled
|
139
|
+
# default value is disabled
|
140
|
+
#ssl_mode: disabled
|
141
|
+
|
142
|
+
# table name
|
143
|
+
#table: sometable
|
144
|
+
|
145
|
+
# other outputs
|
146
|
+
# TODO
|
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
|
5
|
+
class Dir
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
# Creates the directory with the path given, including anynecessary but nonexistent parent directories.
|
10
|
+
#
|
11
|
+
# @param [String] path
|
12
|
+
def mkdirs(path)
|
13
|
+
parent = File.dirname path
|
14
|
+
mkdirs parent unless Dir.exist? parent
|
15
|
+
Dir.mkdir path
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
class Miner
|
24
|
+
|
25
|
+
DEFAULTS = {
|
26
|
+
registry_path: '/var/lib/fileminer/registry',
|
27
|
+
eof_seconds: 86400,
|
28
|
+
batch_lines: 200,
|
29
|
+
}
|
30
|
+
|
31
|
+
attr_reader :registry_path, :paths, :eof_seconds, :batch_lines, :files, :active_files
|
32
|
+
|
33
|
+
# Create a new file miner instance
|
34
|
+
#
|
35
|
+
# @param [Hash] options
|
36
|
+
# @option options [String] :registry_path (/var/lib/fileminer/registry)
|
37
|
+
# @option options [Array] :paths
|
38
|
+
# @option options [Integer] :eof_seconds (86400)
|
39
|
+
# @option options [Integer] :batch_lines (50)
|
40
|
+
# @option options [String] :host (Socket.gethostname)
|
41
|
+
def initialize(options = {})
|
42
|
+
# fix options by DEFAULTS
|
43
|
+
DEFAULTS.each { |k, v| options[k] = v unless options.key? k }
|
44
|
+
@registry_path = options[:registry_path]
|
45
|
+
@paths = options[:paths]
|
46
|
+
@eof_seconds = options[:eof_seconds]
|
47
|
+
@batch_lines = options[:batch_lines]
|
48
|
+
@host = options[:host]
|
49
|
+
if @host.nil?
|
50
|
+
require 'socket'
|
51
|
+
@host = Socket.gethostname
|
52
|
+
end
|
53
|
+
@files = []
|
54
|
+
@active_files = []
|
55
|
+
if File.exist? @registry_path
|
56
|
+
File.open(@registry_path) { |io| @files = JSON.parse(io.read, {symbolize_names: true}) }
|
57
|
+
@active_files = @files.select { |record| !record[:eof] }
|
58
|
+
else
|
59
|
+
parent_dir = File.dirname @registry_path
|
60
|
+
Dir.mkdirs parent_dir unless Dir.exist? parent_dir
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Save registry
|
65
|
+
def save_registry
|
66
|
+
File.open(@registry_path, 'w') { |io| io.write @files.to_json }
|
67
|
+
end
|
68
|
+
|
69
|
+
# Refresh
|
70
|
+
def refresh_files
|
71
|
+
now = Time.now
|
72
|
+
file_paths = Set.new
|
73
|
+
file_paths.merge Dir[*@paths].select { |path| File.file? path }
|
74
|
+
@active_file = @files.select do |record|
|
75
|
+
path = record[:path]
|
76
|
+
file_exists = file_paths.delete? path
|
77
|
+
unless record[:eof]
|
78
|
+
if file_exists
|
79
|
+
# check if EOF
|
80
|
+
if record[:pos] == File.size(path) && now - File.mtime(path) > @eof_seconds
|
81
|
+
record[:eof] = true
|
82
|
+
end
|
83
|
+
else
|
84
|
+
# missing file, set :eof to true
|
85
|
+
record[:eof] = true
|
86
|
+
end
|
87
|
+
end
|
88
|
+
!record[:eof]
|
89
|
+
end
|
90
|
+
file_paths.each do |path|
|
91
|
+
record = {path: path, pos: 0, eof: false}
|
92
|
+
@files << record
|
93
|
+
@active_files << record
|
94
|
+
end
|
95
|
+
@files_refresh_time = now
|
96
|
+
end
|
97
|
+
|
98
|
+
# Read lines
|
99
|
+
def read_lines(record)
|
100
|
+
file_path = record[:path]
|
101
|
+
File.open file_path do |io|
|
102
|
+
lines = []
|
103
|
+
io.pos = record[:pos]
|
104
|
+
while lines.size < @batch_lines
|
105
|
+
line = {host: @host, path: file_path, pos: io.pos}
|
106
|
+
begin
|
107
|
+
data = io.readline
|
108
|
+
break if data.nil?
|
109
|
+
if data[-1] != "\n"
|
110
|
+
io.pos = line[:pos]
|
111
|
+
break
|
112
|
+
end
|
113
|
+
rescue EOFError
|
114
|
+
break
|
115
|
+
end
|
116
|
+
line[:end] = io.pos
|
117
|
+
line[:data] = data
|
118
|
+
lines << line
|
119
|
+
end
|
120
|
+
lines
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def files_need_refresh?(refresh_files_time_trigger)
|
125
|
+
Time.now - @files_refresh_time >= refresh_files_time_trigger
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'kafka'
|
2
|
+
require 'json'
|
3
|
+
require_relative '../output'
|
4
|
+
|
5
|
+
|
6
|
+
module Output
|
7
|
+
|
8
|
+
class KafkaPlugin < OutputPlugin
|
9
|
+
|
10
|
+
# Create a kafka output plugin instance
|
11
|
+
#
|
12
|
+
# @param [Hash] options
|
13
|
+
# @option options [Array] :brokers (['localhost:9092'])
|
14
|
+
# @option options [String] :client_id ('fileminer')
|
15
|
+
# @option options [String] :topic ('fileminer')
|
16
|
+
# @option options [Symbol] :mode (:sync) :sync or :async
|
17
|
+
# @option options [Symbol] :auto_delivery (:disabled) :disabled or :enabled
|
18
|
+
# @option options [Hash] :delivery_conf
|
19
|
+
def initialize(options)
|
20
|
+
brokers = options[:brokers] || ['localhost:9092']
|
21
|
+
client_id = options[:client_id] || 'fileminer'
|
22
|
+
@topic = options[:topic] || 'fileminer'
|
23
|
+
@kafka = Kafka.new(brokers, client_id: client_id)
|
24
|
+
case @mode = options[:mode]
|
25
|
+
when :sync
|
26
|
+
@producer = @kafka.producer
|
27
|
+
when :async
|
28
|
+
case @auto_delivery = options[:auto_delivery]
|
29
|
+
when :disabled
|
30
|
+
@producer = @kafka.async_producer
|
31
|
+
when :enabled
|
32
|
+
@producer = @kafka.async_producer options[:delivery_conf]
|
33
|
+
else
|
34
|
+
raise "invalid value #@auto_delivery of auto_delivery"
|
35
|
+
end
|
36
|
+
else
|
37
|
+
raise "unsupported mode #@mode"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Send all lines to kafka using producer API
|
42
|
+
#
|
43
|
+
# @param [Array] lines
|
44
|
+
# @yield a listener to be called after all lines just be delivered
|
45
|
+
def send_all(lines, &listener)
|
46
|
+
lines.each do |line|
|
47
|
+
message = line.to_json
|
48
|
+
@producer.produce(message, topic: @topic)
|
49
|
+
end
|
50
|
+
@producer.deliver_messages unless @mode == :async and @auto_delivery == :enabled
|
51
|
+
listener.call
|
52
|
+
end
|
53
|
+
|
54
|
+
# close the kafka producer
|
55
|
+
def close
|
56
|
+
@producer.shutdown
|
57
|
+
@kafka.close
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'mysql2'
|
2
|
+
require_relative '../output'
|
3
|
+
|
4
|
+
|
5
|
+
module Output
|
6
|
+
|
7
|
+
class MysqlPlugin < OutputPlugin
|
8
|
+
|
9
|
+
DEFAULT_MYSQL = {
|
10
|
+
host: 'localhost',
|
11
|
+
port: 3306,
|
12
|
+
password: '',
|
13
|
+
encoding: 'utf8mb4',
|
14
|
+
ssl_mode: :disabled
|
15
|
+
}
|
16
|
+
|
17
|
+
# Create a mysql output plugin instance
|
18
|
+
#
|
19
|
+
# @param [Hash] options
|
20
|
+
# @option options [String] :host ('localhost')
|
21
|
+
# @option options [Integer] :port (3306)
|
22
|
+
# @option options [String] :username
|
23
|
+
# @option options [String] :password ('')
|
24
|
+
# @option options [String] :database
|
25
|
+
# @option options [String] :encoding ('utf8mb4')
|
26
|
+
# @option options [Symbol] :ssl_mode (:disabled)
|
27
|
+
# @option options [String] :table
|
28
|
+
def initialize(options)
|
29
|
+
raise 'Missing config username on output.mysql' unless options.key? :username
|
30
|
+
raise 'Missing config database on output.mysql' unless options.key? :database
|
31
|
+
raise 'Missing config table on output.mysql' unless options.key? :table
|
32
|
+
conf = DEFAULT_MYSQL.merge options
|
33
|
+
@table = conf.delete :table
|
34
|
+
conf[:port] = conf[:port].to_i
|
35
|
+
conf[:password] = conf[:password].to_s
|
36
|
+
@encoding = conf[:encoding]
|
37
|
+
conf[:ssl_mode] = :disabled if conf[:ssl_mode] != :enabled
|
38
|
+
@mysql = Mysql2::Client.new conf
|
39
|
+
create_table_if_not_exists
|
40
|
+
@sqls = Hash.new { |hash, key| hash[key] = generate_batch_sql key }
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
def create_table_if_not_exists
|
45
|
+
rs = @mysql.query 'SHOW TABLES'
|
46
|
+
tables = rs.map { |row| row.values[0] }
|
47
|
+
unless tables.include? @table
|
48
|
+
sql = create_table_sql
|
49
|
+
@mysql.query sql
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def create_table_sql
|
54
|
+
<<-EOS
|
55
|
+
CREATE TABLE `#@table` (
|
56
|
+
`id` bigint(20) PRIMARY KEY AUTO_INCREMENT,
|
57
|
+
`host` varchar(255) NOT NULL,
|
58
|
+
`path` varchar(255) NOT NULL,
|
59
|
+
`pos` bigint(20) NOT NULL,
|
60
|
+
`end` bigint(20) NOT NULL,
|
61
|
+
`data` text NOT NULL,
|
62
|
+
UNIQUE KEY `UNIQUE_host_path_pos` (`host`,`path`,`pos`)
|
63
|
+
) ENGINE=InnoDB DEFAULT CHARSET=#@encoding
|
64
|
+
EOS
|
65
|
+
end
|
66
|
+
|
67
|
+
def generate_batch_sql(size)
|
68
|
+
"INSERT IGNORE INTO `#@table`(`host`,`path`,`pos`,`end`,`data`) VALUES " << (['(?,?,?,?,?)'] * size).join(',')
|
69
|
+
end
|
70
|
+
|
71
|
+
def get_batch_sql(size)
|
72
|
+
if @sqls.key? size
|
73
|
+
@sqls[size]
|
74
|
+
else
|
75
|
+
@sqls[size] = generate_batch_sql size
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Send all lines to mysql
|
80
|
+
#
|
81
|
+
# @param [Array] lines
|
82
|
+
# @yield a listener to be called after all lines just be sent
|
83
|
+
public
|
84
|
+
def send_all(lines, &listener)
|
85
|
+
values = lines.flat_map { |line| [line[:host], line[:path], line[:pos], line[:end], line[:data]] }
|
86
|
+
sql = get_batch_sql lines.size
|
87
|
+
@mysql.query 'BEGIN'
|
88
|
+
begin
|
89
|
+
stat = @mysql.prepare sql
|
90
|
+
stat.execute *values
|
91
|
+
@mysql.query 'COMMIT'
|
92
|
+
listener.call
|
93
|
+
rescue => err
|
94
|
+
@mysql.query 'ROLLBACK'
|
95
|
+
raise err
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'redis'
|
2
|
+
require 'json'
|
3
|
+
require_relative '../output'
|
4
|
+
|
5
|
+
|
6
|
+
module Output
|
7
|
+
|
8
|
+
class RedisPlugin < OutputPlugin
|
9
|
+
|
10
|
+
# Create a redis output plugin instance
|
11
|
+
#
|
12
|
+
# @param [Hash] options
|
13
|
+
# @option options [String] :uri redis URI string
|
14
|
+
# @option options [String] :host
|
15
|
+
# @option options [Integer] :port
|
16
|
+
# @option options [Integer] :db
|
17
|
+
# @option options [String] :password
|
18
|
+
# @option options [String] :key redis key
|
19
|
+
def initialize(options)
|
20
|
+
uri = options[:uri]
|
21
|
+
if uri.nil?
|
22
|
+
uri = parse_uri options
|
23
|
+
end
|
24
|
+
@key = options[:key]
|
25
|
+
raise 'Missing key config on output.redis' if @key.nil?
|
26
|
+
driver = require_lib?('hiredis') ? :hiredis : :ruby
|
27
|
+
@redis = Redis.new url: uri, driver: driver
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
def parse_uri(options)
|
32
|
+
host = options[:host] || 'localhost'
|
33
|
+
port = options[:port] || 6379
|
34
|
+
db = options[:db] || 0
|
35
|
+
password = options[:password]
|
36
|
+
if password.nil?
|
37
|
+
"redis://#{host}:#{port}/#{db}"
|
38
|
+
else
|
39
|
+
"redis://:#{password}@#{host}:#{port}/#{db}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def require_lib?(name)
|
44
|
+
require name
|
45
|
+
rescue LoadError
|
46
|
+
false
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
# Send all lines to redis using LPUSH @key
|
51
|
+
#
|
52
|
+
# @param [Array] lines
|
53
|
+
# @yield a listener to be called after all lines just be sent
|
54
|
+
public
|
55
|
+
def send_all(lines, &listener)
|
56
|
+
messages = lines.map { |line| line.to_json }
|
57
|
+
@redis.lpush @key, messages
|
58
|
+
listener.call
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require_relative 'output'
|
data/lib/fileminer.rb
ADDED
@@ -0,0 +1,243 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
|
4
|
+
require 'logger'
|
5
|
+
require_relative 'fileminer/miner'
|
6
|
+
require_relative 'fileminer/plugins'
|
7
|
+
|
8
|
+
FILEMINER_SETTINGS = 'fileminer.settings'
|
9
|
+
FILEMINER_INPUTS = 'fileminer.inputs'
|
10
|
+
|
11
|
+
|
12
|
+
class Hash
|
13
|
+
|
14
|
+
def keys_to_sym
|
15
|
+
map { |k, v| [k.to_sym, v] }.to_h
|
16
|
+
end
|
17
|
+
|
18
|
+
def keys_to_sym!
|
19
|
+
new_hash = keys_to_sym
|
20
|
+
clear
|
21
|
+
merge! new_hash
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
class FileMiner
|
28
|
+
|
29
|
+
DEFAULT_SETTINGS = {
|
30
|
+
refresh_files_time_trigger: '30s',
|
31
|
+
max_time_of_each_mining: '5s',
|
32
|
+
max_lines_of_each_mining: -1,
|
33
|
+
max_lines_of_each_file: -1,
|
34
|
+
}
|
35
|
+
|
36
|
+
attr_reader :miner, :output, :running
|
37
|
+
|
38
|
+
# Create a new FileMiner instance
|
39
|
+
#
|
40
|
+
# @param [Hash] conf
|
41
|
+
def initialize(conf)
|
42
|
+
init_settings conf['fileminer.settings']
|
43
|
+
@output = init_output conf
|
44
|
+
raise 'Missing config fileminer.inputs' unless conf.key? 'fileminer.inputs'
|
45
|
+
@miner = Miner.new conf['fileminer.inputs'].keys_to_sym
|
46
|
+
@miner.refresh_files
|
47
|
+
@miner.save_registry
|
48
|
+
@running = false
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def init_settings(conf)
|
53
|
+
if conf.nil?
|
54
|
+
conf = DEFAULT_SETTINGS.clone
|
55
|
+
else
|
56
|
+
conf = DEFAULT_SETTINGS.merge conf.keys_to_sym
|
57
|
+
end
|
58
|
+
# default logger to stderr
|
59
|
+
# TODO make logger configurable in future
|
60
|
+
@logger = Logger.new STDERR
|
61
|
+
@logger.level = Logger::WARN
|
62
|
+
# mining break trigger
|
63
|
+
max_time_of_each_mining = parse_time conf[:max_time_of_each_mining], 'max_time_of_each_mining on fileminer.settings'
|
64
|
+
max_lines_of_each_mining = conf[:max_lines_of_each_mining]
|
65
|
+
if max_lines_of_each_mining >= 0
|
66
|
+
@mining_break_trigger = lambda { |start_time, lines| Time.now - start_time > max_time_of_each_mining || lines >= max_lines_of_each_mining }
|
67
|
+
else
|
68
|
+
@mining_break_trigger = lambda { |start_time, lines| Time.now - start_time > max_time_of_each_mining }
|
69
|
+
end
|
70
|
+
# file break trigger
|
71
|
+
max_lines_of_each_file = conf[:max_lines_of_each_file]
|
72
|
+
if max_lines_of_each_file >= 0
|
73
|
+
@file_break_trigger = lambda { |lines| lines < @miner.batch_lines || lines >= max_lines_of_each_file }
|
74
|
+
else
|
75
|
+
@file_break_trigger = lambda { |lines| lines < @miner.batch_lines }
|
76
|
+
end
|
77
|
+
# refresh_files_time_trigger
|
78
|
+
@refresh_files_time_trigger = parse_time conf[:refresh_files_time_trigger], 'refresh_files_time_trigger on fileminer.settings'
|
79
|
+
end
|
80
|
+
|
81
|
+
def parse_time(value, conf_name)
|
82
|
+
if /^(\d+)(\w+)$/ =~ value
|
83
|
+
num = $1.to_i
|
84
|
+
unit = $2
|
85
|
+
case unit
|
86
|
+
when 'd'
|
87
|
+
num * 86400
|
88
|
+
when 'h'
|
89
|
+
num * 3600
|
90
|
+
when 'min'
|
91
|
+
num * 60
|
92
|
+
when 's'
|
93
|
+
num
|
94
|
+
when 'ms'
|
95
|
+
num.to_f / 1000
|
96
|
+
else
|
97
|
+
raise "Unsupported time unit '#{unit}' of #{conf_name}"
|
98
|
+
end
|
99
|
+
else
|
100
|
+
raise "Error format '#{value}' of #{conf_name}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def mining_break?(start_time, lines)
|
105
|
+
@mining_break_trigger.call start_time, lines
|
106
|
+
end
|
107
|
+
|
108
|
+
def file_break?(lines)
|
109
|
+
@file_break_trigger.call lines
|
110
|
+
end
|
111
|
+
|
112
|
+
def init_output(conf)
|
113
|
+
case
|
114
|
+
when conf.key?('output.redis')
|
115
|
+
redis_conf = conf['output.redis'].keys_to_sym
|
116
|
+
init_output_redis redis_conf
|
117
|
+
when conf.key?('output.kafka')
|
118
|
+
kafka_conf = conf['output.kafka'].keys_to_sym
|
119
|
+
init_output_kafka kafka_conf
|
120
|
+
when conf.key?('output.mysql')
|
121
|
+
mysql_conf = conf['output.mysql'].keys_to_sym
|
122
|
+
init_output_mysql mysql_conf
|
123
|
+
else
|
124
|
+
raise 'Missing config for output'
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def init_output_redis(redis_conf)
|
129
|
+
require_relative 'fileminer/output/redis'
|
130
|
+
Output::RedisPlugin.new redis_conf
|
131
|
+
end
|
132
|
+
|
133
|
+
def init_output_kafka(kafka_conf)
|
134
|
+
require_relative 'fileminer/output/kafka'
|
135
|
+
kafka_conf[:mode] = kafka_conf[:mode] == 'async' ? :async : :sync
|
136
|
+
if kafka_conf[:mode] == :async
|
137
|
+
kafka_conf[:auto_delivery] = kafka_conf[:auto_delivery] == 'enabled' ? :enabled : :disabled
|
138
|
+
if kafka_conf[:auto_delivery] == :enabled
|
139
|
+
delivery_threshold = kafka_conf.delete :delivery_threshold
|
140
|
+
delivery_interval = kafka_conf.delete :delivery_interval
|
141
|
+
raise 'Missing conf delivery_threshold or delivery_interval' if delivery_threshold.nil? && delivery_interval.nil?
|
142
|
+
kafka_conf[:delivery_conf] = delivery_conf = Hash.new
|
143
|
+
delivery_conf[:delivery_threshold] = delivery_threshold unless delivery_threshold.nil?
|
144
|
+
delivery_conf[:delivery_interval] = delivery_interval unless delivery_interval.nil?
|
145
|
+
end
|
146
|
+
end
|
147
|
+
Output::KafkaPlugin.new kafka_conf
|
148
|
+
end
|
149
|
+
|
150
|
+
def init_output_mysql(mysql_conf)
|
151
|
+
require_relative 'fileminer/output/mysql'
|
152
|
+
mysql_conf[:ssl_mode] = mysql_conf[:ssl_mode] == 'enabled' ? :enabled : :disabled
|
153
|
+
Output::MysqlPlugin.new mysql_conf
|
154
|
+
end
|
155
|
+
|
156
|
+
def send_lines(record, lines)
|
157
|
+
if @output.batch?
|
158
|
+
@output.send_all lines do
|
159
|
+
record[:pos] = lines[-1][:end]
|
160
|
+
@miner.save_registry
|
161
|
+
end
|
162
|
+
else
|
163
|
+
lines.each do |line|
|
164
|
+
@output.send line do
|
165
|
+
record[:pos] = line[:end]
|
166
|
+
@miner.save_registry
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
public
|
173
|
+
def mine_once
|
174
|
+
start_time = Time.now
|
175
|
+
full_lines = 0
|
176
|
+
@miner.active_files.all? do |record|
|
177
|
+
mining_next = true
|
178
|
+
if record[:pos] < File.size(record[:path])
|
179
|
+
file_lines = 0
|
180
|
+
loop do
|
181
|
+
lines = @miner.read_lines record
|
182
|
+
break if lines.empty?
|
183
|
+
send_lines record, lines
|
184
|
+
file_lines += lines.size
|
185
|
+
full_lines += lines.size
|
186
|
+
if mining_break? start_time, full_lines
|
187
|
+
mining_next = false
|
188
|
+
break
|
189
|
+
end
|
190
|
+
break if file_break? file_lines
|
191
|
+
end
|
192
|
+
end
|
193
|
+
mining_next
|
194
|
+
end
|
195
|
+
full_lines
|
196
|
+
end
|
197
|
+
|
198
|
+
def start_mining
|
199
|
+
unless @running
|
200
|
+
@running = true
|
201
|
+
while @running
|
202
|
+
begin
|
203
|
+
files_refreshed = check_files
|
204
|
+
sent_lines = mine_once
|
205
|
+
# sleep 5 seconds if no more data
|
206
|
+
# TODO using settings instead in future
|
207
|
+
if sent_lines == 0
|
208
|
+
@miner.save_registry if files_refreshed
|
209
|
+
sleep 5
|
210
|
+
end
|
211
|
+
rescue => e
|
212
|
+
@logger.error e
|
213
|
+
# sleep for a little while to wait output recover
|
214
|
+
sleep 5 if @running
|
215
|
+
end
|
216
|
+
end
|
217
|
+
@miner.save_registry
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def check_files
|
222
|
+
if @miner.files_need_refresh? @refresh_files_time_trigger
|
223
|
+
@miner.refresh_files
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
def stop_mining
|
228
|
+
@running = false if @running
|
229
|
+
end
|
230
|
+
|
231
|
+
end
|
232
|
+
|
233
|
+
|
234
|
+
if __FILE__ == $0
|
235
|
+
# Usage:
|
236
|
+
# ruby fileminer.rb /etc/fileminer/fileminer.yml
|
237
|
+
require 'yaml'
|
238
|
+
yml = File.open(ARGV[0]) { |io| io.read }
|
239
|
+
conf = YAML.load yml
|
240
|
+
fileminer = FileMiner.new conf
|
241
|
+
trap(:INT) { fileminer.stop_mining }
|
242
|
+
fileminer.start_mining
|
243
|
+
end
|
metadata
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fileminer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0RC1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Fang MinJie
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-01-17 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A simple file/log transfer tool coding by ruby.
|
14
|
+
email:
|
15
|
+
- fmjsjx@163.com
|
16
|
+
executables:
|
17
|
+
- fileminer
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- LICENSE
|
22
|
+
- README.md
|
23
|
+
- bin/fileminer
|
24
|
+
- conf/fileminer_default.yml
|
25
|
+
- lib/fileminer.rb
|
26
|
+
- lib/fileminer/miner.rb
|
27
|
+
- lib/fileminer/output.rb
|
28
|
+
- lib/fileminer/output/kafka.rb
|
29
|
+
- lib/fileminer/output/mysql.rb
|
30
|
+
- lib/fileminer/output/redis.rb
|
31
|
+
- lib/fileminer/plugins.rb
|
32
|
+
- lib/fileminer/version.rb
|
33
|
+
homepage: https://github.com/fmjsjx/fileminer
|
34
|
+
licenses:
|
35
|
+
- MIT
|
36
|
+
metadata: {}
|
37
|
+
post_install_message:
|
38
|
+
rdoc_options: []
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">"
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: 1.3.1
|
51
|
+
requirements: []
|
52
|
+
rubygems_version: 3.0.1
|
53
|
+
signing_key:
|
54
|
+
specification_version: 4
|
55
|
+
summary: A simple file/log transfer tool coding by ruby.
|
56
|
+
test_files: []
|