fileminer 1.0.0RC1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +2 -0
- data/bin/fileminer +31 -0
- data/conf/fileminer_default.yml +146 -0
- data/lib/fileminer/miner.rb +128 -0
- data/lib/fileminer/output/kafka.rb +62 -0
- data/lib/fileminer/output/mysql.rb +101 -0
- data/lib/fileminer/output/redis.rb +63 -0
- data/lib/fileminer/output.rb +17 -0
- data/lib/fileminer/plugins.rb +1 -0
- data/lib/fileminer/version.rb +5 -0
- data/lib/fileminer.rb +243 -0
- metadata +56 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7237a574207f6cb4913c5be6f2ef70dda7c735ffbf786fba9bfaf3941366fd88
|
4
|
+
data.tar.gz: 9e2a23b7b2a49fb78af7ae1ba969f562df855196b5d1aa066b84063e11611afe
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a47639237446113e7dcbf9dd5f79b7008ea102cb3993597743555714fe1ff92e6bb268deee20b8e0ef63c15618e4d745191e6a116333105a743f8ab0da5b32d9
|
7
|
+
data.tar.gz: e60806ee50da1f42d619c9fd01febb239790cb1ebc05938fa7d8634ec52e4da4d17ac3e3d316b8e2f184d45676cfc4a7bea8079e49e5cf5f215e33c9dcfd479a
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2018 fmjsjx
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
data/bin/fileminer
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
|
4
|
+
require 'logger'
|
5
|
+
require 'yaml'
|
6
|
+
require 'fileminer'
|
7
|
+
|
8
|
+
|
9
|
+
logger = Logger.new STDERR
|
10
|
+
|
11
|
+
begin
|
12
|
+
# Usage:
|
13
|
+
# ruby -Ilib ./bin/fileminer /path/to/fileminer.yml
|
14
|
+
yml = File.open(ARGV[0]) { |io| io.read }
|
15
|
+
conf = YAML.load yml
|
16
|
+
# create fileminer instance
|
17
|
+
fileminer = FileMiner.new conf
|
18
|
+
|
19
|
+
# trap INT signal to stop mining
|
20
|
+
trap(:INT) { fileminer.stop_mining }
|
21
|
+
|
22
|
+
# start mining
|
23
|
+
fileminer.start_mining
|
24
|
+
rescue => e
|
25
|
+
logger.error e
|
26
|
+
usage = <<-EOS
|
27
|
+
Usage:
|
28
|
+
ruby -Ilib ./bin/fileminer /path/to/fileminer.yml
|
29
|
+
EOS
|
30
|
+
puts usage
|
31
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# general settings
|
2
|
+
#fileminer.settings:
|
3
|
+
|
4
|
+
# default value is 30s
|
5
|
+
#refresh_files_time_trigger: 30s
|
6
|
+
|
7
|
+
# default value is 5s
|
8
|
+
#max_time_of_each_mining: 5s
|
9
|
+
|
10
|
+
# default value is -1
|
11
|
+
#max_lines_of_each_mining: -1
|
12
|
+
|
13
|
+
# default value is -1
|
14
|
+
#max_lines_of_each_file: -1
|
15
|
+
|
16
|
+
# TODO
|
17
|
+
|
18
|
+
# fileminer inputs
|
19
|
+
fileminer.inputs:
|
20
|
+
|
21
|
+
# setup the path of the registry file
|
22
|
+
# default value is /var/lib/fileminer/registry
|
23
|
+
#registry_path: /var/lib/fileminer/registry
|
24
|
+
|
25
|
+
# file paths
|
26
|
+
paths:
|
27
|
+
- /path/to/*.log
|
28
|
+
|
29
|
+
# other path
|
30
|
+
#- /other/path/*.log
|
31
|
+
|
32
|
+
# EOF seconds
|
33
|
+
# default value is 86400(1 day)
|
34
|
+
#eof_seconds: 86400
|
35
|
+
|
36
|
+
# Batch lines to read
|
37
|
+
# default value is 200
|
38
|
+
#batch_lines: 200
|
39
|
+
|
40
|
+
# Host
|
41
|
+
# default value is `Socket.gethostname`
|
42
|
+
#host: 127.0.0.1
|
43
|
+
|
44
|
+
# -- output --
|
45
|
+
|
46
|
+
# output to redis
|
47
|
+
# based on redis-rb, please make sure that redis has already been installed
|
48
|
+
# install redis: gem install redis
|
49
|
+
output.redis:
|
50
|
+
# the URI of the redis server
|
51
|
+
uri: redis://localhost:6379/0
|
52
|
+
|
53
|
+
# host of the redis server
|
54
|
+
# effective only if uri is not be set
|
55
|
+
# default value is localhost
|
56
|
+
#host: localhost
|
57
|
+
|
58
|
+
# port of the redis server
|
59
|
+
# default value is 6379
|
60
|
+
# effective only if uri is not be set
|
61
|
+
#port: 6379
|
62
|
+
|
63
|
+
# db of the redis server
|
64
|
+
# default value is 0
|
65
|
+
# effective only if uri is not be set
|
66
|
+
#db: 0
|
67
|
+
|
68
|
+
# password of the redis server
|
69
|
+
# effective only if uri is not be set
|
70
|
+
#password:
|
71
|
+
|
72
|
+
# key of the redis server
|
73
|
+
# required
|
74
|
+
key: fileminer
|
75
|
+
|
76
|
+
# output to kafka
|
77
|
+
# based on ruby-kafka, please make sure that ruby-kafka has already been installed
|
78
|
+
# install ruby-kafka: gem install ruby-kafka
|
79
|
+
#output.kafka:
|
80
|
+
# kafka brokers
|
81
|
+
# default value is ['localhost:9092']
|
82
|
+
#brokers: ['localhost:9092']
|
83
|
+
|
84
|
+
# producer client_id
|
85
|
+
# default value is fileminer
|
86
|
+
#client_id: fileminer
|
87
|
+
|
88
|
+
# kafka topic
|
89
|
+
# default value is fileminer
|
90
|
+
#topic: fileminer
|
91
|
+
|
92
|
+
# producer mode
|
93
|
+
# sync or async
|
94
|
+
# default is sync
|
95
|
+
#mode: sync
|
96
|
+
|
97
|
+
# auto delivery
|
98
|
+
# enabled or disabled
|
99
|
+
# default value is disabled
|
100
|
+
# effective only if mode is async
|
101
|
+
# when effective, at least one of delivery_threshold and delivery_interval should be set
|
102
|
+
#auto_delivery: disabled
|
103
|
+
|
104
|
+
# delivery threshold, trigger a delivery once x messages have been buffered
|
105
|
+
# effective only if mode is async and auto_deliver is enabled
|
106
|
+
#delivery_threshold: 100
|
107
|
+
|
108
|
+
# delivery interval, trigger a delivery every x seconds
|
109
|
+
# effective only if mode is async and auto_deliver is enabled
|
110
|
+
#delivery_interval: 30
|
111
|
+
|
112
|
+
# output to mysql
|
113
|
+
# based on mysql2, please make sure that mysql2 has already been installed
|
114
|
+
# install ruby-kafka: gem install mysql2
|
115
|
+
#output.mysql:
|
116
|
+
# host
|
117
|
+
# default value is localhost
|
118
|
+
#host: localhost
|
119
|
+
|
120
|
+
# port
|
121
|
+
# default value is 3306
|
122
|
+
#port: 3306
|
123
|
+
|
124
|
+
# username
|
125
|
+
#username: someuser
|
126
|
+
|
127
|
+
# password
|
128
|
+
#password: somepwd
|
129
|
+
|
130
|
+
# database name
|
131
|
+
#database: somedb
|
132
|
+
|
133
|
+
# encoding
|
134
|
+
# default value is utf8mb4
|
135
|
+
#encoding: utf8mb4
|
136
|
+
|
137
|
+
# SSL mode
|
138
|
+
# enabled or disabled
|
139
|
+
# default value is disabled
|
140
|
+
#ssl_mode: disabled
|
141
|
+
|
142
|
+
# table name
|
143
|
+
#table: sometable
|
144
|
+
|
145
|
+
# other outputs
|
146
|
+
# TODO
|
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
|
5
|
+
class Dir
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
# Creates the directory with the path given, including anynecessary but nonexistent parent directories.
|
10
|
+
#
|
11
|
+
# @param [String] path
|
12
|
+
def mkdirs(path)
|
13
|
+
parent = File.dirname path
|
14
|
+
mkdirs parent unless Dir.exist? parent
|
15
|
+
Dir.mkdir path
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
class Miner
|
24
|
+
|
25
|
+
DEFAULTS = {
|
26
|
+
registry_path: '/var/lib/fileminer/registry',
|
27
|
+
eof_seconds: 86400,
|
28
|
+
batch_lines: 200,
|
29
|
+
}
|
30
|
+
|
31
|
+
attr_reader :registry_path, :paths, :eof_seconds, :batch_lines, :files, :active_files
|
32
|
+
|
33
|
+
# Create a new file miner instance
|
34
|
+
#
|
35
|
+
# @param [Hash] options
|
36
|
+
# @option options [String] :registry_path (/var/lib/fileminer/registry)
|
37
|
+
# @option options [Array] :paths
|
38
|
+
# @option options [Integer] :eof_seconds (86400)
|
39
|
+
# @option options [Integer] :batch_lines (50)
|
40
|
+
# @option options [String] :host (Socket.gethostname)
|
41
|
+
def initialize(options = {})
|
42
|
+
# fix options by DEFAULTS
|
43
|
+
DEFAULTS.each { |k, v| options[k] = v unless options.key? k }
|
44
|
+
@registry_path = options[:registry_path]
|
45
|
+
@paths = options[:paths]
|
46
|
+
@eof_seconds = options[:eof_seconds]
|
47
|
+
@batch_lines = options[:batch_lines]
|
48
|
+
@host = options[:host]
|
49
|
+
if @host.nil?
|
50
|
+
require 'socket'
|
51
|
+
@host = Socket.gethostname
|
52
|
+
end
|
53
|
+
@files = []
|
54
|
+
@active_files = []
|
55
|
+
if File.exist? @registry_path
|
56
|
+
File.open(@registry_path) { |io| @files = JSON.parse(io.read, {symbolize_names: true}) }
|
57
|
+
@active_files = @files.select { |record| !record[:eof] }
|
58
|
+
else
|
59
|
+
parent_dir = File.dirname @registry_path
|
60
|
+
Dir.mkdirs parent_dir unless Dir.exist? parent_dir
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Save registry
|
65
|
+
def save_registry
|
66
|
+
File.open(@registry_path, 'w') { |io| io.write @files.to_json }
|
67
|
+
end
|
68
|
+
|
69
|
+
# Refresh
|
70
|
+
def refresh_files
|
71
|
+
now = Time.now
|
72
|
+
file_paths = Set.new
|
73
|
+
file_paths.merge Dir[*@paths].select { |path| File.file? path }
|
74
|
+
@active_file = @files.select do |record|
|
75
|
+
path = record[:path]
|
76
|
+
file_exists = file_paths.delete? path
|
77
|
+
unless record[:eof]
|
78
|
+
if file_exists
|
79
|
+
# check if EOF
|
80
|
+
if record[:pos] == File.size(path) && now - File.mtime(path) > @eof_seconds
|
81
|
+
record[:eof] = true
|
82
|
+
end
|
83
|
+
else
|
84
|
+
# missing file, set :eof to true
|
85
|
+
record[:eof] = true
|
86
|
+
end
|
87
|
+
end
|
88
|
+
!record[:eof]
|
89
|
+
end
|
90
|
+
file_paths.each do |path|
|
91
|
+
record = {path: path, pos: 0, eof: false}
|
92
|
+
@files << record
|
93
|
+
@active_files << record
|
94
|
+
end
|
95
|
+
@files_refresh_time = now
|
96
|
+
end
|
97
|
+
|
98
|
+
# Read lines
|
99
|
+
def read_lines(record)
|
100
|
+
file_path = record[:path]
|
101
|
+
File.open file_path do |io|
|
102
|
+
lines = []
|
103
|
+
io.pos = record[:pos]
|
104
|
+
while lines.size < @batch_lines
|
105
|
+
line = {host: @host, path: file_path, pos: io.pos}
|
106
|
+
begin
|
107
|
+
data = io.readline
|
108
|
+
break if data.nil?
|
109
|
+
if data[-1] != "\n"
|
110
|
+
io.pos = line[:pos]
|
111
|
+
break
|
112
|
+
end
|
113
|
+
rescue EOFError
|
114
|
+
break
|
115
|
+
end
|
116
|
+
line[:end] = io.pos
|
117
|
+
line[:data] = data
|
118
|
+
lines << line
|
119
|
+
end
|
120
|
+
lines
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def files_need_refresh?(refresh_files_time_trigger)
|
125
|
+
Time.now - @files_refresh_time >= refresh_files_time_trigger
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'kafka'
|
2
|
+
require 'json'
|
3
|
+
require_relative '../output'
|
4
|
+
|
5
|
+
|
6
|
+
module Output
|
7
|
+
|
8
|
+
class KafkaPlugin < OutputPlugin
|
9
|
+
|
10
|
+
# Create a kafka output plugin instance
|
11
|
+
#
|
12
|
+
# @param [Hash] options
|
13
|
+
# @option options [Array] :brokers (['localhost:9092'])
|
14
|
+
# @option options [String] :client_id ('fileminer')
|
15
|
+
# @option options [String] :topic ('fileminer')
|
16
|
+
# @option options [Symbol] :mode (:sync) :sync or :async
|
17
|
+
# @option options [Symbol] :auto_delivery (:disabled) :disabled or :enabled
|
18
|
+
# @option options [Hash] :delivery_conf
|
19
|
+
def initialize(options)
|
20
|
+
brokers = options[:brokers] || ['localhost:9092']
|
21
|
+
client_id = options[:client_id] || 'fileminer'
|
22
|
+
@topic = options[:topic] || 'fileminer'
|
23
|
+
@kafka = Kafka.new(brokers, client_id: client_id)
|
24
|
+
case @mode = options[:mode]
|
25
|
+
when :sync
|
26
|
+
@producer = @kafka.producer
|
27
|
+
when :async
|
28
|
+
case @auto_delivery = options[:auto_delivery]
|
29
|
+
when :disabled
|
30
|
+
@producer = @kafka.async_producer
|
31
|
+
when :enabled
|
32
|
+
@producer = @kafka.async_producer options[:delivery_conf]
|
33
|
+
else
|
34
|
+
raise "invalid value #@auto_delivery of auto_delivery"
|
35
|
+
end
|
36
|
+
else
|
37
|
+
raise "unsupported mode #@mode"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Send all lines to kafka using producer API
|
42
|
+
#
|
43
|
+
# @param [Array] lines
|
44
|
+
# @yield a listener to be called after all lines just be delivered
|
45
|
+
def send_all(lines, &listener)
|
46
|
+
lines.each do |line|
|
47
|
+
message = line.to_json
|
48
|
+
@producer.produce(message, topic: @topic)
|
49
|
+
end
|
50
|
+
@producer.deliver_messages unless @mode == :async and @auto_delivery == :enabled
|
51
|
+
listener.call
|
52
|
+
end
|
53
|
+
|
54
|
+
# close the kafka producer
|
55
|
+
def close
|
56
|
+
@producer.shutdown
|
57
|
+
@kafka.close
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'mysql2'
|
2
|
+
require_relative '../output'
|
3
|
+
|
4
|
+
|
5
|
+
module Output
|
6
|
+
|
7
|
+
class MysqlPlugin < OutputPlugin
|
8
|
+
|
9
|
+
DEFAULT_MYSQL = {
|
10
|
+
host: 'localhost',
|
11
|
+
port: 3306,
|
12
|
+
password: '',
|
13
|
+
encoding: 'utf8mb4',
|
14
|
+
ssl_mode: :disabled
|
15
|
+
}
|
16
|
+
|
17
|
+
# Create a mysql output plugin instance
|
18
|
+
#
|
19
|
+
# @param [Hash] options
|
20
|
+
# @option options [String] :host ('localhost')
|
21
|
+
# @option options [Integer] :port (3306)
|
22
|
+
# @option options [String] :username
|
23
|
+
# @option options [String] :password ('')
|
24
|
+
# @option options [String] :database
|
25
|
+
# @option options [String] :encoding ('utf8mb4')
|
26
|
+
# @option options [Symbol] :ssl_mode (:disabled)
|
27
|
+
# @option options [String] :table
|
28
|
+
def initialize(options)
|
29
|
+
raise 'Missing config username on output.mysql' unless options.key? :username
|
30
|
+
raise 'Missing config database on output.mysql' unless options.key? :database
|
31
|
+
raise 'Missing config table on output.mysql' unless options.key? :table
|
32
|
+
conf = DEFAULT_MYSQL.merge options
|
33
|
+
@table = conf.delete :table
|
34
|
+
conf[:port] = conf[:port].to_i
|
35
|
+
conf[:password] = conf[:password].to_s
|
36
|
+
@encoding = conf[:encoding]
|
37
|
+
conf[:ssl_mode] = :disabled if conf[:ssl_mode] != :enabled
|
38
|
+
@mysql = Mysql2::Client.new conf
|
39
|
+
create_table_if_not_exists
|
40
|
+
@sqls = Hash.new { |hash, key| hash[key] = generate_batch_sql key }
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
def create_table_if_not_exists
|
45
|
+
rs = @mysql.query 'SHOW TABLES'
|
46
|
+
tables = rs.map { |row| row.values[0] }
|
47
|
+
unless tables.include? @table
|
48
|
+
sql = create_table_sql
|
49
|
+
@mysql.query sql
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def create_table_sql
|
54
|
+
<<-EOS
|
55
|
+
CREATE TABLE `#@table` (
|
56
|
+
`id` bigint(20) PRIMARY KEY AUTO_INCREMENT,
|
57
|
+
`host` varchar(255) NOT NULL,
|
58
|
+
`path` varchar(255) NOT NULL,
|
59
|
+
`pos` bigint(20) NOT NULL,
|
60
|
+
`end` bigint(20) NOT NULL,
|
61
|
+
`data` text NOT NULL,
|
62
|
+
UNIQUE KEY `UNIQUE_host_path_pos` (`host`,`path`,`pos`)
|
63
|
+
) ENGINE=InnoDB DEFAULT CHARSET=#@encoding
|
64
|
+
EOS
|
65
|
+
end
|
66
|
+
|
67
|
+
def generate_batch_sql(size)
|
68
|
+
"INSERT IGNORE INTO `#@table`(`host`,`path`,`pos`,`end`,`data`) VALUES " << (['(?,?,?,?,?)'] * size).join(',')
|
69
|
+
end
|
70
|
+
|
71
|
+
def get_batch_sql(size)
|
72
|
+
if @sqls.key? size
|
73
|
+
@sqls[size]
|
74
|
+
else
|
75
|
+
@sqls[size] = generate_batch_sql size
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Send all lines to mysql
|
80
|
+
#
|
81
|
+
# @param [Array] lines
|
82
|
+
# @yield a listener to be called after all lines just be sent
|
83
|
+
public
|
84
|
+
def send_all(lines, &listener)
|
85
|
+
values = lines.flat_map { |line| [line[:host], line[:path], line[:pos], line[:end], line[:data]] }
|
86
|
+
sql = get_batch_sql lines.size
|
87
|
+
@mysql.query 'BEGIN'
|
88
|
+
begin
|
89
|
+
stat = @mysql.prepare sql
|
90
|
+
stat.execute *values
|
91
|
+
@mysql.query 'COMMIT'
|
92
|
+
listener.call
|
93
|
+
rescue => err
|
94
|
+
@mysql.query 'ROLLBACK'
|
95
|
+
raise err
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'redis'
|
2
|
+
require 'json'
|
3
|
+
require_relative '../output'
|
4
|
+
|
5
|
+
|
6
|
+
module Output
|
7
|
+
|
8
|
+
class RedisPlugin < OutputPlugin
|
9
|
+
|
10
|
+
# Create a redis output plugin instance
|
11
|
+
#
|
12
|
+
# @param [Hash] options
|
13
|
+
# @option options [String] :uri redis URI string
|
14
|
+
# @option options [String] :host
|
15
|
+
# @option options [Integer] :port
|
16
|
+
# @option options [Integer] :db
|
17
|
+
# @option options [String] :password
|
18
|
+
# @option options [String] :key redis key
|
19
|
+
def initialize(options)
|
20
|
+
uri = options[:uri]
|
21
|
+
if uri.nil?
|
22
|
+
uri = parse_uri options
|
23
|
+
end
|
24
|
+
@key = options[:key]
|
25
|
+
raise 'Missing key config on output.redis' if @key.nil?
|
26
|
+
driver = require_lib?('hiredis') ? :hiredis : :ruby
|
27
|
+
@redis = Redis.new url: uri, driver: driver
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
def parse_uri(options)
|
32
|
+
host = options[:host] || 'localhost'
|
33
|
+
port = options[:port] || 6379
|
34
|
+
db = options[:db] || 0
|
35
|
+
password = options[:password]
|
36
|
+
if password.nil?
|
37
|
+
"redis://#{host}:#{port}/#{db}"
|
38
|
+
else
|
39
|
+
"redis://:#{password}@#{host}:#{port}/#{db}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def require_lib?(name)
|
44
|
+
require name
|
45
|
+
rescue LoadError
|
46
|
+
false
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
# Send all lines to redis using LPUSH @key
|
51
|
+
#
|
52
|
+
# @param [Array] lines
|
53
|
+
# @yield a listener to be called after all lines just be sent
|
54
|
+
public
|
55
|
+
def send_all(lines, &listener)
|
56
|
+
messages = lines.map { |line| line.to_json }
|
57
|
+
@redis.lpush @key, messages
|
58
|
+
listener.call
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require_relative 'output'
|
data/lib/fileminer.rb
ADDED
@@ -0,0 +1,243 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
|
4
|
+
require 'logger'
|
5
|
+
require_relative 'fileminer/miner'
|
6
|
+
require_relative 'fileminer/plugins'
|
7
|
+
|
8
|
+
FILEMINER_SETTINGS = 'fileminer.settings'
|
9
|
+
FILEMINER_INPUTS = 'fileminer.inputs'
|
10
|
+
|
11
|
+
|
12
|
+
class Hash
|
13
|
+
|
14
|
+
def keys_to_sym
|
15
|
+
map { |k, v| [k.to_sym, v] }.to_h
|
16
|
+
end
|
17
|
+
|
18
|
+
def keys_to_sym!
|
19
|
+
new_hash = keys_to_sym
|
20
|
+
clear
|
21
|
+
merge! new_hash
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
class FileMiner
|
28
|
+
|
29
|
+
DEFAULT_SETTINGS = {
|
30
|
+
refresh_files_time_trigger: '30s',
|
31
|
+
max_time_of_each_mining: '5s',
|
32
|
+
max_lines_of_each_mining: -1,
|
33
|
+
max_lines_of_each_file: -1,
|
34
|
+
}
|
35
|
+
|
36
|
+
attr_reader :miner, :output, :running
|
37
|
+
|
38
|
+
# Create a new FileMiner instance
|
39
|
+
#
|
40
|
+
# @param [Hash] conf
|
41
|
+
def initialize(conf)
|
42
|
+
init_settings conf['fileminer.settings']
|
43
|
+
@output = init_output conf
|
44
|
+
raise 'Missing config fileminer.inputs' unless conf.key? 'fileminer.inputs'
|
45
|
+
@miner = Miner.new conf['fileminer.inputs'].keys_to_sym
|
46
|
+
@miner.refresh_files
|
47
|
+
@miner.save_registry
|
48
|
+
@running = false
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def init_settings(conf)
|
53
|
+
if conf.nil?
|
54
|
+
conf = DEFAULT_SETTINGS.clone
|
55
|
+
else
|
56
|
+
conf = DEFAULT_SETTINGS.merge conf.keys_to_sym
|
57
|
+
end
|
58
|
+
# default logger to stderr
|
59
|
+
# TODO make logger configurable in future
|
60
|
+
@logger = Logger.new STDERR
|
61
|
+
@logger.level = Logger::WARN
|
62
|
+
# mining break trigger
|
63
|
+
max_time_of_each_mining = parse_time conf[:max_time_of_each_mining], 'max_time_of_each_mining on fileminer.settings'
|
64
|
+
max_lines_of_each_mining = conf[:max_lines_of_each_mining]
|
65
|
+
if max_lines_of_each_mining >= 0
|
66
|
+
@mining_break_trigger = lambda { |start_time, lines| Time.now - start_time > max_time_of_each_mining || lines >= max_lines_of_each_mining }
|
67
|
+
else
|
68
|
+
@mining_break_trigger = lambda { |start_time, lines| Time.now - start_time > max_time_of_each_mining }
|
69
|
+
end
|
70
|
+
# file break trigger
|
71
|
+
max_lines_of_each_file = conf[:max_lines_of_each_file]
|
72
|
+
if max_lines_of_each_file >= 0
|
73
|
+
@file_break_trigger = lambda { |lines| lines < @miner.batch_lines || lines >= max_lines_of_each_file }
|
74
|
+
else
|
75
|
+
@file_break_trigger = lambda { |lines| lines < @miner.batch_lines }
|
76
|
+
end
|
77
|
+
# refresh_files_time_trigger
|
78
|
+
@refresh_files_time_trigger = parse_time conf[:refresh_files_time_trigger], 'refresh_files_time_trigger on fileminer.settings'
|
79
|
+
end
|
80
|
+
|
81
|
+
def parse_time(value, conf_name)
|
82
|
+
if /^(\d+)(\w+)$/ =~ value
|
83
|
+
num = $1.to_i
|
84
|
+
unit = $2
|
85
|
+
case unit
|
86
|
+
when 'd'
|
87
|
+
num * 86400
|
88
|
+
when 'h'
|
89
|
+
num * 3600
|
90
|
+
when 'min'
|
91
|
+
num * 60
|
92
|
+
when 's'
|
93
|
+
num
|
94
|
+
when 'ms'
|
95
|
+
num.to_f / 1000
|
96
|
+
else
|
97
|
+
raise "Unsupported time unit '#{unit}' of #{conf_name}"
|
98
|
+
end
|
99
|
+
else
|
100
|
+
raise "Error format '#{value}' of #{conf_name}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def mining_break?(start_time, lines)
|
105
|
+
@mining_break_trigger.call start_time, lines
|
106
|
+
end
|
107
|
+
|
108
|
+
def file_break?(lines)
|
109
|
+
@file_break_trigger.call lines
|
110
|
+
end
|
111
|
+
|
112
|
+
def init_output(conf)
|
113
|
+
case
|
114
|
+
when conf.key?('output.redis')
|
115
|
+
redis_conf = conf['output.redis'].keys_to_sym
|
116
|
+
init_output_redis redis_conf
|
117
|
+
when conf.key?('output.kafka')
|
118
|
+
kafka_conf = conf['output.kafka'].keys_to_sym
|
119
|
+
init_output_kafka kafka_conf
|
120
|
+
when conf.key?('output.mysql')
|
121
|
+
mysql_conf = conf['output.mysql'].keys_to_sym
|
122
|
+
init_output_mysql mysql_conf
|
123
|
+
else
|
124
|
+
raise 'Missing config for output'
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def init_output_redis(redis_conf)
|
129
|
+
require_relative 'fileminer/output/redis'
|
130
|
+
Output::RedisPlugin.new redis_conf
|
131
|
+
end
|
132
|
+
|
133
|
+
def init_output_kafka(kafka_conf)
|
134
|
+
require_relative 'fileminer/output/kafka'
|
135
|
+
kafka_conf[:mode] = kafka_conf[:mode] == 'async' ? :async : :sync
|
136
|
+
if kafka_conf[:mode] == :async
|
137
|
+
kafka_conf[:auto_delivery] = kafka_conf[:auto_delivery] == 'enabled' ? :enabled : :disabled
|
138
|
+
if kafka_conf[:auto_delivery] == :enabled
|
139
|
+
delivery_threshold = kafka_conf.delete :delivery_threshold
|
140
|
+
delivery_interval = kafka_conf.delete :delivery_interval
|
141
|
+
raise 'Missing conf delivery_threshold or delivery_interval' if delivery_threshold.nil? && delivery_interval.nil?
|
142
|
+
kafka_conf[:delivery_conf] = delivery_conf = Hash.new
|
143
|
+
delivery_conf[:delivery_threshold] = delivery_threshold unless delivery_threshold.nil?
|
144
|
+
delivery_conf[:delivery_interval] = delivery_interval unless delivery_interval.nil?
|
145
|
+
end
|
146
|
+
end
|
147
|
+
Output::KafkaPlugin.new kafka_conf
|
148
|
+
end
|
149
|
+
|
150
|
+
def init_output_mysql(mysql_conf)
|
151
|
+
require_relative 'fileminer/output/mysql'
|
152
|
+
mysql_conf[:ssl_mode] = mysql_conf[:ssl_mode] == 'enabled' ? :enabled : :disabled
|
153
|
+
Output::MysqlPlugin.new mysql_conf
|
154
|
+
end
|
155
|
+
|
156
|
+
def send_lines(record, lines)
|
157
|
+
if @output.batch?
|
158
|
+
@output.send_all lines do
|
159
|
+
record[:pos] = lines[-1][:end]
|
160
|
+
@miner.save_registry
|
161
|
+
end
|
162
|
+
else
|
163
|
+
lines.each do |line|
|
164
|
+
@output.send line do
|
165
|
+
record[:pos] = line[:end]
|
166
|
+
@miner.save_registry
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
public
|
173
|
+
def mine_once
|
174
|
+
start_time = Time.now
|
175
|
+
full_lines = 0
|
176
|
+
@miner.active_files.all? do |record|
|
177
|
+
mining_next = true
|
178
|
+
if record[:pos] < File.size(record[:path])
|
179
|
+
file_lines = 0
|
180
|
+
loop do
|
181
|
+
lines = @miner.read_lines record
|
182
|
+
break if lines.empty?
|
183
|
+
send_lines record, lines
|
184
|
+
file_lines += lines.size
|
185
|
+
full_lines += lines.size
|
186
|
+
if mining_break? start_time, full_lines
|
187
|
+
mining_next = false
|
188
|
+
break
|
189
|
+
end
|
190
|
+
break if file_break? file_lines
|
191
|
+
end
|
192
|
+
end
|
193
|
+
mining_next
|
194
|
+
end
|
195
|
+
full_lines
|
196
|
+
end
|
197
|
+
|
198
|
+
def start_mining
|
199
|
+
unless @running
|
200
|
+
@running = true
|
201
|
+
while @running
|
202
|
+
begin
|
203
|
+
files_refreshed = check_files
|
204
|
+
sent_lines = mine_once
|
205
|
+
# sleep 5 seconds if no more data
|
206
|
+
# TODO using settings instead in future
|
207
|
+
if sent_lines == 0
|
208
|
+
@miner.save_registry if files_refreshed
|
209
|
+
sleep 5
|
210
|
+
end
|
211
|
+
rescue => e
|
212
|
+
@logger.error e
|
213
|
+
# sleep for a little while to wait output recover
|
214
|
+
sleep 5 if @running
|
215
|
+
end
|
216
|
+
end
|
217
|
+
@miner.save_registry
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def check_files
|
222
|
+
if @miner.files_need_refresh? @refresh_files_time_trigger
|
223
|
+
@miner.refresh_files
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
def stop_mining
|
228
|
+
@running = false if @running
|
229
|
+
end
|
230
|
+
|
231
|
+
end
|
232
|
+
|
233
|
+
|
234
|
+
if __FILE__ == $0
|
235
|
+
# Usage:
|
236
|
+
# ruby fileminer.rb /etc/fileminer/fileminer.yml
|
237
|
+
require 'yaml'
|
238
|
+
yml = File.open(ARGV[0]) { |io| io.read }
|
239
|
+
conf = YAML.load yml
|
240
|
+
fileminer = FileMiner.new conf
|
241
|
+
trap(:INT) { fileminer.stop_mining }
|
242
|
+
fileminer.start_mining
|
243
|
+
end
|
metadata
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fileminer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0RC1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Fang MinJie
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-01-17 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A simple file/log transfer tool coding by ruby.
|
14
|
+
email:
|
15
|
+
- fmjsjx@163.com
|
16
|
+
executables:
|
17
|
+
- fileminer
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- LICENSE
|
22
|
+
- README.md
|
23
|
+
- bin/fileminer
|
24
|
+
- conf/fileminer_default.yml
|
25
|
+
- lib/fileminer.rb
|
26
|
+
- lib/fileminer/miner.rb
|
27
|
+
- lib/fileminer/output.rb
|
28
|
+
- lib/fileminer/output/kafka.rb
|
29
|
+
- lib/fileminer/output/mysql.rb
|
30
|
+
- lib/fileminer/output/redis.rb
|
31
|
+
- lib/fileminer/plugins.rb
|
32
|
+
- lib/fileminer/version.rb
|
33
|
+
homepage: https://github.com/fmjsjx/fileminer
|
34
|
+
licenses:
|
35
|
+
- MIT
|
36
|
+
metadata: {}
|
37
|
+
post_install_message:
|
38
|
+
rdoc_options: []
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">"
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: 1.3.1
|
51
|
+
requirements: []
|
52
|
+
rubygems_version: 3.0.1
|
53
|
+
signing_key:
|
54
|
+
specification_version: 4
|
55
|
+
summary: A simple file/log transfer tool coding by ruby.
|
56
|
+
test_files: []
|