anschel 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Readme.md +141 -2
- data/VERSION +1 -1
- data/lib/anschel/filter/convert.rb +2 -0
- data/lib/anschel/filter/gsub.rb +2 -0
- data/lib/anschel/filter/index.rb +26 -0
- data/lib/anschel/filter/parse.rb +2 -0
- data/lib/anschel/filter/scan.rb +3 -0
- data/lib/anschel/filter/stamp.rb +7 -4
- data/lib/anschel/filter.rb +8 -4
- data/lib/anschel/input.rb +5 -2
- data/lib/anschel/main.rb +20 -13
- data/lib/anschel/metadata.rb +2 -3
- data/lib/anschel/mjolnir.rb +1 -0
- data/lib/anschel/output.rb +27 -7
- data/lib/anschel.rb +2 -0
- metadata +19 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4e693593d8211b7f7a7ee60d06536d463f74cb2f
|
4
|
+
data.tar.gz: f67b0087ae0d56c73beed719263feb46e78ec301
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0fe919474cdc8d4e31f7ed8703600d960610876613ffca815d79267cf00fdc02e1dfdc5af2e692ae72d0005e67e9372f24da289c4abb1806f09abb06ad8c6f80
|
7
|
+
data.tar.gz: 590ccdd14141ef0feeb4482a8063d634067f9b3ad73d239a2afe0782be6857fa99bc1751a7c2508957e3cf53782c320942eee535899ca6d24a2e69e6ab86ca7f
|
data/Readme.md
CHANGED
@@ -1,3 +1,142 @@
|
|
1
|
-
# Anschel
|
1
|
+
# Anschel ![Version](https://img.shields.io/gem/v/anschel.svg?style=flat-square)
|
2
2
|
|
3
|
-
|
3
|
+
Logstash-like for moving events from Kafka into Elasticsearch.
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
## Usage, Configuration &c.
|
8
|
+
|
9
|
+
### Installation
|
10
|
+
|
11
|
+
Download the jarfile from the [GitHub releases page](https://github.com/sczizzo/anschel/releases)
|
12
|
+
and run like so:
|
13
|
+
|
14
|
+
$ java -jar anschel-1.2.3.jar
|
15
|
+
|
16
|
+
### Usage
|
17
|
+
|
18
|
+
Just call for help!
|
19
|
+
|
20
|
+
$ java -jar anschel-1.2.3.jar help
|
21
|
+
Commands:
|
22
|
+
anschel agent # Run the Anschel agent
|
23
|
+
anschel art # View the application art
|
24
|
+
anschel help [COMMAND] # Describe available commands or one specific command
|
25
|
+
anschel version # Echo the application version
|
26
|
+
|
27
|
+
Probably you're most interested in the `agent` command:
|
28
|
+
|
29
|
+
$ java -jar anschel-1.2.3.jar help agent
|
30
|
+
Usage:
|
31
|
+
anschel agent
|
32
|
+
|
33
|
+
Options:
|
34
|
+
-c, [--config=CONFIG] # Main configuration file
|
35
|
+
# Default: /etc/anschel.json
|
36
|
+
-L, [--log=LOG] # Log to file instead of STDOUT
|
37
|
+
-V, [--debug], [--no-debug] # Enable DEBUG-level logging
|
38
|
+
|
39
|
+
Run the Anschel agent
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
### Configuration
|
44
|
+
|
45
|
+
It's kinda like a JSON version of the Logstash config language:
|
46
|
+
|
47
|
+
{
|
48
|
+
"log4j": {
|
49
|
+
"path": "/path/to/anschel4j.log",
|
50
|
+
"pattern": "[%d] %p %m (%c)%n"
|
51
|
+
},
|
52
|
+
"kafka": {
|
53
|
+
"queue_size": 2000,
|
54
|
+
"zk_connect": "localhost:2181",
|
55
|
+
"zk_connect_timeout": 6000,
|
56
|
+
"zk_session_timeout": 6000,
|
57
|
+
"group_id": "anschel",
|
58
|
+
"topic_id": "franz",
|
59
|
+
"reset_beginning": null,
|
60
|
+
"auto_offset_reset": "smallest",
|
61
|
+
"consumer_restart_on_error": true,
|
62
|
+
"auto_commit_interval": 1000,
|
63
|
+
"rebalance_max_retries": 4,
|
64
|
+
"rebalance_backoff_ms": 2000,
|
65
|
+
"socket_timeout_ms": 30000,
|
66
|
+
"socket_receive_buffer_bytes": 65536,
|
67
|
+
"fetch_message_max_bytes": 1048576,
|
68
|
+
"auto_commit_enable": true,
|
69
|
+
"queued_max_message_chunks": 10,
|
70
|
+
"fetch_min_bytes": 1,
|
71
|
+
"fetch_wait_max_ms": 100,
|
72
|
+
"refresh_leader_backoff_ms": 200,
|
73
|
+
"consumer_timeout_ms": -1,
|
74
|
+
"consumer_restart_sleep_ms": 0
|
75
|
+
},
|
76
|
+
"elasticsearch": {
|
77
|
+
"queue_size": 2000,
|
78
|
+
"bulk_size": 200,
|
79
|
+
"hosts": [ "localhost:9200" ],
|
80
|
+
"randomize_hosts": true,
|
81
|
+
"reload_connections": true,
|
82
|
+
"reload_on_failure": true,
|
83
|
+
"sniffer_timeout": 5
|
84
|
+
},
|
85
|
+
"filter": {
|
86
|
+
"type": [
|
87
|
+
{
|
88
|
+
"scan": {
|
89
|
+
"field": "message",
|
90
|
+
"pattern": "[A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}",
|
91
|
+
"target": "guids"
|
92
|
+
}
|
93
|
+
},
|
94
|
+
{
|
95
|
+
"scan": {
|
96
|
+
"field": "path",
|
97
|
+
"pattern": "[A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}",
|
98
|
+
"target": "guids"
|
99
|
+
}
|
100
|
+
}
|
101
|
+
],
|
102
|
+
"_after": [
|
103
|
+
{
|
104
|
+
"gsub": {
|
105
|
+
"field": "type",
|
106
|
+
"match": "-.*",
|
107
|
+
"replace": ""
|
108
|
+
}
|
109
|
+
},
|
110
|
+
{
|
111
|
+
"index": {}
|
112
|
+
}
|
113
|
+
]
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
### Operation
|
121
|
+
|
122
|
+
You might deploy Anschel with Upstart. Here's a minimal config:
|
123
|
+
|
124
|
+
#!upstart
|
125
|
+
description "anschel"
|
126
|
+
|
127
|
+
console log
|
128
|
+
|
129
|
+
start on startup
|
130
|
+
stop on shutdown
|
131
|
+
respawn
|
132
|
+
|
133
|
+
exec java -jar anschel-1.2.3.jar \
|
134
|
+
--config /etc/anschel.json --log /var/log/anschel.log
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
### Changelog
|
139
|
+
|
140
|
+
#### v1.0 (devlop)
|
141
|
+
|
142
|
+
- Intial implementation of the Kafka-to-Elasticsearch pipeline
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/lib/anschel/filter/gsub.rb
CHANGED
@@ -16,6 +16,8 @@ module Anschel
|
|
16
16
|
raise 'Missing required "match" for "gsub" filter' if match.nil?
|
17
17
|
raise 'Missing required "replace" for "gsub" filter' if replace.nil?
|
18
18
|
|
19
|
+
field = field.to_sym
|
20
|
+
|
19
21
|
lambda do |event|
|
20
22
|
return event unless event.has_key? field
|
21
23
|
event[field].gsub! match, replace
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Anschel
|
2
|
+
class Filter
|
3
|
+
def index conf
|
4
|
+
stamp = conf.delete(:stamp) || '@timestamp'
|
5
|
+
prefix = conf.delete(:prefix) || 'logs-%{type}-'
|
6
|
+
suffix = conf.delete(:suffix) || '%Y.%m.%d'
|
7
|
+
|
8
|
+
stamp = stamp.to_sym
|
9
|
+
|
10
|
+
iso8601 = "yyyy-MM-dd'T'HH:mm:ss.SSSZ"
|
11
|
+
|
12
|
+
joda = org.joda.time.format.DateTimeFormat.forPattern iso8601
|
13
|
+
joda = joda.withDefaultYear(Time.new.year)
|
14
|
+
joda = joda.withOffsetParsed
|
15
|
+
|
16
|
+
lambda do |event|
|
17
|
+
return event unless event.has_key? stamp
|
18
|
+
millis = joda.parseMillis event[stamp]
|
19
|
+
idx_prefix = prefix % event
|
20
|
+
idx_suffix = Time.at(0.001 * millis).strftime(suffix)
|
21
|
+
event[:_index] = idx_prefix + idx_suffix
|
22
|
+
filtered event, conf
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/anschel/filter/parse.rb
CHANGED
@@ -13,6 +13,8 @@ module Anschel
|
|
13
13
|
raise 'Missing required "field" for "parse" filter' if field.nil?
|
14
14
|
raise 'Missing required "pattern" for "parse" filter' if pattern.nil?
|
15
15
|
|
16
|
+
field = field.to_sym
|
17
|
+
|
16
18
|
lambda do |event|
|
17
19
|
return event unless event.has_key? field
|
18
20
|
mdata = pattern.match event[field]
|
data/lib/anschel/filter/scan.rb
CHANGED
@@ -16,6 +16,9 @@ module Anschel
|
|
16
16
|
raise 'Missing required "pattern" for "scan" filter' if pattern.nil?
|
17
17
|
raise 'Missing required "target" for "convert" filter' if target.nil?
|
18
18
|
|
19
|
+
field = field.to_sym
|
20
|
+
target = target.to_sym
|
21
|
+
|
19
22
|
lambda do |event|
|
20
23
|
return event unless event.has_key? field
|
21
24
|
results = event[field].scan(pattern).flatten.uniq
|
data/lib/anschel/filter/stamp.rb
CHANGED
@@ -20,9 +20,12 @@ module Anschel
|
|
20
20
|
precision = (precision || 3).to_i
|
21
21
|
target ||= '@timestamp'
|
22
22
|
|
23
|
+
field = field.to_sym
|
24
|
+
target = target.to_sym
|
25
|
+
|
23
26
|
parsers = patterns.map do |p|
|
24
|
-
joda = org.joda.time.format.DateTimeFormat.forPattern
|
25
|
-
joda = joda.withDefaultYear
|
27
|
+
joda = org.joda.time.format.DateTimeFormat.forPattern p
|
28
|
+
joda = joda.withDefaultYear Time.new.year
|
26
29
|
joda = joda.withOffsetParsed
|
27
30
|
end
|
28
31
|
|
@@ -38,8 +41,8 @@ module Anschel
|
|
38
41
|
end
|
39
42
|
|
40
43
|
if error_tag
|
41
|
-
event[
|
42
|
-
event[
|
44
|
+
event[:tags] ||= []
|
45
|
+
event[:tags] << error_tag
|
43
46
|
end
|
44
47
|
event
|
45
48
|
end
|
data/lib/anschel/filter.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require_relative 'filter/convert'
|
2
2
|
require_relative 'filter/gsub'
|
3
|
+
require_relative 'filter/index'
|
3
4
|
require_relative 'filter/parse'
|
4
5
|
require_relative 'filter/scan'
|
5
6
|
require_relative 'filter/stamp'
|
@@ -10,7 +11,7 @@ module Anschel
|
|
10
11
|
|
11
12
|
attr_reader :filters
|
12
13
|
|
13
|
-
def initialize config
|
14
|
+
def initialize config, log
|
14
15
|
@filters = Hash.new { |h,k| h[k] = [] }
|
15
16
|
config.each do |event_type, filter_defns|
|
16
17
|
filter_defns.each do |filter_defn|
|
@@ -19,13 +20,16 @@ module Anschel
|
|
19
20
|
@filters[event_type] << self.send(filter_type, filter_conf)
|
20
21
|
end
|
21
22
|
end
|
23
|
+
log.info event: 'filter-loaded'
|
22
24
|
end
|
23
25
|
|
24
26
|
|
25
27
|
def apply event
|
26
|
-
raise 'Event does not have a "type" field' unless event[
|
27
|
-
|
28
|
-
filters[
|
28
|
+
raise 'Event does not have a "type" field' unless event[:type]
|
29
|
+
type = event[:type].dup # In case of modification
|
30
|
+
filters[:_before].each { |f| f.call event }
|
31
|
+
filters[type].each { |f| f.call event }
|
32
|
+
filters[:_after].each { |f| f.call event }
|
29
33
|
event
|
30
34
|
end
|
31
35
|
|
data/lib/anschel/input.rb
CHANGED
@@ -3,7 +3,7 @@ require 'jruby-kafka'
|
|
3
3
|
|
4
4
|
module Anschel
|
5
5
|
class Input
|
6
|
-
def initialize config
|
6
|
+
def initialize config, log
|
7
7
|
qsize = config.delete(:queue_size) || 1000
|
8
8
|
@queue = SizedQueue.new qsize
|
9
9
|
consumer_group = Kafka::Group.new config
|
@@ -11,11 +11,14 @@ module Anschel
|
|
11
11
|
|
12
12
|
trap('SIGINT') do
|
13
13
|
consumer_group.shutdown
|
14
|
+
log.info event: 'goodbye'
|
14
15
|
exit
|
15
16
|
end
|
17
|
+
|
18
|
+
log.info event: 'input-loaded'
|
16
19
|
end
|
17
20
|
|
18
21
|
|
19
|
-
def
|
22
|
+
def shift ; @queue.shift end
|
20
23
|
end
|
21
24
|
end
|
data/lib/anschel/main.rb
CHANGED
@@ -27,41 +27,48 @@ module Anschel
|
|
27
27
|
end
|
28
28
|
|
29
29
|
|
30
|
-
desc '
|
30
|
+
desc 'agent', 'Run the Anschel agent'
|
31
31
|
option :config, \
|
32
32
|
type: :string,
|
33
33
|
aliases: %w[ -c ],
|
34
34
|
desc: 'Main configuration file',
|
35
35
|
default: '/etc/anschel.json'
|
36
36
|
include_common_options
|
37
|
-
def
|
37
|
+
def agent
|
38
|
+
log.info event: 'hello'
|
38
39
|
config = JrJackson::Json.load File.read(options.config), symbolize_keys: true
|
39
40
|
setup_log4j config[:log4j]
|
40
41
|
|
41
|
-
input = Input.new config[:kafka]
|
42
|
-
filter = Filter.new config[:filter]
|
43
|
-
output = Output.new config[:elasticsearch]
|
42
|
+
input = Input.new config[:kafka], log
|
43
|
+
filter = Filter.new config[:filter], log
|
44
|
+
output = Output.new config[:elasticsearch], log
|
44
45
|
|
45
|
-
start
|
46
|
-
count
|
46
|
+
start = Time.now
|
47
|
+
count = 0
|
48
|
+
sample = 100_000
|
47
49
|
|
48
50
|
ts = num_cpus.times.map do
|
49
51
|
Thread.new do
|
50
52
|
loop do
|
51
|
-
event = JrJackson::Json.load
|
53
|
+
event = JrJackson::Json.load \
|
54
|
+
input.shift.message.to_s, symbolize_keys: true
|
52
55
|
output.push filter.apply(event)
|
53
|
-
if (count += 1) %
|
54
|
-
|
56
|
+
if (count += 1) % sample == 0
|
57
|
+
old_count, now = count, Time.now
|
58
|
+
elapsed, start = (now - start).to_f, now
|
59
|
+
rate = 1.0 * sample / elapsed
|
55
60
|
log.info \
|
56
61
|
event: 'stat',
|
57
|
-
count:
|
58
|
-
|
59
|
-
|
62
|
+
count: old_count,
|
63
|
+
sample: sample,
|
64
|
+
elapsed_s: elapsed,
|
65
|
+
rate_eps: rate
|
60
66
|
end
|
61
67
|
end
|
62
68
|
end
|
63
69
|
end
|
64
70
|
|
71
|
+
log.info event: 'fully-loaded'
|
65
72
|
ts.map &:join
|
66
73
|
exit
|
67
74
|
end
|
data/lib/anschel/metadata.rb
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
module Anschel
|
2
|
+
# In a nutshell
|
2
3
|
NAME = 'anschel'
|
3
|
-
|
4
|
-
# A quick summary for use in the command-line interface
|
5
|
-
SUMMARY = %q.Companion to Franz.
|
4
|
+
SUMMARY = %q.Logstash-like for moving events from Kafka into Elasticsearch.
|
6
5
|
|
7
6
|
# Take credit for your work
|
8
7
|
AUTHOR = 'Sean Clemmer'
|
data/lib/anschel/mjolnir.rb
CHANGED
data/lib/anschel/output.rb
CHANGED
@@ -1,30 +1,50 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
3
|
+
require 'typhoeus/adapters/faraday'
|
4
|
+
require 'typhoeus'
|
1
5
|
require 'elasticsearch'
|
2
6
|
|
3
7
|
|
4
8
|
module Anschel
|
5
9
|
class Output
|
6
|
-
def initialize config
|
10
|
+
def initialize config, log
|
7
11
|
pattern = config.delete(:index_pattern)
|
8
|
-
bsize = config.delete(:bulk_size) || 500
|
9
12
|
qsize = config.delete(:queue_size) || 2000
|
10
|
-
|
13
|
+
bsize = config.delete(:bulk_size) || 500
|
14
|
+
timeout = config.delete(:bulk_timeout) || 1.0
|
15
|
+
slice = timeout / bsize
|
11
16
|
client = Elasticsearch::Client.new config
|
12
17
|
client.transport.reload_connections!
|
13
18
|
|
19
|
+
@queue = SizedQueue.new qsize
|
20
|
+
|
14
21
|
Thread.new do
|
15
22
|
loop do
|
16
|
-
events =
|
23
|
+
events = []
|
24
|
+
count = 0
|
25
|
+
start = Time.now.to_f
|
26
|
+
until (Time.now.to_f - start > timeout) || ((count += 1) > bsize)
|
27
|
+
begin
|
28
|
+
events.push @queue.shift(true)
|
29
|
+
rescue # shift returned immediately
|
30
|
+
sleep slice
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
next if events.empty?
|
17
35
|
|
18
36
|
body = events.map do |e|
|
19
|
-
index = e.
|
20
|
-
{ index: { _index: index, _type: e[
|
37
|
+
index = e.delete(:_index) || 'logs-anschel'
|
38
|
+
{ index: { _index: index, _type: e[:type], data: e } }
|
21
39
|
end
|
22
40
|
|
23
41
|
client.bulk body: body
|
24
42
|
end
|
25
43
|
end
|
44
|
+
|
45
|
+
log.info event: 'output-loaded'
|
26
46
|
end
|
27
47
|
|
28
|
-
def push event ; @queue
|
48
|
+
def push event ; @queue.push event end
|
29
49
|
end
|
30
50
|
end
|
data/lib/anschel.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anschel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sean Clemmer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,7 +66,21 @@ dependencies:
|
|
66
66
|
- - '>='
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
name: typhoeus
|
76
|
+
prerelease: false
|
77
|
+
type: :runtime
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: Logstash-like for moving events from Kafka into Elasticsearch.
|
70
84
|
email: sczizzo@gmail.com
|
71
85
|
executables:
|
72
86
|
- anschel
|
@@ -81,6 +95,7 @@ files:
|
|
81
95
|
- lib/anschel/filter.rb
|
82
96
|
- lib/anschel/filter/convert.rb
|
83
97
|
- lib/anschel/filter/gsub.rb
|
98
|
+
- lib/anschel/filter/index.rb
|
84
99
|
- lib/anschel/filter/parse.rb
|
85
100
|
- lib/anschel/filter/scan.rb
|
86
101
|
- lib/anschel/filter/stamp.rb
|
@@ -112,5 +127,5 @@ rubyforge_project:
|
|
112
127
|
rubygems_version: 2.4.6
|
113
128
|
signing_key:
|
114
129
|
specification_version: 4
|
115
|
-
summary:
|
130
|
+
summary: Logstash-like for moving events from Kafka into Elasticsearch
|
116
131
|
test_files: []
|