anschel 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Readme.md +141 -2
- data/VERSION +1 -1
- data/lib/anschel/filter/convert.rb +2 -0
- data/lib/anschel/filter/gsub.rb +2 -0
- data/lib/anschel/filter/index.rb +26 -0
- data/lib/anschel/filter/parse.rb +2 -0
- data/lib/anschel/filter/scan.rb +3 -0
- data/lib/anschel/filter/stamp.rb +7 -4
- data/lib/anschel/filter.rb +8 -4
- data/lib/anschel/input.rb +5 -2
- data/lib/anschel/main.rb +20 -13
- data/lib/anschel/metadata.rb +2 -3
- data/lib/anschel/mjolnir.rb +1 -0
- data/lib/anschel/output.rb +27 -7
- data/lib/anschel.rb +2 -0
- metadata +19 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4e693593d8211b7f7a7ee60d06536d463f74cb2f
|
4
|
+
data.tar.gz: f67b0087ae0d56c73beed719263feb46e78ec301
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0fe919474cdc8d4e31f7ed8703600d960610876613ffca815d79267cf00fdc02e1dfdc5af2e692ae72d0005e67e9372f24da289c4abb1806f09abb06ad8c6f80
|
7
|
+
data.tar.gz: 590ccdd14141ef0feeb4482a8063d634067f9b3ad73d239a2afe0782be6857fa99bc1751a7c2508957e3cf53782c320942eee535899ca6d24a2e69e6ab86ca7f
|
data/Readme.md
CHANGED
@@ -1,3 +1,142 @@
|
|
1
|
-
# Anschel
|
1
|
+
# Anschel 
|
2
2
|
|
3
|
-
|
3
|
+
Logstash-like for moving events from Kafka into Elasticsearch.
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
## Usage, Configuration &c.
|
8
|
+
|
9
|
+
### Installation
|
10
|
+
|
11
|
+
Download the jarfile from the [GitHub releases page](https://github.com/sczizzo/anschel/releases)
|
12
|
+
and run like so:
|
13
|
+
|
14
|
+
$ java -jar anschel-1.2.3.jar
|
15
|
+
|
16
|
+
### Usage
|
17
|
+
|
18
|
+
Just call for help!
|
19
|
+
|
20
|
+
$ java -jar anschel-1.2.3.jar help
|
21
|
+
Commands:
|
22
|
+
anschel agent # Run the Anschel agent
|
23
|
+
anschel art # View the application art
|
24
|
+
anschel help [COMMAND] # Describe available commands or one specific command
|
25
|
+
anschel version # Echo the application version
|
26
|
+
|
27
|
+
Probably you're most interested in the `agent` command:
|
28
|
+
|
29
|
+
$ java -jar anschel-1.2.3.jar help agent
|
30
|
+
Usage:
|
31
|
+
anschel agent
|
32
|
+
|
33
|
+
Options:
|
34
|
+
-c, [--config=CONFIG] # Main configuration file
|
35
|
+
# Default: /etc/anschel.json
|
36
|
+
-L, [--log=LOG] # Log to file instead of STDOUT
|
37
|
+
-V, [--debug], [--no-debug] # Enable DEBUG-level logging
|
38
|
+
|
39
|
+
Run the Anschel agent
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
### Configuration
|
44
|
+
|
45
|
+
It's kinda like a JSON version of the Logstash config language:
|
46
|
+
|
47
|
+
{
|
48
|
+
"log4j": {
|
49
|
+
"path": "/path/to/anschel4j.log",
|
50
|
+
"pattern": "[%d] %p %m (%c)%n"
|
51
|
+
},
|
52
|
+
"kafka": {
|
53
|
+
"queue_size": 2000,
|
54
|
+
"zk_connect": "localhost:2181",
|
55
|
+
"zk_connect_timeout": 6000,
|
56
|
+
"zk_session_timeout": 6000,
|
57
|
+
"group_id": "anschel",
|
58
|
+
"topic_id": "franz",
|
59
|
+
"reset_beginning": null,
|
60
|
+
"auto_offset_reset": "smallest",
|
61
|
+
"consumer_restart_on_error": true,
|
62
|
+
"auto_commit_interval": 1000,
|
63
|
+
"rebalance_max_retries": 4,
|
64
|
+
"rebalance_backoff_ms": 2000,
|
65
|
+
"socket_timeout_ms": 30000,
|
66
|
+
"socket_receive_buffer_bytes": 65536,
|
67
|
+
"fetch_message_max_bytes": 1048576,
|
68
|
+
"auto_commit_enable": true,
|
69
|
+
"queued_max_message_chunks": 10,
|
70
|
+
"fetch_min_bytes": 1,
|
71
|
+
"fetch_wait_max_ms": 100,
|
72
|
+
"refresh_leader_backoff_ms": 200,
|
73
|
+
"consumer_timeout_ms": -1,
|
74
|
+
"consumer_restart_sleep_ms": 0
|
75
|
+
},
|
76
|
+
"elasticsearch": {
|
77
|
+
"queue_size": 2000,
|
78
|
+
"bulk_size": 200,
|
79
|
+
"hosts": [ "localhost:9200" ],
|
80
|
+
"randomize_hosts": true,
|
81
|
+
"reload_connections": true,
|
82
|
+
"reload_on_failure": true,
|
83
|
+
"sniffer_timeout": 5
|
84
|
+
},
|
85
|
+
"filter": {
|
86
|
+
"type": [
|
87
|
+
{
|
88
|
+
"scan": {
|
89
|
+
"field": "message",
|
90
|
+
"pattern": "[A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}",
|
91
|
+
"target": "guids"
|
92
|
+
}
|
93
|
+
},
|
94
|
+
{
|
95
|
+
"scan": {
|
96
|
+
"field": "path",
|
97
|
+
"pattern": "[A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}",
|
98
|
+
"target": "guids"
|
99
|
+
}
|
100
|
+
}
|
101
|
+
],
|
102
|
+
"_after": [
|
103
|
+
{
|
104
|
+
"gsub": {
|
105
|
+
"field": "type",
|
106
|
+
"match": "-.*",
|
107
|
+
"replace": ""
|
108
|
+
}
|
109
|
+
},
|
110
|
+
{
|
111
|
+
"index": {}
|
112
|
+
}
|
113
|
+
]
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
### Operation
|
121
|
+
|
122
|
+
You might deploy Anschel with Upstart. Here's a minimal config:
|
123
|
+
|
124
|
+
#!upstart
|
125
|
+
description "anschel"
|
126
|
+
|
127
|
+
console log
|
128
|
+
|
129
|
+
start on startup
|
130
|
+
stop on shutdown
|
131
|
+
respawn
|
132
|
+
|
133
|
+
exec java -jar anschel-1.2.3.jar \
|
134
|
+
--config /etc/anschel.json --log /var/log/anschel.log
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
### Changelog
|
139
|
+
|
140
|
+
#### v1.0 (devlop)
|
141
|
+
|
142
|
+
- Intial implementation of the Kafka-to-Elasticsearch pipeline
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/lib/anschel/filter/gsub.rb
CHANGED
@@ -16,6 +16,8 @@ module Anschel
|
|
16
16
|
raise 'Missing required "match" for "gsub" filter' if match.nil?
|
17
17
|
raise 'Missing required "replace" for "gsub" filter' if replace.nil?
|
18
18
|
|
19
|
+
field = field.to_sym
|
20
|
+
|
19
21
|
lambda do |event|
|
20
22
|
return event unless event.has_key? field
|
21
23
|
event[field].gsub! match, replace
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Anschel
|
2
|
+
class Filter
|
3
|
+
def index conf
|
4
|
+
stamp = conf.delete(:stamp) || '@timestamp'
|
5
|
+
prefix = conf.delete(:prefix) || 'logs-%{type}-'
|
6
|
+
suffix = conf.delete(:suffix) || '%Y.%m.%d'
|
7
|
+
|
8
|
+
stamp = stamp.to_sym
|
9
|
+
|
10
|
+
iso8601 = "yyyy-MM-dd'T'HH:mm:ss.SSSZ"
|
11
|
+
|
12
|
+
joda = org.joda.time.format.DateTimeFormat.forPattern iso8601
|
13
|
+
joda = joda.withDefaultYear(Time.new.year)
|
14
|
+
joda = joda.withOffsetParsed
|
15
|
+
|
16
|
+
lambda do |event|
|
17
|
+
return event unless event.has_key? stamp
|
18
|
+
millis = joda.parseMillis event[stamp]
|
19
|
+
idx_prefix = prefix % event
|
20
|
+
idx_suffix = Time.at(0.001 * millis).strftime(suffix)
|
21
|
+
event[:_index] = idx_prefix + idx_suffix
|
22
|
+
filtered event, conf
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/anschel/filter/parse.rb
CHANGED
@@ -13,6 +13,8 @@ module Anschel
|
|
13
13
|
raise 'Missing required "field" for "parse" filter' if field.nil?
|
14
14
|
raise 'Missing required "pattern" for "parse" filter' if pattern.nil?
|
15
15
|
|
16
|
+
field = field.to_sym
|
17
|
+
|
16
18
|
lambda do |event|
|
17
19
|
return event unless event.has_key? field
|
18
20
|
mdata = pattern.match event[field]
|
data/lib/anschel/filter/scan.rb
CHANGED
@@ -16,6 +16,9 @@ module Anschel
|
|
16
16
|
raise 'Missing required "pattern" for "scan" filter' if pattern.nil?
|
17
17
|
raise 'Missing required "target" for "convert" filter' if target.nil?
|
18
18
|
|
19
|
+
field = field.to_sym
|
20
|
+
target = target.to_sym
|
21
|
+
|
19
22
|
lambda do |event|
|
20
23
|
return event unless event.has_key? field
|
21
24
|
results = event[field].scan(pattern).flatten.uniq
|
data/lib/anschel/filter/stamp.rb
CHANGED
@@ -20,9 +20,12 @@ module Anschel
|
|
20
20
|
precision = (precision || 3).to_i
|
21
21
|
target ||= '@timestamp'
|
22
22
|
|
23
|
+
field = field.to_sym
|
24
|
+
target = target.to_sym
|
25
|
+
|
23
26
|
parsers = patterns.map do |p|
|
24
|
-
joda = org.joda.time.format.DateTimeFormat.forPattern
|
25
|
-
joda = joda.withDefaultYear
|
27
|
+
joda = org.joda.time.format.DateTimeFormat.forPattern p
|
28
|
+
joda = joda.withDefaultYear Time.new.year
|
26
29
|
joda = joda.withOffsetParsed
|
27
30
|
end
|
28
31
|
|
@@ -38,8 +41,8 @@ module Anschel
|
|
38
41
|
end
|
39
42
|
|
40
43
|
if error_tag
|
41
|
-
event[
|
42
|
-
event[
|
44
|
+
event[:tags] ||= []
|
45
|
+
event[:tags] << error_tag
|
43
46
|
end
|
44
47
|
event
|
45
48
|
end
|
data/lib/anschel/filter.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require_relative 'filter/convert'
|
2
2
|
require_relative 'filter/gsub'
|
3
|
+
require_relative 'filter/index'
|
3
4
|
require_relative 'filter/parse'
|
4
5
|
require_relative 'filter/scan'
|
5
6
|
require_relative 'filter/stamp'
|
@@ -10,7 +11,7 @@ module Anschel
|
|
10
11
|
|
11
12
|
attr_reader :filters
|
12
13
|
|
13
|
-
def initialize config
|
14
|
+
def initialize config, log
|
14
15
|
@filters = Hash.new { |h,k| h[k] = [] }
|
15
16
|
config.each do |event_type, filter_defns|
|
16
17
|
filter_defns.each do |filter_defn|
|
@@ -19,13 +20,16 @@ module Anschel
|
|
19
20
|
@filters[event_type] << self.send(filter_type, filter_conf)
|
20
21
|
end
|
21
22
|
end
|
23
|
+
log.info event: 'filter-loaded'
|
22
24
|
end
|
23
25
|
|
24
26
|
|
25
27
|
def apply event
|
26
|
-
raise 'Event does not have a "type" field' unless event[
|
27
|
-
|
28
|
-
filters[
|
28
|
+
raise 'Event does not have a "type" field' unless event[:type]
|
29
|
+
type = event[:type].dup # In case of modification
|
30
|
+
filters[:_before].each { |f| f.call event }
|
31
|
+
filters[type].each { |f| f.call event }
|
32
|
+
filters[:_after].each { |f| f.call event }
|
29
33
|
event
|
30
34
|
end
|
31
35
|
|
data/lib/anschel/input.rb
CHANGED
@@ -3,7 +3,7 @@ require 'jruby-kafka'
|
|
3
3
|
|
4
4
|
module Anschel
|
5
5
|
class Input
|
6
|
-
def initialize config
|
6
|
+
def initialize config, log
|
7
7
|
qsize = config.delete(:queue_size) || 1000
|
8
8
|
@queue = SizedQueue.new qsize
|
9
9
|
consumer_group = Kafka::Group.new config
|
@@ -11,11 +11,14 @@ module Anschel
|
|
11
11
|
|
12
12
|
trap('SIGINT') do
|
13
13
|
consumer_group.shutdown
|
14
|
+
log.info event: 'goodbye'
|
14
15
|
exit
|
15
16
|
end
|
17
|
+
|
18
|
+
log.info event: 'input-loaded'
|
16
19
|
end
|
17
20
|
|
18
21
|
|
19
|
-
def
|
22
|
+
def shift ; @queue.shift end
|
20
23
|
end
|
21
24
|
end
|
data/lib/anschel/main.rb
CHANGED
@@ -27,41 +27,48 @@ module Anschel
|
|
27
27
|
end
|
28
28
|
|
29
29
|
|
30
|
-
desc '
|
30
|
+
desc 'agent', 'Run the Anschel agent'
|
31
31
|
option :config, \
|
32
32
|
type: :string,
|
33
33
|
aliases: %w[ -c ],
|
34
34
|
desc: 'Main configuration file',
|
35
35
|
default: '/etc/anschel.json'
|
36
36
|
include_common_options
|
37
|
-
def
|
37
|
+
def agent
|
38
|
+
log.info event: 'hello'
|
38
39
|
config = JrJackson::Json.load File.read(options.config), symbolize_keys: true
|
39
40
|
setup_log4j config[:log4j]
|
40
41
|
|
41
|
-
input = Input.new config[:kafka]
|
42
|
-
filter = Filter.new config[:filter]
|
43
|
-
output = Output.new config[:elasticsearch]
|
42
|
+
input = Input.new config[:kafka], log
|
43
|
+
filter = Filter.new config[:filter], log
|
44
|
+
output = Output.new config[:elasticsearch], log
|
44
45
|
|
45
|
-
start
|
46
|
-
count
|
46
|
+
start = Time.now
|
47
|
+
count = 0
|
48
|
+
sample = 100_000
|
47
49
|
|
48
50
|
ts = num_cpus.times.map do
|
49
51
|
Thread.new do
|
50
52
|
loop do
|
51
|
-
event = JrJackson::Json.load
|
53
|
+
event = JrJackson::Json.load \
|
54
|
+
input.shift.message.to_s, symbolize_keys: true
|
52
55
|
output.push filter.apply(event)
|
53
|
-
if (count += 1) %
|
54
|
-
|
56
|
+
if (count += 1) % sample == 0
|
57
|
+
old_count, now = count, Time.now
|
58
|
+
elapsed, start = (now - start).to_f, now
|
59
|
+
rate = 1.0 * sample / elapsed
|
55
60
|
log.info \
|
56
61
|
event: 'stat',
|
57
|
-
count:
|
58
|
-
|
59
|
-
|
62
|
+
count: old_count,
|
63
|
+
sample: sample,
|
64
|
+
elapsed_s: elapsed,
|
65
|
+
rate_eps: rate
|
60
66
|
end
|
61
67
|
end
|
62
68
|
end
|
63
69
|
end
|
64
70
|
|
71
|
+
log.info event: 'fully-loaded'
|
65
72
|
ts.map &:join
|
66
73
|
exit
|
67
74
|
end
|
data/lib/anschel/metadata.rb
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
module Anschel
|
2
|
+
# In a nutshell
|
2
3
|
NAME = 'anschel'
|
3
|
-
|
4
|
-
# A quick summary for use in the command-line interface
|
5
|
-
SUMMARY = %q.Companion to Franz.
|
4
|
+
SUMMARY = %q.Logstash-like for moving events from Kafka into Elasticsearch.
|
6
5
|
|
7
6
|
# Take credit for your work
|
8
7
|
AUTHOR = 'Sean Clemmer'
|
data/lib/anschel/mjolnir.rb
CHANGED
data/lib/anschel/output.rb
CHANGED
@@ -1,30 +1,50 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
3
|
+
require 'typhoeus/adapters/faraday'
|
4
|
+
require 'typhoeus'
|
1
5
|
require 'elasticsearch'
|
2
6
|
|
3
7
|
|
4
8
|
module Anschel
|
5
9
|
class Output
|
6
|
-
def initialize config
|
10
|
+
def initialize config, log
|
7
11
|
pattern = config.delete(:index_pattern)
|
8
|
-
bsize = config.delete(:bulk_size) || 500
|
9
12
|
qsize = config.delete(:queue_size) || 2000
|
10
|
-
|
13
|
+
bsize = config.delete(:bulk_size) || 500
|
14
|
+
timeout = config.delete(:bulk_timeout) || 1.0
|
15
|
+
slice = timeout / bsize
|
11
16
|
client = Elasticsearch::Client.new config
|
12
17
|
client.transport.reload_connections!
|
13
18
|
|
19
|
+
@queue = SizedQueue.new qsize
|
20
|
+
|
14
21
|
Thread.new do
|
15
22
|
loop do
|
16
|
-
events =
|
23
|
+
events = []
|
24
|
+
count = 0
|
25
|
+
start = Time.now.to_f
|
26
|
+
until (Time.now.to_f - start > timeout) || ((count += 1) > bsize)
|
27
|
+
begin
|
28
|
+
events.push @queue.shift(true)
|
29
|
+
rescue # shift returned immediately
|
30
|
+
sleep slice
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
next if events.empty?
|
17
35
|
|
18
36
|
body = events.map do |e|
|
19
|
-
index = e.
|
20
|
-
{ index: { _index: index, _type: e[
|
37
|
+
index = e.delete(:_index) || 'logs-anschel'
|
38
|
+
{ index: { _index: index, _type: e[:type], data: e } }
|
21
39
|
end
|
22
40
|
|
23
41
|
client.bulk body: body
|
24
42
|
end
|
25
43
|
end
|
44
|
+
|
45
|
+
log.info event: 'output-loaded'
|
26
46
|
end
|
27
47
|
|
28
|
-
def push event ; @queue
|
48
|
+
def push event ; @queue.push event end
|
29
49
|
end
|
30
50
|
end
|
data/lib/anschel.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anschel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sean Clemmer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,7 +66,21 @@ dependencies:
|
|
66
66
|
- - '>='
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
name: typhoeus
|
76
|
+
prerelease: false
|
77
|
+
type: :runtime
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: Logstash-like for moving events from Kafka into Elasticsearch.
|
70
84
|
email: sczizzo@gmail.com
|
71
85
|
executables:
|
72
86
|
- anschel
|
@@ -81,6 +95,7 @@ files:
|
|
81
95
|
- lib/anschel/filter.rb
|
82
96
|
- lib/anschel/filter/convert.rb
|
83
97
|
- lib/anschel/filter/gsub.rb
|
98
|
+
- lib/anschel/filter/index.rb
|
84
99
|
- lib/anschel/filter/parse.rb
|
85
100
|
- lib/anschel/filter/scan.rb
|
86
101
|
- lib/anschel/filter/stamp.rb
|
@@ -112,5 +127,5 @@ rubyforge_project:
|
|
112
127
|
rubygems_version: 2.4.6
|
113
128
|
signing_key:
|
114
129
|
specification_version: 4
|
115
|
-
summary:
|
130
|
+
summary: Logstash-like for moving events from Kafka into Elasticsearch
|
116
131
|
test_files: []
|