anschel 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2c9d437249846bfe1e7ab107fb86e8338a709d8d
4
- data.tar.gz: 75e4124f0cd424865aa63692d2fdb16001e9bb58
3
+ metadata.gz: 4e693593d8211b7f7a7ee60d06536d463f74cb2f
4
+ data.tar.gz: f67b0087ae0d56c73beed719263feb46e78ec301
5
5
  SHA512:
6
- metadata.gz: 61fd41165f874a2aa55677b3d3a562994e9283d67c949a2d8945df295e1a9d9554af508ce2c39e05b781b03dc8f8f562de1ef3d4c5af8da23c67084ef375bccd
7
- data.tar.gz: 4bb4b827b537644b7f5495a42dccf79d867e8f0a76c8cfc17c5abd2df1b91758c8f1271d8147854a2a56871ba2d34c76afa0940ec7b6d6834fca6fafb9110962
6
+ metadata.gz: 0fe919474cdc8d4e31f7ed8703600d960610876613ffca815d79267cf00fdc02e1dfdc5af2e692ae72d0005e67e9372f24da289c4abb1806f09abb06ad8c6f80
7
+ data.tar.gz: 590ccdd14141ef0feeb4482a8063d634067f9b3ad73d239a2afe0782be6857fa99bc1751a7c2508957e3cf53782c320942eee535899ca6d24a2e69e6ab86ca7f
data/Readme.md CHANGED
@@ -1,3 +1,142 @@
1
- # Anschel
1
+ # Anschel ![Version](https://img.shields.io/gem/v/anschel.svg?style=flat-square)
2
2
 
3
- Companion to Franz
3
+ Logstash-like for moving events from Kafka into Elasticsearch.
4
+
5
+
6
+
7
+ ## Usage, Configuration &c.
8
+
9
+ ### Installation
10
+
11
+ Download the jarfile from the [GitHub releases page](https://github.com/sczizzo/anschel/releases)
12
+ and run like so:
13
+
14
+ $ java -jar anschel-1.2.3.jar
15
+
16
+ ### Usage
17
+
18
+ Just call for help!
19
+
20
+ $ java -jar anschel-1.2.3.jar help
21
+ Commands:
22
+ anschel agent # Run the Anschel agent
23
+ anschel art # View the application art
24
+ anschel help [COMMAND] # Describe available commands or one specific command
25
+ anschel version # Echo the application version
26
+
27
+ Probably you're most interested in the `agent` command:
28
+
29
+ $ java -jar anschel-1.2.3.jar help agent
30
+ Usage:
31
+ anschel agent
32
+
33
+ Options:
34
+ -c, [--config=CONFIG] # Main configuration file
35
+ # Default: /etc/anschel.json
36
+ -L, [--log=LOG] # Log to file instead of STDOUT
37
+ -V, [--debug], [--no-debug] # Enable DEBUG-level logging
38
+
39
+ Run the Anschel agent
40
+
41
+
42
+
43
+ ### Configuration
44
+
45
+ It's kinda like a JSON version of the Logstash config language:
46
+
47
+ {
48
+ "log4j": {
49
+ "path": "/path/to/anschel4j.log",
50
+ "pattern": "[%d] %p %m (%c)%n"
51
+ },
52
+ "kafka": {
53
+ "queue_size": 2000,
54
+ "zk_connect": "localhost:2181",
55
+ "zk_connect_timeout": 6000,
56
+ "zk_session_timeout": 6000,
57
+ "group_id": "anschel",
58
+ "topic_id": "franz",
59
+ "reset_beginning": null,
60
+ "auto_offset_reset": "smallest",
61
+ "consumer_restart_on_error": true,
62
+ "auto_commit_interval": 1000,
63
+ "rebalance_max_retries": 4,
64
+ "rebalance_backoff_ms": 2000,
65
+ "socket_timeout_ms": 30000,
66
+ "socket_receive_buffer_bytes": 65536,
67
+ "fetch_message_max_bytes": 1048576,
68
+ "auto_commit_enable": true,
69
+ "queued_max_message_chunks": 10,
70
+ "fetch_min_bytes": 1,
71
+ "fetch_wait_max_ms": 100,
72
+ "refresh_leader_backoff_ms": 200,
73
+ "consumer_timeout_ms": -1,
74
+ "consumer_restart_sleep_ms": 0
75
+ },
76
+ "elasticsearch": {
77
+ "queue_size": 2000,
78
+ "bulk_size": 200,
79
+ "hosts": [ "localhost:9200" ],
80
+ "randomize_hosts": true,
81
+ "reload_connections": true,
82
+ "reload_on_failure": true,
83
+ "sniffer_timeout": 5
84
+ },
85
+ "filter": {
86
+ "type": [
87
+ {
88
+ "scan": {
89
+ "field": "message",
90
+ "pattern": "[A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}",
91
+ "target": "guids"
92
+ }
93
+ },
94
+ {
95
+ "scan": {
96
+ "field": "path",
97
+ "pattern": "[A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}",
98
+ "target": "guids"
99
+ }
100
+ }
101
+ ],
102
+ "_after": [
103
+ {
104
+ "gsub": {
105
+ "field": "type",
106
+ "match": "-.*",
107
+ "replace": ""
108
+ }
109
+ },
110
+ {
111
+ "index": {}
112
+ }
113
+ ]
114
+ }
115
+ }
116
+
117
+
118
+
119
+
120
+ ### Operation
121
+
122
+ You might deploy Anschel with Upstart. Here's a minimal config:
123
+
124
+ #!upstart
125
+ description "anschel"
126
+
127
+ console log
128
+
129
+ start on startup
130
+ stop on shutdown
131
+ respawn
132
+
133
+ exec java -jar anschel-1.2.3.jar \
134
+ --config /etc/anschel.json --log /var/log/anschel.log
135
+
136
+
137
+
138
+ ### Changelog
139
+
140
+ #### v1.0 (devlop)
141
+
142
+ - Intial implementation of the Kafka-to-Elasticsearch pipeline
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.1.0
@@ -13,6 +13,8 @@ module Anschel
13
13
  raise 'Missing required "field" for "convert" filter' if field.nil?
14
14
  raise 'Missing required "type" for "convert" filter' if type.nil?
15
15
 
16
+ field = field.to_sym
17
+
16
18
  type_conversions = {
17
19
  'integer' => :to_i,
18
20
  'float' => :to_f,
@@ -16,6 +16,8 @@ module Anschel
16
16
  raise 'Missing required "match" for "gsub" filter' if match.nil?
17
17
  raise 'Missing required "replace" for "gsub" filter' if replace.nil?
18
18
 
19
+ field = field.to_sym
20
+
19
21
  lambda do |event|
20
22
  return event unless event.has_key? field
21
23
  event[field].gsub! match, replace
@@ -0,0 +1,26 @@
1
+ module Anschel
2
+ class Filter
3
+ def index conf
4
+ stamp = conf.delete(:stamp) || '@timestamp'
5
+ prefix = conf.delete(:prefix) || 'logs-%{type}-'
6
+ suffix = conf.delete(:suffix) || '%Y.%m.%d'
7
+
8
+ stamp = stamp.to_sym
9
+
10
+ iso8601 = "yyyy-MM-dd'T'HH:mm:ss.SSSZ"
11
+
12
+ joda = org.joda.time.format.DateTimeFormat.forPattern iso8601
13
+ joda = joda.withDefaultYear(Time.new.year)
14
+ joda = joda.withOffsetParsed
15
+
16
+ lambda do |event|
17
+ return event unless event.has_key? stamp
18
+ millis = joda.parseMillis event[stamp]
19
+ idx_prefix = prefix % event
20
+ idx_suffix = Time.at(0.001 * millis).strftime(suffix)
21
+ event[:_index] = idx_prefix + idx_suffix
22
+ filtered event, conf
23
+ end
24
+ end
25
+ end
26
+ end
@@ -13,6 +13,8 @@ module Anschel
13
13
  raise 'Missing required "field" for "parse" filter' if field.nil?
14
14
  raise 'Missing required "pattern" for "parse" filter' if pattern.nil?
15
15
 
16
+ field = field.to_sym
17
+
16
18
  lambda do |event|
17
19
  return event unless event.has_key? field
18
20
  mdata = pattern.match event[field]
@@ -16,6 +16,9 @@ module Anschel
16
16
  raise 'Missing required "pattern" for "scan" filter' if pattern.nil?
17
17
  raise 'Missing required "target" for "convert" filter' if target.nil?
18
18
 
19
+ field = field.to_sym
20
+ target = target.to_sym
21
+
19
22
  lambda do |event|
20
23
  return event unless event.has_key? field
21
24
  results = event[field].scan(pattern).flatten.uniq
@@ -20,9 +20,12 @@ module Anschel
20
20
  precision = (precision || 3).to_i
21
21
  target ||= '@timestamp'
22
22
 
23
+ field = field.to_sym
24
+ target = target.to_sym
25
+
23
26
  parsers = patterns.map do |p|
24
- joda = org.joda.time.format.DateTimeFormat.forPattern(p)
25
- joda = joda.withDefaultYear(Time.new.year)
27
+ joda = org.joda.time.format.DateTimeFormat.forPattern p
28
+ joda = joda.withDefaultYear Time.new.year
26
29
  joda = joda.withOffsetParsed
27
30
  end
28
31
 
@@ -38,8 +41,8 @@ module Anschel
38
41
  end
39
42
 
40
43
  if error_tag
41
- event['tags'] ||= []
42
- event['tags'] << error_tag
44
+ event[:tags] ||= []
45
+ event[:tags] << error_tag
43
46
  end
44
47
  event
45
48
  end
@@ -1,5 +1,6 @@
1
1
  require_relative 'filter/convert'
2
2
  require_relative 'filter/gsub'
3
+ require_relative 'filter/index'
3
4
  require_relative 'filter/parse'
4
5
  require_relative 'filter/scan'
5
6
  require_relative 'filter/stamp'
@@ -10,7 +11,7 @@ module Anschel
10
11
 
11
12
  attr_reader :filters
12
13
 
13
- def initialize config
14
+ def initialize config, log
14
15
  @filters = Hash.new { |h,k| h[k] = [] }
15
16
  config.each do |event_type, filter_defns|
16
17
  filter_defns.each do |filter_defn|
@@ -19,13 +20,16 @@ module Anschel
19
20
  @filters[event_type] << self.send(filter_type, filter_conf)
20
21
  end
21
22
  end
23
+ log.info event: 'filter-loaded'
22
24
  end
23
25
 
24
26
 
25
27
  def apply event
26
- raise 'Event does not have a "type" field' unless event['type']
27
- filters[:*].each { |f| f.call event }
28
- filters[event[:type]].each { |f| f.call event }
28
+ raise 'Event does not have a "type" field' unless event[:type]
29
+ type = event[:type].dup # In case of modification
30
+ filters[:_before].each { |f| f.call event }
31
+ filters[type].each { |f| f.call event }
32
+ filters[:_after].each { |f| f.call event }
29
33
  event
30
34
  end
31
35
 
data/lib/anschel/input.rb CHANGED
@@ -3,7 +3,7 @@ require 'jruby-kafka'
3
3
 
4
4
  module Anschel
5
5
  class Input
6
- def initialize config
6
+ def initialize config, log
7
7
  qsize = config.delete(:queue_size) || 1000
8
8
  @queue = SizedQueue.new qsize
9
9
  consumer_group = Kafka::Group.new config
@@ -11,11 +11,14 @@ module Anschel
11
11
 
12
12
  trap('SIGINT') do
13
13
  consumer_group.shutdown
14
+ log.info event: 'goodbye'
14
15
  exit
15
16
  end
17
+
18
+ log.info event: 'input-loaded'
16
19
  end
17
20
 
18
21
 
19
- def pop ; @queue.pop end
22
+ def shift ; @queue.shift end
20
23
  end
21
24
  end
data/lib/anschel/main.rb CHANGED
@@ -27,41 +27,48 @@ module Anschel
27
27
  end
28
28
 
29
29
 
30
- desc 'test', 'Test the Anschel agent'
30
+ desc 'agent', 'Run the Anschel agent'
31
31
  option :config, \
32
32
  type: :string,
33
33
  aliases: %w[ -c ],
34
34
  desc: 'Main configuration file',
35
35
  default: '/etc/anschel.json'
36
36
  include_common_options
37
- def test
37
+ def agent
38
+ log.info event: 'hello'
38
39
  config = JrJackson::Json.load File.read(options.config), symbolize_keys: true
39
40
  setup_log4j config[:log4j]
40
41
 
41
- input = Input.new config[:kafka]
42
- filter = Filter.new config[:filter]
43
- output = Output.new config[:elasticsearch]
42
+ input = Input.new config[:kafka], log
43
+ filter = Filter.new config[:filter], log
44
+ output = Output.new config[:elasticsearch], log
44
45
 
45
- start = Time.now
46
- count = 0
46
+ start = Time.now
47
+ count = 0
48
+ sample = 100_000
47
49
 
48
50
  ts = num_cpus.times.map do
49
51
  Thread.new do
50
52
  loop do
51
- event = JrJackson::Json.load input.pop.message.to_s
53
+ event = JrJackson::Json.load \
54
+ input.shift.message.to_s, symbolize_keys: true
52
55
  output.push filter.apply(event)
53
- if (count += 1) % 100_000 == 0
54
- elapsed = Time.now - start
56
+ if (count += 1) % sample == 0
57
+ old_count, now = count, Time.now
58
+ elapsed, start = (now - start).to_f, now
59
+ rate = 1.0 * sample / elapsed
55
60
  log.info \
56
61
  event: 'stat',
57
- count: count,
58
- elapsed_s: elapsed.to_f,
59
- rate_eps: ( 1.0 * count / elapsed.to_f )
62
+ count: old_count,
63
+ sample: sample,
64
+ elapsed_s: elapsed,
65
+ rate_eps: rate
60
66
  end
61
67
  end
62
68
  end
63
69
  end
64
70
 
71
+ log.info event: 'fully-loaded'
65
72
  ts.map &:join
66
73
  exit
67
74
  end
@@ -1,8 +1,7 @@
1
1
  module Anschel
2
+ # In a nutshell
2
3
  NAME = 'anschel'
3
-
4
- # A quick summary for use in the command-line interface
5
- SUMMARY = %q.Companion to Franz.
4
+ SUMMARY = %q.Logstash-like for moving events from Kafka into Elasticsearch.
6
5
 
7
6
  # Take credit for your work
8
7
  AUTHOR = 'Sean Clemmer'
@@ -34,6 +34,7 @@ module Anschel
34
34
 
35
35
  # Construct a Logger given the command-line options
36
36
  def log
37
+ return @logger if defined? @logger
37
38
  @logger = Logger.new(options.log || STDOUT)
38
39
  @logger.level = Logger::DEBUG if options.debug?
39
40
  @logger
@@ -1,30 +1,50 @@
1
+ require 'thread'
2
+
3
+ require 'typhoeus/adapters/faraday'
4
+ require 'typhoeus'
1
5
  require 'elasticsearch'
2
6
 
3
7
 
4
8
  module Anschel
5
9
  class Output
6
- def initialize config
10
+ def initialize config, log
7
11
  pattern = config.delete(:index_pattern)
8
- bsize = config.delete(:bulk_size) || 500
9
12
  qsize = config.delete(:queue_size) || 2000
10
- @queue = SizedQueue.new qsize
13
+ bsize = config.delete(:bulk_size) || 500
14
+ timeout = config.delete(:bulk_timeout) || 1.0
15
+ slice = timeout / bsize
11
16
  client = Elasticsearch::Client.new config
12
17
  client.transport.reload_connections!
13
18
 
19
+ @queue = SizedQueue.new qsize
20
+
14
21
  Thread.new do
15
22
  loop do
16
- events = bsize.times.map { @queue.pop }
23
+ events = []
24
+ count = 0
25
+ start = Time.now.to_f
26
+ until (Time.now.to_f - start > timeout) || ((count += 1) > bsize)
27
+ begin
28
+ events.push @queue.shift(true)
29
+ rescue # shift returned immediately
30
+ sleep slice
31
+ end
32
+ end
33
+
34
+ next if events.empty?
17
35
 
18
36
  body = events.map do |e|
19
- index = e.fetch '@@anschel_index', 'logs-anschel'
20
- { index: { _index: index, _type: e['type'], data: e } }
37
+ index = e.delete(:_index) || 'logs-anschel'
38
+ { index: { _index: index, _type: e[:type], data: e } }
21
39
  end
22
40
 
23
41
  client.bulk body: body
24
42
  end
25
43
  end
44
+
45
+ log.info event: 'output-loaded'
26
46
  end
27
47
 
28
- def push event ; @queue << event end
48
+ def push event ; @queue.push event end
29
49
  end
30
50
  end
data/lib/anschel.rb CHANGED
@@ -24,4 +24,6 @@ def setup_log4j config
24
24
  end
25
25
  end
26
26
 
27
+ Thread.abort_on_exception = true
28
+
27
29
  require_relative 'anschel/main'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anschel
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sean Clemmer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-21 00:00:00.000000000 Z
11
+ date: 2015-06-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -66,7 +66,21 @@ dependencies:
66
66
  - - '>='
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
- description: Companion to Franz.
69
+ - !ruby/object:Gem::Dependency
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ name: typhoeus
76
+ prerelease: false
77
+ type: :runtime
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Logstash-like for moving events from Kafka into Elasticsearch.
70
84
  email: sczizzo@gmail.com
71
85
  executables:
72
86
  - anschel
@@ -81,6 +95,7 @@ files:
81
95
  - lib/anschel/filter.rb
82
96
  - lib/anschel/filter/convert.rb
83
97
  - lib/anschel/filter/gsub.rb
98
+ - lib/anschel/filter/index.rb
84
99
  - lib/anschel/filter/parse.rb
85
100
  - lib/anschel/filter/scan.rb
86
101
  - lib/anschel/filter/stamp.rb
@@ -112,5 +127,5 @@ rubyforge_project:
112
127
  rubygems_version: 2.4.6
113
128
  signing_key:
114
129
  specification_version: 4
115
- summary: Companion to Franz
130
+ summary: Logstash-like for moving events from Kafka into Elasticsearch
116
131
  test_files: []