anschel 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2c9d437249846bfe1e7ab107fb86e8338a709d8d
4
- data.tar.gz: 75e4124f0cd424865aa63692d2fdb16001e9bb58
3
+ metadata.gz: 4e693593d8211b7f7a7ee60d06536d463f74cb2f
4
+ data.tar.gz: f67b0087ae0d56c73beed719263feb46e78ec301
5
5
  SHA512:
6
- metadata.gz: 61fd41165f874a2aa55677b3d3a562994e9283d67c949a2d8945df295e1a9d9554af508ce2c39e05b781b03dc8f8f562de1ef3d4c5af8da23c67084ef375bccd
7
- data.tar.gz: 4bb4b827b537644b7f5495a42dccf79d867e8f0a76c8cfc17c5abd2df1b91758c8f1271d8147854a2a56871ba2d34c76afa0940ec7b6d6834fca6fafb9110962
6
+ metadata.gz: 0fe919474cdc8d4e31f7ed8703600d960610876613ffca815d79267cf00fdc02e1dfdc5af2e692ae72d0005e67e9372f24da289c4abb1806f09abb06ad8c6f80
7
+ data.tar.gz: 590ccdd14141ef0feeb4482a8063d634067f9b3ad73d239a2afe0782be6857fa99bc1751a7c2508957e3cf53782c320942eee535899ca6d24a2e69e6ab86ca7f
data/Readme.md CHANGED
@@ -1,3 +1,142 @@
1
- # Anschel
1
+ # Anschel ![Version](https://img.shields.io/gem/v/anschel.svg?style=flat-square)
2
2
 
3
- Companion to Franz
3
+ Logstash-like for moving events from Kafka into Elasticsearch.
4
+
5
+
6
+
7
+ ## Usage, Configuration &c.
8
+
9
+ ### Installation
10
+
11
+ Download the jarfile from the [GitHub releases page](https://github.com/sczizzo/anschel/releases)
12
+ and run like so:
13
+
14
+ $ java -jar anschel-1.2.3.jar
15
+
16
+ ### Usage
17
+
18
+ Just call for help!
19
+
20
+ $ java -jar anschel-1.2.3.jar help
21
+ Commands:
22
+ anschel agent # Run the Anschel agent
23
+ anschel art # View the application art
24
+ anschel help [COMMAND] # Describe available commands or one specific command
25
+ anschel version # Echo the application version
26
+
27
+ Probably you're most interested in the `agent` command:
28
+
29
+ $ java -jar anschel-1.2.3.jar help agent
30
+ Usage:
31
+ anschel agent
32
+
33
+ Options:
34
+ -c, [--config=CONFIG] # Main configuration file
35
+ # Default: /etc/anschel.json
36
+ -L, [--log=LOG] # Log to file instead of STDOUT
37
+ -V, [--debug], [--no-debug] # Enable DEBUG-level logging
38
+
39
+ Run the Anschel agent
40
+
41
+
42
+
43
+ ### Configuration
44
+
45
+ It's kinda like a JSON version of the Logstash config language:
46
+
47
+ {
48
+ "log4j": {
49
+ "path": "/path/to/anschel4j.log",
50
+ "pattern": "[%d] %p %m (%c)%n"
51
+ },
52
+ "kafka": {
53
+ "queue_size": 2000,
54
+ "zk_connect": "localhost:2181",
55
+ "zk_connect_timeout": 6000,
56
+ "zk_session_timeout": 6000,
57
+ "group_id": "anschel",
58
+ "topic_id": "franz",
59
+ "reset_beginning": null,
60
+ "auto_offset_reset": "smallest",
61
+ "consumer_restart_on_error": true,
62
+ "auto_commit_interval": 1000,
63
+ "rebalance_max_retries": 4,
64
+ "rebalance_backoff_ms": 2000,
65
+ "socket_timeout_ms": 30000,
66
+ "socket_receive_buffer_bytes": 65536,
67
+ "fetch_message_max_bytes": 1048576,
68
+ "auto_commit_enable": true,
69
+ "queued_max_message_chunks": 10,
70
+ "fetch_min_bytes": 1,
71
+ "fetch_wait_max_ms": 100,
72
+ "refresh_leader_backoff_ms": 200,
73
+ "consumer_timeout_ms": -1,
74
+ "consumer_restart_sleep_ms": 0
75
+ },
76
+ "elasticsearch": {
77
+ "queue_size": 2000,
78
+ "bulk_size": 200,
79
+ "hosts": [ "localhost:9200" ],
80
+ "randomize_hosts": true,
81
+ "reload_connections": true,
82
+ "reload_on_failure": true,
83
+ "sniffer_timeout": 5
84
+ },
85
+ "filter": {
86
+ "type": [
87
+ {
88
+ "scan": {
89
+ "field": "message",
90
+ "pattern": "[A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}",
91
+ "target": "guids"
92
+ }
93
+ },
94
+ {
95
+ "scan": {
96
+ "field": "path",
97
+ "pattern": "[A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}",
98
+ "target": "guids"
99
+ }
100
+ }
101
+ ],
102
+ "_after": [
103
+ {
104
+ "gsub": {
105
+ "field": "type",
106
+ "match": "-.*",
107
+ "replace": ""
108
+ }
109
+ },
110
+ {
111
+ "index": {}
112
+ }
113
+ ]
114
+ }
115
+ }
116
+
117
+
118
+
119
+
120
+ ### Operation
121
+
122
+ You might deploy Anschel with Upstart. Here's a minimal config:
123
+
124
+ #!upstart
125
+ description "anschel"
126
+
127
+ console log
128
+
129
+ start on startup
130
+ stop on shutdown
131
+ respawn
132
+
133
+ exec java -jar anschel-1.2.3.jar \
134
+ --config /etc/anschel.json --log /var/log/anschel.log
135
+
136
+
137
+
138
+ ### Changelog
139
+
140
+ #### v1.0 (devlop)
141
+
142
+ - Intial implementation of the Kafka-to-Elasticsearch pipeline
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.1.0
@@ -13,6 +13,8 @@ module Anschel
13
13
  raise 'Missing required "field" for "convert" filter' if field.nil?
14
14
  raise 'Missing required "type" for "convert" filter' if type.nil?
15
15
 
16
+ field = field.to_sym
17
+
16
18
  type_conversions = {
17
19
  'integer' => :to_i,
18
20
  'float' => :to_f,
@@ -16,6 +16,8 @@ module Anschel
16
16
  raise 'Missing required "match" for "gsub" filter' if match.nil?
17
17
  raise 'Missing required "replace" for "gsub" filter' if replace.nil?
18
18
 
19
+ field = field.to_sym
20
+
19
21
  lambda do |event|
20
22
  return event unless event.has_key? field
21
23
  event[field].gsub! match, replace
@@ -0,0 +1,26 @@
1
+ module Anschel
2
+ class Filter
3
+ def index conf
4
+ stamp = conf.delete(:stamp) || '@timestamp'
5
+ prefix = conf.delete(:prefix) || 'logs-%{type}-'
6
+ suffix = conf.delete(:suffix) || '%Y.%m.%d'
7
+
8
+ stamp = stamp.to_sym
9
+
10
+ iso8601 = "yyyy-MM-dd'T'HH:mm:ss.SSSZ"
11
+
12
+ joda = org.joda.time.format.DateTimeFormat.forPattern iso8601
13
+ joda = joda.withDefaultYear(Time.new.year)
14
+ joda = joda.withOffsetParsed
15
+
16
+ lambda do |event|
17
+ return event unless event.has_key? stamp
18
+ millis = joda.parseMillis event[stamp]
19
+ idx_prefix = prefix % event
20
+ idx_suffix = Time.at(0.001 * millis).strftime(suffix)
21
+ event[:_index] = idx_prefix + idx_suffix
22
+ filtered event, conf
23
+ end
24
+ end
25
+ end
26
+ end
@@ -13,6 +13,8 @@ module Anschel
13
13
  raise 'Missing required "field" for "parse" filter' if field.nil?
14
14
  raise 'Missing required "pattern" for "parse" filter' if pattern.nil?
15
15
 
16
+ field = field.to_sym
17
+
16
18
  lambda do |event|
17
19
  return event unless event.has_key? field
18
20
  mdata = pattern.match event[field]
@@ -16,6 +16,9 @@ module Anschel
16
16
  raise 'Missing required "pattern" for "scan" filter' if pattern.nil?
17
17
  raise 'Missing required "target" for "convert" filter' if target.nil?
18
18
 
19
+ field = field.to_sym
20
+ target = target.to_sym
21
+
19
22
  lambda do |event|
20
23
  return event unless event.has_key? field
21
24
  results = event[field].scan(pattern).flatten.uniq
@@ -20,9 +20,12 @@ module Anschel
20
20
  precision = (precision || 3).to_i
21
21
  target ||= '@timestamp'
22
22
 
23
+ field = field.to_sym
24
+ target = target.to_sym
25
+
23
26
  parsers = patterns.map do |p|
24
- joda = org.joda.time.format.DateTimeFormat.forPattern(p)
25
- joda = joda.withDefaultYear(Time.new.year)
27
+ joda = org.joda.time.format.DateTimeFormat.forPattern p
28
+ joda = joda.withDefaultYear Time.new.year
26
29
  joda = joda.withOffsetParsed
27
30
  end
28
31
 
@@ -38,8 +41,8 @@ module Anschel
38
41
  end
39
42
 
40
43
  if error_tag
41
- event['tags'] ||= []
42
- event['tags'] << error_tag
44
+ event[:tags] ||= []
45
+ event[:tags] << error_tag
43
46
  end
44
47
  event
45
48
  end
@@ -1,5 +1,6 @@
1
1
  require_relative 'filter/convert'
2
2
  require_relative 'filter/gsub'
3
+ require_relative 'filter/index'
3
4
  require_relative 'filter/parse'
4
5
  require_relative 'filter/scan'
5
6
  require_relative 'filter/stamp'
@@ -10,7 +11,7 @@ module Anschel
10
11
 
11
12
  attr_reader :filters
12
13
 
13
- def initialize config
14
+ def initialize config, log
14
15
  @filters = Hash.new { |h,k| h[k] = [] }
15
16
  config.each do |event_type, filter_defns|
16
17
  filter_defns.each do |filter_defn|
@@ -19,13 +20,16 @@ module Anschel
19
20
  @filters[event_type] << self.send(filter_type, filter_conf)
20
21
  end
21
22
  end
23
+ log.info event: 'filter-loaded'
22
24
  end
23
25
 
24
26
 
25
27
  def apply event
26
- raise 'Event does not have a "type" field' unless event['type']
27
- filters[:*].each { |f| f.call event }
28
- filters[event[:type]].each { |f| f.call event }
28
+ raise 'Event does not have a "type" field' unless event[:type]
29
+ type = event[:type].dup # In case of modification
30
+ filters[:_before].each { |f| f.call event }
31
+ filters[type].each { |f| f.call event }
32
+ filters[:_after].each { |f| f.call event }
29
33
  event
30
34
  end
31
35
 
data/lib/anschel/input.rb CHANGED
@@ -3,7 +3,7 @@ require 'jruby-kafka'
3
3
 
4
4
  module Anschel
5
5
  class Input
6
- def initialize config
6
+ def initialize config, log
7
7
  qsize = config.delete(:queue_size) || 1000
8
8
  @queue = SizedQueue.new qsize
9
9
  consumer_group = Kafka::Group.new config
@@ -11,11 +11,14 @@ module Anschel
11
11
 
12
12
  trap('SIGINT') do
13
13
  consumer_group.shutdown
14
+ log.info event: 'goodbye'
14
15
  exit
15
16
  end
17
+
18
+ log.info event: 'input-loaded'
16
19
  end
17
20
 
18
21
 
19
- def pop ; @queue.pop end
22
+ def shift ; @queue.shift end
20
23
  end
21
24
  end
data/lib/anschel/main.rb CHANGED
@@ -27,41 +27,48 @@ module Anschel
27
27
  end
28
28
 
29
29
 
30
- desc 'test', 'Test the Anschel agent'
30
+ desc 'agent', 'Run the Anschel agent'
31
31
  option :config, \
32
32
  type: :string,
33
33
  aliases: %w[ -c ],
34
34
  desc: 'Main configuration file',
35
35
  default: '/etc/anschel.json'
36
36
  include_common_options
37
- def test
37
+ def agent
38
+ log.info event: 'hello'
38
39
  config = JrJackson::Json.load File.read(options.config), symbolize_keys: true
39
40
  setup_log4j config[:log4j]
40
41
 
41
- input = Input.new config[:kafka]
42
- filter = Filter.new config[:filter]
43
- output = Output.new config[:elasticsearch]
42
+ input = Input.new config[:kafka], log
43
+ filter = Filter.new config[:filter], log
44
+ output = Output.new config[:elasticsearch], log
44
45
 
45
- start = Time.now
46
- count = 0
46
+ start = Time.now
47
+ count = 0
48
+ sample = 100_000
47
49
 
48
50
  ts = num_cpus.times.map do
49
51
  Thread.new do
50
52
  loop do
51
- event = JrJackson::Json.load input.pop.message.to_s
53
+ event = JrJackson::Json.load \
54
+ input.shift.message.to_s, symbolize_keys: true
52
55
  output.push filter.apply(event)
53
- if (count += 1) % 100_000 == 0
54
- elapsed = Time.now - start
56
+ if (count += 1) % sample == 0
57
+ old_count, now = count, Time.now
58
+ elapsed, start = (now - start).to_f, now
59
+ rate = 1.0 * sample / elapsed
55
60
  log.info \
56
61
  event: 'stat',
57
- count: count,
58
- elapsed_s: elapsed.to_f,
59
- rate_eps: ( 1.0 * count / elapsed.to_f )
62
+ count: old_count,
63
+ sample: sample,
64
+ elapsed_s: elapsed,
65
+ rate_eps: rate
60
66
  end
61
67
  end
62
68
  end
63
69
  end
64
70
 
71
+ log.info event: 'fully-loaded'
65
72
  ts.map &:join
66
73
  exit
67
74
  end
@@ -1,8 +1,7 @@
1
1
  module Anschel
2
+ # In a nutshell
2
3
  NAME = 'anschel'
3
-
4
- # A quick summary for use in the command-line interface
5
- SUMMARY = %q.Companion to Franz.
4
+ SUMMARY = %q.Logstash-like for moving events from Kafka into Elasticsearch.
6
5
 
7
6
  # Take credit for your work
8
7
  AUTHOR = 'Sean Clemmer'
@@ -34,6 +34,7 @@ module Anschel
34
34
 
35
35
  # Construct a Logger given the command-line options
36
36
  def log
37
+ return @logger if defined? @logger
37
38
  @logger = Logger.new(options.log || STDOUT)
38
39
  @logger.level = Logger::DEBUG if options.debug?
39
40
  @logger
@@ -1,30 +1,50 @@
1
+ require 'thread'
2
+
3
+ require 'typhoeus/adapters/faraday'
4
+ require 'typhoeus'
1
5
  require 'elasticsearch'
2
6
 
3
7
 
4
8
  module Anschel
5
9
  class Output
6
- def initialize config
10
+ def initialize config, log
7
11
  pattern = config.delete(:index_pattern)
8
- bsize = config.delete(:bulk_size) || 500
9
12
  qsize = config.delete(:queue_size) || 2000
10
- @queue = SizedQueue.new qsize
13
+ bsize = config.delete(:bulk_size) || 500
14
+ timeout = config.delete(:bulk_timeout) || 1.0
15
+ slice = timeout / bsize
11
16
  client = Elasticsearch::Client.new config
12
17
  client.transport.reload_connections!
13
18
 
19
+ @queue = SizedQueue.new qsize
20
+
14
21
  Thread.new do
15
22
  loop do
16
- events = bsize.times.map { @queue.pop }
23
+ events = []
24
+ count = 0
25
+ start = Time.now.to_f
26
+ until (Time.now.to_f - start > timeout) || ((count += 1) > bsize)
27
+ begin
28
+ events.push @queue.shift(true)
29
+ rescue # shift returned immediately
30
+ sleep slice
31
+ end
32
+ end
33
+
34
+ next if events.empty?
17
35
 
18
36
  body = events.map do |e|
19
- index = e.fetch '@@anschel_index', 'logs-anschel'
20
- { index: { _index: index, _type: e['type'], data: e } }
37
+ index = e.delete(:_index) || 'logs-anschel'
38
+ { index: { _index: index, _type: e[:type], data: e } }
21
39
  end
22
40
 
23
41
  client.bulk body: body
24
42
  end
25
43
  end
44
+
45
+ log.info event: 'output-loaded'
26
46
  end
27
47
 
28
- def push event ; @queue << event end
48
+ def push event ; @queue.push event end
29
49
  end
30
50
  end
data/lib/anschel.rb CHANGED
@@ -24,4 +24,6 @@ def setup_log4j config
24
24
  end
25
25
  end
26
26
 
27
+ Thread.abort_on_exception = true
28
+
27
29
  require_relative 'anschel/main'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anschel
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sean Clemmer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-21 00:00:00.000000000 Z
11
+ date: 2015-06-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -66,7 +66,21 @@ dependencies:
66
66
  - - '>='
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
- description: Companion to Franz.
69
+ - !ruby/object:Gem::Dependency
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ name: typhoeus
76
+ prerelease: false
77
+ type: :runtime
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Logstash-like for moving events from Kafka into Elasticsearch.
70
84
  email: sczizzo@gmail.com
71
85
  executables:
72
86
  - anschel
@@ -81,6 +95,7 @@ files:
81
95
  - lib/anschel/filter.rb
82
96
  - lib/anschel/filter/convert.rb
83
97
  - lib/anschel/filter/gsub.rb
98
+ - lib/anschel/filter/index.rb
84
99
  - lib/anschel/filter/parse.rb
85
100
  - lib/anschel/filter/scan.rb
86
101
  - lib/anschel/filter/stamp.rb
@@ -112,5 +127,5 @@ rubyforge_project:
112
127
  rubygems_version: 2.4.6
113
128
  signing_key:
114
129
  specification_version: 4
115
- summary: Companion to Franz
130
+ summary: Logstash-like for moving events from Kafka into Elasticsearch
116
131
  test_files: []