floom 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ # gem files
2
+ pkg
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+
3
+ gemspec
@@ -0,0 +1,15 @@
1
+ # Floom
2
+
3
+ A small set of Ruby tools to make using Apache Flume a little less painful.
4
+
5
+ ## Usage
6
+
7
+ Floom ships with several command line utilities that shorten lengthy, often-used Flume commands.
8
+
9
+ `floom oneshot <flume config string>`
10
+
11
+ Executes a no watchdog, no heartbeat, exit on failure Flume node with the supplied config.
12
+
13
+ `floom debug <decorator file>`
14
+
15
+ Executes a oneshot node with a console source, and a console sink decorated with the definition supplied.
@@ -0,0 +1,3 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env ruby
2
+ require 'pathname'
3
+
4
+ decorator = Pathname(ARGV[0]).realpath rescue nil
5
+
6
+ help_string = <<HELP
7
+ Usage: floom debug <decorator file>
8
+
9
+ Launches 'decorator-debug: console | wukongDecorator("/path/to/decorator") console;'
10
+
11
+ HELP
12
+
13
+ if decorator.nil?
14
+ puts help_string
15
+ else
16
+ flow_name = decorator.basename('.rb').to_s
17
+ flow = "#{flow_name}-debug: console | wukongDecorator(\"#{decorator.to_s}\") console;"
18
+ system('flume', 'node_nowatch', '-1', '-s', '-n', flow_name, '-c', flow)
19
+ end
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+ require 'pathname'
3
+
4
+ floom_command = Pathname(__FILE__).realpath
5
+ command_dir = floom_command.dirname
6
+ available_commands = Pathname.glob(command_dir.join('*')).reject{ |cmd| cmd.fnmatch? floom_command.to_s }.map(&:basename).map(&:to_s)
7
+ printable_commands = available_commands.map{ |cmd| "\t" + cmd.to_s }.join("\n")
8
+ command_name = ARGV.shift
9
+ help_string = <<HELP
10
+ Usage: floom <command> [options]
11
+
12
+ Available Commands:
13
+ #{printable_commands}
14
+
15
+ Use floom <command> --help for specific options
16
+ HELP
17
+
18
+ if available_commands.include? command_name
19
+ exec command_dir.join(command_name).to_s, *ARGV
20
+ else
21
+ puts help_string
22
+ end
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ conf = ARGV.shift
4
+ validations = [ !conf.nil?,
5
+ conf.match(/:/),
6
+ conf.match(/\|/),
7
+ conf.match(/;/),
8
+ conf.split(/:/).size > 1,
9
+ !conf.match(/-help/),
10
+ ARGV.size == 0 ]
11
+
12
+ help_string = <<HELP
13
+ Usage: floom oneshot '<flume config string>'
14
+
15
+ Tips:
16
+
17
+ * Make sure you use single quotes around the config string (')
18
+
19
+ * Make sure your config begins with a logical node name followed by a colon (:)
20
+
21
+ * If anything inside the config needs to be quoted, use double quotes (")
22
+
23
+ * Make sure to end the config with a semicolon (;)
24
+
25
+ * Flume config strings have the following format:
26
+ 'name: source | decorator decorator ... sink;'
27
+
28
+ Examples
29
+ * 'terminal: console | console;'
30
+ * 'writer: console | text("/tmp/flume_output.txt", raw);'
31
+ * 'fake_data: asciisynth(1000, 100) | stubbornAppend rpcSink("localhost", 33333);'
32
+
33
+ HELP
34
+
35
+ if validations.all?
36
+ name = conf.split(':').first
37
+ cmd = "flume node_nowatch -1 -s -n #{name} -c '#{conf}'"
38
+ system cmd
39
+ else
40
+ puts help_string
41
+ end
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env jruby --1.9
2
+ require 'java'
3
+ require 'socket'
4
+ require 'configliere' ; Settings.use(:commandline)
5
+
6
+ Settings.define :flume_home, :default => ENV['FLUME_HOME'],
7
+ :required => true,
8
+ :description => 'Set to the top-level directory of your Flume install'
9
+ Settings.define :host, :default => Socket.gethostname,
10
+ :required => true,
11
+ :flag => 'c',
12
+ :description => 'The hostname where a Flume rpcSource is running'
13
+ Settings.define :port, :default => 33333,
14
+ :required => true,
15
+ :flag => 'p',
16
+ :description => 'The port where a Flume rpcSource is listening'
17
+ Settings.resolve!
18
+
19
+ Dir[File.join(Settings.flume_home, '**/*.jar')].each{ |jar| require jar }
20
+
21
+ java_import 'java.net.URL'
22
+ java_import 'java.net.ConnectException'
23
+ java_import 'org.apache.avro.ipc.HttpTransceiver'
24
+ java_import 'org.apache.avro.ipc.AccountingTransceiver'
25
+ java_import 'org.apache.avro.ipc.specific.SpecificRequestor'
26
+ java_import 'com.cloudera.flume.core.EventImpl'
27
+ java_import 'com.cloudera.flume.handlers.avro.FlumeEventAvroServer'
28
+ java_import 'com.cloudera.flume.handlers.avro.AvroEventConvertUtil'
29
+ java_import 'org.slf4j.LoggerFactory'
30
+
31
+ class RubyRpcClient
32
+
33
+ attr_reader :client, :transport
34
+
35
+ def initialize(config = {})
36
+ @transport = avro_transport(config[:host], config[:port])
37
+ @client = create_rpc_connection(transport)
38
+ end
39
+
40
+ def log
41
+ @log ||= Java::OrgSlf4j::LoggerFactory.getLogger(self.class.to_s)
42
+ end
43
+
44
+ def avro_transport(host, port)
45
+ url = Java::JavaNet::URL.new("http://#{host}:#{port}")
46
+ http = Java::OrgApacheAvroIpc::HttpTransceiver.new(url)
47
+ Java::OrgApacheAvroIpc::AccountingTransceiver.new(http)
48
+ end
49
+
50
+ def create_rpc_connection(transport)
51
+ client_klass = Java::ComClouderaFlumeHandlersAvro::FlumeEventAvroServer.java_class
52
+ Java::OrgApacheAvroIpcSpecific::SpecificRequestor.getClient(client_klass, transport)
53
+ end
54
+
55
+ def avro_event(str)
56
+ event = Java::ComClouderaFlumeCore::EventImpl.new(str.to_java_bytes)
57
+ Java::ComClouderaFlumeHandlersAvro::AvroEventConvertUtil.toAvroEvent(event)
58
+ end
59
+
60
+ def run!
61
+ while line = $stdin.readline.chomp rescue nil
62
+ begin
63
+ client.append avro_event(line)
64
+ rescue Exception => e
65
+ log.error("Error while appending event: #{line}")
66
+ log.error(e.message)
67
+ exit(1)
68
+ end
69
+ end
70
+ end
71
+
72
+ end
73
+
74
+ RubyRpcClient.new(Settings).run!
@@ -0,0 +1,23 @@
1
+ $:.unshift File.expand_path('../lib', __FILE__)
2
+ require 'floom/version'
3
+
4
+ Gem::Specification.new do |s|
5
+
6
+ s.name = 'floom'
7
+ s.version = Floom::VERSION
8
+ s.authors = ['Travis Dempsey']
9
+ s.email = 'travis@infochimps.org'
10
+ s.homepage = 'https://github.com/kornypoet/floom.git'
11
+ s.summary = 'Simple Thrift class extensions for Flume'
12
+ s.description = <<DESC
13
+
14
+ Flume extra fun: Floom
15
+
16
+ DESC
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.executables = 'floom'
20
+ s.require_paths = ['lib']
21
+ s.add_dependency('thrift', '>= 0.8.0')
22
+
23
+ end
@@ -0,0 +1,18 @@
1
+ $:.unshift File.expand_path('../thrift', __FILE__)
2
+
3
+ require 'thrift/flume_master_admin_server'
4
+ require 'thrift/flume_constants'
5
+ require 'thrift/flumeconfig_constants'
6
+ require 'thrift/flumereportserver_constants'
7
+ require 'thrift/mastercontrol_constants'
8
+ require 'thrift/thrift_flume_client_server'
9
+ require 'thrift/thrift_flume_event_server'
10
+ require 'thrift/thrift_flume_report_server'
11
+
12
+ require 'floom/models/configuration'
13
+ require 'floom/models/status'
14
+ require 'floom/models/report'
15
+ require 'floom/models/request'
16
+ require 'floom/client'
17
+ require 'floom/client/master'
18
+ require 'floom/client/reporter'
@@ -0,0 +1,33 @@
1
+ module Floom
2
+ class Client
3
+
4
+ attr_reader :connection
5
+
6
+ def initialize(options = {})
7
+ @host = options[:host]
8
+ @port = options[:port]
9
+ establish_connection!
10
+ end
11
+
12
+ def to_s
13
+ "#<#{self.class}:#{object_id} host:#{@host} port:#{@port}>"
14
+ end
15
+
16
+ def reset_connection!
17
+ @socket = @transport = @protocol = @connection = nil
18
+ establish_connection!
19
+ end
20
+
21
+ private
22
+
23
+ def establish_connection!
24
+ @socket ||= Thrift::Socket.new(@host, @port)
25
+ @transport ||= Thrift::BufferedTransport.new(@socket)
26
+ @protocol ||= Thrift::BinaryProtocol.new(@transport)
27
+ @connection ||= self.class.thrift_class.new(@protocol)
28
+ @transport.open
29
+ self
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,106 @@
1
+ module Floom
2
+ class Master < Floom::Client
3
+
4
+ def self.thrift_class() FlumeMasterAdminServer::Client end
5
+
6
+ def configurations
7
+ connection.getConfigs.inject({}){ |hsh, (node, conf)| hsh[node] = Floom::Configuration.parse(conf) ; hsh }
8
+ end
9
+
10
+ def mappings(physical_node = nil)
11
+ connection.getMappings(physical_node)
12
+ end
13
+
14
+ def statuses
15
+ connection.getNodeStatuses.inject({}){ |hsh, (node, stat)| hsh[node] = Floom::Status.parse(stat) ; hsh }
16
+ end
17
+
18
+ def logical_nodes
19
+ (configurations.keys + mappings.values + statuses.keys).flatten.uniq
20
+ end
21
+
22
+ def physical_nodes
23
+ mappings.keys
24
+ end
25
+
26
+ def mapped? logical_node
27
+ mappings.values.include? logical_node
28
+ end
29
+
30
+ def configured? logical_node
31
+ configurations.keys.include? logical_node
32
+ end
33
+
34
+ def has_status? logical_node
35
+ statuses.keys.include? logical_node
36
+ end
37
+
38
+ def perform_request(*params)
39
+ req = Floom::Request.new(connection, *params).fetch
40
+ req.parse
41
+ end
42
+
43
+ def map(physical_node, logical_node)
44
+ perform_request(:map, physical_node, logical_node)
45
+ end
46
+
47
+ def unmap(*logical_nodes)
48
+ logical_nodes.map do |logical_node|
49
+ physical_node = mappings.detect(->{ mappings.keys }){ |name, nodes| nodes.include? logical_node }.first
50
+ perform_request(:unmap, physical_node, logical_node)
51
+ end
52
+ end
53
+
54
+ def unmap_all
55
+ unmap(*logical_nodes.select{ |node| mapped? node })
56
+ end
57
+
58
+ def decommission(*logical_nodes)
59
+ logical_nodes.map do |logical_node|
60
+ perform_request(:decommission, logical_node)
61
+ end
62
+ end
63
+
64
+ def decommission_all
65
+ decommission(*logical_nodes.select{ |node| mapped?(node) or configured?(node) })
66
+ end
67
+
68
+ def purge(*logical_nodes)
69
+ logical_nodes.map do |logical_node|
70
+ perform_request(:purge, logical_node)
71
+ end
72
+ end
73
+
74
+ def purge_all
75
+ purge(*logical_nodes.select{ |node| has_status? node })
76
+ end
77
+
78
+ def refresh(*logical_nodes)
79
+ logical_nodes.map do |logical_node|
80
+ perform_request(:refresh, logical_node)
81
+ end
82
+ end
83
+
84
+ def refresh_all
85
+ refresh(*logical_nodes)
86
+ end
87
+
88
+ def configure(spec = {})
89
+ spec.map do |logical_node, conf|
90
+ params = case conf
91
+ when Floom::Configuration then conf.to_params
92
+ when ThriftFlumeConfigData then Floom::Configuration.parse(conf).to_params
93
+ when Hash then Floom::Configuration.create(conf).to_params
94
+ when Array then conf
95
+ end
96
+ perform_request(:config, logical_node.to_s, *params)
97
+ end
98
+ end
99
+
100
+ def unconfigure(*logical_nodes)
101
+ logical_nodes.map do |logical_node|
102
+ perform_request(:unconfig, logical_node)
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,64 @@
1
+ module Floom
2
+ class Reporter < Client
3
+
4
+ def self.thrift_class() ThriftFlumeReportServer::Client ; end
5
+
6
+ def reports
7
+ connection.getAllReports.inject({}){ |hsh, (name, report)| hsh[name] = Floom::Report.parse(report) ; hsh }
8
+ end
9
+
10
+ def extract_master(metrics)
11
+ master_key = metrics.keys.detect{ |key| key =~ /^flume-master-\d+$/ }
12
+ master = metrics.delete(master_key) || {}
13
+ master.delete('name')
14
+ { master: master }
15
+ end
16
+
17
+ def extract_system_info(metrics, namespace = 'null')
18
+ system_info_key = metrics.keys.detect{ |key| key =~ /^#{namespace}\.system-info$/ }
19
+ system_info = metrics.delete(system_info_key) || {}
20
+ system_info.delete('name')
21
+ { system_info: system_info }
22
+ end
23
+
24
+ def extract_jvm_info(metrics, namespace = 'null')
25
+ jvm_info_key = metrics.keys.detect{ |key| key =~ /^#{namespace}\.jvm-Info$/ }
26
+ jvm_info = metrics.delete(jvm_info_key) || {}
27
+ jvm_info.delete('name')
28
+ { jvm_info: jvm_info }
29
+ end
30
+
31
+ def extract_logical_nodes(metrics, physical_node)
32
+ logical_node_keys = metrics.keys.map{ |key| key.match(/^#{physical_node}\.(?<node_name>[\w-]+)\.(?<metric>[\w-.]+)$/) }.compact
33
+ logical_nodes = logical_node_keys.inject({}) do |hsh, node_key|
34
+ logical_node = node_key[:node_name]
35
+ metric_key = node_key[:metric]
36
+ hsh[logical_node] = {} unless hsh[logical_node]
37
+ hsh[logical_node].merge!(metric_key => metrics.delete(node_key.to_s))
38
+ hsh
39
+ end
40
+ { logical_nodes: logical_nodes }
41
+ end
42
+
43
+ def extract_physical_nodes(metrics)
44
+ physical_node_keys = metrics.keys.map{ |key| key.match(/^pn-(?<node_name>[\w-]+)$/) }.compact
45
+ physical_nodes = physical_node_keys.inject({}) do |hsh, node_key|
46
+ node_name = node_key[:node_name]
47
+ hsh[node_name] = {} unless hsh[node_name]
48
+ hsh[node_name].merge!(metrics.delete(node_key.to_s)).
49
+ merge!(extract_system_info(metrics, node_key.to_s)).
50
+ merge!(extract_jvm_info(metrics, node_key.to_s)).
51
+ merge!(extract_logical_nodes(metrics, node_name))
52
+ hsh
53
+ end
54
+ { physical_nodes: physical_nodes }
55
+ end
56
+
57
+ def rehash(metrics = reports)
58
+ {}.merge!(extract_master metrics).
59
+ merge!(extract_jvm_info metrics).
60
+ merge!(extract_system_info metrics).
61
+ merge!(extract_physical_nodes metrics)
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,71 @@
1
+ module Floom
2
+ class Configuration
3
+
4
+ class << self
5
+
6
+ def parse(conf)
7
+ new(source: conf.sourceConfig,
8
+ sink: conf.sinkConfig,
9
+ flow: conf.flowID,
10
+ source_version: conf.sourceVersion,
11
+ sink_version: conf.sinkVersion,
12
+ timestamp: conf.timestamp).to_hash
13
+ end
14
+
15
+ def create(options = {}, &blk)
16
+ config = new(options)
17
+ config.instance_eval(&blk) if block_given?
18
+ config
19
+ end
20
+
21
+ end
22
+
23
+ def initialize(options = {})
24
+ @source = options[:source]
25
+ @sink = options[:sink]
26
+ @flow = options[:flow]
27
+ @source_version = options[:source_version]
28
+ @sink_version = options[:sink_version]
29
+ @timestamp = options[:timestamp]
30
+ end
31
+
32
+ def timestamp(val = nil)
33
+ @timestamp = val if val
34
+ @timestamp
35
+ end
36
+
37
+ def source(val = nil)
38
+ @source = val if val
39
+ @source
40
+ end
41
+
42
+ def sink(val = nil)
43
+ @sink = val if val
44
+ @sink
45
+ end
46
+
47
+ def flow(val = nil)
48
+ @flow = val if val
49
+ @flow
50
+ end
51
+
52
+ def source_version(val = nil)
53
+ @source_version = val if val
54
+ @source_version
55
+ end
56
+
57
+ def sink_version(val = nil)
58
+ @sink_version = val if val
59
+ @sink_version
60
+ end
61
+
62
+ def to_hash
63
+ self.instance_variables.map{ |var| var.to_s.slice(1..-1).to_sym }.inject({}){ |hsh, var| hsh[var] = self.send(var) ; hsh }
64
+ end
65
+
66
+ def to_params
67
+ [ flow, source, sink ].compact
68
+ end
69
+
70
+ end
71
+ end