wukong-load 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,46 @@
1
+ module Wukong
2
+ module Load
3
+ class SourceDriver < Wukong::Local::StdioDriver
4
+ include Logging
5
+
6
+ attr_accessor :index, :batch_size
7
+
8
+ def post_init
9
+ super()
10
+ self.index = 1
11
+ self.batch_size = settings[:batch_size].to_i if settings[:batch_size] && settings[:batch_size].to_i > 0
12
+ end
13
+
14
+ def self.start(label, settings={})
15
+ driver = new(:foobar, label, settings)
16
+ driver.post_init
17
+
18
+ period = case
19
+ when settings[:period] then settings[:period]
20
+ when settings[:per_sec] then (1.0 / settings[:per_sec]) rescue 1.0
21
+ else 1.0
22
+ end
23
+ driver.create_event
24
+ EventMachine::PeriodicTimer.new(period) { driver.create_event }
25
+ end
26
+
27
+ def create_event
28
+ receive_line(index.to_s)
29
+ self.index += 1
30
+ finalize_dataflow if self.batch_size && (self.index % self.batch_size) == 0
31
+ end
32
+
33
+ # :nodoc:
34
+ #
35
+ # Not sure why I have to add the call to $stdout.flush at the
36
+ # end of this method. Supposedly $stdout.sync is called during
37
+ # the #setup method in StdoutProcessor in
38
+ # wukong/widget/processors. Doesn't that do this?
39
+ def process record
40
+ $stdout.puts record
41
+ $stdout.flush
42
+ end
43
+
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,36 @@
1
+ require_relative('source_driver')
2
+ module Wukong
3
+ module Load
4
+
5
+ # Runs the wu-source command.
6
+ class SourceRunner < Wukong::Local::LocalRunner
7
+
8
+ usage "PROCESSOR|DATAFLOW"
9
+
10
+ description <<-EOF.gsub(/^ {8}/,'')
11
+
12
+ wu-source is a tool for using Wukong processors as sources of
13
+ data in streams.
14
+
15
+ Run any Wukong processor as a source for data:
16
+
17
+ $ wu-source fake_log_data
18
+ 205.4.75.208 - 3918471017 [27/Nov/2012:05:06:57 -0600] "GET /products/eget HTTP/1.0" 200 25600
19
+ 63.181.105.15 - 3650805763 [27/Nov/2012:05:06:57 -0600] "GET /products/lacinia-nulla-vitae HTTP/1.0" 200 3790
20
+ 227.190.78.101 - 39543891 [27/Nov/2012:05:06:58 -0600] "GET /products/odio-nulla-nulla-ipsum HTTP/1.0" 200 31718
21
+ ...
22
+
23
+ The fake_log_data processor will receive an event once every
24
+ second. Each event will consist of a single string giving a
25
+ consecutive integer starting with '1' as the first event.
26
+ EOF
27
+
28
+ include Logging
29
+
30
+ def driver
31
+ SourceDriver
32
+ end
33
+
34
+ end
35
+ end
36
+ end
@@ -1,6 +1,6 @@
1
1
  module Wukong
2
2
  module Load
3
3
  # The current version of Wukong-Load
4
- VERSION = '0.0.2'
4
+ VERSION = '0.1.0'
5
5
  end
6
6
  end
@@ -3,5 +3,18 @@ require 'wukong/spec_helpers'
3
3
 
4
4
  RSpec.configure do |config|
5
5
  config.mock_with :rspec
6
+
6
7
  include Wukong::SpecHelpers
8
+
9
+ config.before(:each) do
10
+ Wukong::Log.level = Log4r::OFF
11
+ end
12
+
13
+ def root
14
+ @root ||= Pathname.new(File.expand_path('../..', __FILE__))
15
+ end
16
+
17
+ def load_runner *args, &block
18
+ runner(Wukong::Load::LoadRunner, 'wu-load', *args)
19
+ end
7
20
  end
@@ -0,0 +1,142 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::Load::ElasticsearchLoader do
4
+
5
+ let(:loader) { Wukong::Load::ElasticsearchLoader.new }
6
+ let(:loader_with_custom_index) { Wukong::Load::ElasticsearchLoader.new(:index => 'custom_index') }
7
+ let(:loader_with_custom_type) { Wukong::Load::ElasticsearchLoader.new(:es_type => 'custom_es_type') }
8
+ let(:loader_with_custom_id) { Wukong::Load::ElasticsearchLoader.new(:id_field => '_custom_id') }
9
+
10
+ let(:record) { {'text' => 'hi' } }
11
+ let(:record_with_index) { {'text' => 'hi', '_index' => 'custom_index' } }
12
+ let(:record_with_custom_index) { {'text' => 'hi', '_custom_index' => 'custom_index' } }
13
+ let(:record_with_es_type) { {'text' => 'hi', '_es_type' => 'custom_es_type' } }
14
+ let(:record_with_custom_es_type) { {'text' => 'hi', '_custom_es_type' => 'custom_es_type' } }
15
+ let(:record_with_id) { {'text' => 'hi', '_id' => 'the_id' } }
16
+ let(:record_with_custom_id) { {'text' => 'hi', '_custom_id' => 'the_id' } }
17
+
18
+ it_behaves_like 'a processor', :named => :elasticsearch_loader
19
+
20
+ context "without an Elasticsearch available" do
21
+ before do
22
+ Net::HTTP.should_receive(:new).and_raise(StandardError)
23
+ end
24
+
25
+ it "raises an error on setup" do
26
+ expect { processor(:elasticsearch_loader) }.to raise_error(Wukong::Error)
27
+ end
28
+ end
29
+
30
+ context "routes" do
31
+ context "all records" do
32
+ it "to a default index" do
33
+ loader.index_for(record).should == loader.index
34
+ end
35
+ it "to a given index" do
36
+ loader_with_custom_index.index_for(record).should == 'custom_index'
37
+ end
38
+ it "to a default type" do
39
+ loader.es_type_for(record).should == loader.es_type
40
+ end
41
+ it "to a given type" do
42
+ loader_with_custom_type.es_type_for(record).should == 'custom_es_type'
43
+ end
44
+ end
45
+
46
+ context "records having a value for" do
47
+ it "default index field to the given index" do
48
+ loader.index_for(record_with_index).should == 'custom_index'
49
+ end
50
+ it "given index field to the given index" do
51
+ loader_with_custom_index.index_for(record_with_custom_index).should == 'custom_index'
52
+ end
53
+ it "default type field to the given type" do
54
+ loader.es_type_for(record_with_es_type).should == 'custom_es_type'
55
+ end
56
+ it "given type field to the given type" do
57
+ loader_with_custom_type.es_type_for(record_with_custom_es_type).should == 'custom_es_type'
58
+ end
59
+ end
60
+ end
61
+
62
+ context "detects IDs" do
63
+ it "based on the absence of a default ID field" do
64
+ loader.id_for(record).should be_nil
65
+ end
66
+ it "based on the value of a default ID field" do
67
+ loader.id_for(record_with_id).should == 'the_id'
68
+ end
69
+ it "based on the value of a custom ID field" do
70
+ loader_with_custom_id.id_for(record_with_custom_id).should == 'the_id'
71
+ end
72
+ end
73
+
74
+ context "sends" do
75
+ it "create requests on a record without an ID" do
76
+ loader.should_receive(:request).with(Net::HTTP::Post, '/foo/bar', kind_of(Hash))
77
+ loader.load({'_index' => 'foo', '_es_type' => 'bar'})
78
+ end
79
+
80
+ it "update requests on a record with an ID" do
81
+ processor(:elasticsearch_loader) do |proc|
82
+ proc.should_receive(:request).with(Net::HTTP::Put, '/foo/bar/1', kind_of(Hash))
83
+ proc.load({'_index' => 'foo', '_es_type' => 'bar', '_id' => '1'})
84
+ end
85
+ end
86
+ end
87
+
88
+ context "receives" do
89
+ let(:connection) { double() }
90
+ before { Net::HTTP.should_receive(:new).and_return(connection) }
91
+
92
+ let(:ok) do
93
+ mock("Net::HTTPOK").tap do |response|
94
+ response.stub!(:code).and_return('200')
95
+ response.stub!(:body).and_return('{"ok": true}')
96
+ end
97
+ end
98
+ let(:created) do
99
+ mock("Net::HTTPCreated").tap do |response|
100
+ response.stub!(:code).and_return('201')
101
+ response.stub!(:body).and_return('{"created": true}')
102
+ end
103
+ end
104
+ let(:not_found) do
105
+ mock("Net::HTTPNotFound").tap do |response|
106
+ response.stub!(:code).and_return('404')
107
+ response.stub!(:body).and_return('{"error": "Not found"}')
108
+ end
109
+ end
110
+
111
+ context "201 Created" do
112
+ before { connection.should_receive(:request).with(kind_of(Net::HTTP::Post)).and_return(created) }
113
+ it "by logging an INFO message" do
114
+ processor(:elasticsearch_loader) do |proc|
115
+ proc.log.should_receive(:info)
116
+ proc.load(record)
117
+ end
118
+ end
119
+ end
120
+
121
+ context "200 OK" do
122
+ before { connection.should_receive(:request).with(kind_of(Net::HTTP::Put)).and_return(ok) }
123
+ it "by logging an INFO message" do
124
+ processor(:elasticsearch_loader) do |proc|
125
+ proc.log.should_receive(:info)
126
+ proc.load(record_with_id)
127
+ end
128
+ end
129
+ end
130
+
131
+ context "an error response from Elasticsearch" do
132
+ before { connection.should_receive(:request).with(kind_of(Net::HTTP::Post)).and_return(not_found) }
133
+ it "by logging an ERROR message" do
134
+ processor(:elasticsearch_loader) do |proc|
135
+ proc.log.should_receive(:error)
136
+ proc.load(record)
137
+ end
138
+ end
139
+ end
140
+
141
+ end
142
+ end
@@ -0,0 +1,72 @@
1
+ require 'spec_helper'
2
+ require 'kafka'
3
+
4
+ describe Wukong::Load::KafkaLoader do
5
+
6
+ let(:loader) { Wukong::Load::KafkaLoader.new }
7
+ let(:loader_with_custom_topic) { Wukong::Load::KafkaLoader.new(:topic => 'custom' ) }
8
+ let(:loader_with_custom_topic_field) { Wukong::Load::KafkaLoader.new(:topic_field => '_custom_topic' ) }
9
+ let(:loader_with_custom_partition) { Wukong::Load::KafkaLoader.new(:partition => 1 ) }
10
+ let(:loader_with_custom_partition_field) { Wukong::Load::KafkaLoader.new(:partition_field => '_custom_partition' ) }
11
+
12
+ let(:record) { {'text' => 'hi' } }
13
+ let(:record_with_topic_field) { {'text' => 'hi', '_topic' => 'custom' } }
14
+ let(:record_with_custom_topic_field) { {'text' => 'hi', '_custom_topic' => 'custom' } }
15
+ let(:record_with_partition_field) { {'text' => 'hi', '_partition' => 1 } }
16
+ let(:record_with_custom_partition_field) { {'text' => 'hi', '_custom_partition' => 1 } }
17
+
18
+
19
+ it "raises an error on setup if it can't connect to Kafka" do
20
+ Kafka::MultiProducer.should_receive(:new).and_raise(StandardError)
21
+ expect { processor(:kafka_loader) }.to raise_error(Wukong::Error)
22
+ end
23
+
24
+ context "with a Kafka available" do
25
+ before do
26
+ @producer = double()
27
+ Kafka::MultiProducer.stub!(:new).and_return(@producer)
28
+ end
29
+ it_behaves_like 'a processor', :named => :kafka_loader
30
+
31
+ it "produces an INFO log message on every write" do
32
+ @producer.should_receive(:send)
33
+ processor(:kafka_loader) do |proc|
34
+ proc.log.should_receive(:info)
35
+ proc.load(record)
36
+ end
37
+ end
38
+
39
+ end
40
+
41
+ context "routes" do
42
+ context "all records" do
43
+ it "to a default topic" do
44
+ loader.topic_for(record).should == loader.topic
45
+ end
46
+ it "to a given topic" do
47
+ loader_with_custom_topic.topic_for(record).should == 'custom'
48
+ end
49
+ it "to a default partition" do
50
+ loader.partition_for(record).should == loader.partition
51
+ end
52
+ it "to a given partition" do
53
+ loader_with_custom_partition.partition_for(record).should == 1
54
+ end
55
+ end
56
+ context "records having a value for" do
57
+ it "default topic field to the given topic" do
58
+ loader.topic_for(record_with_topic_field).should == 'custom'
59
+ end
60
+ it "given topic field to the given topic" do
61
+ loader_with_custom_topic.topic_for(record_with_topic_field).should == 'custom'
62
+ end
63
+ it "default partition field to the given partition" do
64
+ loader.partition_for(record_with_partition_field).should == 1
65
+ end
66
+ it "given partition field to the given partition" do
67
+ loader_with_custom_partition.partition_for(record_with_partition_field).should == 1
68
+ end
69
+ end
70
+ end
71
+
72
+ end
@@ -0,0 +1,100 @@
1
+ require 'spec_helper'
2
+ require 'mongo'
3
+
4
+ describe Wukong::Load::MongoDBLoader do
5
+
6
+ let(:loader) { Wukong::Load::MongoDBLoader.new }
7
+ let(:loader_with_custom_database) { Wukong::Load::MongoDBLoader.new(:database => 'custom_database') }
8
+ let(:loader_with_custom_collection) { Wukong::Load::MongoDBLoader.new(:collection => 'custom_collection') }
9
+ let(:loader_with_custom_id) { Wukong::Load::MongoDBLoader.new(:id_field => '_custom_id') }
10
+
11
+ let(:record) { {'text' => 'hi' } }
12
+ let(:record_with_database) { {'text' => 'hi', '_database' => 'custom_database' } }
13
+ let(:record_with_custom_database) { {'text' => 'hi', '_custom_database' => 'custom_database' } }
14
+ let(:record_with_collection) { {'text' => 'hi', '_collection' => 'custom_collection' } }
15
+ let(:record_with_custom_collection) { {'text' => 'hi', '_custom_collection' => 'custom_collection' } }
16
+ let(:record_with_id) { {'text' => 'hi', '_id' => 'the_id' } }
17
+ let(:record_with_custom_id) { {'text' => 'hi', '_custom_id' => 'the_id' } }
18
+
19
+
20
+
21
+ context "without an MongoDB available" do
22
+ before do
23
+ Mongo::MongoClient.should_receive(:new).and_raise(StandardError)
24
+ end
25
+
26
+ it "raises an error on setup" do
27
+ expect { processor(:mongodb_loader) }.to raise_error(Wukong::Error)
28
+ end
29
+ end
30
+
31
+ context "with a MongoDB available" do
32
+ before do
33
+ @client = double()
34
+ Mongo::MongoClient.stub!(:new).and_return(@client)
35
+ end
36
+
37
+ it_behaves_like 'a processor', :named => :mongodb_loader
38
+
39
+ context "routes" do
40
+ context "all records" do
41
+ it "to a default database" do
42
+ loader.database_name_for(record).should == loader.database
43
+ end
44
+ it "to a given database" do
45
+ loader_with_custom_database.database_name_for(record).should == 'custom_database'
46
+ end
47
+ it "to a default collection" do
48
+ loader.collection_name_for(record).should == loader.collection
49
+ end
50
+ it "to a given collection" do
51
+ loader_with_custom_collection.collection_name_for(record).should == 'custom_collection'
52
+ end
53
+ end
54
+
55
+ context "records having a value for" do
56
+ it "default database field to the given database" do
57
+ loader.database_name_for(record_with_database).should == 'custom_database'
58
+ end
59
+ it "given database field to the given database" do
60
+ loader_with_custom_database.database_name_for(record_with_custom_database).should == 'custom_database'
61
+ end
62
+ it "default collection field to the given collection" do
63
+ loader.collection_name_for(record_with_collection).should == 'custom_collection'
64
+ end
65
+ it "given collection field to the given collection" do
66
+ loader_with_custom_collection.collection_name_for(record_with_custom_collection).should == 'custom_collection'
67
+ end
68
+ end
69
+ end
70
+
71
+ context "detects IDs" do
72
+ it "based on the absence of a default ID field" do
73
+ loader.id_for(record).should be_nil
74
+ end
75
+ it "based on the value of a default ID field" do
76
+ loader.id_for(record_with_id).should == 'the_id'
77
+ end
78
+ it "based on the value of a custom ID field" do
79
+ loader_with_custom_id.id_for(record_with_custom_id).should == 'the_id'
80
+ end
81
+ end
82
+
83
+ context "sends" do
84
+ before do
85
+ @collection = double()
86
+ loader.stub!(:collection_for).and_return(@collection)
87
+ end
88
+ it "insert requests on a record without an ID" do
89
+ @collection.should_receive(:insert).with(kind_of(Hash))
90
+ loader.load({'_database' => 'foo', '_collection' => 'bar'})
91
+ end
92
+
93
+ it "update requests on a record with an ID" do
94
+ @collection.should_receive(:update).with({:_id => '1'}, kind_of(Hash), :upsert => true).and_return({})
95
+ loader.load({'_database' => 'foo', '_collection' => 'bar', '_id' => '1'})
96
+ end
97
+ end
98
+
99
+ end
100
+ end
@@ -0,0 +1,112 @@
1
+ require 'spec_helper'
2
+ require 'mongo'
3
+ require 'mysql2'
4
+
5
+ describe Wukong::Load::SQLLoader do
6
+
7
+ let(:loader) { Wukong::Load::SQLLoader.new }
8
+ let(:loader_with_custom_database) { Wukong::Load::SQLLoader.new(:database => 'custom_database') }
9
+ let(:loader_with_custom_table) { Wukong::Load::SQLLoader.new(:table => 'custom_table') }
10
+ let(:loader_with_custom_id) { Wukong::Load::SQLLoader.new(:id_field => '_custom_id') }
11
+
12
+ let(:record) { {'text' => 'hi' } }
13
+ let(:record_with_database) { {'text' => 'hi', '_database' => 'custom_database' } }
14
+ let(:record_with_custom_database) { {'text' => 'hi', '_custom_database' => 'custom_database' } }
15
+ let(:record_with_table) { {'text' => 'hi', '_table' => 'custom_table' } }
16
+ let(:record_with_custom_table) { {'text' => 'hi', '_custom_table' => 'custom_table' } }
17
+ let(:record_with_id) { {'text' => 'hi', '_id' => 'the_id' } }
18
+ let(:record_with_custom_id) { {'text' => 'hi', '_custom_id' => 'the_id' } }
19
+
20
+
21
+
22
+ context "without an SQL available" do
23
+ before do
24
+ Mysql2::Client.should_receive(:new).and_raise(StandardError)
25
+ end
26
+
27
+ it "raises an error on setup" do
28
+ expect { processor(:sql_loader) }.to raise_error(Wukong::Error)
29
+ end
30
+ end
31
+
32
+ context "with a SQL available" do
33
+ before do
34
+ @client = double()
35
+ def @client.escape record ; record.to_s ; end
36
+ Mysql2::Client.stub!(:new).and_return(@client)
37
+ end
38
+
39
+ it_behaves_like 'a processor', :named => :sql_loader
40
+
41
+ context "routes" do
42
+ context "all records" do
43
+ it "to a default database" do
44
+ loader.setup
45
+ loader.database_name_for(record).should == '`wukong`'
46
+ end
47
+ it "to a given database" do
48
+ loader_with_custom_database.setup
49
+ loader_with_custom_database.database_name_for(record).should == '`custom_database`'
50
+ end
51
+ it "to a default table" do
52
+ loader.setup
53
+ loader.table_name_for(record).should == '`streaming_record`'
54
+ end
55
+ it "to a given table" do
56
+ loader_with_custom_table.setup
57
+ loader_with_custom_table.table_name_for(record).should == '`custom_table`'
58
+ end
59
+ end
60
+
61
+ context "records having a value for" do
62
+ it "default database field to the given database" do
63
+ loader.setup
64
+ loader.database_name_for(record_with_database).should == '`custom_database`'
65
+ end
66
+ it "given database field to the given database" do
67
+ loader_with_custom_database.setup
68
+ loader_with_custom_database.database_name_for(record_with_custom_database).should == '`custom_database`'
69
+ end
70
+ it "default table field to the given table" do
71
+ loader.setup
72
+ loader.table_name_for(record_with_table).should == '`custom_table`'
73
+ end
74
+ it "given table field to the given table" do
75
+ loader_with_custom_table.setup
76
+ loader_with_custom_table.table_name_for(record_with_custom_table).should == '`custom_table`'
77
+ end
78
+ end
79
+ end
80
+
81
+ context "detects IDs" do
82
+ it "based on the absence of a default ID field" do
83
+ loader.setup
84
+ loader.id_for(record).should be_nil
85
+ end
86
+ it "based on the value of a default ID field" do
87
+ loader.setup
88
+ loader.id_for(record_with_id).should == '"the_id"'
89
+ end
90
+ it "based on the value of a custom ID field" do
91
+ loader_with_custom_id.setup
92
+ loader_with_custom_id.id_for(record_with_custom_id).should == '"the_id"'
93
+ end
94
+ end
95
+
96
+ context "sends" do
97
+ before do
98
+ loader.setup
99
+ end
100
+ it "insert requests on a record without an ID" do
101
+ @client.should_receive(:query).with(%Q{INSERT INTO `foo`.`bar` (`age`, `email`, `name`) VALUES (58, "jerry@nbc.com", "Jerry Seinfeld") ON DUPLICATE KEY UPDATE `age`=58, `email`="jerry@nbc.com", `name`="Jerry Seinfeld"})
102
+ loader.load({'_database' => 'foo', '_table' => 'bar', 'name' => 'Jerry Seinfeld', 'email' => 'jerry@nbc.com', 'age' => 58})
103
+ end
104
+
105
+ it "update requests on a record with an ID" do
106
+ @client.should_receive(:query).with(%Q{UPDATE `foo`.`bar` SET `age`=58, `email`="jerry@nbc.com", `name`="Jerry Seinfeld" WHERE `id`="1"})
107
+ loader.load({'_database' => 'foo', '_table' => 'bar', '_id' => '1', 'name' => 'Jerry Seinfeld', 'email' => 'jerry@nbc.com', 'age' => 58})
108
+ end
109
+ end
110
+
111
+ end
112
+ end