wukong-load 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ module Wukong
2
+ module Load
3
+ class SourceDriver < Wukong::Local::StdioDriver
4
+ include Logging
5
+
6
+ attr_accessor :index, :batch_size
7
+
8
+ def post_init
9
+ super()
10
+ self.index = 1
11
+ self.batch_size = settings[:batch_size].to_i if settings[:batch_size] && settings[:batch_size].to_i > 0
12
+ end
13
+
14
+ def self.start(label, settings={})
15
+ driver = new(:foobar, label, settings)
16
+ driver.post_init
17
+
18
+ period = case
19
+ when settings[:period] then settings[:period]
20
+ when settings[:per_sec] then (1.0 / settings[:per_sec]) rescue 1.0
21
+ else 1.0
22
+ end
23
+ driver.create_event
24
+ EventMachine::PeriodicTimer.new(period) { driver.create_event }
25
+ end
26
+
27
+ def create_event
28
+ receive_line(index.to_s)
29
+ self.index += 1
30
+ finalize_dataflow if self.batch_size && (self.index % self.batch_size) == 0
31
+ end
32
+
33
+ # :nodoc:
34
+ #
35
+ # Not sure why I have to add the call to $stdout.flush at the
36
+ # end of this method. Supposedly $stdout.sync is called during
37
+ # the #setup method in StdoutProcessor in
38
+ # wukong/widget/processors. Doesn't that do this?
39
+ def process record
40
+ $stdout.puts record
41
+ $stdout.flush
42
+ end
43
+
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,36 @@
1
+ require_relative('source_driver')
2
+ module Wukong
3
+ module Load
4
+
5
+ # Runs the wu-source command.
6
+ class SourceRunner < Wukong::Local::LocalRunner
7
+
8
+ usage "PROCESSOR|DATAFLOW"
9
+
10
+ description <<-EOF.gsub(/^ {8}/,'')
11
+
12
+ wu-source is a tool for using Wukong processors as sources of
13
+ data in streams.
14
+
15
+ Run any Wukong processor as a source for data:
16
+
17
+ $ wu-source fake_log_data
18
+ 205.4.75.208 - 3918471017 [27/Nov/2012:05:06:57 -0600] "GET /products/eget HTTP/1.0" 200 25600
19
+ 63.181.105.15 - 3650805763 [27/Nov/2012:05:06:57 -0600] "GET /products/lacinia-nulla-vitae HTTP/1.0" 200 3790
20
+ 227.190.78.101 - 39543891 [27/Nov/2012:05:06:58 -0600] "GET /products/odio-nulla-nulla-ipsum HTTP/1.0" 200 31718
21
+ ...
22
+
23
+ The fake_log_data processor will receive an event once every
24
+ second. Each event will consist of a single string giving a
25
+ consecutive integer starting with '1' as the first event.
26
+ EOF
27
+
28
+ include Logging
29
+
30
+ def driver
31
+ SourceDriver
32
+ end
33
+
34
+ end
35
+ end
36
+ end
@@ -1,6 +1,6 @@
1
1
  module Wukong
2
2
  module Load
3
3
  # The current version of Wukong-Load
4
- VERSION = '0.0.2'
4
+ VERSION = '0.1.0'
5
5
  end
6
6
  end
@@ -3,5 +3,18 @@ require 'wukong/spec_helpers'
3
3
 
4
4
  RSpec.configure do |config|
5
5
  config.mock_with :rspec
6
+
6
7
  include Wukong::SpecHelpers
8
+
9
+ config.before(:each) do
10
+ Wukong::Log.level = Log4r::OFF
11
+ end
12
+
13
+ def root
14
+ @root ||= Pathname.new(File.expand_path('../..', __FILE__))
15
+ end
16
+
17
+ def load_runner *args, &block
18
+ runner(Wukong::Load::LoadRunner, 'wu-load', *args)
19
+ end
7
20
  end
@@ -0,0 +1,142 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::Load::ElasticsearchLoader do
4
+
5
+ let(:loader) { Wukong::Load::ElasticsearchLoader.new }
6
+ let(:loader_with_custom_index) { Wukong::Load::ElasticsearchLoader.new(:index => 'custom_index') }
7
+ let(:loader_with_custom_type) { Wukong::Load::ElasticsearchLoader.new(:es_type => 'custom_es_type') }
8
+ let(:loader_with_custom_id) { Wukong::Load::ElasticsearchLoader.new(:id_field => '_custom_id') }
9
+
10
+ let(:record) { {'text' => 'hi' } }
11
+ let(:record_with_index) { {'text' => 'hi', '_index' => 'custom_index' } }
12
+ let(:record_with_custom_index) { {'text' => 'hi', '_custom_index' => 'custom_index' } }
13
+ let(:record_with_es_type) { {'text' => 'hi', '_es_type' => 'custom_es_type' } }
14
+ let(:record_with_custom_es_type) { {'text' => 'hi', '_custom_es_type' => 'custom_es_type' } }
15
+ let(:record_with_id) { {'text' => 'hi', '_id' => 'the_id' } }
16
+ let(:record_with_custom_id) { {'text' => 'hi', '_custom_id' => 'the_id' } }
17
+
18
+ it_behaves_like 'a processor', :named => :elasticsearch_loader
19
+
20
+ context "without an Elasticsearch available" do
21
+ before do
22
+ Net::HTTP.should_receive(:new).and_raise(StandardError)
23
+ end
24
+
25
+ it "raises an error on setup" do
26
+ expect { processor(:elasticsearch_loader) }.to raise_error(Wukong::Error)
27
+ end
28
+ end
29
+
30
+ context "routes" do
31
+ context "all records" do
32
+ it "to a default index" do
33
+ loader.index_for(record).should == loader.index
34
+ end
35
+ it "to a given index" do
36
+ loader_with_custom_index.index_for(record).should == 'custom_index'
37
+ end
38
+ it "to a default type" do
39
+ loader.es_type_for(record).should == loader.es_type
40
+ end
41
+ it "to a given type" do
42
+ loader_with_custom_type.es_type_for(record).should == 'custom_es_type'
43
+ end
44
+ end
45
+
46
+ context "records having a value for" do
47
+ it "default index field to the given index" do
48
+ loader.index_for(record_with_index).should == 'custom_index'
49
+ end
50
+ it "given index field to the given index" do
51
+ loader_with_custom_index.index_for(record_with_custom_index).should == 'custom_index'
52
+ end
53
+ it "default type field to the given type" do
54
+ loader.es_type_for(record_with_es_type).should == 'custom_es_type'
55
+ end
56
+ it "given type field to the given type" do
57
+ loader_with_custom_type.es_type_for(record_with_custom_es_type).should == 'custom_es_type'
58
+ end
59
+ end
60
+ end
61
+
62
+ context "detects IDs" do
63
+ it "based on the absence of a default ID field" do
64
+ loader.id_for(record).should be_nil
65
+ end
66
+ it "based on the value of a default ID field" do
67
+ loader.id_for(record_with_id).should == 'the_id'
68
+ end
69
+ it "based on the value of a custom ID field" do
70
+ loader_with_custom_id.id_for(record_with_custom_id).should == 'the_id'
71
+ end
72
+ end
73
+
74
+ context "sends" do
75
+ it "create requests on a record without an ID" do
76
+ loader.should_receive(:request).with(Net::HTTP::Post, '/foo/bar', kind_of(Hash))
77
+ loader.load({'_index' => 'foo', '_es_type' => 'bar'})
78
+ end
79
+
80
+ it "update requests on a record with an ID" do
81
+ processor(:elasticsearch_loader) do |proc|
82
+ proc.should_receive(:request).with(Net::HTTP::Put, '/foo/bar/1', kind_of(Hash))
83
+ proc.load({'_index' => 'foo', '_es_type' => 'bar', '_id' => '1'})
84
+ end
85
+ end
86
+ end
87
+
88
+ context "receives" do
89
+ let(:connection) { double() }
90
+ before { Net::HTTP.should_receive(:new).and_return(connection) }
91
+
92
+ let(:ok) do
93
+ mock("Net::HTTPOK").tap do |response|
94
+ response.stub!(:code).and_return('200')
95
+ response.stub!(:body).and_return('{"ok": true}')
96
+ end
97
+ end
98
+ let(:created) do
99
+ mock("Net::HTTPCreated").tap do |response|
100
+ response.stub!(:code).and_return('201')
101
+ response.stub!(:body).and_return('{"created": true}')
102
+ end
103
+ end
104
+ let(:not_found) do
105
+ mock("Net::HTTPNotFound").tap do |response|
106
+ response.stub!(:code).and_return('404')
107
+ response.stub!(:body).and_return('{"error": "Not found"}')
108
+ end
109
+ end
110
+
111
+ context "201 Created" do
112
+ before { connection.should_receive(:request).with(kind_of(Net::HTTP::Post)).and_return(created) }
113
+ it "by logging an INFO message" do
114
+ processor(:elasticsearch_loader) do |proc|
115
+ proc.log.should_receive(:info)
116
+ proc.load(record)
117
+ end
118
+ end
119
+ end
120
+
121
+ context "200 OK" do
122
+ before { connection.should_receive(:request).with(kind_of(Net::HTTP::Put)).and_return(ok) }
123
+ it "by logging an INFO message" do
124
+ processor(:elasticsearch_loader) do |proc|
125
+ proc.log.should_receive(:info)
126
+ proc.load(record_with_id)
127
+ end
128
+ end
129
+ end
130
+
131
+ context "an error response from Elasticsearch" do
132
+ before { connection.should_receive(:request).with(kind_of(Net::HTTP::Post)).and_return(not_found) }
133
+ it "by logging an ERROR message" do
134
+ processor(:elasticsearch_loader) do |proc|
135
+ proc.log.should_receive(:error)
136
+ proc.load(record)
137
+ end
138
+ end
139
+ end
140
+
141
+ end
142
+ end
@@ -0,0 +1,72 @@
1
+ require 'spec_helper'
2
+ require 'kafka'
3
+
4
+ describe Wukong::Load::KafkaLoader do
5
+
6
+ let(:loader) { Wukong::Load::KafkaLoader.new }
7
+ let(:loader_with_custom_topic) { Wukong::Load::KafkaLoader.new(:topic => 'custom' ) }
8
+ let(:loader_with_custom_topic_field) { Wukong::Load::KafkaLoader.new(:topic_field => '_custom_topic' ) }
9
+ let(:loader_with_custom_partition) { Wukong::Load::KafkaLoader.new(:partition => 1 ) }
10
+ let(:loader_with_custom_partition_field) { Wukong::Load::KafkaLoader.new(:partition_field => '_custom_partition' ) }
11
+
12
+ let(:record) { {'text' => 'hi' } }
13
+ let(:record_with_topic_field) { {'text' => 'hi', '_topic' => 'custom' } }
14
+ let(:record_with_custom_topic_field) { {'text' => 'hi', '_custom_topic' => 'custom' } }
15
+ let(:record_with_partition_field) { {'text' => 'hi', '_partition' => 1 } }
16
+ let(:record_with_custom_partition_field) { {'text' => 'hi', '_custom_partition' => 1 } }
17
+
18
+
19
+ it "raises an error on setup if it can't connect to Kafka" do
20
+ Kafka::MultiProducer.should_receive(:new).and_raise(StandardError)
21
+ expect { processor(:kafka_loader) }.to raise_error(Wukong::Error)
22
+ end
23
+
24
+ context "with a Kafka available" do
25
+ before do
26
+ @producer = double()
27
+ Kafka::MultiProducer.stub!(:new).and_return(@producer)
28
+ end
29
+ it_behaves_like 'a processor', :named => :kafka_loader
30
+
31
+ it "produces an INFO log message on every write" do
32
+ @producer.should_receive(:send)
33
+ processor(:kafka_loader) do |proc|
34
+ proc.log.should_receive(:info)
35
+ proc.load(record)
36
+ end
37
+ end
38
+
39
+ end
40
+
41
+ context "routes" do
42
+ context "all records" do
43
+ it "to a default topic" do
44
+ loader.topic_for(record).should == loader.topic
45
+ end
46
+ it "to a given topic" do
47
+ loader_with_custom_topic.topic_for(record).should == 'custom'
48
+ end
49
+ it "to a default partition" do
50
+ loader.partition_for(record).should == loader.partition
51
+ end
52
+ it "to a given partition" do
53
+ loader_with_custom_partition.partition_for(record).should == 1
54
+ end
55
+ end
56
+ context "records having a value for" do
57
+ it "default topic field to the given topic" do
58
+ loader.topic_for(record_with_topic_field).should == 'custom'
59
+ end
60
+ it "given topic field to the given topic" do
61
+ loader_with_custom_topic.topic_for(record_with_topic_field).should == 'custom'
62
+ end
63
+ it "default partition field to the given partition" do
64
+ loader.partition_for(record_with_partition_field).should == 1
65
+ end
66
+ it "given partition field to the given partition" do
67
+ loader_with_custom_partition.partition_for(record_with_partition_field).should == 1
68
+ end
69
+ end
70
+ end
71
+
72
+ end
@@ -0,0 +1,100 @@
1
+ require 'spec_helper'
2
+ require 'mongo'
3
+
4
+ describe Wukong::Load::MongoDBLoader do
5
+
6
+ let(:loader) { Wukong::Load::MongoDBLoader.new }
7
+ let(:loader_with_custom_database) { Wukong::Load::MongoDBLoader.new(:database => 'custom_database') }
8
+ let(:loader_with_custom_collection) { Wukong::Load::MongoDBLoader.new(:collection => 'custom_collection') }
9
+ let(:loader_with_custom_id) { Wukong::Load::MongoDBLoader.new(:id_field => '_custom_id') }
10
+
11
+ let(:record) { {'text' => 'hi' } }
12
+ let(:record_with_database) { {'text' => 'hi', '_database' => 'custom_database' } }
13
+ let(:record_with_custom_database) { {'text' => 'hi', '_custom_database' => 'custom_database' } }
14
+ let(:record_with_collection) { {'text' => 'hi', '_collection' => 'custom_collection' } }
15
+ let(:record_with_custom_collection) { {'text' => 'hi', '_custom_collection' => 'custom_collection' } }
16
+ let(:record_with_id) { {'text' => 'hi', '_id' => 'the_id' } }
17
+ let(:record_with_custom_id) { {'text' => 'hi', '_custom_id' => 'the_id' } }
18
+
19
+
20
+
21
+ context "without an MongoDB available" do
22
+ before do
23
+ Mongo::MongoClient.should_receive(:new).and_raise(StandardError)
24
+ end
25
+
26
+ it "raises an error on setup" do
27
+ expect { processor(:mongodb_loader) }.to raise_error(Wukong::Error)
28
+ end
29
+ end
30
+
31
+ context "with a MongoDB available" do
32
+ before do
33
+ @client = double()
34
+ Mongo::MongoClient.stub!(:new).and_return(@client)
35
+ end
36
+
37
+ it_behaves_like 'a processor', :named => :mongodb_loader
38
+
39
+ context "routes" do
40
+ context "all records" do
41
+ it "to a default database" do
42
+ loader.database_name_for(record).should == loader.database
43
+ end
44
+ it "to a given database" do
45
+ loader_with_custom_database.database_name_for(record).should == 'custom_database'
46
+ end
47
+ it "to a default collection" do
48
+ loader.collection_name_for(record).should == loader.collection
49
+ end
50
+ it "to a given collection" do
51
+ loader_with_custom_collection.collection_name_for(record).should == 'custom_collection'
52
+ end
53
+ end
54
+
55
+ context "records having a value for" do
56
+ it "default database field to the given database" do
57
+ loader.database_name_for(record_with_database).should == 'custom_database'
58
+ end
59
+ it "given database field to the given database" do
60
+ loader_with_custom_database.database_name_for(record_with_custom_database).should == 'custom_database'
61
+ end
62
+ it "default collection field to the given collection" do
63
+ loader.collection_name_for(record_with_collection).should == 'custom_collection'
64
+ end
65
+ it "given collection field to the given collection" do
66
+ loader_with_custom_collection.collection_name_for(record_with_custom_collection).should == 'custom_collection'
67
+ end
68
+ end
69
+ end
70
+
71
+ context "detects IDs" do
72
+ it "based on the absence of a default ID field" do
73
+ loader.id_for(record).should be_nil
74
+ end
75
+ it "based on the value of a default ID field" do
76
+ loader.id_for(record_with_id).should == 'the_id'
77
+ end
78
+ it "based on the value of a custom ID field" do
79
+ loader_with_custom_id.id_for(record_with_custom_id).should == 'the_id'
80
+ end
81
+ end
82
+
83
+ context "sends" do
84
+ before do
85
+ @collection = double()
86
+ loader.stub!(:collection_for).and_return(@collection)
87
+ end
88
+ it "insert requests on a record without an ID" do
89
+ @collection.should_receive(:insert).with(kind_of(Hash))
90
+ loader.load({'_database' => 'foo', '_collection' => 'bar'})
91
+ end
92
+
93
+ it "update requests on a record with an ID" do
94
+ @collection.should_receive(:update).with({:_id => '1'}, kind_of(Hash), :upsert => true).and_return({})
95
+ loader.load({'_database' => 'foo', '_collection' => 'bar', '_id' => '1'})
96
+ end
97
+ end
98
+
99
+ end
100
+ end
@@ -0,0 +1,112 @@
1
+ require 'spec_helper'
2
+ require 'mongo'
3
+ require 'mysql2'
4
+
5
+ describe Wukong::Load::SQLLoader do
6
+
7
+ let(:loader) { Wukong::Load::SQLLoader.new }
8
+ let(:loader_with_custom_database) { Wukong::Load::SQLLoader.new(:database => 'custom_database') }
9
+ let(:loader_with_custom_table) { Wukong::Load::SQLLoader.new(:table => 'custom_table') }
10
+ let(:loader_with_custom_id) { Wukong::Load::SQLLoader.new(:id_field => '_custom_id') }
11
+
12
+ let(:record) { {'text' => 'hi' } }
13
+ let(:record_with_database) { {'text' => 'hi', '_database' => 'custom_database' } }
14
+ let(:record_with_custom_database) { {'text' => 'hi', '_custom_database' => 'custom_database' } }
15
+ let(:record_with_table) { {'text' => 'hi', '_table' => 'custom_table' } }
16
+ let(:record_with_custom_table) { {'text' => 'hi', '_custom_table' => 'custom_table' } }
17
+ let(:record_with_id) { {'text' => 'hi', '_id' => 'the_id' } }
18
+ let(:record_with_custom_id) { {'text' => 'hi', '_custom_id' => 'the_id' } }
19
+
20
+
21
+
22
+ context "without an SQL available" do
23
+ before do
24
+ Mysql2::Client.should_receive(:new).and_raise(StandardError)
25
+ end
26
+
27
+ it "raises an error on setup" do
28
+ expect { processor(:sql_loader) }.to raise_error(Wukong::Error)
29
+ end
30
+ end
31
+
32
+ context "with a SQL available" do
33
+ before do
34
+ @client = double()
35
+ def @client.escape record ; record.to_s ; end
36
+ Mysql2::Client.stub!(:new).and_return(@client)
37
+ end
38
+
39
+ it_behaves_like 'a processor', :named => :sql_loader
40
+
41
+ context "routes" do
42
+ context "all records" do
43
+ it "to a default database" do
44
+ loader.setup
45
+ loader.database_name_for(record).should == '`wukong`'
46
+ end
47
+ it "to a given database" do
48
+ loader_with_custom_database.setup
49
+ loader_with_custom_database.database_name_for(record).should == '`custom_database`'
50
+ end
51
+ it "to a default table" do
52
+ loader.setup
53
+ loader.table_name_for(record).should == '`streaming_record`'
54
+ end
55
+ it "to a given table" do
56
+ loader_with_custom_table.setup
57
+ loader_with_custom_table.table_name_for(record).should == '`custom_table`'
58
+ end
59
+ end
60
+
61
+ context "records having a value for" do
62
+ it "default database field to the given database" do
63
+ loader.setup
64
+ loader.database_name_for(record_with_database).should == '`custom_database`'
65
+ end
66
+ it "given database field to the given database" do
67
+ loader_with_custom_database.setup
68
+ loader_with_custom_database.database_name_for(record_with_custom_database).should == '`custom_database`'
69
+ end
70
+ it "default table field to the given table" do
71
+ loader.setup
72
+ loader.table_name_for(record_with_table).should == '`custom_table`'
73
+ end
74
+ it "given table field to the given table" do
75
+ loader_with_custom_table.setup
76
+ loader_with_custom_table.table_name_for(record_with_custom_table).should == '`custom_table`'
77
+ end
78
+ end
79
+ end
80
+
81
+ context "detects IDs" do
82
+ it "based on the absence of a default ID field" do
83
+ loader.setup
84
+ loader.id_for(record).should be_nil
85
+ end
86
+ it "based on the value of a default ID field" do
87
+ loader.setup
88
+ loader.id_for(record_with_id).should == '"the_id"'
89
+ end
90
+ it "based on the value of a custom ID field" do
91
+ loader_with_custom_id.setup
92
+ loader_with_custom_id.id_for(record_with_custom_id).should == '"the_id"'
93
+ end
94
+ end
95
+
96
+ context "sends" do
97
+ before do
98
+ loader.setup
99
+ end
100
+ it "insert requests on a record without an ID" do
101
+ @client.should_receive(:query).with(%Q{INSERT INTO `foo`.`bar` (`age`, `email`, `name`) VALUES (58, "jerry@nbc.com", "Jerry Seinfeld") ON DUPLICATE KEY UPDATE `age`=58, `email`="jerry@nbc.com", `name`="Jerry Seinfeld"})
102
+ loader.load({'_database' => 'foo', '_table' => 'bar', 'name' => 'Jerry Seinfeld', 'email' => 'jerry@nbc.com', 'age' => 58})
103
+ end
104
+
105
+ it "update requests on a record with an ID" do
106
+ @client.should_receive(:query).with(%Q{UPDATE `foo`.`bar` SET `age`=58, `email`="jerry@nbc.com", `name`="Jerry Seinfeld" WHERE `id`="1"})
107
+ loader.load({'_database' => 'foo', '_table' => 'bar', '_id' => '1', 'name' => 'Jerry Seinfeld', 'email' => 'jerry@nbc.com', 'age' => 58})
108
+ end
109
+ end
110
+
111
+ end
112
+ end