wukong-load 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +5 -0
- data/Gemfile +16 -0
- data/LICENSE.md +1 -1
- data/README.md +100 -34
- data/bin/wu-load +1 -47
- data/bin/wu-source +4 -0
- data/lib/wukong-load.rb +36 -3
- data/lib/wukong-load/load_runner.rb +64 -0
- data/lib/wukong-load/loader.rb +7 -0
- data/lib/wukong-load/loaders/elasticsearch.rb +151 -0
- data/lib/wukong-load/loaders/kafka.rb +98 -0
- data/lib/wukong-load/loaders/mongodb.rb +123 -0
- data/lib/wukong-load/loaders/sql.rb +169 -0
- data/lib/wukong-load/models/http_request.rb +60 -0
- data/lib/wukong-load/source_driver.rb +46 -0
- data/lib/wukong-load/source_runner.rb +36 -0
- data/lib/wukong-load/version.rb +1 -1
- data/spec/spec_helper.rb +13 -0
- data/spec/wukong-load/loaders/elasticsearch_spec.rb +142 -0
- data/spec/wukong-load/loaders/kafka_spec.rb +72 -0
- data/spec/wukong-load/loaders/mongodb_spec.rb +100 -0
- data/spec/wukong-load/loaders/sql_spec.rb +112 -0
- data/spec/wukong-load/models/http_request_spec.rb +21 -0
- data/wukong-load.gemspec +3 -2
- metadata +26 -10
- data/lib/wukong-load/configuration.rb +0 -8
- data/lib/wukong-load/elasticsearch.rb +0 -99
- data/lib/wukong-load/runner.rb +0 -48
- data/spec/wukong-load/elasticsearch_spec.rb +0 -140
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wukong::HttpRequest do
|
4
|
+
context "reporting the best IP address" do
|
5
|
+
let(:real_ip) { '10.122.122.122' }
|
6
|
+
let(:forwarded_ip) { '10.123.123.123' }
|
7
|
+
context "without an X-Forwarded-For header" do
|
8
|
+
subject { Wukong::HttpRequest.receive(:ip_address => real_ip) }
|
9
|
+
its(:best_ip_address) { should == real_ip }
|
10
|
+
end
|
11
|
+
context "with an X-Forwarded-For header" do
|
12
|
+
subject do
|
13
|
+
Wukong::HttpRequest.receive({
|
14
|
+
:ip_address => real_ip,
|
15
|
+
:headers => {'X-Forwarded-For' => [forwarded_ip, '10.124.124.124'].join(', ') }
|
16
|
+
})
|
17
|
+
end
|
18
|
+
its(:best_ip_address) { should == forwarded_ip }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/wukong-load.gemspec
CHANGED
@@ -17,14 +17,15 @@ Gem::Specification.new do |gem|
|
|
17
17
|
* MongoDB
|
18
18
|
* HBase
|
19
19
|
* MySQL
|
20
|
+
* Kafka
|
20
21
|
|
21
22
|
and others.
|
22
23
|
EOF
|
23
24
|
|
24
25
|
gem.files = `git ls-files`.split("\n")
|
25
|
-
gem.executables = ['wu-load']
|
26
|
+
gem.executables = ['wu-load', 'wu-source']
|
26
27
|
gem.test_files = gem.files.grep(/^spec/)
|
27
28
|
gem.require_paths = ['lib']
|
28
29
|
|
29
|
-
gem.add_dependency('wukong',
|
30
|
+
gem.add_dependency('wukong', '3.0.0')
|
30
31
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong-load
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date:
|
15
|
+
date: 2013-02-20 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: wukong
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
requirements:
|
22
22
|
- - '='
|
23
23
|
- !ruby/object:Gem::Version
|
24
|
-
version: 3.0.0
|
24
|
+
version: 3.0.0
|
25
25
|
type: :runtime
|
26
26
|
prerelease: false
|
27
27
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,29 +29,41 @@ dependencies:
|
|
29
29
|
requirements:
|
30
30
|
- - '='
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: 3.0.0
|
32
|
+
version: 3.0.0
|
33
33
|
description: ! " Lets you load data from the command-line into data stores like\n\n
|
34
|
-
\ * Elasticsearch\n * MongoDB\n * HBase\n * MySQL\n\nand others.\n"
|
34
|
+
\ * Elasticsearch\n * MongoDB\n * HBase\n * MySQL\n * Kafka\n\nand others.\n"
|
35
35
|
email: coders@infochimps.com
|
36
36
|
executables:
|
37
37
|
- wu-load
|
38
|
+
- wu-source
|
38
39
|
extensions: []
|
39
40
|
extra_rdoc_files: []
|
40
41
|
files:
|
41
42
|
- .gitignore
|
43
|
+
- .yardopts
|
42
44
|
- Gemfile
|
43
45
|
- LICENSE.md
|
44
46
|
- README.md
|
45
47
|
- Rakefile
|
46
48
|
- bin/wu-load
|
49
|
+
- bin/wu-source
|
47
50
|
- lib/wukong-load.rb
|
48
|
-
- lib/wukong-load/
|
49
|
-
- lib/wukong-load/elasticsearch.rb
|
51
|
+
- lib/wukong-load/load_runner.rb
|
50
52
|
- lib/wukong-load/loader.rb
|
51
|
-
- lib/wukong-load/
|
53
|
+
- lib/wukong-load/loaders/elasticsearch.rb
|
54
|
+
- lib/wukong-load/loaders/kafka.rb
|
55
|
+
- lib/wukong-load/loaders/mongodb.rb
|
56
|
+
- lib/wukong-load/loaders/sql.rb
|
57
|
+
- lib/wukong-load/models/http_request.rb
|
58
|
+
- lib/wukong-load/source_driver.rb
|
59
|
+
- lib/wukong-load/source_runner.rb
|
52
60
|
- lib/wukong-load/version.rb
|
53
61
|
- spec/spec_helper.rb
|
54
|
-
- spec/wukong-load/elasticsearch_spec.rb
|
62
|
+
- spec/wukong-load/loaders/elasticsearch_spec.rb
|
63
|
+
- spec/wukong-load/loaders/kafka_spec.rb
|
64
|
+
- spec/wukong-load/loaders/mongodb_spec.rb
|
65
|
+
- spec/wukong-load/loaders/sql_spec.rb
|
66
|
+
- spec/wukong-load/models/http_request_spec.rb
|
55
67
|
- wukong-load.gemspec
|
56
68
|
homepage: https://github.com/infochimps-labs/wukong-load
|
57
69
|
licenses:
|
@@ -80,5 +92,9 @@ specification_version: 3
|
|
80
92
|
summary: Load data produced by Wukong processors and dataflows into data stores.
|
81
93
|
test_files:
|
82
94
|
- spec/spec_helper.rb
|
83
|
-
- spec/wukong-load/elasticsearch_spec.rb
|
95
|
+
- spec/wukong-load/loaders/elasticsearch_spec.rb
|
96
|
+
- spec/wukong-load/loaders/kafka_spec.rb
|
97
|
+
- spec/wukong-load/loaders/mongodb_spec.rb
|
98
|
+
- spec/wukong-load/loaders/sql_spec.rb
|
99
|
+
- spec/wukong-load/models/http_request_spec.rb
|
84
100
|
has_rdoc:
|
@@ -1,99 +0,0 @@
|
|
1
|
-
# This should be extracted into Wonderdog and inserted via the Wukong
|
2
|
-
# plugin mechanism.
|
3
|
-
|
4
|
-
require_relative('loader')
|
5
|
-
|
6
|
-
module Wukong
|
7
|
-
module Load
|
8
|
-
|
9
|
-
# Loads data into Elasticsearch
|
10
|
-
class ElasticsearchLoader < Loader
|
11
|
-
|
12
|
-
field :host, String, :default => 'localhost'
|
13
|
-
field :port, Integer,:default => 9200
|
14
|
-
field :index, String, :default => 'wukong'
|
15
|
-
field :es_type, String, :default => 'streaming_record'
|
16
|
-
field :index_field, String, :default => '_index'
|
17
|
-
field :es_type_field, String, :default => '_es_type'
|
18
|
-
field :id_field, String, :default => '_id'
|
19
|
-
|
20
|
-
attr_accessor :connection
|
21
|
-
|
22
|
-
def setup
|
23
|
-
h = host.gsub(%r{^http://},'')
|
24
|
-
log.debug("Connecting to Elasticsearch cluster at #{h}:#{port}...")
|
25
|
-
begin
|
26
|
-
self.connection = Net::HTTP.new(h, port)
|
27
|
-
self.connection.use_ssl = true if host =~ /^https/
|
28
|
-
rescue => e
|
29
|
-
raise Error.new(e.message)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def load record
|
34
|
-
id_for(record) ? request(Net::HTTP::Put, update_path(record), record) : request(Net::HTTP::Post, create_path(record), record)
|
35
|
-
end
|
36
|
-
|
37
|
-
def create_path record
|
38
|
-
File.join('/', index_for(record).to_s, es_type_for(record).to_s)
|
39
|
-
end
|
40
|
-
|
41
|
-
def update_path record
|
42
|
-
File.join('/', index_for(record).to_s, es_type_for(record).to_s, id_for(record).to_s)
|
43
|
-
end
|
44
|
-
|
45
|
-
def index_for record
|
46
|
-
record[index_field] || self.index
|
47
|
-
end
|
48
|
-
|
49
|
-
def es_type_for record
|
50
|
-
record[es_type_field] || self.es_type
|
51
|
-
end
|
52
|
-
|
53
|
-
def id_for record
|
54
|
-
record[id_field]
|
55
|
-
end
|
56
|
-
|
57
|
-
def request request_type, path, record
|
58
|
-
perform_request(create_request(request_type, path, record))
|
59
|
-
end
|
60
|
-
|
61
|
-
private
|
62
|
-
|
63
|
-
def create_request request_type, path, record
|
64
|
-
request_type.new(path).tap do |req|
|
65
|
-
req.body = MultiJson.dump(record)
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
def perform_request req
|
70
|
-
begin
|
71
|
-
response = connection.request(req)
|
72
|
-
status = response.code.to_i
|
73
|
-
if (200..201).include?(status)
|
74
|
-
log.info("#{req.class} #{req.path} #{status}")
|
75
|
-
else
|
76
|
-
handle_elasticsearch_error(status, response)
|
77
|
-
end
|
78
|
-
rescue => e
|
79
|
-
log.error("#{e.class} - #{e.message}")
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def handle_elasticsearch_error response
|
84
|
-
begin
|
85
|
-
error = MultiJson.load(response.body)
|
86
|
-
log.error("#{response.code}: #{error['error']}")
|
87
|
-
rescue => e
|
88
|
-
log.error("Received a response code of #{status}: #{response.body}")
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
register :elasticsearch_loader
|
93
|
-
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
|
99
|
-
|
data/lib/wukong-load/runner.rb
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
module Wukong
|
2
|
-
module Load
|
3
|
-
class Runner
|
4
|
-
|
5
|
-
include Logging
|
6
|
-
|
7
|
-
def self.run settings
|
8
|
-
begin
|
9
|
-
new(settings).run
|
10
|
-
rescue Error => e
|
11
|
-
log.error(e.message)
|
12
|
-
exit(127)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
attr_accessor :settings
|
17
|
-
def initialize settings
|
18
|
-
self.settings = settings
|
19
|
-
end
|
20
|
-
|
21
|
-
def args
|
22
|
-
settings.rest
|
23
|
-
end
|
24
|
-
|
25
|
-
def data_store_name
|
26
|
-
args.first
|
27
|
-
end
|
28
|
-
|
29
|
-
def processor_name
|
30
|
-
case data_store_name
|
31
|
-
when 'elasticsearch' then :elasticsearch_loader
|
32
|
-
when nil
|
33
|
-
settings.dump_help
|
34
|
-
exit(1)
|
35
|
-
else
|
36
|
-
raise Error.new("No loader defined for data store: #{data_store_name}")
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def run
|
41
|
-
EM.run do
|
42
|
-
StupidServer.new(processor_name, settings).run!
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
@@ -1,140 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wukong::Load::ElasticsearchLoader do
|
4
|
-
|
5
|
-
let(:record) { {'text' => 'hi' } }
|
6
|
-
let(:record_with_index) { {'text' => 'hi', '_index' => 'custom_index' } }
|
7
|
-
let(:record_with_custom_index) { {'text' => 'hi', '_custom_index' => 'custom_index' } }
|
8
|
-
let(:record_with_es_type) { {'text' => 'hi', '_es_type' => 'custom_es_type' } }
|
9
|
-
let(:record_with_custom_es_type) { {'text' => 'hi', '_custom_es_type' => 'custom_es_type' } }
|
10
|
-
let(:record_with_id) { {'text' => 'hi', '_id' => 'the_id' } }
|
11
|
-
let(:record_with_custom_id) { {'text' => 'hi', '_custom_id' => 'the_id' } }
|
12
|
-
|
13
|
-
it_behaves_like 'a processor', :named => :elasticsearch_loader
|
14
|
-
|
15
|
-
context "without an Elasticsearch available" do
|
16
|
-
before do
|
17
|
-
Net::HTTP.should_receive(:new).and_raise(StandardError)
|
18
|
-
end
|
19
|
-
|
20
|
-
it "raises an error on setup" do
|
21
|
-
expect { processor(:elasticsearch_loader).setup }.to raise_error(Wukong::Error)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
context "routes" do
|
26
|
-
context "all records" do
|
27
|
-
it "to a default index" do
|
28
|
-
proc = processor(:elasticsearch_loader)
|
29
|
-
proc.index_for(record).should == proc.index
|
30
|
-
end
|
31
|
-
it "to a given index" do
|
32
|
-
processor(:elasticsearch_loader, :index => 'custom_index').index_for(record).should == 'custom_index'
|
33
|
-
end
|
34
|
-
it "to a default type" do
|
35
|
-
proc = processor(:elasticsearch_loader)
|
36
|
-
proc.es_type_for(record).should == proc.es_type
|
37
|
-
end
|
38
|
-
it "to a given type" do
|
39
|
-
processor(:elasticsearch_loader, :es_type => 'custom_es_type').es_type_for(record).should == 'custom_es_type'
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
context "records having a value for" do
|
44
|
-
it "default index field to the given index" do
|
45
|
-
processor(:elasticsearch_loader).index_for(record_with_index).should == 'custom_index'
|
46
|
-
end
|
47
|
-
it "given index field to the given index" do
|
48
|
-
processor(:elasticsearch_loader, :index_field => '_custom_index').index_for(record_with_custom_index).should == 'custom_index'
|
49
|
-
end
|
50
|
-
it "default type field to the given type" do
|
51
|
-
processor(:elasticsearch_loader).es_type_for(record_with_es_type).should == 'custom_es_type'
|
52
|
-
end
|
53
|
-
it "given type field to the given type" do
|
54
|
-
processor(:elasticsearch_loader, :es_type_field => '_custom_es_type').es_type_for(record_with_custom_es_type).should == 'custom_es_type'
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
context "detects IDs" do
|
60
|
-
it "based on the absence of a default ID field" do
|
61
|
-
processor(:elasticsearch_loader).id_for(record).should be_nil
|
62
|
-
end
|
63
|
-
it "based on the value of a default ID field" do
|
64
|
-
processor(:elasticsearch_loader).id_for(record_with_id).should == 'the_id'
|
65
|
-
end
|
66
|
-
it "based on the value of a custom ID field" do
|
67
|
-
processor(:elasticsearch_loader, :id_field => '_custom_id').id_for(record_with_custom_id).should == 'the_id'
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
context "having made a connection to the database" do
|
72
|
-
|
73
|
-
let(:connection) { double() }
|
74
|
-
let(:log) { double() }
|
75
|
-
subject { processor(:elasticsearch_loader) }
|
76
|
-
before do
|
77
|
-
Net::HTTP.should_receive(:new).and_return(connection)
|
78
|
-
subject.stub!(:log).and_return(log)
|
79
|
-
end
|
80
|
-
|
81
|
-
|
82
|
-
context "sends" do
|
83
|
-
it "create requests on a record without an ID" do
|
84
|
-
subject.should_receive(:request).with(Net::HTTP::Post, '/foo/bar', kind_of(Hash))
|
85
|
-
subject.load({'_index' => 'foo', '_es_type' => 'bar'})
|
86
|
-
end
|
87
|
-
it "update requests on a record with an ID" do
|
88
|
-
subject.should_receive(:request).with(Net::HTTP::Put, '/foo/bar/1', kind_of(Hash))
|
89
|
-
subject.load({'_index' => 'foo', '_es_type' => 'bar', '_id' => '1'})
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
context "receives" do
|
94
|
-
|
95
|
-
let(:ok) do
|
96
|
-
mock("Net::HTTPOK").tap do |response|
|
97
|
-
response.stub!(:code).and_return('200')
|
98
|
-
response.stub!(:body).and_return('{"ok": true}')
|
99
|
-
end
|
100
|
-
end
|
101
|
-
let(:created) do
|
102
|
-
mock("Net::HTTPCreated").tap do |response|
|
103
|
-
response.stub!(:code).and_return('201')
|
104
|
-
response.stub!(:body).and_return('{"created": true}')
|
105
|
-
end
|
106
|
-
end
|
107
|
-
let(:not_found) do
|
108
|
-
mock("Net::HTTPNotFound").tap do |response|
|
109
|
-
response.stub!(:code).and_return('404')
|
110
|
-
response.stub!(:body).and_return('{"error": "Not found"}')
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
context "201 Created" do
|
115
|
-
before { connection.should_receive(:request).with(kind_of(Net::HTTP::Post)).and_return(created) }
|
116
|
-
it "by logging an INFO message" do
|
117
|
-
log.should_receive(:info)
|
118
|
-
subject.load(record)
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
context "200 OK" do
|
123
|
-
before { connection.should_receive(:request).with(kind_of(Net::HTTP::Put)).and_return(ok) }
|
124
|
-
it "by logging an INFO message" do
|
125
|
-
log.should_receive(:info)
|
126
|
-
subject.load(record_with_id)
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
context "an error response from Elasticsearch" do
|
131
|
-
before { connection.should_receive(:request).with(kind_of(Net::HTTP::Post)).and_return(not_found) }
|
132
|
-
it "by logging an ERROR message" do
|
133
|
-
log.should_receive(:error)
|
134
|
-
subject.load(record)
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
end
|
139
|
-
end
|
140
|
-
end
|