wukong-load 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +5 -0
- data/Gemfile +16 -0
- data/LICENSE.md +1 -1
- data/README.md +100 -34
- data/bin/wu-load +1 -47
- data/bin/wu-source +4 -0
- data/lib/wukong-load.rb +36 -3
- data/lib/wukong-load/load_runner.rb +64 -0
- data/lib/wukong-load/loader.rb +7 -0
- data/lib/wukong-load/loaders/elasticsearch.rb +151 -0
- data/lib/wukong-load/loaders/kafka.rb +98 -0
- data/lib/wukong-load/loaders/mongodb.rb +123 -0
- data/lib/wukong-load/loaders/sql.rb +169 -0
- data/lib/wukong-load/models/http_request.rb +60 -0
- data/lib/wukong-load/source_driver.rb +46 -0
- data/lib/wukong-load/source_runner.rb +36 -0
- data/lib/wukong-load/version.rb +1 -1
- data/spec/spec_helper.rb +13 -0
- data/spec/wukong-load/loaders/elasticsearch_spec.rb +142 -0
- data/spec/wukong-load/loaders/kafka_spec.rb +72 -0
- data/spec/wukong-load/loaders/mongodb_spec.rb +100 -0
- data/spec/wukong-load/loaders/sql_spec.rb +112 -0
- data/spec/wukong-load/models/http_request_spec.rb +21 -0
- data/wukong-load.gemspec +3 -2
- metadata +26 -10
- data/lib/wukong-load/configuration.rb +0 -8
- data/lib/wukong-load/elasticsearch.rb +0 -99
- data/lib/wukong-load/runner.rb +0 -48
- data/spec/wukong-load/elasticsearch_spec.rb +0 -140
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wukong::HttpRequest do
|
4
|
+
context "reporting the best IP address" do
|
5
|
+
let(:real_ip) { '10.122.122.122' }
|
6
|
+
let(:forwarded_ip) { '10.123.123.123' }
|
7
|
+
context "without an X-Forwarded-For header" do
|
8
|
+
subject { Wukong::HttpRequest.receive(:ip_address => real_ip) }
|
9
|
+
its(:best_ip_address) { should == real_ip }
|
10
|
+
end
|
11
|
+
context "with an X-Forwarded-For header" do
|
12
|
+
subject do
|
13
|
+
Wukong::HttpRequest.receive({
|
14
|
+
:ip_address => real_ip,
|
15
|
+
:headers => {'X-Forwarded-For' => [forwarded_ip, '10.124.124.124'].join(', ') }
|
16
|
+
})
|
17
|
+
end
|
18
|
+
its(:best_ip_address) { should == forwarded_ip }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/wukong-load.gemspec
CHANGED
@@ -17,14 +17,15 @@ Gem::Specification.new do |gem|
|
|
17
17
|
* MongoDB
|
18
18
|
* HBase
|
19
19
|
* MySQL
|
20
|
+
* Kafka
|
20
21
|
|
21
22
|
and others.
|
22
23
|
EOF
|
23
24
|
|
24
25
|
gem.files = `git ls-files`.split("\n")
|
25
|
-
gem.executables = ['wu-load']
|
26
|
+
gem.executables = ['wu-load', 'wu-source']
|
26
27
|
gem.test_files = gem.files.grep(/^spec/)
|
27
28
|
gem.require_paths = ['lib']
|
28
29
|
|
29
|
-
gem.add_dependency('wukong',
|
30
|
+
gem.add_dependency('wukong', '3.0.0')
|
30
31
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong-load
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date:
|
15
|
+
date: 2013-02-20 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: wukong
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
requirements:
|
22
22
|
- - '='
|
23
23
|
- !ruby/object:Gem::Version
|
24
|
-
version: 3.0.0
|
24
|
+
version: 3.0.0
|
25
25
|
type: :runtime
|
26
26
|
prerelease: false
|
27
27
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,29 +29,41 @@ dependencies:
|
|
29
29
|
requirements:
|
30
30
|
- - '='
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: 3.0.0
|
32
|
+
version: 3.0.0
|
33
33
|
description: ! " Lets you load data from the command-line into data stores like\n\n
|
34
|
-
\ * Elasticsearch\n * MongoDB\n * HBase\n * MySQL\n\nand others.\n"
|
34
|
+
\ * Elasticsearch\n * MongoDB\n * HBase\n * MySQL\n * Kafka\n\nand others.\n"
|
35
35
|
email: coders@infochimps.com
|
36
36
|
executables:
|
37
37
|
- wu-load
|
38
|
+
- wu-source
|
38
39
|
extensions: []
|
39
40
|
extra_rdoc_files: []
|
40
41
|
files:
|
41
42
|
- .gitignore
|
43
|
+
- .yardopts
|
42
44
|
- Gemfile
|
43
45
|
- LICENSE.md
|
44
46
|
- README.md
|
45
47
|
- Rakefile
|
46
48
|
- bin/wu-load
|
49
|
+
- bin/wu-source
|
47
50
|
- lib/wukong-load.rb
|
48
|
-
- lib/wukong-load/
|
49
|
-
- lib/wukong-load/elasticsearch.rb
|
51
|
+
- lib/wukong-load/load_runner.rb
|
50
52
|
- lib/wukong-load/loader.rb
|
51
|
-
- lib/wukong-load/
|
53
|
+
- lib/wukong-load/loaders/elasticsearch.rb
|
54
|
+
- lib/wukong-load/loaders/kafka.rb
|
55
|
+
- lib/wukong-load/loaders/mongodb.rb
|
56
|
+
- lib/wukong-load/loaders/sql.rb
|
57
|
+
- lib/wukong-load/models/http_request.rb
|
58
|
+
- lib/wukong-load/source_driver.rb
|
59
|
+
- lib/wukong-load/source_runner.rb
|
52
60
|
- lib/wukong-load/version.rb
|
53
61
|
- spec/spec_helper.rb
|
54
|
-
- spec/wukong-load/elasticsearch_spec.rb
|
62
|
+
- spec/wukong-load/loaders/elasticsearch_spec.rb
|
63
|
+
- spec/wukong-load/loaders/kafka_spec.rb
|
64
|
+
- spec/wukong-load/loaders/mongodb_spec.rb
|
65
|
+
- spec/wukong-load/loaders/sql_spec.rb
|
66
|
+
- spec/wukong-load/models/http_request_spec.rb
|
55
67
|
- wukong-load.gemspec
|
56
68
|
homepage: https://github.com/infochimps-labs/wukong-load
|
57
69
|
licenses:
|
@@ -80,5 +92,9 @@ specification_version: 3
|
|
80
92
|
summary: Load data produced by Wukong processors and dataflows into data stores.
|
81
93
|
test_files:
|
82
94
|
- spec/spec_helper.rb
|
83
|
-
- spec/wukong-load/elasticsearch_spec.rb
|
95
|
+
- spec/wukong-load/loaders/elasticsearch_spec.rb
|
96
|
+
- spec/wukong-load/loaders/kafka_spec.rb
|
97
|
+
- spec/wukong-load/loaders/mongodb_spec.rb
|
98
|
+
- spec/wukong-load/loaders/sql_spec.rb
|
99
|
+
- spec/wukong-load/models/http_request_spec.rb
|
84
100
|
has_rdoc:
|
@@ -1,99 +0,0 @@
|
|
1
|
-
# This should be extracted into Wonderdog and inserted via the Wukong
|
2
|
-
# plugin mechanism.
|
3
|
-
|
4
|
-
require_relative('loader')
|
5
|
-
|
6
|
-
module Wukong
|
7
|
-
module Load
|
8
|
-
|
9
|
-
# Loads data into Elasticsearch
|
10
|
-
class ElasticsearchLoader < Loader
|
11
|
-
|
12
|
-
field :host, String, :default => 'localhost'
|
13
|
-
field :port, Integer,:default => 9200
|
14
|
-
field :index, String, :default => 'wukong'
|
15
|
-
field :es_type, String, :default => 'streaming_record'
|
16
|
-
field :index_field, String, :default => '_index'
|
17
|
-
field :es_type_field, String, :default => '_es_type'
|
18
|
-
field :id_field, String, :default => '_id'
|
19
|
-
|
20
|
-
attr_accessor :connection
|
21
|
-
|
22
|
-
def setup
|
23
|
-
h = host.gsub(%r{^http://},'')
|
24
|
-
log.debug("Connecting to Elasticsearch cluster at #{h}:#{port}...")
|
25
|
-
begin
|
26
|
-
self.connection = Net::HTTP.new(h, port)
|
27
|
-
self.connection.use_ssl = true if host =~ /^https/
|
28
|
-
rescue => e
|
29
|
-
raise Error.new(e.message)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def load record
|
34
|
-
id_for(record) ? request(Net::HTTP::Put, update_path(record), record) : request(Net::HTTP::Post, create_path(record), record)
|
35
|
-
end
|
36
|
-
|
37
|
-
def create_path record
|
38
|
-
File.join('/', index_for(record).to_s, es_type_for(record).to_s)
|
39
|
-
end
|
40
|
-
|
41
|
-
def update_path record
|
42
|
-
File.join('/', index_for(record).to_s, es_type_for(record).to_s, id_for(record).to_s)
|
43
|
-
end
|
44
|
-
|
45
|
-
def index_for record
|
46
|
-
record[index_field] || self.index
|
47
|
-
end
|
48
|
-
|
49
|
-
def es_type_for record
|
50
|
-
record[es_type_field] || self.es_type
|
51
|
-
end
|
52
|
-
|
53
|
-
def id_for record
|
54
|
-
record[id_field]
|
55
|
-
end
|
56
|
-
|
57
|
-
def request request_type, path, record
|
58
|
-
perform_request(create_request(request_type, path, record))
|
59
|
-
end
|
60
|
-
|
61
|
-
private
|
62
|
-
|
63
|
-
def create_request request_type, path, record
|
64
|
-
request_type.new(path).tap do |req|
|
65
|
-
req.body = MultiJson.dump(record)
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
def perform_request req
|
70
|
-
begin
|
71
|
-
response = connection.request(req)
|
72
|
-
status = response.code.to_i
|
73
|
-
if (200..201).include?(status)
|
74
|
-
log.info("#{req.class} #{req.path} #{status}")
|
75
|
-
else
|
76
|
-
handle_elasticsearch_error(status, response)
|
77
|
-
end
|
78
|
-
rescue => e
|
79
|
-
log.error("#{e.class} - #{e.message}")
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def handle_elasticsearch_error response
|
84
|
-
begin
|
85
|
-
error = MultiJson.load(response.body)
|
86
|
-
log.error("#{response.code}: #{error['error']}")
|
87
|
-
rescue => e
|
88
|
-
log.error("Received a response code of #{status}: #{response.body}")
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
register :elasticsearch_loader
|
93
|
-
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
|
99
|
-
|
data/lib/wukong-load/runner.rb
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
module Wukong
|
2
|
-
module Load
|
3
|
-
class Runner
|
4
|
-
|
5
|
-
include Logging
|
6
|
-
|
7
|
-
def self.run settings
|
8
|
-
begin
|
9
|
-
new(settings).run
|
10
|
-
rescue Error => e
|
11
|
-
log.error(e.message)
|
12
|
-
exit(127)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
attr_accessor :settings
|
17
|
-
def initialize settings
|
18
|
-
self.settings = settings
|
19
|
-
end
|
20
|
-
|
21
|
-
def args
|
22
|
-
settings.rest
|
23
|
-
end
|
24
|
-
|
25
|
-
def data_store_name
|
26
|
-
args.first
|
27
|
-
end
|
28
|
-
|
29
|
-
def processor_name
|
30
|
-
case data_store_name
|
31
|
-
when 'elasticsearch' then :elasticsearch_loader
|
32
|
-
when nil
|
33
|
-
settings.dump_help
|
34
|
-
exit(1)
|
35
|
-
else
|
36
|
-
raise Error.new("No loader defined for data store: #{data_store_name}")
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def run
|
41
|
-
EM.run do
|
42
|
-
StupidServer.new(processor_name, settings).run!
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
@@ -1,140 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Wukong::Load::ElasticsearchLoader do
|
4
|
-
|
5
|
-
let(:record) { {'text' => 'hi' } }
|
6
|
-
let(:record_with_index) { {'text' => 'hi', '_index' => 'custom_index' } }
|
7
|
-
let(:record_with_custom_index) { {'text' => 'hi', '_custom_index' => 'custom_index' } }
|
8
|
-
let(:record_with_es_type) { {'text' => 'hi', '_es_type' => 'custom_es_type' } }
|
9
|
-
let(:record_with_custom_es_type) { {'text' => 'hi', '_custom_es_type' => 'custom_es_type' } }
|
10
|
-
let(:record_with_id) { {'text' => 'hi', '_id' => 'the_id' } }
|
11
|
-
let(:record_with_custom_id) { {'text' => 'hi', '_custom_id' => 'the_id' } }
|
12
|
-
|
13
|
-
it_behaves_like 'a processor', :named => :elasticsearch_loader
|
14
|
-
|
15
|
-
context "without an Elasticsearch available" do
|
16
|
-
before do
|
17
|
-
Net::HTTP.should_receive(:new).and_raise(StandardError)
|
18
|
-
end
|
19
|
-
|
20
|
-
it "raises an error on setup" do
|
21
|
-
expect { processor(:elasticsearch_loader).setup }.to raise_error(Wukong::Error)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
context "routes" do
|
26
|
-
context "all records" do
|
27
|
-
it "to a default index" do
|
28
|
-
proc = processor(:elasticsearch_loader)
|
29
|
-
proc.index_for(record).should == proc.index
|
30
|
-
end
|
31
|
-
it "to a given index" do
|
32
|
-
processor(:elasticsearch_loader, :index => 'custom_index').index_for(record).should == 'custom_index'
|
33
|
-
end
|
34
|
-
it "to a default type" do
|
35
|
-
proc = processor(:elasticsearch_loader)
|
36
|
-
proc.es_type_for(record).should == proc.es_type
|
37
|
-
end
|
38
|
-
it "to a given type" do
|
39
|
-
processor(:elasticsearch_loader, :es_type => 'custom_es_type').es_type_for(record).should == 'custom_es_type'
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
context "records having a value for" do
|
44
|
-
it "default index field to the given index" do
|
45
|
-
processor(:elasticsearch_loader).index_for(record_with_index).should == 'custom_index'
|
46
|
-
end
|
47
|
-
it "given index field to the given index" do
|
48
|
-
processor(:elasticsearch_loader, :index_field => '_custom_index').index_for(record_with_custom_index).should == 'custom_index'
|
49
|
-
end
|
50
|
-
it "default type field to the given type" do
|
51
|
-
processor(:elasticsearch_loader).es_type_for(record_with_es_type).should == 'custom_es_type'
|
52
|
-
end
|
53
|
-
it "given type field to the given type" do
|
54
|
-
processor(:elasticsearch_loader, :es_type_field => '_custom_es_type').es_type_for(record_with_custom_es_type).should == 'custom_es_type'
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
context "detects IDs" do
|
60
|
-
it "based on the absence of a default ID field" do
|
61
|
-
processor(:elasticsearch_loader).id_for(record).should be_nil
|
62
|
-
end
|
63
|
-
it "based on the value of a default ID field" do
|
64
|
-
processor(:elasticsearch_loader).id_for(record_with_id).should == 'the_id'
|
65
|
-
end
|
66
|
-
it "based on the value of a custom ID field" do
|
67
|
-
processor(:elasticsearch_loader, :id_field => '_custom_id').id_for(record_with_custom_id).should == 'the_id'
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
context "having made a connection to the database" do
|
72
|
-
|
73
|
-
let(:connection) { double() }
|
74
|
-
let(:log) { double() }
|
75
|
-
subject { processor(:elasticsearch_loader) }
|
76
|
-
before do
|
77
|
-
Net::HTTP.should_receive(:new).and_return(connection)
|
78
|
-
subject.stub!(:log).and_return(log)
|
79
|
-
end
|
80
|
-
|
81
|
-
|
82
|
-
context "sends" do
|
83
|
-
it "create requests on a record without an ID" do
|
84
|
-
subject.should_receive(:request).with(Net::HTTP::Post, '/foo/bar', kind_of(Hash))
|
85
|
-
subject.load({'_index' => 'foo', '_es_type' => 'bar'})
|
86
|
-
end
|
87
|
-
it "update requests on a record with an ID" do
|
88
|
-
subject.should_receive(:request).with(Net::HTTP::Put, '/foo/bar/1', kind_of(Hash))
|
89
|
-
subject.load({'_index' => 'foo', '_es_type' => 'bar', '_id' => '1'})
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
context "receives" do
|
94
|
-
|
95
|
-
let(:ok) do
|
96
|
-
mock("Net::HTTPOK").tap do |response|
|
97
|
-
response.stub!(:code).and_return('200')
|
98
|
-
response.stub!(:body).and_return('{"ok": true}')
|
99
|
-
end
|
100
|
-
end
|
101
|
-
let(:created) do
|
102
|
-
mock("Net::HTTPCreated").tap do |response|
|
103
|
-
response.stub!(:code).and_return('201')
|
104
|
-
response.stub!(:body).and_return('{"created": true}')
|
105
|
-
end
|
106
|
-
end
|
107
|
-
let(:not_found) do
|
108
|
-
mock("Net::HTTPNotFound").tap do |response|
|
109
|
-
response.stub!(:code).and_return('404')
|
110
|
-
response.stub!(:body).and_return('{"error": "Not found"}')
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
context "201 Created" do
|
115
|
-
before { connection.should_receive(:request).with(kind_of(Net::HTTP::Post)).and_return(created) }
|
116
|
-
it "by logging an INFO message" do
|
117
|
-
log.should_receive(:info)
|
118
|
-
subject.load(record)
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
context "200 OK" do
|
123
|
-
before { connection.should_receive(:request).with(kind_of(Net::HTTP::Put)).and_return(ok) }
|
124
|
-
it "by logging an INFO message" do
|
125
|
-
log.should_receive(:info)
|
126
|
-
subject.load(record_with_id)
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
context "an error response from Elasticsearch" do
|
131
|
-
before { connection.should_receive(:request).with(kind_of(Net::HTTP::Post)).and_return(not_found) }
|
132
|
-
it "by logging an ERROR message" do
|
133
|
-
log.should_receive(:error)
|
134
|
-
subject.load(record)
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
end
|
139
|
-
end
|
140
|
-
end
|