wukong-load 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,21 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::HttpRequest do
4
+ context "reporting the best IP address" do
5
+ let(:real_ip) { '10.122.122.122' }
6
+ let(:forwarded_ip) { '10.123.123.123' }
7
+ context "without an X-Forwarded-For header" do
8
+ subject { Wukong::HttpRequest.receive(:ip_address => real_ip) }
9
+ its(:best_ip_address) { should == real_ip }
10
+ end
11
+ context "with an X-Forwarded-For header" do
12
+ subject do
13
+ Wukong::HttpRequest.receive({
14
+ :ip_address => real_ip,
15
+ :headers => {'X-Forwarded-For' => [forwarded_ip, '10.124.124.124'].join(', ') }
16
+ })
17
+ end
18
+ its(:best_ip_address) { should == forwarded_ip }
19
+ end
20
+ end
21
+ end
@@ -17,14 +17,15 @@ Gem::Specification.new do |gem|
17
17
  * MongoDB
18
18
  * HBase
19
19
  * MySQL
20
+ * Kafka
20
21
 
21
22
  and others.
22
23
  EOF
23
24
 
24
25
  gem.files = `git ls-files`.split("\n")
25
- gem.executables = ['wu-load']
26
+ gem.executables = ['wu-load', 'wu-source']
26
27
  gem.test_files = gem.files.grep(/^spec/)
27
28
  gem.require_paths = ['lib']
28
29
 
29
- gem.add_dependency('wukong', '3.0.0.pre3')
30
+ gem.add_dependency('wukong', '3.0.0')
30
31
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong-load
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2012-12-18 00:00:00.000000000 Z
15
+ date: 2013-02-20 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: wukong
@@ -21,7 +21,7 @@ dependencies:
21
21
  requirements:
22
22
  - - '='
23
23
  - !ruby/object:Gem::Version
24
- version: 3.0.0.pre3
24
+ version: 3.0.0
25
25
  type: :runtime
26
26
  prerelease: false
27
27
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,29 +29,41 @@ dependencies:
29
29
  requirements:
30
30
  - - '='
31
31
  - !ruby/object:Gem::Version
32
- version: 3.0.0.pre3
32
+ version: 3.0.0
33
33
  description: ! " Lets you load data from the command-line into data stores like\n\n
34
- \ * Elasticsearch\n * MongoDB\n * HBase\n * MySQL\n\nand others.\n"
34
+ \ * Elasticsearch\n * MongoDB\n * HBase\n * MySQL\n * Kafka\n\nand others.\n"
35
35
  email: coders@infochimps.com
36
36
  executables:
37
37
  - wu-load
38
+ - wu-source
38
39
  extensions: []
39
40
  extra_rdoc_files: []
40
41
  files:
41
42
  - .gitignore
43
+ - .yardopts
42
44
  - Gemfile
43
45
  - LICENSE.md
44
46
  - README.md
45
47
  - Rakefile
46
48
  - bin/wu-load
49
+ - bin/wu-source
47
50
  - lib/wukong-load.rb
48
- - lib/wukong-load/configuration.rb
49
- - lib/wukong-load/elasticsearch.rb
51
+ - lib/wukong-load/load_runner.rb
50
52
  - lib/wukong-load/loader.rb
51
- - lib/wukong-load/runner.rb
53
+ - lib/wukong-load/loaders/elasticsearch.rb
54
+ - lib/wukong-load/loaders/kafka.rb
55
+ - lib/wukong-load/loaders/mongodb.rb
56
+ - lib/wukong-load/loaders/sql.rb
57
+ - lib/wukong-load/models/http_request.rb
58
+ - lib/wukong-load/source_driver.rb
59
+ - lib/wukong-load/source_runner.rb
52
60
  - lib/wukong-load/version.rb
53
61
  - spec/spec_helper.rb
54
- - spec/wukong-load/elasticsearch_spec.rb
62
+ - spec/wukong-load/loaders/elasticsearch_spec.rb
63
+ - spec/wukong-load/loaders/kafka_spec.rb
64
+ - spec/wukong-load/loaders/mongodb_spec.rb
65
+ - spec/wukong-load/loaders/sql_spec.rb
66
+ - spec/wukong-load/models/http_request_spec.rb
55
67
  - wukong-load.gemspec
56
68
  homepage: https://github.com/infochimps-labs/wukong-load
57
69
  licenses:
@@ -80,5 +92,9 @@ specification_version: 3
80
92
  summary: Load data produced by Wukong processors and dataflows into data stores.
81
93
  test_files:
82
94
  - spec/spec_helper.rb
83
- - spec/wukong-load/elasticsearch_spec.rb
95
+ - spec/wukong-load/loaders/elasticsearch_spec.rb
96
+ - spec/wukong-load/loaders/kafka_spec.rb
97
+ - spec/wukong-load/loaders/mongodb_spec.rb
98
+ - spec/wukong-load/loaders/sql_spec.rb
99
+ - spec/wukong-load/models/http_request_spec.rb
84
100
  has_rdoc:
@@ -1,8 +0,0 @@
1
- module Wukong
2
- module Load
3
-
4
- # All local configuration for Wukong-Load lives within this object.
5
- Configuration = Configliere::Param.new unless defined? Configuration
6
-
7
- end
8
- end
@@ -1,99 +0,0 @@
1
- # This should be extracted into Wonderdog and inserted via the Wukong
2
- # plugin mechanism.
3
-
4
- require_relative('loader')
5
-
6
- module Wukong
7
- module Load
8
-
9
- # Loads data into Elasticsearch
10
- class ElasticsearchLoader < Loader
11
-
12
- field :host, String, :default => 'localhost'
13
- field :port, Integer,:default => 9200
14
- field :index, String, :default => 'wukong'
15
- field :es_type, String, :default => 'streaming_record'
16
- field :index_field, String, :default => '_index'
17
- field :es_type_field, String, :default => '_es_type'
18
- field :id_field, String, :default => '_id'
19
-
20
- attr_accessor :connection
21
-
22
- def setup
23
- h = host.gsub(%r{^http://},'')
24
- log.debug("Connecting to Elasticsearch cluster at #{h}:#{port}...")
25
- begin
26
- self.connection = Net::HTTP.new(h, port)
27
- self.connection.use_ssl = true if host =~ /^https/
28
- rescue => e
29
- raise Error.new(e.message)
30
- end
31
- end
32
-
33
- def load record
34
- id_for(record) ? request(Net::HTTP::Put, update_path(record), record) : request(Net::HTTP::Post, create_path(record), record)
35
- end
36
-
37
- def create_path record
38
- File.join('/', index_for(record).to_s, es_type_for(record).to_s)
39
- end
40
-
41
- def update_path record
42
- File.join('/', index_for(record).to_s, es_type_for(record).to_s, id_for(record).to_s)
43
- end
44
-
45
- def index_for record
46
- record[index_field] || self.index
47
- end
48
-
49
- def es_type_for record
50
- record[es_type_field] || self.es_type
51
- end
52
-
53
- def id_for record
54
- record[id_field]
55
- end
56
-
57
- def request request_type, path, record
58
- perform_request(create_request(request_type, path, record))
59
- end
60
-
61
- private
62
-
63
- def create_request request_type, path, record
64
- request_type.new(path).tap do |req|
65
- req.body = MultiJson.dump(record)
66
- end
67
- end
68
-
69
- def perform_request req
70
- begin
71
- response = connection.request(req)
72
- status = response.code.to_i
73
- if (200..201).include?(status)
74
- log.info("#{req.class} #{req.path} #{status}")
75
- else
76
- handle_elasticsearch_error(status, response)
77
- end
78
- rescue => e
79
- log.error("#{e.class} - #{e.message}")
80
- end
81
- end
82
-
83
- def handle_elasticsearch_error response
84
- begin
85
- error = MultiJson.load(response.body)
86
- log.error("#{response.code}: #{error['error']}")
87
- rescue => e
88
- log.error("Received a response code of #{status}: #{response.body}")
89
- end
90
- end
91
-
92
- register :elasticsearch_loader
93
-
94
- end
95
- end
96
- end
97
-
98
-
99
-
@@ -1,48 +0,0 @@
1
- module Wukong
2
- module Load
3
- class Runner
4
-
5
- include Logging
6
-
7
- def self.run settings
8
- begin
9
- new(settings).run
10
- rescue Error => e
11
- log.error(e.message)
12
- exit(127)
13
- end
14
- end
15
-
16
- attr_accessor :settings
17
- def initialize settings
18
- self.settings = settings
19
- end
20
-
21
- def args
22
- settings.rest
23
- end
24
-
25
- def data_store_name
26
- args.first
27
- end
28
-
29
- def processor_name
30
- case data_store_name
31
- when 'elasticsearch' then :elasticsearch_loader
32
- when nil
33
- settings.dump_help
34
- exit(1)
35
- else
36
- raise Error.new("No loader defined for data store: #{data_store_name}")
37
- end
38
- end
39
-
40
- def run
41
- EM.run do
42
- StupidServer.new(processor_name, settings).run!
43
- end
44
- end
45
-
46
- end
47
- end
48
- end
@@ -1,140 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Wukong::Load::ElasticsearchLoader do
4
-
5
- let(:record) { {'text' => 'hi' } }
6
- let(:record_with_index) { {'text' => 'hi', '_index' => 'custom_index' } }
7
- let(:record_with_custom_index) { {'text' => 'hi', '_custom_index' => 'custom_index' } }
8
- let(:record_with_es_type) { {'text' => 'hi', '_es_type' => 'custom_es_type' } }
9
- let(:record_with_custom_es_type) { {'text' => 'hi', '_custom_es_type' => 'custom_es_type' } }
10
- let(:record_with_id) { {'text' => 'hi', '_id' => 'the_id' } }
11
- let(:record_with_custom_id) { {'text' => 'hi', '_custom_id' => 'the_id' } }
12
-
13
- it_behaves_like 'a processor', :named => :elasticsearch_loader
14
-
15
- context "without an Elasticsearch available" do
16
- before do
17
- Net::HTTP.should_receive(:new).and_raise(StandardError)
18
- end
19
-
20
- it "raises an error on setup" do
21
- expect { processor(:elasticsearch_loader).setup }.to raise_error(Wukong::Error)
22
- end
23
- end
24
-
25
- context "routes" do
26
- context "all records" do
27
- it "to a default index" do
28
- proc = processor(:elasticsearch_loader)
29
- proc.index_for(record).should == proc.index
30
- end
31
- it "to a given index" do
32
- processor(:elasticsearch_loader, :index => 'custom_index').index_for(record).should == 'custom_index'
33
- end
34
- it "to a default type" do
35
- proc = processor(:elasticsearch_loader)
36
- proc.es_type_for(record).should == proc.es_type
37
- end
38
- it "to a given type" do
39
- processor(:elasticsearch_loader, :es_type => 'custom_es_type').es_type_for(record).should == 'custom_es_type'
40
- end
41
- end
42
-
43
- context "records having a value for" do
44
- it "default index field to the given index" do
45
- processor(:elasticsearch_loader).index_for(record_with_index).should == 'custom_index'
46
- end
47
- it "given index field to the given index" do
48
- processor(:elasticsearch_loader, :index_field => '_custom_index').index_for(record_with_custom_index).should == 'custom_index'
49
- end
50
- it "default type field to the given type" do
51
- processor(:elasticsearch_loader).es_type_for(record_with_es_type).should == 'custom_es_type'
52
- end
53
- it "given type field to the given type" do
54
- processor(:elasticsearch_loader, :es_type_field => '_custom_es_type').es_type_for(record_with_custom_es_type).should == 'custom_es_type'
55
- end
56
- end
57
- end
58
-
59
- context "detects IDs" do
60
- it "based on the absence of a default ID field" do
61
- processor(:elasticsearch_loader).id_for(record).should be_nil
62
- end
63
- it "based on the value of a default ID field" do
64
- processor(:elasticsearch_loader).id_for(record_with_id).should == 'the_id'
65
- end
66
- it "based on the value of a custom ID field" do
67
- processor(:elasticsearch_loader, :id_field => '_custom_id').id_for(record_with_custom_id).should == 'the_id'
68
- end
69
- end
70
-
71
- context "having made a connection to the database" do
72
-
73
- let(:connection) { double() }
74
- let(:log) { double() }
75
- subject { processor(:elasticsearch_loader) }
76
- before do
77
- Net::HTTP.should_receive(:new).and_return(connection)
78
- subject.stub!(:log).and_return(log)
79
- end
80
-
81
-
82
- context "sends" do
83
- it "create requests on a record without an ID" do
84
- subject.should_receive(:request).with(Net::HTTP::Post, '/foo/bar', kind_of(Hash))
85
- subject.load({'_index' => 'foo', '_es_type' => 'bar'})
86
- end
87
- it "update requests on a record with an ID" do
88
- subject.should_receive(:request).with(Net::HTTP::Put, '/foo/bar/1', kind_of(Hash))
89
- subject.load({'_index' => 'foo', '_es_type' => 'bar', '_id' => '1'})
90
- end
91
- end
92
-
93
- context "receives" do
94
-
95
- let(:ok) do
96
- mock("Net::HTTPOK").tap do |response|
97
- response.stub!(:code).and_return('200')
98
- response.stub!(:body).and_return('{"ok": true}')
99
- end
100
- end
101
- let(:created) do
102
- mock("Net::HTTPCreated").tap do |response|
103
- response.stub!(:code).and_return('201')
104
- response.stub!(:body).and_return('{"created": true}')
105
- end
106
- end
107
- let(:not_found) do
108
- mock("Net::HTTPNotFound").tap do |response|
109
- response.stub!(:code).and_return('404')
110
- response.stub!(:body).and_return('{"error": "Not found"}')
111
- end
112
- end
113
-
114
- context "201 Created" do
115
- before { connection.should_receive(:request).with(kind_of(Net::HTTP::Post)).and_return(created) }
116
- it "by logging an INFO message" do
117
- log.should_receive(:info)
118
- subject.load(record)
119
- end
120
- end
121
-
122
- context "200 OK" do
123
- before { connection.should_receive(:request).with(kind_of(Net::HTTP::Put)).and_return(ok) }
124
- it "by logging an INFO message" do
125
- log.should_receive(:info)
126
- subject.load(record_with_id)
127
- end
128
- end
129
-
130
- context "an error response from Elasticsearch" do
131
- before { connection.should_receive(:request).with(kind_of(Net::HTTP::Post)).and_return(not_found) }
132
- it "by logging an ERROR message" do
133
- log.should_receive(:error)
134
- subject.load(record)
135
- end
136
- end
137
-
138
- end
139
- end
140
- end