wukong-load 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong::HttpRequest do
4
+ context "reporting the best IP address" do
5
+ let(:real_ip) { '10.122.122.122' }
6
+ let(:forwarded_ip) { '10.123.123.123' }
7
+ context "without an X-Forwarded-For header" do
8
+ subject { Wukong::HttpRequest.receive(:ip_address => real_ip) }
9
+ its(:best_ip_address) { should == real_ip }
10
+ end
11
+ context "with an X-Forwarded-For header" do
12
+ subject do
13
+ Wukong::HttpRequest.receive({
14
+ :ip_address => real_ip,
15
+ :headers => {'X-Forwarded-For' => [forwarded_ip, '10.124.124.124'].join(', ') }
16
+ })
17
+ end
18
+ its(:best_ip_address) { should == forwarded_ip }
19
+ end
20
+ end
21
+ end
@@ -17,14 +17,15 @@ Gem::Specification.new do |gem|
17
17
  * MongoDB
18
18
  * HBase
19
19
  * MySQL
20
+ * Kafka
20
21
 
21
22
  and others.
22
23
  EOF
23
24
 
24
25
  gem.files = `git ls-files`.split("\n")
25
- gem.executables = ['wu-load']
26
+ gem.executables = ['wu-load', 'wu-source']
26
27
  gem.test_files = gem.files.grep(/^spec/)
27
28
  gem.require_paths = ['lib']
28
29
 
29
- gem.add_dependency('wukong', '3.0.0.pre3')
30
+ gem.add_dependency('wukong', '3.0.0')
30
31
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong-load
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2012-12-18 00:00:00.000000000 Z
15
+ date: 2013-02-20 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: wukong
@@ -21,7 +21,7 @@ dependencies:
21
21
  requirements:
22
22
  - - '='
23
23
  - !ruby/object:Gem::Version
24
- version: 3.0.0.pre3
24
+ version: 3.0.0
25
25
  type: :runtime
26
26
  prerelease: false
27
27
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,29 +29,41 @@ dependencies:
29
29
  requirements:
30
30
  - - '='
31
31
  - !ruby/object:Gem::Version
32
- version: 3.0.0.pre3
32
+ version: 3.0.0
33
33
  description: ! " Lets you load data from the command-line into data stores like\n\n
34
- \ * Elasticsearch\n * MongoDB\n * HBase\n * MySQL\n\nand others.\n"
34
+ \ * Elasticsearch\n * MongoDB\n * HBase\n * MySQL\n * Kafka\n\nand others.\n"
35
35
  email: coders@infochimps.com
36
36
  executables:
37
37
  - wu-load
38
+ - wu-source
38
39
  extensions: []
39
40
  extra_rdoc_files: []
40
41
  files:
41
42
  - .gitignore
43
+ - .yardopts
42
44
  - Gemfile
43
45
  - LICENSE.md
44
46
  - README.md
45
47
  - Rakefile
46
48
  - bin/wu-load
49
+ - bin/wu-source
47
50
  - lib/wukong-load.rb
48
- - lib/wukong-load/configuration.rb
49
- - lib/wukong-load/elasticsearch.rb
51
+ - lib/wukong-load/load_runner.rb
50
52
  - lib/wukong-load/loader.rb
51
- - lib/wukong-load/runner.rb
53
+ - lib/wukong-load/loaders/elasticsearch.rb
54
+ - lib/wukong-load/loaders/kafka.rb
55
+ - lib/wukong-load/loaders/mongodb.rb
56
+ - lib/wukong-load/loaders/sql.rb
57
+ - lib/wukong-load/models/http_request.rb
58
+ - lib/wukong-load/source_driver.rb
59
+ - lib/wukong-load/source_runner.rb
52
60
  - lib/wukong-load/version.rb
53
61
  - spec/spec_helper.rb
54
- - spec/wukong-load/elasticsearch_spec.rb
62
+ - spec/wukong-load/loaders/elasticsearch_spec.rb
63
+ - spec/wukong-load/loaders/kafka_spec.rb
64
+ - spec/wukong-load/loaders/mongodb_spec.rb
65
+ - spec/wukong-load/loaders/sql_spec.rb
66
+ - spec/wukong-load/models/http_request_spec.rb
55
67
  - wukong-load.gemspec
56
68
  homepage: https://github.com/infochimps-labs/wukong-load
57
69
  licenses:
@@ -80,5 +92,9 @@ specification_version: 3
80
92
  summary: Load data produced by Wukong processors and dataflows into data stores.
81
93
  test_files:
82
94
  - spec/spec_helper.rb
83
- - spec/wukong-load/elasticsearch_spec.rb
95
+ - spec/wukong-load/loaders/elasticsearch_spec.rb
96
+ - spec/wukong-load/loaders/kafka_spec.rb
97
+ - spec/wukong-load/loaders/mongodb_spec.rb
98
+ - spec/wukong-load/loaders/sql_spec.rb
99
+ - spec/wukong-load/models/http_request_spec.rb
84
100
  has_rdoc:
@@ -1,8 +0,0 @@
1
- module Wukong
2
- module Load
3
-
4
- # All local configuration for Wukong-Load lives within this object.
5
- Configuration = Configliere::Param.new unless defined? Configuration
6
-
7
- end
8
- end
@@ -1,99 +0,0 @@
1
- # This should be extracted into Wonderdog and inserted via the Wukong
2
- # plugin mechanism.
3
-
4
- require_relative('loader')
5
-
6
- module Wukong
7
- module Load
8
-
9
- # Loads data into Elasticsearch
10
- class ElasticsearchLoader < Loader
11
-
12
- field :host, String, :default => 'localhost'
13
- field :port, Integer,:default => 9200
14
- field :index, String, :default => 'wukong'
15
- field :es_type, String, :default => 'streaming_record'
16
- field :index_field, String, :default => '_index'
17
- field :es_type_field, String, :default => '_es_type'
18
- field :id_field, String, :default => '_id'
19
-
20
- attr_accessor :connection
21
-
22
- def setup
23
- h = host.gsub(%r{^http://},'')
24
- log.debug("Connecting to Elasticsearch cluster at #{h}:#{port}...")
25
- begin
26
- self.connection = Net::HTTP.new(h, port)
27
- self.connection.use_ssl = true if host =~ /^https/
28
- rescue => e
29
- raise Error.new(e.message)
30
- end
31
- end
32
-
33
- def load record
34
- id_for(record) ? request(Net::HTTP::Put, update_path(record), record) : request(Net::HTTP::Post, create_path(record), record)
35
- end
36
-
37
- def create_path record
38
- File.join('/', index_for(record).to_s, es_type_for(record).to_s)
39
- end
40
-
41
- def update_path record
42
- File.join('/', index_for(record).to_s, es_type_for(record).to_s, id_for(record).to_s)
43
- end
44
-
45
- def index_for record
46
- record[index_field] || self.index
47
- end
48
-
49
- def es_type_for record
50
- record[es_type_field] || self.es_type
51
- end
52
-
53
- def id_for record
54
- record[id_field]
55
- end
56
-
57
- def request request_type, path, record
58
- perform_request(create_request(request_type, path, record))
59
- end
60
-
61
- private
62
-
63
- def create_request request_type, path, record
64
- request_type.new(path).tap do |req|
65
- req.body = MultiJson.dump(record)
66
- end
67
- end
68
-
69
- def perform_request req
70
- begin
71
- response = connection.request(req)
72
- status = response.code.to_i
73
- if (200..201).include?(status)
74
- log.info("#{req.class} #{req.path} #{status}")
75
- else
76
- handle_elasticsearch_error(status, response)
77
- end
78
- rescue => e
79
- log.error("#{e.class} - #{e.message}")
80
- end
81
- end
82
-
83
- def handle_elasticsearch_error response
84
- begin
85
- error = MultiJson.load(response.body)
86
- log.error("#{response.code}: #{error['error']}")
87
- rescue => e
88
- log.error("Received a response code of #{status}: #{response.body}")
89
- end
90
- end
91
-
92
- register :elasticsearch_loader
93
-
94
- end
95
- end
96
- end
97
-
98
-
99
-
@@ -1,48 +0,0 @@
1
- module Wukong
2
- module Load
3
- class Runner
4
-
5
- include Logging
6
-
7
- def self.run settings
8
- begin
9
- new(settings).run
10
- rescue Error => e
11
- log.error(e.message)
12
- exit(127)
13
- end
14
- end
15
-
16
- attr_accessor :settings
17
- def initialize settings
18
- self.settings = settings
19
- end
20
-
21
- def args
22
- settings.rest
23
- end
24
-
25
- def data_store_name
26
- args.first
27
- end
28
-
29
- def processor_name
30
- case data_store_name
31
- when 'elasticsearch' then :elasticsearch_loader
32
- when nil
33
- settings.dump_help
34
- exit(1)
35
- else
36
- raise Error.new("No loader defined for data store: #{data_store_name}")
37
- end
38
- end
39
-
40
- def run
41
- EM.run do
42
- StupidServer.new(processor_name, settings).run!
43
- end
44
- end
45
-
46
- end
47
- end
48
- end
@@ -1,140 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Wukong::Load::ElasticsearchLoader do
4
-
5
- let(:record) { {'text' => 'hi' } }
6
- let(:record_with_index) { {'text' => 'hi', '_index' => 'custom_index' } }
7
- let(:record_with_custom_index) { {'text' => 'hi', '_custom_index' => 'custom_index' } }
8
- let(:record_with_es_type) { {'text' => 'hi', '_es_type' => 'custom_es_type' } }
9
- let(:record_with_custom_es_type) { {'text' => 'hi', '_custom_es_type' => 'custom_es_type' } }
10
- let(:record_with_id) { {'text' => 'hi', '_id' => 'the_id' } }
11
- let(:record_with_custom_id) { {'text' => 'hi', '_custom_id' => 'the_id' } }
12
-
13
- it_behaves_like 'a processor', :named => :elasticsearch_loader
14
-
15
- context "without an Elasticsearch available" do
16
- before do
17
- Net::HTTP.should_receive(:new).and_raise(StandardError)
18
- end
19
-
20
- it "raises an error on setup" do
21
- expect { processor(:elasticsearch_loader).setup }.to raise_error(Wukong::Error)
22
- end
23
- end
24
-
25
- context "routes" do
26
- context "all records" do
27
- it "to a default index" do
28
- proc = processor(:elasticsearch_loader)
29
- proc.index_for(record).should == proc.index
30
- end
31
- it "to a given index" do
32
- processor(:elasticsearch_loader, :index => 'custom_index').index_for(record).should == 'custom_index'
33
- end
34
- it "to a default type" do
35
- proc = processor(:elasticsearch_loader)
36
- proc.es_type_for(record).should == proc.es_type
37
- end
38
- it "to a given type" do
39
- processor(:elasticsearch_loader, :es_type => 'custom_es_type').es_type_for(record).should == 'custom_es_type'
40
- end
41
- end
42
-
43
- context "records having a value for" do
44
- it "default index field to the given index" do
45
- processor(:elasticsearch_loader).index_for(record_with_index).should == 'custom_index'
46
- end
47
- it "given index field to the given index" do
48
- processor(:elasticsearch_loader, :index_field => '_custom_index').index_for(record_with_custom_index).should == 'custom_index'
49
- end
50
- it "default type field to the given type" do
51
- processor(:elasticsearch_loader).es_type_for(record_with_es_type).should == 'custom_es_type'
52
- end
53
- it "given type field to the given type" do
54
- processor(:elasticsearch_loader, :es_type_field => '_custom_es_type').es_type_for(record_with_custom_es_type).should == 'custom_es_type'
55
- end
56
- end
57
- end
58
-
59
- context "detects IDs" do
60
- it "based on the absence of a default ID field" do
61
- processor(:elasticsearch_loader).id_for(record).should be_nil
62
- end
63
- it "based on the value of a default ID field" do
64
- processor(:elasticsearch_loader).id_for(record_with_id).should == 'the_id'
65
- end
66
- it "based on the value of a custom ID field" do
67
- processor(:elasticsearch_loader, :id_field => '_custom_id').id_for(record_with_custom_id).should == 'the_id'
68
- end
69
- end
70
-
71
- context "having made a connection to the database" do
72
-
73
- let(:connection) { double() }
74
- let(:log) { double() }
75
- subject { processor(:elasticsearch_loader) }
76
- before do
77
- Net::HTTP.should_receive(:new).and_return(connection)
78
- subject.stub!(:log).and_return(log)
79
- end
80
-
81
-
82
- context "sends" do
83
- it "create requests on a record without an ID" do
84
- subject.should_receive(:request).with(Net::HTTP::Post, '/foo/bar', kind_of(Hash))
85
- subject.load({'_index' => 'foo', '_es_type' => 'bar'})
86
- end
87
- it "update requests on a record with an ID" do
88
- subject.should_receive(:request).with(Net::HTTP::Put, '/foo/bar/1', kind_of(Hash))
89
- subject.load({'_index' => 'foo', '_es_type' => 'bar', '_id' => '1'})
90
- end
91
- end
92
-
93
- context "receives" do
94
-
95
- let(:ok) do
96
- mock("Net::HTTPOK").tap do |response|
97
- response.stub!(:code).and_return('200')
98
- response.stub!(:body).and_return('{"ok": true}')
99
- end
100
- end
101
- let(:created) do
102
- mock("Net::HTTPCreated").tap do |response|
103
- response.stub!(:code).and_return('201')
104
- response.stub!(:body).and_return('{"created": true}')
105
- end
106
- end
107
- let(:not_found) do
108
- mock("Net::HTTPNotFound").tap do |response|
109
- response.stub!(:code).and_return('404')
110
- response.stub!(:body).and_return('{"error": "Not found"}')
111
- end
112
- end
113
-
114
- context "201 Created" do
115
- before { connection.should_receive(:request).with(kind_of(Net::HTTP::Post)).and_return(created) }
116
- it "by logging an INFO message" do
117
- log.should_receive(:info)
118
- subject.load(record)
119
- end
120
- end
121
-
122
- context "200 OK" do
123
- before { connection.should_receive(:request).with(kind_of(Net::HTTP::Put)).and_return(ok) }
124
- it "by logging an INFO message" do
125
- log.should_receive(:info)
126
- subject.load(record_with_id)
127
- end
128
- end
129
-
130
- context "an error response from Elasticsearch" do
131
- before { connection.should_receive(:request).with(kind_of(Net::HTTP::Post)).and_return(not_found) }
132
- it "by logging an ERROR message" do
133
- log.should_receive(:error)
134
- subject.load(record)
135
- end
136
- end
137
-
138
- end
139
- end
140
- end