wukong 3.0.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Gemfile +1 -1
- data/README.md +253 -45
- data/bin/wu +34 -0
- data/bin/wu-source +5 -0
- data/examples/Gemfile +0 -1
- data/examples/deploy_pack/Gemfile +0 -1
- data/examples/improver/tweet_summary.rb +73 -0
- data/examples/ruby_project/Gemfile +0 -1
- data/examples/splitter.rb +94 -0
- data/examples/twitter.rb +5 -0
- data/lib/hanuman.rb +1 -1
- data/lib/hanuman/graph.rb +39 -22
- data/lib/hanuman/stage.rb +46 -13
- data/lib/hanuman/tree.rb +67 -0
- data/lib/wukong.rb +6 -1
- data/lib/wukong/dataflow.rb +19 -48
- data/lib/wukong/driver.rb +176 -65
- data/lib/wukong/{local → driver}/event_machine_driver.rb +1 -13
- data/lib/wukong/driver/wiring.rb +68 -0
- data/lib/wukong/local.rb +6 -4
- data/lib/wukong/local/runner.rb +14 -16
- data/lib/wukong/local/stdio_driver.rb +72 -12
- data/lib/wukong/processor.rb +1 -30
- data/lib/wukong/runner.rb +2 -0
- data/lib/wukong/runner/command_runner.rb +44 -0
- data/lib/wukong/source.rb +33 -0
- data/lib/wukong/source/source_driver.rb +74 -0
- data/lib/wukong/source/source_runner.rb +38 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +0 -1
- data/lib/wukong/spec_helpers/unit_tests.rb +6 -5
- data/lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb +4 -14
- data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +7 -8
- data/lib/wukong/version.rb +1 -1
- data/lib/wukong/widget/echo.rb +55 -0
- data/lib/wukong/widget/{processors.rb → extract.rb} +0 -106
- data/lib/wukong/widget/filters.rb +15 -0
- data/lib/wukong/widget/logger.rb +56 -0
- data/lib/wukong/widget/operators.rb +82 -0
- data/lib/wukong/widget/reducers.rb +2 -0
- data/lib/wukong/widget/reducers/improver.rb +71 -0
- data/lib/wukong/widget/reducers/join_xml.rb +37 -0
- data/lib/wukong/widget/serializers.rb +21 -6
- data/lib/wukong/widgets.rb +6 -3
- data/spec/hanuman/graph_spec.rb +73 -10
- data/spec/hanuman/stage_spec.rb +15 -0
- data/spec/hanuman/tree_spec.rb +119 -0
- data/spec/spec_helper.rb +13 -1
- data/spec/support/example_test_helpers.rb +0 -1
- data/spec/support/model_test_helpers.rb +1 -1
- data/spec/support/shared_context_for_graphs.rb +57 -0
- data/spec/support/shared_examples_for_builders.rb +8 -15
- data/spec/wukong/driver_spec.rb +152 -0
- data/spec/wukong/local/runner_spec.rb +1 -12
- data/spec/wukong/local/stdio_driver_spec.rb +73 -0
- data/spec/wukong/processor_spec.rb +0 -1
- data/spec/wukong/runner_spec.rb +2 -2
- data/spec/wukong/source_spec.rb +6 -0
- data/spec/wukong/widget/extract_spec.rb +101 -0
- data/spec/wukong/widget/logger_spec.rb +23 -0
- data/spec/wukong/widget/operators_spec.rb +25 -0
- data/spec/wukong/widget/reducers/join_xml_spec.rb +25 -0
- data/spec/wukong/wu-source_spec.rb +32 -0
- data/spec/wukong/wu_spec.rb +14 -0
- data/wukong.gemspec +1 -2
- metadata +45 -28
- data/lib/wukong/local/tcp_driver.rb +0 -47
- data/spec/wu/geo/geolocated_spec.rb +0 -247
- data/spec/wukong/widget/processors_spec.rb +0 -125
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe :logger do
|
4
|
+
it_behaves_like "a processor", :named => :logger
|
5
|
+
|
6
|
+
it "logs each event at the 'info' level by default" do
|
7
|
+
log = double("logger")
|
8
|
+
log.should_receive(:info).with('hi there')
|
9
|
+
log.should_receive(:info).with('buddy')
|
10
|
+
processor(:logger) do |proc|
|
11
|
+
proc.stub(:log).and_return(log)
|
12
|
+
end.given('hi there', 'buddy').should emit(0).records
|
13
|
+
end
|
14
|
+
|
15
|
+
it "logs each event at the a desired level set with an argument" do
|
16
|
+
log = double("logger")
|
17
|
+
log.should_receive(:debug).with('hi there')
|
18
|
+
log.should_receive(:debug).with('buddy')
|
19
|
+
processor(:logger, level: :debug) do |proc|
|
20
|
+
proc.stub(:log).and_return(log)
|
21
|
+
end.given('hi there', 'buddy').should emit(0).records
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Operators" do
|
4
|
+
|
5
|
+
describe :map do
|
6
|
+
it_behaves_like 'a processor', :named => :map
|
7
|
+
it "performs an action on each input record" do
|
8
|
+
processor(:map, action: ->(input_record) { input_record.upcase }).given('snap', 'crackle', 'pop').should emit('SNAP', 'CRACKLE', 'POP')
|
9
|
+
end
|
10
|
+
|
11
|
+
it "can simultaneously filter out records" do
|
12
|
+
processor(:map, compact: true, action: ->(input_record) { input_record + 1 if input_record > 0 }).given(2, -4, 6).should emit(3, 7)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe :flatten do
|
17
|
+
it_behaves_like 'a processor', :named => :flatten
|
18
|
+
|
19
|
+
it "yields each input record or its contents" do
|
20
|
+
processor(:flatten).given('foo', ['bar', 'baz'], 'bing').should emit('foo', 'bar', 'baz', 'bing')
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Reducers" do
|
4
|
+
describe :join_xml do
|
5
|
+
include_context "reducers"
|
6
|
+
it_behaves_like 'a processor', :named => :join_xml
|
7
|
+
|
8
|
+
it "joins XML spread out over multiple lines" do
|
9
|
+
processor(:join_xml).given('<xml>first line', 'second line', 'third line</xml>').should emit("<xml>first line\nsecond line\nthird line</xml>")
|
10
|
+
end
|
11
|
+
|
12
|
+
it "joins XML one-per-line" do
|
13
|
+
processor(:join_xml).given('<xml>first line</xml>', '<xml>second line</xml>', '<xml>third line</xml>').should emit('<xml>first line</xml>', '<xml>second line</xml>', '<xml>third line</xml>')
|
14
|
+
end
|
15
|
+
|
16
|
+
it "joins XML split in the middle of a line" do
|
17
|
+
processor(:join_xml).given('<xml>first line', 'second</xml><xml> line', 'third line</xml>').should emit("<xml>first line\nsecond</xml>", "<xml> line\nthird line</xml>")
|
18
|
+
end
|
19
|
+
|
20
|
+
it "joins XML with a custom tag" do
|
21
|
+
processor(:join_xml, root: 'foobar').given('<foobar>first line', 'second line', 'third line</foobar>').should emit("<foobar>first line\nsecond line\nthird line</foobar>")
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'wu-source' do
|
4
|
+
|
5
|
+
let(:input) { %w[1 2 3] }
|
6
|
+
|
7
|
+
context "without any arguments" do
|
8
|
+
subject { wu_source() }
|
9
|
+
it {should exit_with(:non_zero) }
|
10
|
+
it "displays help on STDERR" do
|
11
|
+
should have_stderr(/provide a processor.*to run/)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# FIXME -- it's hard to write an integration test for wu-source
|
16
|
+
# because it doesn't self-terminate under any conditions when run
|
17
|
+
# successfully.
|
18
|
+
#
|
19
|
+
# Options:
|
20
|
+
#
|
21
|
+
# 1) Add a --max (or similar) flag to wu-source allowing it to
|
22
|
+
# exit after some number of records which could then be checked
|
23
|
+
# by an integration test.
|
24
|
+
#
|
25
|
+
# 2) Launch it in a subprocess and wait a little while (how long?)
|
26
|
+
# and ensure that it's produced a bunch of output in the meantime.
|
27
|
+
# If the `per_sec` option is high, we shouldn't have to wait very
|
28
|
+
# long to see a whole bunch of output records. This is tricky b/c
|
29
|
+
# what if the system is under load and we don't wait long enough
|
30
|
+
# for the wu-source subprocess to boot up and start emitting?
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'wu' do
|
4
|
+
|
5
|
+
let(:input) { %w[1 2 3] }
|
6
|
+
|
7
|
+
context "without any arguments" do
|
8
|
+
subject { wu() }
|
9
|
+
it {should exit_with(:non_zero) }
|
10
|
+
it "displays help on STDERR" do
|
11
|
+
should have_stderr(/provide a Wukong command to run/)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
data/wukong.gemspec
CHANGED
@@ -21,13 +21,12 @@ Gem::Specification.new do |gem|
|
|
21
21
|
EOF
|
22
22
|
|
23
23
|
gem.files = `git ls-files`.split("\n").reject { |path| path =~ /^(data|docpages|notes|old)/ }
|
24
|
-
gem.executables = ['wu-local']
|
24
|
+
gem.executables = ['wu-local', 'wu-source', 'wu']
|
25
25
|
gem.test_files = gem.files.grep(/^spec/)
|
26
26
|
gem.require_paths = ['lib']
|
27
27
|
|
28
28
|
gem.add_dependency('configliere', '>= 0.4.18')
|
29
29
|
gem.add_dependency('multi_json', '>= 1.3.6')
|
30
|
-
gem.add_dependency('vayacondios-client', '>= 0.1.2')
|
31
30
|
gem.add_dependency('gorillib', '>= 0.4.2')
|
32
31
|
gem.add_dependency('forgery')
|
33
32
|
gem.add_dependency('uuidtools')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2014-03-19 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: configliere
|
@@ -45,22 +45,6 @@ dependencies:
|
|
45
45
|
- - ! '>='
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: 1.3.6
|
48
|
-
- !ruby/object:Gem::Dependency
|
49
|
-
name: vayacondios-client
|
50
|
-
requirement: !ruby/object:Gem::Requirement
|
51
|
-
none: false
|
52
|
-
requirements:
|
53
|
-
- - ! '>='
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
version: 0.1.2
|
56
|
-
type: :runtime
|
57
|
-
prerelease: false
|
58
|
-
version_requirements: !ruby/object:Gem::Requirement
|
59
|
-
none: false
|
60
|
-
requirements:
|
61
|
-
- - ! '>='
|
62
|
-
- !ruby/object:Gem::Version
|
63
|
-
version: 0.1.2
|
64
48
|
- !ruby/object:Gem::Dependency
|
65
49
|
name: gorillib
|
66
50
|
requirement: !ruby/object:Gem::Requirement
|
@@ -149,6 +133,8 @@ description: ! " Treat your dataset like a:\n\n * stream of lines when it'
|
|
149
133
|
email: coders@infochimps.com
|
150
134
|
executables:
|
151
135
|
- wu-local
|
136
|
+
- wu-source
|
137
|
+
- wu
|
152
138
|
extensions: []
|
153
139
|
extra_rdoc_files: []
|
154
140
|
files:
|
@@ -174,6 +160,7 @@ files:
|
|
174
160
|
- bin/tabchar
|
175
161
|
- bin/uniq-ord
|
176
162
|
- bin/uniqc
|
163
|
+
- bin/wu
|
177
164
|
- bin/wu-clean-encoding
|
178
165
|
- bin/wu-date
|
179
166
|
- bin/wu-datetime
|
@@ -181,6 +168,7 @@ files:
|
|
181
168
|
- bin/wu-lign
|
182
169
|
- bin/wu-local
|
183
170
|
- bin/wu-plus
|
171
|
+
- bin/wu-source
|
184
172
|
- bin/wu-sum
|
185
173
|
- diagrams/wu_local.dot
|
186
174
|
- diagrams/wu_local.dot.png
|
@@ -215,6 +203,7 @@ files:
|
|
215
203
|
- examples/geo/tile_fetcher.rb
|
216
204
|
- examples/graph/implied_geolocation/README.md
|
217
205
|
- examples/graph/minimum_spanning_tree/airfares_graphviz.rb
|
206
|
+
- examples/improver/tweet_summary.rb
|
218
207
|
- examples/loadable.rb
|
219
208
|
- examples/munging/airline_flights/airline_flights.rake
|
220
209
|
- examples/munging/airline_flights/airplane.rb
|
@@ -266,6 +255,7 @@ files:
|
|
266
255
|
- examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb
|
267
256
|
- examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb
|
268
257
|
- examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb
|
258
|
+
- examples/splitter.rb
|
269
259
|
- examples/string_reverser.rb
|
270
260
|
- examples/text/pig_latin/pig_latinizer.rb
|
271
261
|
- examples/text/pig_latin/pig_latinizer_widget.rb
|
@@ -273,6 +263,7 @@ files:
|
|
273
263
|
- examples/text/regional_flavor/article_wordbags.pig
|
274
264
|
- examples/text/regional_flavor/j01-article_wordbags.rb
|
275
265
|
- examples/text/regional_flavor/simple_pig_script.pig
|
266
|
+
- examples/twitter.rb
|
276
267
|
- lib/hanuman.rb
|
277
268
|
- lib/hanuman/graph.rb
|
278
269
|
- lib/hanuman/graphvizzer.rb
|
@@ -281,6 +272,7 @@ files:
|
|
281
272
|
- lib/hanuman/link.rb
|
282
273
|
- lib/hanuman/registry.rb
|
283
274
|
- lib/hanuman/stage.rb
|
275
|
+
- lib/hanuman/tree.rb
|
284
276
|
- lib/wu/geo.rb
|
285
277
|
- lib/wu/geo/geo_grids.numbers
|
286
278
|
- lib/wu/geo/geolocated.rb
|
@@ -297,11 +289,11 @@ files:
|
|
297
289
|
- lib/wukong/doc_helpers/field_handler.rb
|
298
290
|
- lib/wukong/doc_helpers/processor_handler.rb
|
299
291
|
- lib/wukong/driver.rb
|
292
|
+
- lib/wukong/driver/event_machine_driver.rb
|
293
|
+
- lib/wukong/driver/wiring.rb
|
300
294
|
- lib/wukong/local.rb
|
301
|
-
- lib/wukong/local/event_machine_driver.rb
|
302
295
|
- lib/wukong/local/runner.rb
|
303
296
|
- lib/wukong/local/stdio_driver.rb
|
304
|
-
- lib/wukong/local/tcp_driver.rb
|
305
297
|
- lib/wukong/logger.rb
|
306
298
|
- lib/wukong/model/faker.rb
|
307
299
|
- lib/wukong/model/flatpack_parser/flat.rb
|
@@ -315,8 +307,12 @@ files:
|
|
315
307
|
- lib/wukong/runner.rb
|
316
308
|
- lib/wukong/runner/boot_sequence.rb
|
317
309
|
- lib/wukong/runner/code_loader.rb
|
310
|
+
- lib/wukong/runner/command_runner.rb
|
318
311
|
- lib/wukong/runner/deploy_pack_loader.rb
|
319
312
|
- lib/wukong/runner/help_message.rb
|
313
|
+
- lib/wukong/source.rb
|
314
|
+
- lib/wukong/source/source_driver.rb
|
315
|
+
- lib/wukong/source/source_runner.rb
|
320
316
|
- lib/wukong/spec_helpers.rb
|
321
317
|
- lib/wukong/spec_helpers/integration_tests.rb
|
322
318
|
- lib/wukong/spec_helpers/integration_tests/integration_test_matchers.rb
|
@@ -327,14 +323,19 @@ files:
|
|
327
323
|
- lib/wukong/spec_helpers/unit_tests/unit_test_matchers.rb
|
328
324
|
- lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb
|
329
325
|
- lib/wukong/version.rb
|
326
|
+
- lib/wukong/widget/echo.rb
|
327
|
+
- lib/wukong/widget/extract.rb
|
330
328
|
- lib/wukong/widget/filters.rb
|
331
|
-
- lib/wukong/widget/
|
329
|
+
- lib/wukong/widget/logger.rb
|
330
|
+
- lib/wukong/widget/operators.rb
|
332
331
|
- lib/wukong/widget/reducers.rb
|
333
332
|
- lib/wukong/widget/reducers/accumulator.rb
|
334
333
|
- lib/wukong/widget/reducers/bin.rb
|
335
334
|
- lib/wukong/widget/reducers/count.rb
|
336
335
|
- lib/wukong/widget/reducers/group.rb
|
337
336
|
- lib/wukong/widget/reducers/group_concat.rb
|
337
|
+
- lib/wukong/widget/reducers/improver.rb
|
338
|
+
- lib/wukong/widget/reducers/join_xml.rb
|
338
339
|
- lib/wukong/widget/reducers/moments.rb
|
339
340
|
- lib/wukong/widget/reducers/sort.rb
|
340
341
|
- lib/wukong/widget/reducers/uniq.rb
|
@@ -355,36 +356,44 @@ files:
|
|
355
356
|
- spec/hanuman/hanuman_spec.rb
|
356
357
|
- spec/hanuman/registry_spec.rb
|
357
358
|
- spec/hanuman/stage_spec.rb
|
359
|
+
- spec/hanuman/tree_spec.rb
|
358
360
|
- spec/spec.opts
|
359
361
|
- spec/spec_helper.rb
|
360
362
|
- spec/support/example_test_helpers.rb
|
361
363
|
- spec/support/hanuman_test_helpers.rb
|
362
364
|
- spec/support/integration_helper.rb
|
363
365
|
- spec/support/model_test_helpers.rb
|
366
|
+
- spec/support/shared_context_for_graphs.rb
|
364
367
|
- spec/support/shared_context_for_reducers.rb
|
365
368
|
- spec/support/shared_examples_for_builders.rb
|
366
369
|
- spec/support/shared_examples_for_shortcuts.rb
|
367
|
-
- spec/wu/geo/geolocated_spec.rb
|
368
370
|
- spec/wu/model/reconcilable_spec.rb
|
369
371
|
- spec/wukong/dataflow_spec.rb
|
370
372
|
- spec/wukong/driver_spec.rb
|
371
373
|
- spec/wukong/local/runner_spec.rb
|
374
|
+
- spec/wukong/local/stdio_driver_spec.rb
|
372
375
|
- spec/wukong/local_spec.rb
|
373
376
|
- spec/wukong/logger_spec.rb
|
374
377
|
- spec/wukong/model/faker_spec.rb
|
375
378
|
- spec/wukong/processor_spec.rb
|
376
379
|
- spec/wukong/runner_spec.rb
|
380
|
+
- spec/wukong/source_spec.rb
|
381
|
+
- spec/wukong/widget/extract_spec.rb
|
377
382
|
- spec/wukong/widget/filters_spec.rb
|
378
|
-
- spec/wukong/widget/
|
383
|
+
- spec/wukong/widget/logger_spec.rb
|
384
|
+
- spec/wukong/widget/operators_spec.rb
|
379
385
|
- spec/wukong/widget/reducers/bin_spec.rb
|
380
386
|
- spec/wukong/widget/reducers/count_spec.rb
|
381
387
|
- spec/wukong/widget/reducers/group_spec.rb
|
388
|
+
- spec/wukong/widget/reducers/join_xml_spec.rb
|
382
389
|
- spec/wukong/widget/reducers/moments_spec.rb
|
383
390
|
- spec/wukong/widget/reducers/sort_spec.rb
|
384
391
|
- spec/wukong/widget/serializers_spec.rb
|
385
392
|
- spec/wukong/widget/sink_spec.rb
|
386
393
|
- spec/wukong/widget/source_spec.rb
|
387
394
|
- spec/wukong/wu-local_spec.rb
|
395
|
+
- spec/wukong/wu-source_spec.rb
|
396
|
+
- spec/wukong/wu_spec.rb
|
388
397
|
- spec/wukong/wukong_spec.rb
|
389
398
|
- wukong.gemspec
|
390
399
|
homepage: https://github.com/infochimps-labs/wukong
|
@@ -402,7 +411,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
402
411
|
version: '0'
|
403
412
|
segments:
|
404
413
|
- 0
|
405
|
-
hash:
|
414
|
+
hash: -3691227993438066896
|
406
415
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
407
416
|
none: false
|
408
417
|
requirements:
|
@@ -411,10 +420,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
411
420
|
version: '0'
|
412
421
|
segments:
|
413
422
|
- 0
|
414
|
-
hash:
|
423
|
+
hash: -3691227993438066896
|
415
424
|
requirements: []
|
416
425
|
rubyforge_project:
|
417
|
-
rubygems_version: 1.8.
|
426
|
+
rubygems_version: 1.8.23
|
418
427
|
signing_key:
|
419
428
|
specification_version: 3
|
420
429
|
summary: Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee can use
|
@@ -434,35 +443,43 @@ test_files:
|
|
434
443
|
- spec/hanuman/hanuman_spec.rb
|
435
444
|
- spec/hanuman/registry_spec.rb
|
436
445
|
- spec/hanuman/stage_spec.rb
|
446
|
+
- spec/hanuman/tree_spec.rb
|
437
447
|
- spec/spec.opts
|
438
448
|
- spec/spec_helper.rb
|
439
449
|
- spec/support/example_test_helpers.rb
|
440
450
|
- spec/support/hanuman_test_helpers.rb
|
441
451
|
- spec/support/integration_helper.rb
|
442
452
|
- spec/support/model_test_helpers.rb
|
453
|
+
- spec/support/shared_context_for_graphs.rb
|
443
454
|
- spec/support/shared_context_for_reducers.rb
|
444
455
|
- spec/support/shared_examples_for_builders.rb
|
445
456
|
- spec/support/shared_examples_for_shortcuts.rb
|
446
|
-
- spec/wu/geo/geolocated_spec.rb
|
447
457
|
- spec/wu/model/reconcilable_spec.rb
|
448
458
|
- spec/wukong/dataflow_spec.rb
|
449
459
|
- spec/wukong/driver_spec.rb
|
450
460
|
- spec/wukong/local/runner_spec.rb
|
461
|
+
- spec/wukong/local/stdio_driver_spec.rb
|
451
462
|
- spec/wukong/local_spec.rb
|
452
463
|
- spec/wukong/logger_spec.rb
|
453
464
|
- spec/wukong/model/faker_spec.rb
|
454
465
|
- spec/wukong/processor_spec.rb
|
455
466
|
- spec/wukong/runner_spec.rb
|
467
|
+
- spec/wukong/source_spec.rb
|
468
|
+
- spec/wukong/widget/extract_spec.rb
|
456
469
|
- spec/wukong/widget/filters_spec.rb
|
457
|
-
- spec/wukong/widget/
|
470
|
+
- spec/wukong/widget/logger_spec.rb
|
471
|
+
- spec/wukong/widget/operators_spec.rb
|
458
472
|
- spec/wukong/widget/reducers/bin_spec.rb
|
459
473
|
- spec/wukong/widget/reducers/count_spec.rb
|
460
474
|
- spec/wukong/widget/reducers/group_spec.rb
|
475
|
+
- spec/wukong/widget/reducers/join_xml_spec.rb
|
461
476
|
- spec/wukong/widget/reducers/moments_spec.rb
|
462
477
|
- spec/wukong/widget/reducers/sort_spec.rb
|
463
478
|
- spec/wukong/widget/serializers_spec.rb
|
464
479
|
- spec/wukong/widget/sink_spec.rb
|
465
480
|
- spec/wukong/widget/source_spec.rb
|
466
481
|
- spec/wukong/wu-local_spec.rb
|
482
|
+
- spec/wukong/wu-source_spec.rb
|
483
|
+
- spec/wukong/wu_spec.rb
|
467
484
|
- spec/wukong/wukong_spec.rb
|
468
485
|
has_rdoc:
|
@@ -1,47 +0,0 @@
|
|
1
|
-
require_relative('event_machine_driver')
|
2
|
-
module Wukong
|
3
|
-
module Local
|
4
|
-
|
5
|
-
# A class for driving processors over a TCP protocol.
|
6
|
-
class TCPDriver < EM::P::LineAndTextProtocol
|
7
|
-
include EventMachineDriver
|
8
|
-
include Processor::BufferedProcessor
|
9
|
-
include Logging
|
10
|
-
|
11
|
-
def self.start(label, settings = {})
|
12
|
-
host = (settings[:host] || Socket.gethostname) rescue 'localhost'
|
13
|
-
port = (settings[:port] || 9000).to_i rescue 9000
|
14
|
-
EM.start_server(host, port, self, label, settings)
|
15
|
-
log.info "Server started on #{host} on port #{port}"
|
16
|
-
add_signal_traps
|
17
|
-
end
|
18
|
-
|
19
|
-
def post_init
|
20
|
-
port, ip = Socket.unpack_sockaddr_in(get_peername)
|
21
|
-
log.info "Connected to #{ip} on #{port}"
|
22
|
-
setup_dataflow
|
23
|
-
end
|
24
|
-
|
25
|
-
def receive_line line
|
26
|
-
@buffer = []
|
27
|
-
operation = proc { driver.send_through_dataflow(line) }
|
28
|
-
callback = proc { flush_buffer @buffer }
|
29
|
-
EM.defer(operation, callback)
|
30
|
-
rescue => e
|
31
|
-
EM.stop
|
32
|
-
raise Wukong::Error.new(e)
|
33
|
-
end
|
34
|
-
|
35
|
-
def flush_buffer records
|
36
|
-
send_data(records.join("\n") + "\n")
|
37
|
-
records.clear
|
38
|
-
end
|
39
|
-
|
40
|
-
def unbind
|
41
|
-
finalize_and_stop_dataflow
|
42
|
-
EM.stop
|
43
|
-
end
|
44
|
-
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
@@ -1,247 +0,0 @@
|
|
1
|
-
require 'gorillib/data_munging'
|
2
|
-
require 'wu/geo/geolocated'
|
3
|
-
|
4
|
-
describe Wukong::Geolocated do
|
5
|
-
let(:aus_lng){ -97.759003 } # Austin, TX -- infochimps HQ
|
6
|
-
let(:aus_lat){ 30.273884 }
|
7
|
-
let(:sat_lng){ -98.486123 } # San Antonio, TX
|
8
|
-
let(:sat_lat){ 29.42575 }
|
9
|
-
let(:dpi){ 72 }
|
10
|
-
#
|
11
|
-
let(:aus_tile_x_3){ 1.82758 } # zoom level 3
|
12
|
-
let(:aus_tile_y_3){ 3.29356 }
|
13
|
-
let(:aus_pixel_x_3){ 468 }
|
14
|
-
let(:aus_pixel_y_3){ 843 }
|
15
|
-
#
|
16
|
-
let(:aus_tile_x_8){ 58.48248675555555 } # zoom level 8
|
17
|
-
let(:aus_tile_y_8){ 105.39405073699557 }
|
18
|
-
let(:aus_tile_x_11){ 467 } # zoom level 11
|
19
|
-
let(:aus_tile_y_11){ 843 }
|
20
|
-
#
|
21
|
-
let(:aus_quadkey ){ "0231301203311211" }
|
22
|
-
let(:aus_quadkey_3){ "023" }
|
23
|
-
let(:radius){ 1_000_000 } # 1,000 km
|
24
|
-
|
25
|
-
context Wukong::Geolocated::ByCoordinates do
|
26
|
-
let(:point_klass) do
|
27
|
-
module Wukong
|
28
|
-
class TestPoint
|
29
|
-
include Gorillib::Model
|
30
|
-
include Wukong::Geolocated::ByCoordinates
|
31
|
-
field :name, String, position: 0, doc: "Name of this location"
|
32
|
-
field :longitude, Float, position: 1, doc: "Longitude (X) of a point, in decimal degrees"
|
33
|
-
field :latitude, Float, position: 2, doc: "Latitude (Y) of a point, in decimal degrees"
|
34
|
-
end
|
35
|
-
end
|
36
|
-
Wukong::TestPoint
|
37
|
-
end
|
38
|
-
subject{ point_klass.new("Infochimps HQ", aus_lng, aus_lat) }
|
39
|
-
|
40
|
-
context '#tile_xf' do
|
41
|
-
it "tile X coordinate, as a float" do
|
42
|
-
subject.tile_xf(3).should be_within(0.0001).of( 1.82758)
|
43
|
-
subject.tile_xf(8).should be_within(0.0001).of(58.48248)
|
44
|
-
subject.tile_xf(11).should be_within(0.0001).of(467.8598)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
context '#tile_x' do
|
48
|
-
it "tile X coordinate, as an integer" do
|
49
|
-
subject.tile_x(3).should == 1
|
50
|
-
subject.tile_x(8).should == 58
|
51
|
-
subject.tile_x(11).should == 467
|
52
|
-
end
|
53
|
-
end
|
54
|
-
context '#tile_yf' do
|
55
|
-
it "tile Y coordinate, as a float" do
|
56
|
-
subject.tile_yf(3).should be_within(0.0001).of( 3.29356)
|
57
|
-
subject.tile_yf(8).should be_within(0.0001).of(105.394051)
|
58
|
-
subject.tile_yf(11).should be_within(0.0001).of(843.152406)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
context '#tile_x' do
|
62
|
-
it "tile Y coordinate, as an integer" do
|
63
|
-
subject.tile_y(3).should == 3
|
64
|
-
subject.tile_y(8).should == 105
|
65
|
-
subject.tile_y(11).should == 843
|
66
|
-
end
|
67
|
-
end
|
68
|
-
context '#quadkey' do
|
69
|
-
it "a string of 2-bit tile selectors" do
|
70
|
-
subject.quadkey(3).should == "023"
|
71
|
-
subject.quadkey(16).should == "0231301203311211"
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
context Wukong::Geolocated do
|
77
|
-
|
78
|
-
it "gives private methods on including class as well as the methods on itself" do
|
79
|
-
klass = Class.new{ include Wukong::Geolocated }
|
80
|
-
klass.should be_private_method_defined(:lng_lat_zl_to_tile_xy)
|
81
|
-
klass.should be_private_method_defined(:haversine_distance)
|
82
|
-
end
|
83
|
-
|
84
|
-
#
|
85
|
-
# Tile coordinates
|
86
|
-
#
|
87
|
-
|
88
|
-
it "returns a map tile size given a zoom level" do
|
89
|
-
Wukong::Geolocated.map_tile_size(3).should == 8
|
90
|
-
end
|
91
|
-
|
92
|
-
it "returns a tile_x, tile_y pair given a longitude, latitude and zoom level" do
|
93
|
-
Wukong::Geolocated.lng_lat_zl_to_tile_xy(aus_lng, aus_lat, 8).should == [ 58, 105]
|
94
|
-
Wukong::Geolocated.lng_lat_zl_to_tile_xy(aus_lng, aus_lat, 11).should == [467, 843]
|
95
|
-
end
|
96
|
-
|
97
|
-
it "returns a longitude, latitude pair given tile_x, tile_y and zoom level" do
|
98
|
-
lng, lat = Wukong::Geolocated.tile_xy_zl_to_lng_lat(aus_tile_x_8, aus_tile_y_8, 8)
|
99
|
-
lng.should be_within(0.0001).of(aus_lng)
|
100
|
-
lat.should be_within(0.0001).of(aus_lat)
|
101
|
-
end
|
102
|
-
|
103
|
-
#
|
104
|
-
# Pixel coordinates
|
105
|
-
#
|
106
|
-
|
107
|
-
it "returns a map pizel size given a zoom level" do
|
108
|
-
Wukong::Geolocated.map_pixel_size(3).should == 2048
|
109
|
-
end
|
110
|
-
|
111
|
-
it "returns a pixel_x, pixel_y pair given a longitude, latitude and zoom level" do
|
112
|
-
Wukong::Geolocated.lng_lat_zl_to_pixel_xy(aus_lng, aus_lat, 3).should == [468, 843]
|
113
|
-
end
|
114
|
-
|
115
|
-
it "returns a longitude, latitude pair given pixel_x, pixel_y and zoom level" do
|
116
|
-
lng, lat = Wukong::Geolocated.pixel_xy_zl_to_lng_lat(aus_pixel_x_3, aus_pixel_y_3, 3)
|
117
|
-
lat.round(4).should == 30.2970
|
118
|
-
lng.round(4).should == -97.7344
|
119
|
-
end
|
120
|
-
|
121
|
-
it "returns a tile x-y pair given a pixel x-y pair" do
|
122
|
-
Wukong::Geolocated.pixel_xy_to_tile_xy(aus_pixel_x_3, aus_pixel_y_3).should == [1,3]
|
123
|
-
end
|
124
|
-
|
125
|
-
it "returns a pixel x-y pair given a float tile x-y pair" do
|
126
|
-
Wukong::Geolocated.tile_xy_to_pixel_xy(aus_tile_x_3, aus_tile_y_3 ).should == [467.86048, 843.15136]
|
127
|
-
end
|
128
|
-
|
129
|
-
it "returns a pixel x-y pair given an integer tile x-y pair" do
|
130
|
-
Wukong::Geolocated.tile_xy_to_pixel_xy(aus_tile_x_3.to_i, aus_tile_y_3.to_i).should == [256, 768]
|
131
|
-
end
|
132
|
-
|
133
|
-
#
|
134
|
-
# Quadkey coordinates
|
135
|
-
#
|
136
|
-
|
137
|
-
it "returns a quadkey given a tile x-y pair and a zoom level" do
|
138
|
-
Wukong::Geolocated.tile_xy_zl_to_quadkey(aus_tile_x_3, aus_tile_y_3, 3).should == "023"
|
139
|
-
Wukong::Geolocated.tile_xy_zl_to_quadkey(aus_tile_x_8, aus_tile_y_8, 8).should == "02313012"
|
140
|
-
Wukong::Geolocated.tile_xy_zl_to_quadkey(aus_tile_x_11, aus_tile_y_11,11).should == "02313012033"
|
141
|
-
end
|
142
|
-
|
143
|
-
it "returns a quadkey given a longitude, latitude and a zoom level" do
|
144
|
-
Wukong::Geolocated.lng_lat_zl_to_quadkey(aus_lng, aus_lat, 3).should == "023"
|
145
|
-
Wukong::Geolocated.lng_lat_zl_to_quadkey(aus_lng, aus_lat, 8).should == "02313012"
|
146
|
-
Wukong::Geolocated.lng_lat_zl_to_quadkey(aus_lng, aus_lat, 11).should == "02313012033"
|
147
|
-
Wukong::Geolocated.lng_lat_zl_to_quadkey(aus_lng, aus_lat, 16).should == "0231301203311211"
|
148
|
-
end
|
149
|
-
|
150
|
-
it "returns a packed quadkey (an integer) given a tile xy and zoom level" do
|
151
|
-
Wukong::Geolocated.tile_xy_zl_to_packed_qk(aus_tile_x_3.floor, aus_tile_y_3.floor, 3).should == "023".to_i(4)
|
152
|
-
Wukong::Geolocated.tile_xy_zl_to_packed_qk(aus_tile_x_8.floor, aus_tile_y_8.floor, 8).should == "02313012".to_i(4)
|
153
|
-
Wukong::Geolocated.tile_xy_zl_to_packed_qk(aus_tile_x_11.floor, aus_tile_y_11.floor,11).should == "02313012033".to_i(4)
|
154
|
-
end
|
155
|
-
|
156
|
-
context '.packed_qk_zl_to_tile_xy' do
|
157
|
-
let(:packed_qk){ "0231301203311211".to_i(4) }
|
158
|
-
it "returns a tile xy given a packed quadkey (integer)" do
|
159
|
-
Wukong::Geolocated.packed_qk_zl_to_tile_xy(packed_qk >> 26, 3).should == [ 1, 3, 3]
|
160
|
-
Wukong::Geolocated.packed_qk_zl_to_tile_xy(packed_qk >> 16, 8).should == [ 58, 105, 8]
|
161
|
-
Wukong::Geolocated.packed_qk_zl_to_tile_xy(packed_qk >> 10, 11).should == [467, 843, 11]
|
162
|
-
end
|
163
|
-
|
164
|
-
it "defaults to zl=16 for packed quadkeys" do
|
165
|
-
Wukong::Geolocated.packed_qk_zl_to_tile_xy(packed_qk ).should == [14971, 26980, 16]
|
166
|
-
Wukong::Geolocated.packed_qk_zl_to_tile_xy(packed_qk, 16).should == [14971, 26980, 16]
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
170
|
-
it "returns tile x-y pair and a zoom level given a quadkey" do
|
171
|
-
Wukong::Geolocated.quadkey_to_tile_xy_zl(aus_quadkey[0..2] ).should == [1, 3, 3]
|
172
|
-
Wukong::Geolocated.quadkey_to_tile_xy_zl(aus_quadkey[0..7] ).should == [aus_tile_x_8.floor, aus_tile_y_8.floor, 8]
|
173
|
-
Wukong::Geolocated.quadkey_to_tile_xy_zl(aus_quadkey[0..10]).should == [aus_tile_x_11.floor, aus_tile_y_11.floor, 11]
|
174
|
-
end
|
175
|
-
|
176
|
-
it "allows '' to be a quadkey (whole map)" do
|
177
|
-
Wukong::Geolocated.quadkey_to_tile_xy_zl("").should == [0, 0, 0]
|
178
|
-
end
|
179
|
-
|
180
|
-
it "maps tile xyz [0,0,0] to quadkey ''" do
|
181
|
-
Wukong::Geolocated.tile_xy_zl_to_quadkey(0,0,0).should == ""
|
182
|
-
end
|
183
|
-
|
184
|
-
it "throws an error if a bad quadkey is given" do
|
185
|
-
expect{ Wukong::Geolocated.quadkey_to_tile_xy_zl("bad_key") }.to raise_error(ArgumentError, /Quadkey.*characters/)
|
186
|
-
end
|
187
|
-
|
188
|
-
it "returns a bounding box given a quadkey" do
|
189
|
-
left, btm, right, top = Wukong::Geolocated.quadkey_to_bbox(aus_quadkey_3)
|
190
|
-
left.should be_within(0.0001).of(-135.0)
|
191
|
-
right.should be_within(0.0001).of(- 90.0)
|
192
|
-
btm.should be_within(0.0001).of( 0.0)
|
193
|
-
top.should be_within(0.0001).of( 40.9799)
|
194
|
-
end
|
195
|
-
|
196
|
-
it "returns the smallest quadkey containing two points" do
|
197
|
-
Wukong::Geolocated.quadkey_containing_bbox(aus_lng, aus_lat, sat_lng, sat_lat).should == "023130"
|
198
|
-
end
|
199
|
-
|
200
|
-
it "returns a bounding box given a point and radius" do
|
201
|
-
left, btm, right, top = Wukong::Geolocated.lng_lat_rad_to_bbox(aus_lng, aus_lat, radius)
|
202
|
-
|
203
|
-
left.should be_within(0.0001).of(-108.1723)
|
204
|
-
right.should be_within(0.0001).of(- 87.3457)
|
205
|
-
btm.should be_within(0.0001).of( 21.2807)
|
206
|
-
top.should be_within(0.0001).of( 39.2671)
|
207
|
-
end
|
208
|
-
|
209
|
-
it "returns a centroid given a bounding box" do
|
210
|
-
mid_lng, mid_lat = Wukong::Geolocated.bbox_centroid([aus_lng, sat_lat], [sat_lng, aus_lat])
|
211
|
-
mid_lng.should be_within(0.0001).of(-98.1241)
|
212
|
-
mid_lat.should be_within(0.0001).of( 29.8503)
|
213
|
-
end
|
214
|
-
|
215
|
-
it "returns a pixel resolution given a latitude and zoom level" do
|
216
|
-
Wukong::Geolocated.pixel_resolution(aus_lat, 3).should be_within(0.0001).of(16880.4081)
|
217
|
-
end
|
218
|
-
|
219
|
-
it "returns a map scale given a latitude, zoom level and dpi" do
|
220
|
-
Wukong::Geolocated.map_scale_for_dpi(aus_lat, 3, dpi).should be_within(0.0001).of(47849975.8302)
|
221
|
-
end
|
222
|
-
|
223
|
-
it "calculates the haversine distance between two points" do
|
224
|
-
Wukong::Geolocated.haversine_distance(aus_lng, aus_lat, sat_lng, sat_lat).should be_within(0.0001).of(117522.1219)
|
225
|
-
end
|
226
|
-
|
227
|
-
it "calculates the haversine midpoint between two points" do
|
228
|
-
lng, lat = Wukong::Geolocated.haversine_midpoint(aus_lng, sat_lat, sat_lng, aus_lat)
|
229
|
-
lng.should be_within(0.0001).of(-98.1241)
|
230
|
-
lat.should be_within(0.0001).of( 29.8503)
|
231
|
-
end
|
232
|
-
|
233
|
-
it "calculates the point a given distance directly north from a lat/lng" do
|
234
|
-
lng, lat = Wukong::Geolocated.point_north(aus_lng, aus_lat, 1000000)
|
235
|
-
lng.should be_within(0.0001).of(-97.7590)
|
236
|
-
lat.should be_within(0.0001).of( 39.2671)
|
237
|
-
end
|
238
|
-
|
239
|
-
it "calculates the point a given distance directly east from a lat/lng" do
|
240
|
-
lng, lat = Wukong::Geolocated.point_east(aus_lng, aus_lat, 1000000)
|
241
|
-
lng.should be_within(0.0001).of(-87.3457)
|
242
|
-
lat.should be_within(0.0001).of( 30.2739)
|
243
|
-
end
|
244
|
-
|
245
|
-
|
246
|
-
end # module methods
|
247
|
-
end
|