wukong 3.0.0.pre3 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/Gemfile +1 -0
  2. data/README.md +689 -50
  3. data/bin/wu-local +1 -74
  4. data/diagrams/wu_local.dot +39 -0
  5. data/diagrams/wu_local.dot.png +0 -0
  6. data/examples/loadable.rb +2 -0
  7. data/examples/string_reverser.rb +7 -0
  8. data/lib/hanuman/stage.rb +2 -2
  9. data/lib/wukong.rb +21 -10
  10. data/lib/wukong/dataflow.rb +2 -5
  11. data/lib/wukong/doc_helpers.rb +14 -0
  12. data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
  13. data/lib/wukong/doc_helpers/field_handler.rb +91 -0
  14. data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
  15. data/lib/wukong/driver.rb +11 -1
  16. data/lib/wukong/local.rb +40 -0
  17. data/lib/wukong/local/event_machine_driver.rb +27 -0
  18. data/lib/wukong/local/runner.rb +98 -0
  19. data/lib/wukong/local/stdio_driver.rb +44 -0
  20. data/lib/wukong/local/tcp_driver.rb +47 -0
  21. data/lib/wukong/logger.rb +16 -7
  22. data/lib/wukong/plugin.rb +48 -0
  23. data/lib/wukong/processor.rb +57 -15
  24. data/lib/wukong/rake_helper.rb +6 -0
  25. data/lib/wukong/runner.rb +151 -128
  26. data/lib/wukong/runner/boot_sequence.rb +123 -0
  27. data/lib/wukong/runner/code_loader.rb +52 -0
  28. data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
  29. data/lib/wukong/runner/help_message.rb +42 -0
  30. data/lib/wukong/spec_helpers.rb +4 -12
  31. data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
  32. data/lib/wukong/spec_helpers/{integration_driver_matchers.rb → integration_tests/integration_test_matchers.rb} +28 -62
  33. data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
  34. data/lib/wukong/spec_helpers/shared_examples.rb +19 -10
  35. data/lib/wukong/spec_helpers/unit_tests.rb +134 -0
  36. data/lib/wukong/spec_helpers/{processor_methods.rb → unit_tests/unit_test_driver.rb} +42 -8
  37. data/lib/wukong/spec_helpers/{spec_driver_matchers.rb → unit_tests/unit_test_matchers.rb} +6 -32
  38. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +54 -0
  39. data/lib/wukong/version.rb +1 -1
  40. data/lib/wukong/widget/filters.rb +134 -8
  41. data/lib/wukong/widget/processors.rb +64 -5
  42. data/lib/wukong/widget/reducers/bin.rb +68 -18
  43. data/lib/wukong/widget/reducers/count.rb +12 -0
  44. data/lib/wukong/widget/reducers/group.rb +48 -5
  45. data/lib/wukong/widget/reducers/group_concat.rb +30 -2
  46. data/lib/wukong/widget/reducers/moments.rb +4 -4
  47. data/lib/wukong/widget/reducers/sort.rb +53 -3
  48. data/lib/wukong/widget/serializers.rb +37 -12
  49. data/lib/wukong/widget/utils.rb +1 -1
  50. data/spec/spec_helper.rb +20 -2
  51. data/spec/wukong/driver_spec.rb +2 -0
  52. data/spec/wukong/local/runner_spec.rb +40 -0
  53. data/spec/wukong/local_spec.rb +6 -0
  54. data/spec/wukong/logger_spec.rb +49 -0
  55. data/spec/wukong/processor_spec.rb +22 -0
  56. data/spec/wukong/runner_spec.rb +128 -8
  57. data/spec/wukong/widget/filters_spec.rb +28 -10
  58. data/spec/wukong/widget/processors_spec.rb +5 -5
  59. data/spec/wukong/widget/reducers/bin_spec.rb +14 -14
  60. data/spec/wukong/widget/reducers/count_spec.rb +1 -1
  61. data/spec/wukong/widget/reducers/group_spec.rb +7 -6
  62. data/spec/wukong/widget/reducers/moments_spec.rb +2 -2
  63. data/spec/wukong/widget/reducers/sort_spec.rb +1 -1
  64. data/spec/wukong/widget/serializers_spec.rb +84 -88
  65. data/spec/wukong/wu-local_spec.rb +109 -0
  66. metadata +43 -20
  67. data/bin/wu-server +0 -70
  68. data/lib/wukong/boot.rb +0 -96
  69. data/lib/wukong/configuration.rb +0 -8
  70. data/lib/wukong/emitter.rb +0 -22
  71. data/lib/wukong/server.rb +0 -119
  72. data/lib/wukong/spec_helpers/integration_driver.rb +0 -157
  73. data/lib/wukong/spec_helpers/processor_helpers.rb +0 -89
  74. data/lib/wukong/spec_helpers/spec_driver.rb +0 -28
  75. data/spec/wukong/local_runner_spec.rb +0 -31
  76. data/spec/wukong/wu_local_spec.rb +0 -125
@@ -0,0 +1,109 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'wu-local' do
4
+
5
+ let(:input) { %w[1 2 3] }
6
+
7
+ context "without any arguments" do
8
+ subject { wu_local() }
9
+ it {should exit_with(:non_zero) }
10
+ it "displays help on STDERR" do
11
+ should have_stderr(/provide a processor.*to run/)
12
+ end
13
+ end
14
+
15
+ context "running outside any Ruby project" do
16
+ subject { wu_local('count').in(examples_dir('empty')) < input }
17
+ it { should exit_with(0) }
18
+ it "runs the processor" do
19
+ should have_stdout("3")
20
+ end
21
+ context "when passed a BUNDLE_GEMFILE" do
22
+ context "that doesn't belong to a deploy pack" do
23
+ subject { wu_local('count').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('ruby_project', 'Gemfile').to_s)) < input }
24
+ it { should exit_with(0) }
25
+ it "runs the processor" do
26
+ should have_stdout("3")
27
+ end
28
+ end
29
+ context "that belongs to a deploy pack" do
30
+ subject { wu_local('count').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('deploy_pack', 'Gemfile').to_s)) < input }
31
+ it { should exit_with(0) }
32
+ it "runs the processor" do
33
+ should have_stdout("3")
34
+ end
35
+ context "loading the deploy pack" do
36
+ subject { wu_local('string_reverser').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('deploy_pack', 'Gemfile').to_s)) < 'hi' }
37
+ it { should exit_with(0) }
38
+ it "runs the processor" do
39
+ should have_stdout("ih")
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ context "running within a Ruby project" do
47
+ context "at its root" do
48
+ subject { wu_local('count').in(examples_dir('ruby_project')) < input }
49
+ it { should exit_with(0) }
50
+ it "runs the processor" do
51
+ should have_stdout("3")
52
+ end
53
+ end
54
+ context "deep within it" do
55
+ subject { wu_local('count').in(examples_dir('ruby_project')) < input }
56
+ it { should exit_with(0) }
57
+ it "runs the processor" do
58
+ should have_stdout("3")
59
+ end
60
+ end
61
+ end
62
+
63
+ context "running within a deploy pack" do
64
+ context "at its root" do
65
+ subject { wu_local('count').in(examples_dir('deploy_pack')) < input }
66
+ it { should exit_with(0) }
67
+ it "runs the processor" do
68
+ should have_stdout("3")
69
+ end
70
+ context "loading the deploy pack" do
71
+ subject { wu_local('string_reverser').in(examples_dir('deploy_pack')) < 'hi' }
72
+ it { should exit_with(0) }
73
+ it "runs the processor" do
74
+ should have_stdout("ih")
75
+ end
76
+ end
77
+ end
78
+ context "deep within it" do
79
+ subject { wu_local('count').in(examples_dir('deploy_pack')) < input }
80
+ it { should exit_with(0) }
81
+ it "runs the processor" do
82
+ should have_stdout("3")
83
+ end
84
+ context "loading the deploy pack" do
85
+ subject { wu_local('string_reverser').in(examples_dir('deploy_pack')) < 'hi' }
86
+ it { should exit_with(0) }
87
+ it "runs the processor" do
88
+ should have_stdout("ih")
89
+ end
90
+ end
91
+ end
92
+ end
93
+
94
+ context "parsing records with the --from argument" do
95
+ subject { wu_local('identity --from=tsv') < "1\t2\t3" }
96
+ it { should exit_with(0) }
97
+ it "should emit a row for each input column" do
98
+ should have_stdout("1\n2\n3")
99
+ end
100
+ end
101
+
102
+ context "formatting records with the --to argument" do
103
+ subject { wu_local('identity --from=tsv --to=json') < "1\t2\t3" }
104
+ it { should exit_with(0) }
105
+ it "should emit a single JSON array for the input record" do
106
+ should have_stdout('["1","2","3"]')
107
+ end
108
+ end
109
+ end
metadata CHANGED
@@ -1,8 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0.pre3
5
- prerelease: 6
4
+ version: 3.0.0
5
+ prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Infochimps
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-12-17 00:00:00.000000000 Z
14
+ date: 2013-02-20 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: configliere
@@ -181,8 +181,9 @@ files:
181
181
  - bin/wu-lign
182
182
  - bin/wu-local
183
183
  - bin/wu-plus
184
- - bin/wu-server
185
184
  - bin/wu-sum
185
+ - diagrams/wu_local.dot
186
+ - diagrams/wu_local.dot.png
186
187
  - examples/Gemfile
187
188
  - examples/README.md
188
189
  - examples/basic/string_reverser.rb
@@ -214,6 +215,7 @@ files:
214
215
  - examples/geo/tile_fetcher.rb
215
216
  - examples/graph/implied_geolocation/README.md
216
217
  - examples/graph/minimum_spanning_tree/airfares_graphviz.rb
218
+ - examples/loadable.rb
217
219
  - examples/munging/airline_flights/airline_flights.rake
218
220
  - examples/munging/airline_flights/airplane.rb
219
221
  - examples/munging/airline_flights/airport_id_unification.rb
@@ -264,6 +266,7 @@ files:
264
266
  - examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb
265
267
  - examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb
266
268
  - examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb
269
+ - examples/string_reverser.rb
267
270
  - examples/text/pig_latin/pig_latinizer.rb
268
271
  - examples/text/pig_latin/pig_latinizer_widget.rb
269
272
  - examples/text/regional_flavor/README.md
@@ -288,11 +291,17 @@ files:
288
291
  - lib/wu/social/models/twitter.rb
289
292
  - lib/wu/wikipedia/models.rb
290
293
  - lib/wukong.rb
291
- - lib/wukong/boot.rb
292
- - lib/wukong/configuration.rb
293
294
  - lib/wukong/dataflow.rb
295
+ - lib/wukong/doc_helpers.rb
296
+ - lib/wukong/doc_helpers/dataflow_handler.rb
297
+ - lib/wukong/doc_helpers/field_handler.rb
298
+ - lib/wukong/doc_helpers/processor_handler.rb
294
299
  - lib/wukong/driver.rb
295
- - lib/wukong/emitter.rb
300
+ - lib/wukong/local.rb
301
+ - lib/wukong/local/event_machine_driver.rb
302
+ - lib/wukong/local/runner.rb
303
+ - lib/wukong/local/stdio_driver.rb
304
+ - lib/wukong/local/tcp_driver.rb
296
305
  - lib/wukong/logger.rb
297
306
  - lib/wukong/model/faker.rb
298
307
  - lib/wukong/model/flatpack_parser/flat.rb
@@ -300,17 +309,23 @@ files:
300
309
  - lib/wukong/model/flatpack_parser/lang.rb
301
310
  - lib/wukong/model/flatpack_parser/parser.rb
302
311
  - lib/wukong/model/flatpack_parser/tokens.rb
312
+ - lib/wukong/plugin.rb
303
313
  - lib/wukong/processor.rb
314
+ - lib/wukong/rake_helper.rb
304
315
  - lib/wukong/runner.rb
305
- - lib/wukong/server.rb
316
+ - lib/wukong/runner/boot_sequence.rb
317
+ - lib/wukong/runner/code_loader.rb
318
+ - lib/wukong/runner/deploy_pack_loader.rb
319
+ - lib/wukong/runner/help_message.rb
306
320
  - lib/wukong/spec_helpers.rb
307
- - lib/wukong/spec_helpers/integration_driver.rb
308
- - lib/wukong/spec_helpers/integration_driver_matchers.rb
309
- - lib/wukong/spec_helpers/processor_helpers.rb
310
- - lib/wukong/spec_helpers/processor_methods.rb
321
+ - lib/wukong/spec_helpers/integration_tests.rb
322
+ - lib/wukong/spec_helpers/integration_tests/integration_test_matchers.rb
323
+ - lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb
311
324
  - lib/wukong/spec_helpers/shared_examples.rb
312
- - lib/wukong/spec_helpers/spec_driver.rb
313
- - lib/wukong/spec_helpers/spec_driver_matchers.rb
325
+ - lib/wukong/spec_helpers/unit_tests.rb
326
+ - lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb
327
+ - lib/wukong/spec_helpers/unit_tests/unit_test_matchers.rb
328
+ - lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb
314
329
  - lib/wukong/version.rb
315
330
  - lib/wukong/widget/filters.rb
316
331
  - lib/wukong/widget/processors.rb
@@ -351,8 +366,12 @@ files:
351
366
  - spec/wu/geo/geolocated_spec.rb
352
367
  - spec/wu/model/reconcilable_spec.rb
353
368
  - spec/wukong/dataflow_spec.rb
354
- - spec/wukong/local_runner_spec.rb
369
+ - spec/wukong/driver_spec.rb
370
+ - spec/wukong/local/runner_spec.rb
371
+ - spec/wukong/local_spec.rb
372
+ - spec/wukong/logger_spec.rb
355
373
  - spec/wukong/model/faker_spec.rb
374
+ - spec/wukong/processor_spec.rb
356
375
  - spec/wukong/runner_spec.rb
357
376
  - spec/wukong/widget/filters_spec.rb
358
377
  - spec/wukong/widget/processors_spec.rb
@@ -364,7 +383,7 @@ files:
364
383
  - spec/wukong/widget/serializers_spec.rb
365
384
  - spec/wukong/widget/sink_spec.rb
366
385
  - spec/wukong/widget/source_spec.rb
367
- - spec/wukong/wu_local_spec.rb
386
+ - spec/wukong/wu-local_spec.rb
368
387
  - spec/wukong/wukong_spec.rb
369
388
  - wukong.gemspec
370
389
  homepage: https://github.com/infochimps-labs/wukong
@@ -383,9 +402,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
383
402
  required_rubygems_version: !ruby/object:Gem::Requirement
384
403
  none: false
385
404
  requirements:
386
- - - ! '>'
405
+ - - ! '>='
387
406
  - !ruby/object:Gem::Version
388
- version: 1.3.1
407
+ version: '0'
389
408
  requirements: []
390
409
  rubyforge_project:
391
410
  rubygems_version: 1.8.23
@@ -420,8 +439,12 @@ test_files:
420
439
  - spec/wu/geo/geolocated_spec.rb
421
440
  - spec/wu/model/reconcilable_spec.rb
422
441
  - spec/wukong/dataflow_spec.rb
423
- - spec/wukong/local_runner_spec.rb
442
+ - spec/wukong/driver_spec.rb
443
+ - spec/wukong/local/runner_spec.rb
444
+ - spec/wukong/local_spec.rb
445
+ - spec/wukong/logger_spec.rb
424
446
  - spec/wukong/model/faker_spec.rb
447
+ - spec/wukong/processor_spec.rb
425
448
  - spec/wukong/runner_spec.rb
426
449
  - spec/wukong/widget/filters_spec.rb
427
450
  - spec/wukong/widget/processors_spec.rb
@@ -433,6 +456,6 @@ test_files:
433
456
  - spec/wukong/widget/serializers_spec.rb
434
457
  - spec/wukong/widget/sink_spec.rb
435
458
  - spec/wukong/widget/source_spec.rb
436
- - spec/wukong/wu_local_spec.rb
459
+ - spec/wukong/wu-local_spec.rb
437
460
  - spec/wukong/wukong_spec.rb
438
461
  has_rdoc:
@@ -1,70 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'configliere'
3
- require 'wukong'
4
- require 'eventmachine'
5
- require 'em-synchrony'
6
- require 'multi_json'
7
-
8
- Settings({
9
- host: "localhost",
10
- port: 9500
11
- })
12
-
13
- Settings.use :commandline
14
- Settings.resolve!
15
-
16
- # Load the file on the command line
17
- wu_file = ARGV.shift
18
- load wu_file
19
- $processor = File.basename(wu_file, '.rb').to_sym
20
-
21
- class Wukong::Server
22
- def prepare(options = {})
23
- dataflow_class = Wukong.dataflow(:server){ send(options[:processor]) }
24
- flow_builder = Wukong.registry.retrieve(:server)
25
- flow = flow_builder.build(processor: $processor)
26
-
27
- @buffer = []
28
- @processor = flow.stages.values.first
29
- @processor.emitter = ->(value){ @buffer << value }
30
- end
31
-
32
- def process(record)
33
- @process.process(record)
34
- end
35
-
36
- def cleanup
37
- end
38
- end
39
-
40
-
41
- # EventMachine server
42
-
43
- class WukongMachine < EM::Protocols::LineAndTextProtocol
44
- def post_init
45
- puts "[server] Client connected"
46
- @wukong = WukongInterface.new
47
- @wukong.prepare(processor: $processor)
48
- end
49
-
50
- def receive_data(data)
51
- @buffer = []
52
- input = MultiJson.load data
53
-
54
- op = proc { @wukong.process(input) }
55
- callback = proc { send_data MultiJson.dump(@buffer) + "\n" }
56
- EM.defer(op, callback)
57
-
58
- rescue MultiJson::DecodeError => ex
59
- STDERR.puts "[server] Dropped: Malformed request"
60
- end
61
-
62
- def unbind
63
- puts "[server] Client disconnected."
64
- end
65
- end
66
-
67
- EM::run {
68
- EM::start_server(Settings[:host], Settings[:port], WukongMachine)
69
- puts "Listening on #{Settings[:host]}:#{Settings[:port]}"
70
- }
@@ -1,96 +0,0 @@
1
- module Wukong
2
-
3
- # Boots Wukong, reading any and all relevant `settings` for your
4
- # processors.
5
- #
6
- # This is most useful for any script which wants to run within a
7
- # deploy. See Wukong::Deploy for more details.
8
- #
9
- # @param [Configliere::Param] settings the relevant settings object that should be configured to boot within a deploy pack
10
- def self.boot! settings
11
- # First we load the deploy pack's environment, but only if we seem
12
- # to be running within a deploy pack.
13
- Boot.load_environment if in_deploy_pack?
14
-
15
- # Next pass the `settings` to the Deploy pack itself to add
16
- # options it needs.
17
- Deploy.configure(settings) if loaded_deploy_pack?
18
-
19
- # Resolve the `settings` so we can capture all the options we need
20
- # from the command line, the environment, &c.
21
- settings.resolve!
22
-
23
- # Now boot the deploy pack itself, passing in its known location
24
- # and the `settings`
25
- Deploy.boot!(settings, Boot.deploy_pack_dir) if loaded_deploy_pack?
26
- end
27
-
28
- # Is execution likely happening within a deploy pack?
29
- #
30
- # See Wukong::Deploy for more information on deploy packs.
31
- #
32
- # @return [true, false]
33
- def self.in_deploy_pack?
34
- return @in_deploy_pack unless @in_deploy_pack.nil?
35
- @in_deploy_pack = (Boot.deploy_pack_dir != '/')
36
- end
37
-
38
- # Have we already loaded the environment of a deploy pack?
39
- #
40
- # See Wukong::Deploy for more information on deploy packs.
41
- #
42
- # @return [true, false]
43
- def self.loaded_deploy_pack?
44
- in_deploy_pack? && defined?(::Wukong::Deploy)
45
- end
46
-
47
- # Lets Wukong bootstrap by requiring an enclosing deploy pack's
48
- # environment file if available.
49
- #
50
- # We use a simple heuristic (presence of 'Gemfile' and
51
- # 'config/environment.rb' in a non-root parent directory) to
52
- # determine whether or not we are in a deploy pack.
53
- module Boot
54
-
55
- # Return the directory of the enclosing deploy pack. Will return
56
- # the root ('/') if no deeper directory is identified as a deploy
57
- # pack.
58
- #
59
- # @return [String]
60
- def self.deploy_pack_dir
61
- return File.dirname(ENV["BUNDLE_GEMFILE"]) if ENV["BUNDLE_GEMFILE"] && is_deploy_pack_dir?(File.dirname(ENV["BUNDLE_GEMFILE"]))
62
- return @deploy_pack_dir if @deploy_pack_dir
63
- wd = Dir.pwd
64
- parent = File.dirname(wd)
65
- until wd == parent
66
- return wd if is_deploy_pack_dir?(wd)
67
- wd = parent
68
- parent = File.dirname(wd)
69
- end
70
- @deploy_pack_dir = wd
71
- end
72
-
73
- # The default environment file that will be require'd when
74
- # booting.
75
- #
76
- # @return [String]
77
- def self.environment_file
78
- File.join(deploy_pack_dir, 'config', 'environment.rb')
79
- end
80
-
81
- # Load the actual deploy pack environment. Will not swallow any
82
- # load errors.
83
- def self.load_environment
84
- require environment_file
85
- end
86
-
87
- # Could `dir` be a deploy pack dir?
88
- #
89
- # @param [String] dir
90
- # @return [true, false]
91
- def self.is_deploy_pack_dir? dir
92
- dir && !dir.empty? && File.directory?(dir) && File.exist?(File.join(dir, 'Gemfile')) && File.exist?(File.join(dir, 'config', 'environment.rb'))
93
- end
94
- end
95
-
96
- end
@@ -1,8 +0,0 @@
1
- module Wukong
2
- module Local
3
-
4
- # All local configuration for Wukong lives within this object.
5
- Configuration = Configliere::Param.new unless defined? Configuration
6
-
7
- end
8
- end
@@ -1,22 +0,0 @@
1
- module Wukong
2
- class Emitter < Hanuman::DirectedLink
3
-
4
- def initialize(from, into)
5
- @from = from
6
- @into = into
7
- @from.write_attribute(:emitter, self)
8
- end
9
-
10
- def call(record)
11
- into.process(record)
12
- end
13
-
14
- def to_s
15
- "#<#{self.class}(#{from.label} -> #{into.label})>"
16
- end
17
-
18
- end
19
-
20
- Hanuman::LinkFactory.register(:emitter, ->(from_stage, into_stage){ Wukong::Emitter.new(from_stage, into_stage) })
21
-
22
- end