wukong 3.0.0.pre3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/Gemfile +1 -0
  2. data/README.md +689 -50
  3. data/bin/wu-local +1 -74
  4. data/diagrams/wu_local.dot +39 -0
  5. data/diagrams/wu_local.dot.png +0 -0
  6. data/examples/loadable.rb +2 -0
  7. data/examples/string_reverser.rb +7 -0
  8. data/lib/hanuman/stage.rb +2 -2
  9. data/lib/wukong.rb +21 -10
  10. data/lib/wukong/dataflow.rb +2 -5
  11. data/lib/wukong/doc_helpers.rb +14 -0
  12. data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
  13. data/lib/wukong/doc_helpers/field_handler.rb +91 -0
  14. data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
  15. data/lib/wukong/driver.rb +11 -1
  16. data/lib/wukong/local.rb +40 -0
  17. data/lib/wukong/local/event_machine_driver.rb +27 -0
  18. data/lib/wukong/local/runner.rb +98 -0
  19. data/lib/wukong/local/stdio_driver.rb +44 -0
  20. data/lib/wukong/local/tcp_driver.rb +47 -0
  21. data/lib/wukong/logger.rb +16 -7
  22. data/lib/wukong/plugin.rb +48 -0
  23. data/lib/wukong/processor.rb +57 -15
  24. data/lib/wukong/rake_helper.rb +6 -0
  25. data/lib/wukong/runner.rb +151 -128
  26. data/lib/wukong/runner/boot_sequence.rb +123 -0
  27. data/lib/wukong/runner/code_loader.rb +52 -0
  28. data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
  29. data/lib/wukong/runner/help_message.rb +42 -0
  30. data/lib/wukong/spec_helpers.rb +4 -12
  31. data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
  32. data/lib/wukong/spec_helpers/{integration_driver_matchers.rb → integration_tests/integration_test_matchers.rb} +28 -62
  33. data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
  34. data/lib/wukong/spec_helpers/shared_examples.rb +19 -10
  35. data/lib/wukong/spec_helpers/unit_tests.rb +134 -0
  36. data/lib/wukong/spec_helpers/{processor_methods.rb → unit_tests/unit_test_driver.rb} +42 -8
  37. data/lib/wukong/spec_helpers/{spec_driver_matchers.rb → unit_tests/unit_test_matchers.rb} +6 -32
  38. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +54 -0
  39. data/lib/wukong/version.rb +1 -1
  40. data/lib/wukong/widget/filters.rb +134 -8
  41. data/lib/wukong/widget/processors.rb +64 -5
  42. data/lib/wukong/widget/reducers/bin.rb +68 -18
  43. data/lib/wukong/widget/reducers/count.rb +12 -0
  44. data/lib/wukong/widget/reducers/group.rb +48 -5
  45. data/lib/wukong/widget/reducers/group_concat.rb +30 -2
  46. data/lib/wukong/widget/reducers/moments.rb +4 -4
  47. data/lib/wukong/widget/reducers/sort.rb +53 -3
  48. data/lib/wukong/widget/serializers.rb +37 -12
  49. data/lib/wukong/widget/utils.rb +1 -1
  50. data/spec/spec_helper.rb +20 -2
  51. data/spec/wukong/driver_spec.rb +2 -0
  52. data/spec/wukong/local/runner_spec.rb +40 -0
  53. data/spec/wukong/local_spec.rb +6 -0
  54. data/spec/wukong/logger_spec.rb +49 -0
  55. data/spec/wukong/processor_spec.rb +22 -0
  56. data/spec/wukong/runner_spec.rb +128 -8
  57. data/spec/wukong/widget/filters_spec.rb +28 -10
  58. data/spec/wukong/widget/processors_spec.rb +5 -5
  59. data/spec/wukong/widget/reducers/bin_spec.rb +14 -14
  60. data/spec/wukong/widget/reducers/count_spec.rb +1 -1
  61. data/spec/wukong/widget/reducers/group_spec.rb +7 -6
  62. data/spec/wukong/widget/reducers/moments_spec.rb +2 -2
  63. data/spec/wukong/widget/reducers/sort_spec.rb +1 -1
  64. data/spec/wukong/widget/serializers_spec.rb +84 -88
  65. data/spec/wukong/wu-local_spec.rb +109 -0
  66. metadata +43 -20
  67. data/bin/wu-server +0 -70
  68. data/lib/wukong/boot.rb +0 -96
  69. data/lib/wukong/configuration.rb +0 -8
  70. data/lib/wukong/emitter.rb +0 -22
  71. data/lib/wukong/server.rb +0 -119
  72. data/lib/wukong/spec_helpers/integration_driver.rb +0 -157
  73. data/lib/wukong/spec_helpers/processor_helpers.rb +0 -89
  74. data/lib/wukong/spec_helpers/spec_driver.rb +0 -28
  75. data/spec/wukong/local_runner_spec.rb +0 -31
  76. data/spec/wukong/wu_local_spec.rb +0 -125
@@ -0,0 +1,109 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'wu-local' do
4
+
5
+ let(:input) { %w[1 2 3] }
6
+
7
+ context "without any arguments" do
8
+ subject { wu_local() }
9
+ it {should exit_with(:non_zero) }
10
+ it "displays help on STDERR" do
11
+ should have_stderr(/provide a processor.*to run/)
12
+ end
13
+ end
14
+
15
+ context "running outside any Ruby project" do
16
+ subject { wu_local('count').in(examples_dir('empty')) < input }
17
+ it { should exit_with(0) }
18
+ it "runs the processor" do
19
+ should have_stdout("3")
20
+ end
21
+ context "when passed a BUNDLE_GEMFILE" do
22
+ context "that doesn't belong to a deploy pack" do
23
+ subject { wu_local('count').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('ruby_project', 'Gemfile').to_s)) < input }
24
+ it { should exit_with(0) }
25
+ it "runs the processor" do
26
+ should have_stdout("3")
27
+ end
28
+ end
29
+ context "that belongs to a deploy pack" do
30
+ subject { wu_local('count').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('deploy_pack', 'Gemfile').to_s)) < input }
31
+ it { should exit_with(0) }
32
+ it "runs the processor" do
33
+ should have_stdout("3")
34
+ end
35
+ context "loading the deploy pack" do
36
+ subject { wu_local('string_reverser').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('deploy_pack', 'Gemfile').to_s)) < 'hi' }
37
+ it { should exit_with(0) }
38
+ it "runs the processor" do
39
+ should have_stdout("ih")
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ context "running within a Ruby project" do
47
+ context "at its root" do
48
+ subject { wu_local('count').in(examples_dir('ruby_project')) < input }
49
+ it { should exit_with(0) }
50
+ it "runs the processor" do
51
+ should have_stdout("3")
52
+ end
53
+ end
54
+ context "deep within it" do
55
+ subject { wu_local('count').in(examples_dir('ruby_project')) < input }
56
+ it { should exit_with(0) }
57
+ it "runs the processor" do
58
+ should have_stdout("3")
59
+ end
60
+ end
61
+ end
62
+
63
+ context "running within a deploy pack" do
64
+ context "at its root" do
65
+ subject { wu_local('count').in(examples_dir('deploy_pack')) < input }
66
+ it { should exit_with(0) }
67
+ it "runs the processor" do
68
+ should have_stdout("3")
69
+ end
70
+ context "loading the deploy pack" do
71
+ subject { wu_local('string_reverser').in(examples_dir('deploy_pack')) < 'hi' }
72
+ it { should exit_with(0) }
73
+ it "runs the processor" do
74
+ should have_stdout("ih")
75
+ end
76
+ end
77
+ end
78
+ context "deep within it" do
79
+ subject { wu_local('count').in(examples_dir('deploy_pack')) < input }
80
+ it { should exit_with(0) }
81
+ it "runs the processor" do
82
+ should have_stdout("3")
83
+ end
84
+ context "loading the deploy pack" do
85
+ subject { wu_local('string_reverser').in(examples_dir('deploy_pack')) < 'hi' }
86
+ it { should exit_with(0) }
87
+ it "runs the processor" do
88
+ should have_stdout("ih")
89
+ end
90
+ end
91
+ end
92
+ end
93
+
94
+ context "parsing records with the --from argument" do
95
+ subject { wu_local('identity --from=tsv') < "1\t2\t3" }
96
+ it { should exit_with(0) }
97
+ it "should emit a row for each input column" do
98
+ should have_stdout("1\n2\n3")
99
+ end
100
+ end
101
+
102
+ context "formatting records with the --to argument" do
103
+ subject { wu_local('identity --from=tsv --to=json') < "1\t2\t3" }
104
+ it { should exit_with(0) }
105
+ it "should emit a single JSON array for the input record" do
106
+ should have_stdout('["1","2","3"]')
107
+ end
108
+ end
109
+ end
metadata CHANGED
@@ -1,8 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0.pre3
5
- prerelease: 6
4
+ version: 3.0.0
5
+ prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Infochimps
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-12-17 00:00:00.000000000 Z
14
+ date: 2013-02-20 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: configliere
@@ -181,8 +181,9 @@ files:
181
181
  - bin/wu-lign
182
182
  - bin/wu-local
183
183
  - bin/wu-plus
184
- - bin/wu-server
185
184
  - bin/wu-sum
185
+ - diagrams/wu_local.dot
186
+ - diagrams/wu_local.dot.png
186
187
  - examples/Gemfile
187
188
  - examples/README.md
188
189
  - examples/basic/string_reverser.rb
@@ -214,6 +215,7 @@ files:
214
215
  - examples/geo/tile_fetcher.rb
215
216
  - examples/graph/implied_geolocation/README.md
216
217
  - examples/graph/minimum_spanning_tree/airfares_graphviz.rb
218
+ - examples/loadable.rb
217
219
  - examples/munging/airline_flights/airline_flights.rake
218
220
  - examples/munging/airline_flights/airplane.rb
219
221
  - examples/munging/airline_flights/airport_id_unification.rb
@@ -264,6 +266,7 @@ files:
264
266
  - examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb
265
267
  - examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb
266
268
  - examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb
269
+ - examples/string_reverser.rb
267
270
  - examples/text/pig_latin/pig_latinizer.rb
268
271
  - examples/text/pig_latin/pig_latinizer_widget.rb
269
272
  - examples/text/regional_flavor/README.md
@@ -288,11 +291,17 @@ files:
288
291
  - lib/wu/social/models/twitter.rb
289
292
  - lib/wu/wikipedia/models.rb
290
293
  - lib/wukong.rb
291
- - lib/wukong/boot.rb
292
- - lib/wukong/configuration.rb
293
294
  - lib/wukong/dataflow.rb
295
+ - lib/wukong/doc_helpers.rb
296
+ - lib/wukong/doc_helpers/dataflow_handler.rb
297
+ - lib/wukong/doc_helpers/field_handler.rb
298
+ - lib/wukong/doc_helpers/processor_handler.rb
294
299
  - lib/wukong/driver.rb
295
- - lib/wukong/emitter.rb
300
+ - lib/wukong/local.rb
301
+ - lib/wukong/local/event_machine_driver.rb
302
+ - lib/wukong/local/runner.rb
303
+ - lib/wukong/local/stdio_driver.rb
304
+ - lib/wukong/local/tcp_driver.rb
296
305
  - lib/wukong/logger.rb
297
306
  - lib/wukong/model/faker.rb
298
307
  - lib/wukong/model/flatpack_parser/flat.rb
@@ -300,17 +309,23 @@ files:
300
309
  - lib/wukong/model/flatpack_parser/lang.rb
301
310
  - lib/wukong/model/flatpack_parser/parser.rb
302
311
  - lib/wukong/model/flatpack_parser/tokens.rb
312
+ - lib/wukong/plugin.rb
303
313
  - lib/wukong/processor.rb
314
+ - lib/wukong/rake_helper.rb
304
315
  - lib/wukong/runner.rb
305
- - lib/wukong/server.rb
316
+ - lib/wukong/runner/boot_sequence.rb
317
+ - lib/wukong/runner/code_loader.rb
318
+ - lib/wukong/runner/deploy_pack_loader.rb
319
+ - lib/wukong/runner/help_message.rb
306
320
  - lib/wukong/spec_helpers.rb
307
- - lib/wukong/spec_helpers/integration_driver.rb
308
- - lib/wukong/spec_helpers/integration_driver_matchers.rb
309
- - lib/wukong/spec_helpers/processor_helpers.rb
310
- - lib/wukong/spec_helpers/processor_methods.rb
321
+ - lib/wukong/spec_helpers/integration_tests.rb
322
+ - lib/wukong/spec_helpers/integration_tests/integration_test_matchers.rb
323
+ - lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb
311
324
  - lib/wukong/spec_helpers/shared_examples.rb
312
- - lib/wukong/spec_helpers/spec_driver.rb
313
- - lib/wukong/spec_helpers/spec_driver_matchers.rb
325
+ - lib/wukong/spec_helpers/unit_tests.rb
326
+ - lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb
327
+ - lib/wukong/spec_helpers/unit_tests/unit_test_matchers.rb
328
+ - lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb
314
329
  - lib/wukong/version.rb
315
330
  - lib/wukong/widget/filters.rb
316
331
  - lib/wukong/widget/processors.rb
@@ -351,8 +366,12 @@ files:
351
366
  - spec/wu/geo/geolocated_spec.rb
352
367
  - spec/wu/model/reconcilable_spec.rb
353
368
  - spec/wukong/dataflow_spec.rb
354
- - spec/wukong/local_runner_spec.rb
369
+ - spec/wukong/driver_spec.rb
370
+ - spec/wukong/local/runner_spec.rb
371
+ - spec/wukong/local_spec.rb
372
+ - spec/wukong/logger_spec.rb
355
373
  - spec/wukong/model/faker_spec.rb
374
+ - spec/wukong/processor_spec.rb
356
375
  - spec/wukong/runner_spec.rb
357
376
  - spec/wukong/widget/filters_spec.rb
358
377
  - spec/wukong/widget/processors_spec.rb
@@ -364,7 +383,7 @@ files:
364
383
  - spec/wukong/widget/serializers_spec.rb
365
384
  - spec/wukong/widget/sink_spec.rb
366
385
  - spec/wukong/widget/source_spec.rb
367
- - spec/wukong/wu_local_spec.rb
386
+ - spec/wukong/wu-local_spec.rb
368
387
  - spec/wukong/wukong_spec.rb
369
388
  - wukong.gemspec
370
389
  homepage: https://github.com/infochimps-labs/wukong
@@ -383,9 +402,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
383
402
  required_rubygems_version: !ruby/object:Gem::Requirement
384
403
  none: false
385
404
  requirements:
386
- - - ! '>'
405
+ - - ! '>='
387
406
  - !ruby/object:Gem::Version
388
- version: 1.3.1
407
+ version: '0'
389
408
  requirements: []
390
409
  rubyforge_project:
391
410
  rubygems_version: 1.8.23
@@ -420,8 +439,12 @@ test_files:
420
439
  - spec/wu/geo/geolocated_spec.rb
421
440
  - spec/wu/model/reconcilable_spec.rb
422
441
  - spec/wukong/dataflow_spec.rb
423
- - spec/wukong/local_runner_spec.rb
442
+ - spec/wukong/driver_spec.rb
443
+ - spec/wukong/local/runner_spec.rb
444
+ - spec/wukong/local_spec.rb
445
+ - spec/wukong/logger_spec.rb
424
446
  - spec/wukong/model/faker_spec.rb
447
+ - spec/wukong/processor_spec.rb
425
448
  - spec/wukong/runner_spec.rb
426
449
  - spec/wukong/widget/filters_spec.rb
427
450
  - spec/wukong/widget/processors_spec.rb
@@ -433,6 +456,6 @@ test_files:
433
456
  - spec/wukong/widget/serializers_spec.rb
434
457
  - spec/wukong/widget/sink_spec.rb
435
458
  - spec/wukong/widget/source_spec.rb
436
- - spec/wukong/wu_local_spec.rb
459
+ - spec/wukong/wu-local_spec.rb
437
460
  - spec/wukong/wukong_spec.rb
438
461
  has_rdoc:
@@ -1,70 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'configliere'
3
- require 'wukong'
4
- require 'eventmachine'
5
- require 'em-synchrony'
6
- require 'multi_json'
7
-
8
- Settings({
9
- host: "localhost",
10
- port: 9500
11
- })
12
-
13
- Settings.use :commandline
14
- Settings.resolve!
15
-
16
- # Load the file on the command line
17
- wu_file = ARGV.shift
18
- load wu_file
19
- $processor = File.basename(wu_file, '.rb').to_sym
20
-
21
- class Wukong::Server
22
- def prepare(options = {})
23
- dataflow_class = Wukong.dataflow(:server){ send(options[:processor]) }
24
- flow_builder = Wukong.registry.retrieve(:server)
25
- flow = flow_builder.build(processor: $processor)
26
-
27
- @buffer = []
28
- @processor = flow.stages.values.first
29
- @processor.emitter = ->(value){ @buffer << value }
30
- end
31
-
32
- def process(record)
33
- @process.process(record)
34
- end
35
-
36
- def cleanup
37
- end
38
- end
39
-
40
-
41
- # EventMachine server
42
-
43
- class WukongMachine < EM::Protocols::LineAndTextProtocol
44
- def post_init
45
- puts "[server] Client connected"
46
- @wukong = WukongInterface.new
47
- @wukong.prepare(processor: $processor)
48
- end
49
-
50
- def receive_data(data)
51
- @buffer = []
52
- input = MultiJson.load data
53
-
54
- op = proc { @wukong.process(input) }
55
- callback = proc { send_data MultiJson.dump(@buffer) + "\n" }
56
- EM.defer(op, callback)
57
-
58
- rescue MultiJson::DecodeError => ex
59
- STDERR.puts "[server] Dropped: Malformed request"
60
- end
61
-
62
- def unbind
63
- puts "[server] Client disconnected."
64
- end
65
- end
66
-
67
- EM::run {
68
- EM::start_server(Settings[:host], Settings[:port], WukongMachine)
69
- puts "Listening on #{Settings[:host]}:#{Settings[:port]}"
70
- }
@@ -1,96 +0,0 @@
1
- module Wukong
2
-
3
- # Boots Wukong, reading any and all relevant `settings` for your
4
- # processors.
5
- #
6
- # This is most useful for any script which wants to run within a
7
- # deploy. See Wukong::Deploy for more details.
8
- #
9
- # @param [Configliere::Param] settings the relevant settings object that should be configured to boot within a deploy pack
10
- def self.boot! settings
11
- # First we load the deploy pack's environment, but only if we seem
12
- # to be running within a deploy pack.
13
- Boot.load_environment if in_deploy_pack?
14
-
15
- # Next pass the `settings` to the Deploy pack itself to add
16
- # options it needs.
17
- Deploy.configure(settings) if loaded_deploy_pack?
18
-
19
- # Resolve the `settings` so we can capture all the options we need
20
- # from the command line, the environment, &c.
21
- settings.resolve!
22
-
23
- # Now boot the deploy pack itself, passing in its known location
24
- # and the `settings`
25
- Deploy.boot!(settings, Boot.deploy_pack_dir) if loaded_deploy_pack?
26
- end
27
-
28
- # Is execution likely happening within a deploy pack?
29
- #
30
- # See Wukong::Deploy for more information on deploy packs.
31
- #
32
- # @return [true, false]
33
- def self.in_deploy_pack?
34
- return @in_deploy_pack unless @in_deploy_pack.nil?
35
- @in_deploy_pack = (Boot.deploy_pack_dir != '/')
36
- end
37
-
38
- # Have we already loaded the environment of a deploy pack?
39
- #
40
- # See Wukong::Deploy for more information on deploy packs.
41
- #
42
- # @return [true, false]
43
- def self.loaded_deploy_pack?
44
- in_deploy_pack? && defined?(::Wukong::Deploy)
45
- end
46
-
47
- # Lets Wukong bootstrap by requiring an enclosing deploy pack's
48
- # environment file if available.
49
- #
50
- # We use a simple heuristic (presence of 'Gemfile' and
51
- # 'config/environment.rb' in a non-root parent directory) to
52
- # determine whether or not we are in a deploy pack.
53
- module Boot
54
-
55
- # Return the directory of the enclosing deploy pack. Will return
56
- # the root ('/') if no deeper directory is identified as a deploy
57
- # pack.
58
- #
59
- # @return [String]
60
- def self.deploy_pack_dir
61
- return File.dirname(ENV["BUNDLE_GEMFILE"]) if ENV["BUNDLE_GEMFILE"] && is_deploy_pack_dir?(File.dirname(ENV["BUNDLE_GEMFILE"]))
62
- return @deploy_pack_dir if @deploy_pack_dir
63
- wd = Dir.pwd
64
- parent = File.dirname(wd)
65
- until wd == parent
66
- return wd if is_deploy_pack_dir?(wd)
67
- wd = parent
68
- parent = File.dirname(wd)
69
- end
70
- @deploy_pack_dir = wd
71
- end
72
-
73
- # The default environment file that will be require'd when
74
- # booting.
75
- #
76
- # @return [String]
77
- def self.environment_file
78
- File.join(deploy_pack_dir, 'config', 'environment.rb')
79
- end
80
-
81
- # Load the actual deploy pack environment. Will not swallow any
82
- # load errors.
83
- def self.load_environment
84
- require environment_file
85
- end
86
-
87
- # Could `dir` be a deploy pack dir?
88
- #
89
- # @param [String] dir
90
- # @return [true, false]
91
- def self.is_deploy_pack_dir? dir
92
- dir && !dir.empty? && File.directory?(dir) && File.exist?(File.join(dir, 'Gemfile')) && File.exist?(File.join(dir, 'config', 'environment.rb'))
93
- end
94
- end
95
-
96
- end
@@ -1,8 +0,0 @@
1
- module Wukong
2
- module Local
3
-
4
- # All local configuration for Wukong lives within this object.
5
- Configuration = Configliere::Param.new unless defined? Configuration
6
-
7
- end
8
- end
@@ -1,22 +0,0 @@
1
- module Wukong
2
- class Emitter < Hanuman::DirectedLink
3
-
4
- def initialize(from, into)
5
- @from = from
6
- @into = into
7
- @from.write_attribute(:emitter, self)
8
- end
9
-
10
- def call(record)
11
- into.process(record)
12
- end
13
-
14
- def to_s
15
- "#<#{self.class}(#{from.label} -> #{into.label})>"
16
- end
17
-
18
- end
19
-
20
- Hanuman::LinkFactory.register(:emitter, ->(from_stage, into_stage){ Wukong::Emitter.new(from_stage, into_stage) })
21
-
22
- end