ul-wukong 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +60 -0
  3. data/.gitmodules +6 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +19 -0
  6. data/.yardopts +6 -0
  7. data/CHANGELOG.md +7 -0
  8. data/Gemfile +17 -0
  9. data/Guardfile +12 -0
  10. data/LICENSE.md +95 -0
  11. data/NOTES-travis.md +31 -0
  12. data/README-old.md +422 -0
  13. data/README.md +1308 -0
  14. data/Rakefile +28 -0
  15. data/TODO.md +99 -0
  16. data/bin/cutc +30 -0
  17. data/bin/cuttab +5 -0
  18. data/bin/greptrue +6 -0
  19. data/bin/md5sort +20 -0
  20. data/bin/setcat +11 -0
  21. data/bin/tabchar +5 -0
  22. data/bin/uniq-ord +59 -0
  23. data/bin/uniqc +3 -0
  24. data/bin/wu +34 -0
  25. data/bin/wu-clean-encoding +31 -0
  26. data/bin/wu-date +13 -0
  27. data/bin/wu-datetime +13 -0
  28. data/bin/wu-hist +3 -0
  29. data/bin/wu-lign +186 -0
  30. data/bin/wu-local +4 -0
  31. data/bin/wu-plus +9 -0
  32. data/bin/wu-source +5 -0
  33. data/bin/wu-sum +31 -0
  34. data/diagrams/wu_local.dot +39 -0
  35. data/diagrams/wu_local.dot.png +0 -0
  36. data/examples/Gemfile +38 -0
  37. data/examples/README.md +9 -0
  38. data/examples/basic/string_reverser.rb +23 -0
  39. data/examples/basic/tiny_count.rb +8 -0
  40. data/examples/basic/word_count/accumulator.rb +26 -0
  41. data/examples/basic/word_count/tokenizer.rb +13 -0
  42. data/examples/basic/word_count/word_count.rb +6 -0
  43. data/examples/dataflow/scraper_macro_flow.rb +28 -0
  44. data/examples/deploy_pack/Gemfile +6 -0
  45. data/examples/deploy_pack/README.md +6 -0
  46. data/examples/deploy_pack/a/b/c/.gitkeep +0 -0
  47. data/examples/deploy_pack/app/processors/string_reverser.rb +5 -0
  48. data/examples/deploy_pack/config/environment.rb +1 -0
  49. data/examples/dsl/dataflow/fibonacci_series.rb +101 -0
  50. data/examples/dsl/dataflow/scraper_macro_flow.rb +28 -0
  51. data/examples/dsl/dataflow/simple.rb +12 -0
  52. data/examples/dsl/dataflow/telegram.rb +45 -0
  53. data/examples/dsl/workflow/cherry_pie.dot +97 -0
  54. data/examples/dsl/workflow/cherry_pie.md +104 -0
  55. data/examples/dsl/workflow/cherry_pie.png +0 -0
  56. data/examples/dsl/workflow/cherry_pie.rb +101 -0
  57. data/examples/empty/.gitkeep +0 -0
  58. data/examples/examples_helper.rb +9 -0
  59. data/examples/geo.rb +4 -0
  60. data/examples/geo/geo_grids.numbers +0 -0
  61. data/examples/geo/geolocated.rb +331 -0
  62. data/examples/geo/quadtile.rb +69 -0
  63. data/examples/geo/spec/geolocated_spec.rb +247 -0
  64. data/examples/geo/tile_fetcher.rb +77 -0
  65. data/examples/graph/implied_geolocation/README.md +63 -0
  66. data/examples/graph/minimum_spanning_tree/airfares_graphviz.rb +73 -0
  67. data/examples/improver/tweet_summary.rb +73 -0
  68. data/examples/loadable.rb +2 -0
  69. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  70. data/examples/munging/airline_flights/airplane.rb +0 -0
  71. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  72. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  73. data/examples/munging/airline_flights/indexable.rb +75 -0
  74. data/examples/munging/airline_flights/indexable_spec.rb +90 -0
  75. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  76. data/examples/munging/airline_flights/tasks.rake +83 -0
  77. data/examples/munging/airline_flights/topcities.rb +167 -0
  78. data/examples/munging/geo/geo_json.rb +54 -0
  79. data/examples/munging/geo/geo_models.rb +69 -0
  80. data/examples/munging/geo/geonames_models.rb +107 -0
  81. data/examples/munging/geo/iso_codes.rb +172 -0
  82. data/examples/munging/geo/reconcile_countries.rb +124 -0
  83. data/examples/munging/geo/tasks.rake +71 -0
  84. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  85. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  86. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  87. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  88. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  89. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  90. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  91. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +5 -0
  92. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  93. data/examples/munging/wikipedia/dbpedia/extract_links-cruft.rb +66 -0
  94. data/examples/munging/wikipedia/dbpedia/extract_links.rb +260 -0
  95. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  96. data/examples/rake_helper.rb +97 -0
  97. data/examples/ruby_project/Gemfile +6 -0
  98. data/examples/ruby_project/README.md +6 -0
  99. data/examples/ruby_project/a/b/c/.gitkeep +0 -0
  100. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  101. data/examples/server_logs/logline.rb +95 -0
  102. data/examples/server_logs/models.rb +66 -0
  103. data/examples/server_logs/page_counts.pig +48 -0
  104. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  105. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  106. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  107. data/examples/server_logs/server_logs-03-breadcrumbs-full.rb +71 -0
  108. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  109. data/examples/serverlogs/geo_ip_mapping/munge_geolite.rb +82 -0
  110. data/examples/serverlogs/models/logline.rb +102 -0
  111. data/examples/serverlogs/parser/apache_parser_widget.rb +46 -0
  112. data/examples/serverlogs/visit_paths/common.rb +4 -0
  113. data/examples/serverlogs/visit_paths/page_counts.pig +48 -0
  114. data/examples/serverlogs/visit_paths/serverlogs-01-parse-script.rb +11 -0
  115. data/examples/serverlogs/visit_paths/serverlogs-02-histograms-full.rb +31 -0
  116. data/examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb +12 -0
  117. data/examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb +67 -0
  118. data/examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb +38 -0
  119. data/examples/splitter.rb +94 -0
  120. data/examples/string_reverser.rb +7 -0
  121. data/examples/text/pig_latin/pig_latinizer.rb +35 -0
  122. data/examples/text/pig_latin/pig_latinizer_widget.rb +16 -0
  123. data/examples/text/regional_flavor/README.md +14 -0
  124. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  125. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  126. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  127. data/examples/twitter.rb +5 -0
  128. data/lib/hanuman.rb +36 -0
  129. data/lib/hanuman/graph.rb +97 -0
  130. data/lib/hanuman/graphvizzer.rb +206 -0
  131. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  132. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  133. data/lib/hanuman/link.rb +35 -0
  134. data/lib/hanuman/registry.rb +46 -0
  135. data/lib/hanuman/stage.rb +128 -0
  136. data/lib/hanuman/tree.rb +67 -0
  137. data/lib/wu/geo.rb +4 -0
  138. data/lib/wu/geo/geo_grids.numbers +0 -0
  139. data/lib/wu/geo/geolocated.rb +331 -0
  140. data/lib/wu/geo/quadtile.rb +69 -0
  141. data/lib/wu/graph/union_find.rb +62 -0
  142. data/lib/wu/model/reconcilable.rb +63 -0
  143. data/lib/wu/munging.rb +71 -0
  144. data/lib/wu/social/models/twitter.rb +31 -0
  145. data/lib/wu/wikipedia/models.rb +20 -0
  146. data/lib/wukong.rb +54 -0
  147. data/lib/wukong/dataflow.rb +43 -0
  148. data/lib/wukong/doc_helpers.rb +14 -0
  149. data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
  150. data/lib/wukong/doc_helpers/field_handler.rb +91 -0
  151. data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
  152. data/lib/wukong/driver.rb +214 -0
  153. data/lib/wukong/driver/event_machine_driver.rb +15 -0
  154. data/lib/wukong/driver/wiring.rb +68 -0
  155. data/lib/wukong/local.rb +42 -0
  156. data/lib/wukong/local/runner.rb +96 -0
  157. data/lib/wukong/local/stdio_driver.rb +104 -0
  158. data/lib/wukong/logger.rb +102 -0
  159. data/lib/wukong/model/faker.rb +136 -0
  160. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  161. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  162. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  163. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  164. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  165. data/lib/wukong/plugin.rb +48 -0
  166. data/lib/wukong/processor.rb +110 -0
  167. data/lib/wukong/rake_helper.rb +6 -0
  168. data/lib/wukong/runner.rb +169 -0
  169. data/lib/wukong/runner/boot_sequence.rb +123 -0
  170. data/lib/wukong/runner/code_loader.rb +52 -0
  171. data/lib/wukong/runner/command_runner.rb +44 -0
  172. data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
  173. data/lib/wukong/runner/help_message.rb +42 -0
  174. data/lib/wukong/source.rb +33 -0
  175. data/lib/wukong/source/source_driver.rb +74 -0
  176. data/lib/wukong/source/source_runner.rb +38 -0
  177. data/lib/wukong/spec_helpers.rb +74 -0
  178. data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
  179. data/lib/wukong/spec_helpers/integration_tests/integration_test_matchers.rb +207 -0
  180. data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
  181. data/lib/wukong/spec_helpers/shared_examples.rb +22 -0
  182. data/lib/wukong/spec_helpers/unit_tests.rb +135 -0
  183. data/lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb +132 -0
  184. data/lib/wukong/spec_helpers/unit_tests/unit_test_matchers.rb +169 -0
  185. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +60 -0
  186. data/lib/wukong/version.rb +3 -0
  187. data/lib/wukong/widget/echo.rb +55 -0
  188. data/lib/wukong/widget/extract.rb +122 -0
  189. data/lib/wukong/widget/filters.rb +452 -0
  190. data/lib/wukong/widget/logger.rb +56 -0
  191. data/lib/wukong/widget/operators.rb +82 -0
  192. data/lib/wukong/widget/reducers.rb +10 -0
  193. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  194. data/lib/wukong/widget/reducers/bin.rb +368 -0
  195. data/lib/wukong/widget/reducers/count.rb +73 -0
  196. data/lib/wukong/widget/reducers/group.rb +128 -0
  197. data/lib/wukong/widget/reducers/group_concat.rb +98 -0
  198. data/lib/wukong/widget/reducers/improver.rb +71 -0
  199. data/lib/wukong/widget/reducers/join_xml.rb +37 -0
  200. data/lib/wukong/widget/reducers/moments.rb +72 -0
  201. data/lib/wukong/widget/reducers/sort.rb +180 -0
  202. data/lib/wukong/widget/reducers/uniq.rb +91 -0
  203. data/lib/wukong/widget/serializers.rb +317 -0
  204. data/lib/wukong/widget/utils.rb +46 -0
  205. data/lib/wukong/widgets.rb +7 -0
  206. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  207. data/spec/examples/dataflow/parse_apache_logs_spec.rb +8 -0
  208. data/spec/examples/dataflow/parsing_spec.rb +14 -0
  209. data/spec/examples/dataflow/simple_spec.rb +34 -0
  210. data/spec/examples/dataflow/telegram_spec.rb +43 -0
  211. data/spec/examples/graph/minimum_spanning_tree_spec.rb +34 -0
  212. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  213. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  214. data/spec/examples/text/pig_latin_spec.rb +18 -0
  215. data/spec/examples/workflow/cherry_pie_spec.rb +36 -0
  216. data/spec/hanuman/graph_spec.rb +119 -0
  217. data/spec/hanuman/hanuman_spec.rb +10 -0
  218. data/spec/hanuman/registry_spec.rb +123 -0
  219. data/spec/hanuman/stage_spec.rb +81 -0
  220. data/spec/hanuman/tree_spec.rb +119 -0
  221. data/spec/spec.opts +1 -0
  222. data/spec/spec_helper.rb +43 -0
  223. data/spec/support/example_test_helpers.rb +95 -0
  224. data/spec/support/hanuman_test_helpers.rb +92 -0
  225. data/spec/support/integration_helper.rb +38 -0
  226. data/spec/support/model_test_helpers.rb +115 -0
  227. data/spec/support/shared_context_for_graphs.rb +57 -0
  228. data/spec/support/shared_context_for_reducers.rb +37 -0
  229. data/spec/support/shared_examples_for_builders.rb +94 -0
  230. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  231. data/spec/wu/model/reconcilable_spec.rb +152 -0
  232. data/spec/wukong/dataflow_spec.rb +87 -0
  233. data/spec/wukong/driver_spec.rb +154 -0
  234. data/spec/wukong/local/runner_spec.rb +29 -0
  235. data/spec/wukong/local/stdio_driver_spec.rb +73 -0
  236. data/spec/wukong/local_spec.rb +6 -0
  237. data/spec/wukong/logger_spec.rb +49 -0
  238. data/spec/wukong/model/faker_spec.rb +132 -0
  239. data/spec/wukong/processor_spec.rb +21 -0
  240. data/spec/wukong/runner_spec.rb +132 -0
  241. data/spec/wukong/source_spec.rb +6 -0
  242. data/spec/wukong/widget/extract_spec.rb +101 -0
  243. data/spec/wukong/widget/filters_spec.rb +79 -0
  244. data/spec/wukong/widget/logger_spec.rb +23 -0
  245. data/spec/wukong/widget/operators_spec.rb +25 -0
  246. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  247. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  248. data/spec/wukong/widget/reducers/group_spec.rb +21 -0
  249. data/spec/wukong/widget/reducers/join_xml_spec.rb +25 -0
  250. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  251. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  252. data/spec/wukong/widget/reducers/uniq_spec.rb +14 -0
  253. data/spec/wukong/widget/serializers_spec.rb +114 -0
  254. data/spec/wukong/widget/sink_spec.rb +19 -0
  255. data/spec/wukong/widget/source_spec.rb +65 -0
  256. data/spec/wukong/wu-local_spec.rb +109 -0
  257. data/spec/wukong/wu-source_spec.rb +32 -0
  258. data/spec/wukong/wu_spec.rb +14 -0
  259. data/spec/wukong/wukong_spec.rb +10 -0
  260. data/wukong.gemspec +35 -0
  261. metadata +465 -0
@@ -0,0 +1,14 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Reducers" do
4
+ describe :uniq do
5
+ include_context "reducers"
6
+ it_behaves_like 'a processor', :named => :uniq
7
+ it "should remove duplicate records" do
8
+ processor.given(*strings.sort).should emit(*strings.sort.uniq)
9
+ end
10
+ it "should output nothing if given no records" do
11
+ processor.given().should emit()
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,114 @@
1
+ require 'spec_helper'
2
+
3
+ describe "Serializers" do
4
+
5
+ describe :to_json do
6
+ it_behaves_like 'a processor', :named => :to_json
7
+
8
+ let(:valid_record) { { hi: 'there' } }
9
+ let(:record_as_json) { '{"hi":"there"}' }
10
+ let(:model_as_json) { '{"model":"json"}' }
11
+ let(:valid_model) { double('model', to_json: model_as_json) }
12
+
13
+ it 'serializes records to JSON' do
14
+ processor.given(valid_record).should emit(record_as_json)
15
+ end
16
+
17
+ it 'serializes records as pretty JSON when asked' do
18
+ processor(:pretty => true).given(valid_record).output.first.should include("\n")
19
+ end
20
+
21
+ it 'defers to models to let them serialize themselves as JSON' do
22
+ processor.given(valid_model).should emit(model_as_json)
23
+ end
24
+ end
25
+
26
+
27
+ describe :from_json do
28
+ it_behaves_like 'a processor', :named => :from_json
29
+
30
+ let(:valid_json) { '{"hi": "there"}' }
31
+ let(:json_parsed) { {"hi" => "there"} }
32
+ let(:invalid_json) { '{"832323:' }
33
+
34
+ it 'deserializes valid JSON' do
35
+ processor.given(valid_json).should emit(json_parsed)
36
+ end
37
+
38
+ it 'handles errors on invalid JSON' do
39
+ processor { |proc| proc.should_receive(:handle_error).with(invalid_json, kind_of(Exception)) }.given(invalid_json).should emit(0).records
40
+ end
41
+ end
42
+
43
+ describe :to_tsv do
44
+ it_behaves_like 'a processor', :named => :to_tsv
45
+
46
+ let(:valid_record) { ["foo", 2, :a] }
47
+ let(:invalid_record) { nil }
48
+ let(:record_as_tsv) { "foo\t2\ta" }
49
+ let(:model_as_tsv) { "foo\tbar\tbaz" }
50
+ let(:valid_model) { double('model', to_tsv: model_as_tsv) }
51
+
52
+ it 'serializes records to JSON' do
53
+ processor.given(valid_record).should emit(record_as_tsv)
54
+ end
55
+
56
+ it 'defers to models to let them serialize themselves as JSON' do
57
+ processor.given(valid_model).should emit(model_as_tsv)
58
+ end
59
+
60
+ it 'handles errors on bad records' do
61
+ processor { |proc| proc.should_receive(:handle_error) }.given(invalid_record).should emit(0).records
62
+ end
63
+ end
64
+
65
+ describe :from_tsv, serializer: true, handles_errors: true do
66
+ it_behaves_like 'a processor', :named => :from_tsv
67
+
68
+ let(:valid_tsv) { "foo\t2\ta" }
69
+ let(:tsv_parsed) { ["foo", "2", "a"] }
70
+ let(:invalid_tsv) { nil }
71
+
72
+ it 'deserializes valid TSV' do
73
+ processor.given(valid_tsv).should emit(tsv_parsed)
74
+ end
75
+
76
+ it "handles errors on invalid TSV" do
77
+ processor { |proc| proc.should_receive(:handle_error).with(invalid_tsv, kind_of(Exception)) }.given(invalid_tsv).should emit(0).records
78
+ end
79
+ end
80
+
81
+ describe :to_inspect do
82
+ it_behaves_like 'a processor', :named => :to_inspect
83
+
84
+ let(:valid_record) { {"a" => 1 } }
85
+ let(:record_as_inspect) { valid_record.inspect }
86
+ let(:model_as_inspect) { '<Model #13e233>' }
87
+ let(:valid_model) { double('model', inspect: model_as_inspect) }
88
+
89
+ it 'serializes records via inspect' do
90
+ processor.given(valid_record).should emit(record_as_inspect)
91
+ end
92
+
93
+ it 'defers to models to let them inspect themselves' do
94
+ processor.given(valid_model).should emit(model_as_inspect)
95
+ end
96
+ end
97
+
98
+ describe :recordize do
99
+ let(:model_instance) { double('model') }
100
+ let(:model_klass) { double('model_def', receive: model_instance) }
101
+ let(:serializer) { processor(:recordize, model: model_klass, on_error: :skip) }
102
+ let(:valid_record) { { foo: 'bar' } }
103
+ let(:invalid_record) { [1,2,3] }
104
+
105
+ it 'recordizes valid records' do
106
+ processor(model: model_klass).given(valid_record).should emit(model_instance)
107
+ end
108
+
109
+ it 'handles errors on invalid models' do
110
+ processor(model: model_klass) { |proc| proc.should_receive(:handle_error).with(invalid_record, kind_of(Exception)) }.given(invalid_record).should emit(0).records
111
+ end
112
+
113
+ end
114
+ end
@@ -0,0 +1,19 @@
1
+ # require 'spec_helper'
2
+
3
+ # describe :sinks, :helpers => true do
4
+
5
+ # describe Wukong::Sink::Stdout do
6
+ # it 'dumps records to $stdout' do
7
+ # $stdout.should_receive(:puts).with(mock_record)
8
+ # subject.process(mock_record)
9
+ # end
10
+ # end
11
+
12
+ # describe Wukong::Sink::Stderr do
13
+ # it 'dumps records to $stderr' do
14
+ # $stderr.should_receive(:puts).with(mock_record)
15
+ # subject.process(mock_record)
16
+ # end
17
+
18
+ # end
19
+ # end
@@ -0,0 +1,65 @@
1
+ # require 'spec_helper'
2
+ # require 'wukong/widget/gibberish'
3
+
4
+ # describe :sources, :helpers => true do
5
+
6
+ # describe Wukong::Source::Iter do
7
+ # subject{ described_class.new(obj: (9 .. 14), owner: test_dataflow) }
8
+ # it 'iterates over a given collection' do
9
+ # subject.to_enum.to_a.should == [9, 10, 11, 12, 13, 14]
10
+ # end
11
+ # context 'dataflow method' do
12
+ # it 'simplified args' do
13
+ # test_dataflow.iter(9 .. 14).should == subject
14
+ # end
15
+ # end
16
+ # end
17
+
18
+ # # describe Wukong::Source::FileSource do
19
+ # # let(:example_filename){ Pathname.path_to(:data, 'text/jabberwocky.txt') }
20
+ # # subject{ described_class.receive(filename: example_filename, owner: test_dataflow) }
21
+ # # before{ subject.setup }
22
+ # # it 'iterates over a given collection' do
23
+ # # subject.to_enum.to_a[6, 4].should == ["`Twas brillig, and the slithy toves", " Did gyre and gimble in the wabe:", "All mimsy were the borogoves,", " And the mome raths outgrabe.",]
24
+ # # end
25
+ # # context 'dataflow method' do
26
+ # # it 'simplified args' do
27
+ # # test_dataflow.file_source(example_filename).should == subject
28
+ # # end
29
+ # # end
30
+ # # end
31
+
32
+ # describe Wukong::Source::Integers do
33
+ # subject{ described_class.receive(qty: 10, owner: test_dataflow) }
34
+ # before{ subject.setup }
35
+
36
+ # it 'generates integers up to the given limit' do
37
+ # subject.to_enum.to_a.should == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
38
+ # end
39
+ # it 'generates nothing if the initial range is void' do
40
+ # subject.qty = 0
41
+ # subject.to_enum.to_a.should == []
42
+ # end
43
+ # it 'generates one thing if the min and max are equal' do
44
+ # subject.qty = 1
45
+ # subject.to_enum.to_a.should == [0]
46
+ # end
47
+
48
+ # context 'dataflow method' do
49
+ # it 'takes simplified args' do
50
+ # test_dataflow.integers(10).should == subject
51
+ # end
52
+ # end
53
+ # end
54
+
55
+ # describe Wukong::Widget::Gibberish do
56
+ # subject{ described_class.receive(:qty => 4) }
57
+ # before{ subject.setup }
58
+
59
+ # it 'generates integers up to the given limit' do
60
+ # subject.rng = Random.new(8675309)
61
+ # subject.to_enum.to_a.should == ["loaiaeiaeo neidgfo heeume sptfmeec naet sttptlm waaaioh detov elrrltv nii ulcsnn", "set ensr poeleaa seqi tmnreoee boooral oczncgp deaia rykcoao leo rim mmibpbfii", "artrru sto quuu doo peoehrile nto esl tia gaili tuiooey lkedotp sts kaiy weeeia", "crhi tyiiae mieubmbooa teeae roi ednz taieh zaloy syhe ret kuoa deeeo xittipl mo"]
62
+ # end
63
+
64
+ # end
65
+ # end
@@ -0,0 +1,109 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'wu-local' do
4
+
5
+ let(:input) { %w[1 2 3] }
6
+
7
+ context "without any arguments" do
8
+ subject { wu_local() }
9
+ it {should exit_with(:non_zero) }
10
+ it "displays help on STDERR" do
11
+ should have_stderr(/provide a processor.*to run/)
12
+ end
13
+ end
14
+
15
+ context "running outside any Ruby project" do
16
+ subject { wu_local('count').in(examples_dir('empty')) < input }
17
+ it { should exit_with(0) }
18
+ it "runs the processor" do
19
+ should have_stdout("3")
20
+ end
21
+ context "when passed a BUNDLE_GEMFILE" do
22
+ context "that doesn't belong to a deploy pack" do
23
+ subject { wu_local('count').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('ruby_project', 'Gemfile').to_s)) < input }
24
+ it { should exit_with(0) }
25
+ it "runs the processor" do
26
+ should have_stdout("3")
27
+ end
28
+ end
29
+ context "that belongs to a deploy pack" do
30
+ subject { wu_local('count').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('deploy_pack', 'Gemfile').to_s)) < input }
31
+ it { should exit_with(0) }
32
+ it "runs the processor" do
33
+ should have_stdout("3")
34
+ end
35
+ context "loading the deploy pack" do
36
+ subject { wu_local('string_reverser').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('deploy_pack', 'Gemfile').to_s)) < 'hi' }
37
+ it { should exit_with(0) }
38
+ it "runs the processor" do
39
+ should have_stdout("ih")
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ context "running within a Ruby project" do
47
+ context "at its root" do
48
+ subject { wu_local('count').in(examples_dir('ruby_project')) < input }
49
+ it { should exit_with(0) }
50
+ it "runs the processor" do
51
+ should have_stdout("3")
52
+ end
53
+ end
54
+ context "deep within it" do
55
+ subject { wu_local('count').in(examples_dir('ruby_project')) < input }
56
+ it { should exit_with(0) }
57
+ it "runs the processor" do
58
+ should have_stdout("3")
59
+ end
60
+ end
61
+ end
62
+
63
+ context "running within a deploy pack" do
64
+ context "at its root" do
65
+ subject { wu_local('count').in(examples_dir('deploy_pack')) < input }
66
+ it { should exit_with(0) }
67
+ it "runs the processor" do
68
+ should have_stdout("3")
69
+ end
70
+ context "loading the deploy pack" do
71
+ subject { wu_local('string_reverser').in(examples_dir('deploy_pack')) < 'hi' }
72
+ it { should exit_with(0) }
73
+ it "runs the processor" do
74
+ should have_stdout("ih")
75
+ end
76
+ end
77
+ end
78
+ context "deep within it" do
79
+ subject { wu_local('count').in(examples_dir('deploy_pack')) < input }
80
+ it { should exit_with(0) }
81
+ it "runs the processor" do
82
+ should have_stdout("3")
83
+ end
84
+ context "loading the deploy pack" do
85
+ subject { wu_local('string_reverser').in(examples_dir('deploy_pack')) < 'hi' }
86
+ it { should exit_with(0) }
87
+ it "runs the processor" do
88
+ should have_stdout("ih")
89
+ end
90
+ end
91
+ end
92
+ end
93
+
94
+ context "parsing records with the --from argument" do
95
+ subject { wu_local('identity --from=tsv') < "1\t2\t3" }
96
+ it { should exit_with(0) }
97
+ it "should emit a row for each input column" do
98
+ should have_stdout("1\n2\n3")
99
+ end
100
+ end
101
+
102
+ context "formatting records with the --to argument" do
103
+ subject { wu_local('identity --from=tsv --to=json') < "1\t2\t3" }
104
+ it { should exit_with(0) }
105
+ it "should emit a single JSON array for the input record" do
106
+ should have_stdout('["1","2","3"]')
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,32 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'wu-source' do
4
+
5
+ let(:input) { %w[1 2 3] }
6
+
7
+ context "without any arguments" do
8
+ subject { wu_source() }
9
+ it {should exit_with(:non_zero) }
10
+ it "displays help on STDERR" do
11
+ should have_stderr(/provide a processor.*to run/)
12
+ end
13
+ end
14
+
15
+ # FIXME -- it's hard to write an integration test for wu-source
16
+ # because it doesn't self-terminate under any conditions when run
17
+ # successfully.
18
+ #
19
+ # Options:
20
+ #
21
+ # 1) Add a --max (or similar) flag to wu-source allowing it to
22
+ # exit after some number of records which could then be checked
23
+ # by an integration test.
24
+ #
25
+ # 2) Launch it in a subprocess and wait a little while (how long?)
26
+ # and ensure that it's produced a bunch of output in the meantime.
27
+ # If the `per_sec` option is high, we shouldn't have to wait very
28
+ # long to see a whole bunch of output records. This is tricky b/c
29
+ # what if the system is under load and we don't wait long enough
30
+ # for the wu-source subprocess to boot up and start emitting?
31
+
32
+ end
@@ -0,0 +1,14 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'wu' do
4
+
5
+ let(:input) { %w[1 2 3] }
6
+
7
+ context "without any arguments" do
8
+ subject { wu() }
9
+ it {should exit_with(:non_zero) }
10
+ it "displays help on STDERR" do
11
+ should have_stderr(/provide a Wukong command to run/)
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,10 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wukong do
4
+
5
+ it_behaves_like Hanuman::Shortcuts
6
+
7
+ it{ should respond_to(:processor) }
8
+ it{ should respond_to(:dataflow) }
9
+
10
+ end
@@ -0,0 +1,35 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/wukong/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = 'ul-wukong'
6
+ gem.homepage = 'https://github.com/undeadlabs/wukong'
7
+ gem.licenses = ["Apache 2.0"]
8
+ gem.email = 'coders@infochimps.com'
9
+ gem.authors = ['Infochimps', 'Philip (flip) Kromer', 'Travis Dempsey']
10
+ gem.version = Wukong::VERSION
11
+
12
+ gem.summary = 'Fork of Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee can use it, yet handles terabyte-scale computation with ease.'
13
+ gem.description = <<-EOF
14
+ Treat your dataset like a:
15
+
16
+ * stream of lines when it's efficient to process by lines
17
+ * stream of field arrays when it's efficient to deal directly with fields
18
+ * stream of lightweight objects when it's efficient to deal with objects
19
+
20
+ Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.
21
+ EOF
22
+
23
+ gem.files = `git ls-files`.split("\n").reject { |path| path =~ /^(data|docpages|notes|old)/ }
24
+ gem.executables = ['wu-local', 'wu-source', 'wu']
25
+ gem.test_files = gem.files.grep(/^spec/)
26
+ gem.require_paths = ['lib']
27
+
28
+ gem.add_dependency('configliere', '>= 0.4.18')
29
+ gem.add_dependency('multi_json', '>= 1.3.6')
30
+ gem.add_dependency('gorillib', '>= 0.4.2')
31
+ gem.add_dependency('forgery')
32
+ gem.add_dependency('uuidtools')
33
+ gem.add_dependency('eventmachine')
34
+ gem.add_dependency('log4r')
35
+ end
metadata ADDED
@@ -0,0 +1,465 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ul-wukong
3
+ version: !ruby/object:Gem::Version
4
+ version: 4.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Infochimps
8
+ - Philip (flip) Kromer
9
+ - Travis Dempsey
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2014-11-19 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: configliere
17
+ requirement: !ruby/object:Gem::Requirement
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.4.18
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ! '>='
27
+ - !ruby/object:Gem::Version
28
+ version: 0.4.18
29
+ - !ruby/object:Gem::Dependency
30
+ name: multi_json
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ! '>='
34
+ - !ruby/object:Gem::Version
35
+ version: 1.3.6
36
+ type: :runtime
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ! '>='
41
+ - !ruby/object:Gem::Version
42
+ version: 1.3.6
43
+ - !ruby/object:Gem::Dependency
44
+ name: gorillib
45
+ requirement: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ! '>='
48
+ - !ruby/object:Gem::Version
49
+ version: 0.4.2
50
+ type: :runtime
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: 0.4.2
57
+ - !ruby/object:Gem::Dependency
58
+ name: forgery
59
+ requirement: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ type: :runtime
65
+ prerelease: false
66
+ version_requirements: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ - !ruby/object:Gem::Dependency
72
+ name: uuidtools
73
+ requirement: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ type: :runtime
79
+ prerelease: false
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ! '>='
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ - !ruby/object:Gem::Dependency
86
+ name: eventmachine
87
+ requirement: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ type: :runtime
93
+ prerelease: false
94
+ version_requirements: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ - !ruby/object:Gem::Dependency
100
+ name: log4r
101
+ requirement: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ! '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ type: :runtime
107
+ prerelease: false
108
+ version_requirements: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ! '>='
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ description: ! " Treat your dataset like a:\n\n * stream of lines when it's
114
+ efficient to process by lines\n * stream of field arrays when it's efficient
115
+ to deal directly with fields\n * stream of lightweight objects when it's efficient
116
+ to deal with objects\n\n Wukong is friends with Hadoop the elephant, Pig the query
117
+ language, and the cat on your command line.\n"
118
+ email: coders@infochimps.com
119
+ executables:
120
+ - wu-local
121
+ - wu-source
122
+ - wu
123
+ extensions: []
124
+ extra_rdoc_files: []
125
+ files:
126
+ - .gitignore
127
+ - .gitmodules
128
+ - .rspec
129
+ - .travis.yml
130
+ - .yardopts
131
+ - CHANGELOG.md
132
+ - Gemfile
133
+ - Guardfile
134
+ - LICENSE.md
135
+ - NOTES-travis.md
136
+ - README-old.md
137
+ - README.md
138
+ - Rakefile
139
+ - TODO.md
140
+ - bin/cutc
141
+ - bin/cuttab
142
+ - bin/greptrue
143
+ - bin/md5sort
144
+ - bin/setcat
145
+ - bin/tabchar
146
+ - bin/uniq-ord
147
+ - bin/uniqc
148
+ - bin/wu
149
+ - bin/wu-clean-encoding
150
+ - bin/wu-date
151
+ - bin/wu-datetime
152
+ - bin/wu-hist
153
+ - bin/wu-lign
154
+ - bin/wu-local
155
+ - bin/wu-plus
156
+ - bin/wu-source
157
+ - bin/wu-sum
158
+ - diagrams/wu_local.dot
159
+ - diagrams/wu_local.dot.png
160
+ - examples/Gemfile
161
+ - examples/README.md
162
+ - examples/basic/string_reverser.rb
163
+ - examples/basic/tiny_count.rb
164
+ - examples/basic/word_count/accumulator.rb
165
+ - examples/basic/word_count/tokenizer.rb
166
+ - examples/basic/word_count/word_count.rb
167
+ - examples/dataflow/scraper_macro_flow.rb
168
+ - examples/deploy_pack/Gemfile
169
+ - examples/deploy_pack/README.md
170
+ - examples/deploy_pack/a/b/c/.gitkeep
171
+ - examples/deploy_pack/app/processors/string_reverser.rb
172
+ - examples/deploy_pack/config/environment.rb
173
+ - examples/dsl/dataflow/fibonacci_series.rb
174
+ - examples/dsl/dataflow/scraper_macro_flow.rb
175
+ - examples/dsl/dataflow/simple.rb
176
+ - examples/dsl/dataflow/telegram.rb
177
+ - examples/dsl/workflow/cherry_pie.dot
178
+ - examples/dsl/workflow/cherry_pie.md
179
+ - examples/dsl/workflow/cherry_pie.png
180
+ - examples/dsl/workflow/cherry_pie.rb
181
+ - examples/empty/.gitkeep
182
+ - examples/examples_helper.rb
183
+ - examples/geo.rb
184
+ - examples/geo/geo_grids.numbers
185
+ - examples/geo/geolocated.rb
186
+ - examples/geo/quadtile.rb
187
+ - examples/geo/spec/geolocated_spec.rb
188
+ - examples/geo/tile_fetcher.rb
189
+ - examples/graph/implied_geolocation/README.md
190
+ - examples/graph/minimum_spanning_tree/airfares_graphviz.rb
191
+ - examples/improver/tweet_summary.rb
192
+ - examples/loadable.rb
193
+ - examples/munging/airline_flights/airline_flights.rake
194
+ - examples/munging/airline_flights/airplane.rb
195
+ - examples/munging/airline_flights/airport_id_unification.rb
196
+ - examples/munging/airline_flights/airport_ok_chars.rb
197
+ - examples/munging/airline_flights/indexable.rb
198
+ - examples/munging/airline_flights/indexable_spec.rb
199
+ - examples/munging/airline_flights/reconcile_airports.rb
200
+ - examples/munging/airline_flights/tasks.rake
201
+ - examples/munging/airline_flights/topcities.rb
202
+ - examples/munging/geo/geo_json.rb
203
+ - examples/munging/geo/geo_models.rb
204
+ - examples/munging/geo/geonames_models.rb
205
+ - examples/munging/geo/iso_codes.rb
206
+ - examples/munging/geo/reconcile_countries.rb
207
+ - examples/munging/geo/tasks.rake
208
+ - examples/munging/wikipedia/articles/extract_articles-parsed.rb
209
+ - examples/munging/wikipedia/articles/extract_articles-templated.rb
210
+ - examples/munging/wikipedia/articles/textualize_articles.rb
211
+ - examples/munging/wikipedia/articles/verify_structure.rb
212
+ - examples/munging/wikipedia/articles/wp2txt-LICENSE.txt
213
+ - examples/munging/wikipedia/articles/wp2txt_article.rb
214
+ - examples/munging/wikipedia/articles/wp2txt_utils.rb
215
+ - examples/munging/wikipedia/dbpedia/dbpedia_common.rb
216
+ - examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb
217
+ - examples/munging/wikipedia/dbpedia/extract_links-cruft.rb
218
+ - examples/munging/wikipedia/dbpedia/extract_links.rb
219
+ - examples/munging/wikipedia/dbpedia/sameas_extractor.rb
220
+ - examples/rake_helper.rb
221
+ - examples/ruby_project/Gemfile
222
+ - examples/ruby_project/README.md
223
+ - examples/ruby_project/a/b/c/.gitkeep
224
+ - examples/server_logs/geo_ip_mapping/munge_geolite.rb
225
+ - examples/server_logs/logline.rb
226
+ - examples/server_logs/models.rb
227
+ - examples/server_logs/page_counts.pig
228
+ - examples/server_logs/server_logs-01-parse-script.rb
229
+ - examples/server_logs/server_logs-02-histograms-full.rb
230
+ - examples/server_logs/server_logs-02-histograms-mapper.rb
231
+ - examples/server_logs/server_logs-03-breadcrumbs-full.rb
232
+ - examples/server_logs/server_logs-04-page_page_edges-full.rb
233
+ - examples/serverlogs/geo_ip_mapping/munge_geolite.rb
234
+ - examples/serverlogs/models/logline.rb
235
+ - examples/serverlogs/parser/apache_parser_widget.rb
236
+ - examples/serverlogs/visit_paths/common.rb
237
+ - examples/serverlogs/visit_paths/page_counts.pig
238
+ - examples/serverlogs/visit_paths/serverlogs-01-parse-script.rb
239
+ - examples/serverlogs/visit_paths/serverlogs-02-histograms-full.rb
240
+ - examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb
241
+ - examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb
242
+ - examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb
243
+ - examples/splitter.rb
244
+ - examples/string_reverser.rb
245
+ - examples/text/pig_latin/pig_latinizer.rb
246
+ - examples/text/pig_latin/pig_latinizer_widget.rb
247
+ - examples/text/regional_flavor/README.md
248
+ - examples/text/regional_flavor/article_wordbags.pig
249
+ - examples/text/regional_flavor/j01-article_wordbags.rb
250
+ - examples/text/regional_flavor/simple_pig_script.pig
251
+ - examples/twitter.rb
252
+ - lib/hanuman.rb
253
+ - lib/hanuman/graph.rb
254
+ - lib/hanuman/graphvizzer.rb
255
+ - lib/hanuman/graphvizzer/gv_models.rb
256
+ - lib/hanuman/graphvizzer/gv_presenter.rb
257
+ - lib/hanuman/link.rb
258
+ - lib/hanuman/registry.rb
259
+ - lib/hanuman/stage.rb
260
+ - lib/hanuman/tree.rb
261
+ - lib/wu/geo.rb
262
+ - lib/wu/geo/geo_grids.numbers
263
+ - lib/wu/geo/geolocated.rb
264
+ - lib/wu/geo/quadtile.rb
265
+ - lib/wu/graph/union_find.rb
266
+ - lib/wu/model/reconcilable.rb
267
+ - lib/wu/munging.rb
268
+ - lib/wu/social/models/twitter.rb
269
+ - lib/wu/wikipedia/models.rb
270
+ - lib/wukong.rb
271
+ - lib/wukong/dataflow.rb
272
+ - lib/wukong/doc_helpers.rb
273
+ - lib/wukong/doc_helpers/dataflow_handler.rb
274
+ - lib/wukong/doc_helpers/field_handler.rb
275
+ - lib/wukong/doc_helpers/processor_handler.rb
276
+ - lib/wukong/driver.rb
277
+ - lib/wukong/driver/event_machine_driver.rb
278
+ - lib/wukong/driver/wiring.rb
279
+ - lib/wukong/local.rb
280
+ - lib/wukong/local/runner.rb
281
+ - lib/wukong/local/stdio_driver.rb
282
+ - lib/wukong/logger.rb
283
+ - lib/wukong/model/faker.rb
284
+ - lib/wukong/model/flatpack_parser/flat.rb
285
+ - lib/wukong/model/flatpack_parser/flatpack.rb
286
+ - lib/wukong/model/flatpack_parser/lang.rb
287
+ - lib/wukong/model/flatpack_parser/parser.rb
288
+ - lib/wukong/model/flatpack_parser/tokens.rb
289
+ - lib/wukong/plugin.rb
290
+ - lib/wukong/processor.rb
291
+ - lib/wukong/rake_helper.rb
292
+ - lib/wukong/runner.rb
293
+ - lib/wukong/runner/boot_sequence.rb
294
+ - lib/wukong/runner/code_loader.rb
295
+ - lib/wukong/runner/command_runner.rb
296
+ - lib/wukong/runner/deploy_pack_loader.rb
297
+ - lib/wukong/runner/help_message.rb
298
+ - lib/wukong/source.rb
299
+ - lib/wukong/source/source_driver.rb
300
+ - lib/wukong/source/source_runner.rb
301
+ - lib/wukong/spec_helpers.rb
302
+ - lib/wukong/spec_helpers/integration_tests.rb
303
+ - lib/wukong/spec_helpers/integration_tests/integration_test_matchers.rb
304
+ - lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb
305
+ - lib/wukong/spec_helpers/shared_examples.rb
306
+ - lib/wukong/spec_helpers/unit_tests.rb
307
+ - lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb
308
+ - lib/wukong/spec_helpers/unit_tests/unit_test_matchers.rb
309
+ - lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb
310
+ - lib/wukong/version.rb
311
+ - lib/wukong/widget/echo.rb
312
+ - lib/wukong/widget/extract.rb
313
+ - lib/wukong/widget/filters.rb
314
+ - lib/wukong/widget/logger.rb
315
+ - lib/wukong/widget/operators.rb
316
+ - lib/wukong/widget/reducers.rb
317
+ - lib/wukong/widget/reducers/accumulator.rb
318
+ - lib/wukong/widget/reducers/bin.rb
319
+ - lib/wukong/widget/reducers/count.rb
320
+ - lib/wukong/widget/reducers/group.rb
321
+ - lib/wukong/widget/reducers/group_concat.rb
322
+ - lib/wukong/widget/reducers/improver.rb
323
+ - lib/wukong/widget/reducers/join_xml.rb
324
+ - lib/wukong/widget/reducers/moments.rb
325
+ - lib/wukong/widget/reducers/sort.rb
326
+ - lib/wukong/widget/reducers/uniq.rb
327
+ - lib/wukong/widget/serializers.rb
328
+ - lib/wukong/widget/utils.rb
329
+ - lib/wukong/widgets.rb
330
+ - spec/examples/dataflow/fibonacci_series_spec.rb
331
+ - spec/examples/dataflow/parse_apache_logs_spec.rb
332
+ - spec/examples/dataflow/parsing_spec.rb
333
+ - spec/examples/dataflow/simple_spec.rb
334
+ - spec/examples/dataflow/telegram_spec.rb
335
+ - spec/examples/graph/minimum_spanning_tree_spec.rb
336
+ - spec/examples/munging/airline_flights/identifiers_spec.rb
337
+ - spec/examples/munging/airline_flights_spec.rb
338
+ - spec/examples/text/pig_latin_spec.rb
339
+ - spec/examples/workflow/cherry_pie_spec.rb
340
+ - spec/hanuman/graph_spec.rb
341
+ - spec/hanuman/hanuman_spec.rb
342
+ - spec/hanuman/registry_spec.rb
343
+ - spec/hanuman/stage_spec.rb
344
+ - spec/hanuman/tree_spec.rb
345
+ - spec/spec.opts
346
+ - spec/spec_helper.rb
347
+ - spec/support/example_test_helpers.rb
348
+ - spec/support/hanuman_test_helpers.rb
349
+ - spec/support/integration_helper.rb
350
+ - spec/support/model_test_helpers.rb
351
+ - spec/support/shared_context_for_graphs.rb
352
+ - spec/support/shared_context_for_reducers.rb
353
+ - spec/support/shared_examples_for_builders.rb
354
+ - spec/support/shared_examples_for_shortcuts.rb
355
+ - spec/wu/model/reconcilable_spec.rb
356
+ - spec/wukong/dataflow_spec.rb
357
+ - spec/wukong/driver_spec.rb
358
+ - spec/wukong/local/runner_spec.rb
359
+ - spec/wukong/local/stdio_driver_spec.rb
360
+ - spec/wukong/local_spec.rb
361
+ - spec/wukong/logger_spec.rb
362
+ - spec/wukong/model/faker_spec.rb
363
+ - spec/wukong/processor_spec.rb
364
+ - spec/wukong/runner_spec.rb
365
+ - spec/wukong/source_spec.rb
366
+ - spec/wukong/widget/extract_spec.rb
367
+ - spec/wukong/widget/filters_spec.rb
368
+ - spec/wukong/widget/logger_spec.rb
369
+ - spec/wukong/widget/operators_spec.rb
370
+ - spec/wukong/widget/reducers/bin_spec.rb
371
+ - spec/wukong/widget/reducers/count_spec.rb
372
+ - spec/wukong/widget/reducers/group_spec.rb
373
+ - spec/wukong/widget/reducers/join_xml_spec.rb
374
+ - spec/wukong/widget/reducers/moments_spec.rb
375
+ - spec/wukong/widget/reducers/sort_spec.rb
376
+ - spec/wukong/widget/reducers/uniq_spec.rb
377
+ - spec/wukong/widget/serializers_spec.rb
378
+ - spec/wukong/widget/sink_spec.rb
379
+ - spec/wukong/widget/source_spec.rb
380
+ - spec/wukong/wu-local_spec.rb
381
+ - spec/wukong/wu-source_spec.rb
382
+ - spec/wukong/wu_spec.rb
383
+ - spec/wukong/wukong_spec.rb
384
+ - wukong.gemspec
385
+ homepage: https://github.com/undeadlabs/wukong
386
+ licenses:
387
+ - Apache 2.0
388
+ metadata: {}
389
+ post_install_message:
390
+ rdoc_options: []
391
+ require_paths:
392
+ - lib
393
+ required_ruby_version: !ruby/object:Gem::Requirement
394
+ requirements:
395
+ - - ! '>='
396
+ - !ruby/object:Gem::Version
397
+ version: '0'
398
+ required_rubygems_version: !ruby/object:Gem::Requirement
399
+ requirements:
400
+ - - ! '>='
401
+ - !ruby/object:Gem::Version
402
+ version: '0'
403
+ requirements: []
404
+ rubyforge_project:
405
+ rubygems_version: 2.2.2
406
+ signing_key:
407
+ specification_version: 4
408
+ summary: Fork of Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee
409
+ can use it, yet handles terabyte-scale computation with ease.
410
+ test_files:
411
+ - spec/examples/dataflow/fibonacci_series_spec.rb
412
+ - spec/examples/dataflow/parse_apache_logs_spec.rb
413
+ - spec/examples/dataflow/parsing_spec.rb
414
+ - spec/examples/dataflow/simple_spec.rb
415
+ - spec/examples/dataflow/telegram_spec.rb
416
+ - spec/examples/graph/minimum_spanning_tree_spec.rb
417
+ - spec/examples/munging/airline_flights/identifiers_spec.rb
418
+ - spec/examples/munging/airline_flights_spec.rb
419
+ - spec/examples/text/pig_latin_spec.rb
420
+ - spec/examples/workflow/cherry_pie_spec.rb
421
+ - spec/hanuman/graph_spec.rb
422
+ - spec/hanuman/hanuman_spec.rb
423
+ - spec/hanuman/registry_spec.rb
424
+ - spec/hanuman/stage_spec.rb
425
+ - spec/hanuman/tree_spec.rb
426
+ - spec/spec.opts
427
+ - spec/spec_helper.rb
428
+ - spec/support/example_test_helpers.rb
429
+ - spec/support/hanuman_test_helpers.rb
430
+ - spec/support/integration_helper.rb
431
+ - spec/support/model_test_helpers.rb
432
+ - spec/support/shared_context_for_graphs.rb
433
+ - spec/support/shared_context_for_reducers.rb
434
+ - spec/support/shared_examples_for_builders.rb
435
+ - spec/support/shared_examples_for_shortcuts.rb
436
+ - spec/wu/model/reconcilable_spec.rb
437
+ - spec/wukong/dataflow_spec.rb
438
+ - spec/wukong/driver_spec.rb
439
+ - spec/wukong/local/runner_spec.rb
440
+ - spec/wukong/local/stdio_driver_spec.rb
441
+ - spec/wukong/local_spec.rb
442
+ - spec/wukong/logger_spec.rb
443
+ - spec/wukong/model/faker_spec.rb
444
+ - spec/wukong/processor_spec.rb
445
+ - spec/wukong/runner_spec.rb
446
+ - spec/wukong/source_spec.rb
447
+ - spec/wukong/widget/extract_spec.rb
448
+ - spec/wukong/widget/filters_spec.rb
449
+ - spec/wukong/widget/logger_spec.rb
450
+ - spec/wukong/widget/operators_spec.rb
451
+ - spec/wukong/widget/reducers/bin_spec.rb
452
+ - spec/wukong/widget/reducers/count_spec.rb
453
+ - spec/wukong/widget/reducers/group_spec.rb
454
+ - spec/wukong/widget/reducers/join_xml_spec.rb
455
+ - spec/wukong/widget/reducers/moments_spec.rb
456
+ - spec/wukong/widget/reducers/sort_spec.rb
457
+ - spec/wukong/widget/reducers/uniq_spec.rb
458
+ - spec/wukong/widget/serializers_spec.rb
459
+ - spec/wukong/widget/sink_spec.rb
460
+ - spec/wukong/widget/source_spec.rb
461
+ - spec/wukong/wu-local_spec.rb
462
+ - spec/wukong/wu-source_spec.rb
463
+ - spec/wukong/wu_spec.rb
464
+ - spec/wukong/wukong_spec.rb
465
+ has_rdoc: