wukong 3.0.0.pre2 → 3.0.0.pre3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (146) hide show
  1. data/Gemfile +13 -0
  2. data/README.md +182 -6
  3. data/bin/wu-local +13 -5
  4. data/bin/wu-server +1 -1
  5. data/examples/Gemfile +2 -1
  6. data/examples/basic/string_reverser.rb +23 -0
  7. data/examples/{tiny_count.rb → basic/tiny_count.rb} +0 -0
  8. data/examples/{word_count → basic/word_count}/accumulator.rb +0 -0
  9. data/examples/{word_count → basic/word_count}/tokenizer.rb +0 -0
  10. data/examples/{word_count → basic/word_count}/word_count.rb +0 -0
  11. data/examples/deploy_pack/Gemfile +7 -0
  12. data/examples/deploy_pack/README.md +6 -0
  13. data/examples/{text/latinize_text.rb → deploy_pack/a/b/c/.gitkeep} +0 -0
  14. data/examples/deploy_pack/app/processors/string_reverser.rb +5 -0
  15. data/examples/deploy_pack/config/environment.rb +1 -0
  16. data/examples/{dataflow → dsl/dataflow}/fibonacci_series.rb +0 -0
  17. data/examples/dsl/dataflow/scraper_macro_flow.rb +28 -0
  18. data/examples/{dataflow → dsl/dataflow}/simple.rb +0 -0
  19. data/examples/{dataflow → dsl/dataflow}/telegram.rb +0 -0
  20. data/examples/{workflow → dsl/workflow}/cherry_pie.dot +0 -0
  21. data/examples/{workflow → dsl/workflow}/cherry_pie.md +0 -0
  22. data/examples/{workflow → dsl/workflow}/cherry_pie.png +0 -0
  23. data/examples/{workflow → dsl/workflow}/cherry_pie.rb +0 -0
  24. data/examples/empty/.gitkeep +0 -0
  25. data/examples/graph/implied_geolocation/README.md +63 -0
  26. data/examples/graph/{minimum_spanning_tree.rb → minimum_spanning_tree/airfares_graphviz.rb} +0 -0
  27. data/examples/munging/airline_flights/indexable.rb +75 -0
  28. data/examples/munging/airline_flights/indexable_spec.rb +90 -0
  29. data/examples/munging/geo/geonames_models.rb +29 -0
  30. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +1 -0
  31. data/examples/munging/wikipedia/dbpedia/extract_links-cruft.rb +66 -0
  32. data/examples/munging/wikipedia/dbpedia/extract_links.rb +213 -146
  33. data/examples/rake_helper.rb +12 -0
  34. data/examples/ruby_project/Gemfile +7 -0
  35. data/examples/ruby_project/README.md +6 -0
  36. data/examples/ruby_project/a/b/c/.gitkeep +0 -0
  37. data/examples/serverlogs/geo_ip_mapping/munge_geolite.rb +82 -0
  38. data/examples/serverlogs/models/logline.rb +102 -0
  39. data/examples/{dataflow/parse_apache_logs.rb → serverlogs/parser/apache_parser_widget.rb} +0 -0
  40. data/examples/serverlogs/visit_paths/common.rb +4 -0
  41. data/examples/serverlogs/visit_paths/page_counts.pig +48 -0
  42. data/examples/serverlogs/visit_paths/serverlogs-01-parse-script.rb +11 -0
  43. data/examples/serverlogs/visit_paths/serverlogs-02-histograms-full.rb +31 -0
  44. data/examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb +12 -0
  45. data/examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb +67 -0
  46. data/examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb +38 -0
  47. data/examples/text/{pig_latin.rb → pig_latin/pig_latinizer.rb} +0 -0
  48. data/examples/{dataflow/pig_latinizer.rb → text/pig_latin/pig_latinizer_widget.rb} +0 -0
  49. data/lib/hanuman/graph.rb +6 -1
  50. data/lib/wu/geo.rb +4 -0
  51. data/lib/wu/geo/geo_grids.numbers +0 -0
  52. data/lib/wu/geo/geolocated.rb +331 -0
  53. data/lib/wu/geo/quadtile.rb +69 -0
  54. data/{examples → lib/wu}/graph/union_find.rb +0 -0
  55. data/lib/wu/model/reconcilable.rb +63 -0
  56. data/{examples/munging/wikipedia/utils/munging_utils.rb → lib/wu/munging.rb} +7 -4
  57. data/lib/wu/social/models/twitter.rb +31 -0
  58. data/{examples/models/wikipedia.rb → lib/wu/wikipedia/models.rb} +0 -0
  59. data/lib/wukong.rb +9 -4
  60. data/lib/wukong/boot.rb +10 -1
  61. data/lib/wukong/driver.rb +65 -71
  62. data/lib/wukong/logger.rb +93 -0
  63. data/lib/wukong/processor.rb +38 -29
  64. data/lib/wukong/runner.rb +144 -0
  65. data/lib/wukong/server.rb +119 -0
  66. data/lib/wukong/spec_helpers.rb +1 -0
  67. data/lib/wukong/spec_helpers/integration_driver.rb +22 -9
  68. data/lib/wukong/spec_helpers/integration_driver_matchers.rb +26 -4
  69. data/lib/wukong/spec_helpers/processor_helpers.rb +4 -10
  70. data/lib/wukong/spec_helpers/shared_examples.rb +12 -13
  71. data/lib/wukong/version.rb +1 -1
  72. data/lib/wukong/widget/processors.rb +13 -0
  73. data/lib/wukong/widget/serializers.rb +55 -65
  74. data/lib/wukong/widgets.rb +0 -2
  75. data/spec/hanuman/graph_spec.rb +14 -0
  76. data/spec/spec_helper.rb +4 -30
  77. data/spec/support/{wukong_test_helpers.rb → example_test_helpers.rb} +29 -2
  78. data/spec/support/integration_helper.rb +38 -0
  79. data/spec/support/model_test_helpers.rb +115 -0
  80. data/spec/wu/geo/geolocated_spec.rb +247 -0
  81. data/spec/wu/model/reconcilable_spec.rb +152 -0
  82. data/spec/wukong/widget/processors_spec.rb +0 -1
  83. data/spec/wukong/widget/serializers_spec.rb +88 -62
  84. data/spec/wukong/wu_local_spec.rb +125 -0
  85. data/wukong.gemspec +3 -16
  86. metadata +72 -266
  87. data/examples/dataflow/apache_log_line.rb +0 -100
  88. data/examples/jabberwocky.txt +0 -36
  89. data/examples/munging/Gemfile +0 -8
  90. data/examples/munging/airline_flights/airline.rb +0 -57
  91. data/examples/munging/airline_flights/airport.rb +0 -211
  92. data/examples/munging/airline_flights/flight.rb +0 -156
  93. data/examples/munging/airline_flights/models.rb +0 -4
  94. data/examples/munging/airline_flights/parse.rb +0 -26
  95. data/examples/munging/airline_flights/route.rb +0 -35
  96. data/examples/munging/airline_flights/timezone_fixup.rb +0 -62
  97. data/examples/munging/airports/40_wbans.txt +0 -40
  98. data/examples/munging/airports/filter_weather_reports.rb +0 -37
  99. data/examples/munging/airports/join.pig +0 -31
  100. data/examples/munging/airports/to_tsv.rb +0 -33
  101. data/examples/munging/airports/usa_wbans.pig +0 -19
  102. data/examples/munging/airports/usa_wbans.txt +0 -2157
  103. data/examples/munging/airports/wbans.pig +0 -19
  104. data/examples/munging/airports/wbans.txt +0 -2310
  105. data/examples/munging/rake_helper.rb +0 -62
  106. data/examples/munging/weather/.gitignore +0 -1
  107. data/examples/munging/weather/Gemfile +0 -4
  108. data/examples/munging/weather/Rakefile +0 -28
  109. data/examples/munging/weather/extract_ish.rb +0 -13
  110. data/examples/munging/weather/models/weather.rb +0 -119
  111. data/examples/munging/weather/utils/noaa_downloader.rb +0 -46
  112. data/examples/munging/wikipedia/README.md +0 -34
  113. data/examples/munging/wikipedia/Rakefile +0 -193
  114. data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +0 -18
  115. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +0 -21
  116. data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +0 -27
  117. data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +0 -29
  118. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +0 -14
  119. data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +0 -25
  120. data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +0 -29
  121. data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +0 -32
  122. data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +0 -85
  123. data/examples/munging/wikipedia/pig_style_guide.md +0 -25
  124. data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +0 -19
  125. data/examples/munging/wikipedia/subuniverse/sub_articles.pig +0 -23
  126. data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +0 -24
  127. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +0 -22
  128. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +0 -22
  129. data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +0 -26
  130. data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +0 -29
  131. data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +0 -24
  132. data/examples/munging/wikipedia/utils/get_namespaces.rb +0 -86
  133. data/examples/munging/wikipedia/utils/namespaces.json +0 -1
  134. data/examples/string_reverser.rb +0 -26
  135. data/examples/twitter/locations.rb +0 -29
  136. data/examples/twitter/models.rb +0 -24
  137. data/examples/twitter/pt1-fiddle.pig +0 -8
  138. data/examples/twitter/pt2-simple_parse.pig +0 -31
  139. data/examples/twitter/pt2-simple_parse.rb +0 -18
  140. data/examples/twitter/pt3-join_on_zips.pig +0 -39
  141. data/examples/twitter/pt4-strong_links.rb +0 -20
  142. data/examples/twitter/pt5-lnglat_and_strong_links.pig +0 -16
  143. data/examples/twitter/states.tsv +0 -50
  144. data/examples/workflow/package_gem.rb +0 -55
  145. data/lib/wukong/widget/sink.rb +0 -16
  146. data/lib/wukong/widget/source.rb +0 -14
@@ -0,0 +1,125 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'wu-local' do
4
+
5
+ let(:input) { %w[1 2 3] }
6
+
7
+ context "without any arguments" do
8
+ subject { command('wu-local') }
9
+ it {should exit_with(:non_zero) }
10
+ it "displays help on STDERR" do
11
+ should have_stderr("usage: wu-local")
12
+ end
13
+ end
14
+
15
+ context "running outside any Ruby project" do
16
+ subject { command('wu-local count').in(examples_dir('empty')) < input }
17
+ it { should exit_with(0) }
18
+ it "runs the processor" do
19
+ should have_stdout("3")
20
+ end
21
+ context "when passed a BUNDLE_GEMFILE" do
22
+ context "that doesn't belong to a deploy pack" do
23
+ subject { command('wu-local count').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('ruby_project', 'Gemfile').to_s)) < input }
24
+ it { should exit_with(0) }
25
+ it "runs the processor" do
26
+ should have_stdout("3")
27
+ end
28
+ end
29
+ context "that belongs to a deploy pack" do
30
+ subject { command('wu-local count').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('deploy_pack', 'Gemfile').to_s)) < input }
31
+ it { should exit_with(0) }
32
+ it "runs the processor" do
33
+ should have_stdout("3")
34
+ end
35
+ context "loading the deploy pack" do
36
+ subject { command('wu-local string_reverser').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('deploy_pack', 'Gemfile').to_s)) < 'hi' }
37
+ it { should exit_with(0) }
38
+ it "runs the processor" do
39
+ should have_stdout("ih")
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ context "running within a Ruby project" do
47
+ context "at its root" do
48
+ subject { command('wu-local count').in(examples_dir('ruby_project')) < input }
49
+ it { should exit_with(0) }
50
+ it "runs the processor" do
51
+ should have_stdout("3")
52
+ end
53
+ end
54
+ context "deep within it" do
55
+ subject { command('wu-local count').in(examples_dir('ruby_project')) < input }
56
+ it { should exit_with(0) }
57
+ it "runs the processor" do
58
+ should have_stdout("3")
59
+ end
60
+ end
61
+ end
62
+
63
+ context "running within a deploy pack" do
64
+ context "at its root" do
65
+ subject { command('wu-local count').in(examples_dir('deploy_pack')) < input }
66
+ it { should exit_with(0) }
67
+ it "runs the processor" do
68
+ should have_stdout("3")
69
+ end
70
+ context "loading the deploy pack" do
71
+ subject { command('wu-local string_reverser').in(examples_dir('deploy_pack')) < 'hi' }
72
+ it { should exit_with(0) }
73
+ it "runs the processor" do
74
+ should have_stdout("ih")
75
+ end
76
+ end
77
+ end
78
+ context "deep within it" do
79
+ subject { command('wu-local count').in(examples_dir('deploy_pack')) < input }
80
+ it { should exit_with(0) }
81
+ it "runs the processor" do
82
+ should have_stdout("3")
83
+ end
84
+ context "loading the deploy pack" do
85
+ subject { command('wu-local string_reverser').in(examples_dir('deploy_pack')) < 'hi' }
86
+ it { should exit_with(0) }
87
+ it "runs the processor" do
88
+ should have_stdout("ih")
89
+ end
90
+ end
91
+ end
92
+ end
93
+
94
+ # context "running within a deploy pack" do
95
+ # context "at its root" do
96
+ # let(:subject) { command('wu-local', :cwd => examples_dir('deploy_pack')) }
97
+ # end
98
+ # context "deep within it" do
99
+ # let(:subject) { command('wu-local', :cwd => examples_dir('deploy_pack', 'a','b','c')) }
100
+ # end
101
+ # end
102
+
103
+ # context "in local mode" do
104
+ # context "on a map-only job" do
105
+ # let(:subject) { command('wu-hadoop', example_script('tokenizer.rb'), "--mode=local", "--input=#{example_script('sonnet_18.txt')}") }
106
+ # it { should exit_with(0) }
107
+ # it { should have_stdout('Shall', 'I', 'compare', 'thee', 'to', 'a', "summer's", 'day') }
108
+ # end
109
+
110
+ # context "on a map-reduce job" do
111
+ # let(:subject) { command('wu-hadoop', example_script('word_count.rb'), "--mode=local", "--input=#{example_script('sonnet_18.txt')}") }
112
+ # it { should exit_with(0) }
113
+ # it { should have_stdout(/complexion\s+1/, /Death\s+1/, /temperate\s+1/) }
114
+ # end
115
+ # end
116
+
117
+ # context "in Hadoop mode" do
118
+ # context "on a map-only job" do
119
+ # let(:subject) { command('wu-hadoop', example_script('tokenizer.rb'), "--mode=hadoop", "--input=/data/in", "--output=/data/out", "--dry_run") }
120
+ # it { should exit_with(0) }
121
+ # it { should have_stdout(%r{jar.*hadoop.*streaming.*\.jar}, %r{-mapper.+tokenizer\.rb}, %r{-input.*/data/in}, %r{-output.*/data/out}) }
122
+ # end
123
+ # end
124
+
125
+ end
data/wukong.gemspec CHANGED
@@ -5,7 +5,7 @@ Gem::Specification.new do |gem|
5
5
  gem.name = 'wukong'
6
6
  gem.homepage = 'https://github.com/infochimps-labs/wukong'
7
7
  gem.licenses = ["Apache 2.0"]
8
- gem.email = 'coders@infochimps.org'
8
+ gem.email = 'coders@infochimps.com'
9
9
  gem.authors = ['Infochimps', 'Philip (flip) Kromer', 'Travis Dempsey']
10
10
  gem.version = Wukong::VERSION
11
11
 
@@ -25,25 +25,12 @@ EOF
25
25
  gem.test_files = gem.files.grep(/^spec/)
26
26
  gem.require_paths = ['lib']
27
27
 
28
- gem.add_dependency('configliere', '~> 0.4')
28
+ gem.add_dependency('configliere', '>= 0.4.18')
29
29
  gem.add_dependency('multi_json', '>= 1.3.6')
30
- gem.add_dependency('vayacondios-client', '>= 0.0.3')
30
+ gem.add_dependency('vayacondios-client', '>= 0.1.2')
31
31
  gem.add_dependency('gorillib', '>= 0.4.2')
32
32
  gem.add_dependency('forgery')
33
33
  gem.add_dependency('uuidtools')
34
34
  gem.add_dependency('eventmachine')
35
35
  gem.add_dependency('log4r')
36
-
37
- gem.add_development_dependency('bundler', '~> 1.1')
38
- gem.add_development_dependency('rake', '>= 0.9')
39
- gem.add_development_dependency('rspec', '>= 2.8')
40
- gem.add_development_dependency('guard', '>= 1.0')
41
- gem.add_development_dependency('guard-rspec', '>= 0.6')
42
- gem.add_development_dependency('simplecov', '>= 0.5')
43
- gem.add_development_dependency('pry')
44
- gem.add_development_dependency('yard')
45
- gem.add_development_dependency('redcarpet')
46
- gem.add_development_dependency('addressable')
47
- gem.add_development_dependency('htmlentities')
48
-
49
36
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0.pre2
4
+ version: 3.0.0.pre3
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
@@ -11,24 +11,24 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-12-01 00:00:00.000000000 Z
14
+ date: 2012-12-17 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: configliere
18
18
  requirement: !ruby/object:Gem::Requirement
19
19
  none: false
20
20
  requirements:
21
- - - ~>
21
+ - - ! '>='
22
22
  - !ruby/object:Gem::Version
23
- version: '0.4'
23
+ version: 0.4.18
24
24
  type: :runtime
25
25
  prerelease: false
26
26
  version_requirements: !ruby/object:Gem::Requirement
27
27
  none: false
28
28
  requirements:
29
- - - ~>
29
+ - - ! '>='
30
30
  - !ruby/object:Gem::Version
31
- version: '0.4'
31
+ version: 0.4.18
32
32
  - !ruby/object:Gem::Dependency
33
33
  name: multi_json
34
34
  requirement: !ruby/object:Gem::Requirement
@@ -52,7 +52,7 @@ dependencies:
52
52
  requirements:
53
53
  - - ! '>='
54
54
  - !ruby/object:Gem::Version
55
- version: 0.0.3
55
+ version: 0.1.2
56
56
  type: :runtime
57
57
  prerelease: false
58
58
  version_requirements: !ruby/object:Gem::Requirement
@@ -60,7 +60,7 @@ dependencies:
60
60
  requirements:
61
61
  - - ! '>='
62
62
  - !ruby/object:Gem::Version
63
- version: 0.0.3
63
+ version: 0.1.2
64
64
  - !ruby/object:Gem::Dependency
65
65
  name: gorillib
66
66
  requirement: !ruby/object:Gem::Requirement
@@ -141,188 +141,12 @@ dependencies:
141
141
  - - ! '>='
142
142
  - !ruby/object:Gem::Version
143
143
  version: '0'
144
- - !ruby/object:Gem::Dependency
145
- name: bundler
146
- requirement: !ruby/object:Gem::Requirement
147
- none: false
148
- requirements:
149
- - - ~>
150
- - !ruby/object:Gem::Version
151
- version: '1.1'
152
- type: :development
153
- prerelease: false
154
- version_requirements: !ruby/object:Gem::Requirement
155
- none: false
156
- requirements:
157
- - - ~>
158
- - !ruby/object:Gem::Version
159
- version: '1.1'
160
- - !ruby/object:Gem::Dependency
161
- name: rake
162
- requirement: !ruby/object:Gem::Requirement
163
- none: false
164
- requirements:
165
- - - ! '>='
166
- - !ruby/object:Gem::Version
167
- version: '0.9'
168
- type: :development
169
- prerelease: false
170
- version_requirements: !ruby/object:Gem::Requirement
171
- none: false
172
- requirements:
173
- - - ! '>='
174
- - !ruby/object:Gem::Version
175
- version: '0.9'
176
- - !ruby/object:Gem::Dependency
177
- name: rspec
178
- requirement: !ruby/object:Gem::Requirement
179
- none: false
180
- requirements:
181
- - - ! '>='
182
- - !ruby/object:Gem::Version
183
- version: '2.8'
184
- type: :development
185
- prerelease: false
186
- version_requirements: !ruby/object:Gem::Requirement
187
- none: false
188
- requirements:
189
- - - ! '>='
190
- - !ruby/object:Gem::Version
191
- version: '2.8'
192
- - !ruby/object:Gem::Dependency
193
- name: guard
194
- requirement: !ruby/object:Gem::Requirement
195
- none: false
196
- requirements:
197
- - - ! '>='
198
- - !ruby/object:Gem::Version
199
- version: '1.0'
200
- type: :development
201
- prerelease: false
202
- version_requirements: !ruby/object:Gem::Requirement
203
- none: false
204
- requirements:
205
- - - ! '>='
206
- - !ruby/object:Gem::Version
207
- version: '1.0'
208
- - !ruby/object:Gem::Dependency
209
- name: guard-rspec
210
- requirement: !ruby/object:Gem::Requirement
211
- none: false
212
- requirements:
213
- - - ! '>='
214
- - !ruby/object:Gem::Version
215
- version: '0.6'
216
- type: :development
217
- prerelease: false
218
- version_requirements: !ruby/object:Gem::Requirement
219
- none: false
220
- requirements:
221
- - - ! '>='
222
- - !ruby/object:Gem::Version
223
- version: '0.6'
224
- - !ruby/object:Gem::Dependency
225
- name: simplecov
226
- requirement: !ruby/object:Gem::Requirement
227
- none: false
228
- requirements:
229
- - - ! '>='
230
- - !ruby/object:Gem::Version
231
- version: '0.5'
232
- type: :development
233
- prerelease: false
234
- version_requirements: !ruby/object:Gem::Requirement
235
- none: false
236
- requirements:
237
- - - ! '>='
238
- - !ruby/object:Gem::Version
239
- version: '0.5'
240
- - !ruby/object:Gem::Dependency
241
- name: pry
242
- requirement: !ruby/object:Gem::Requirement
243
- none: false
244
- requirements:
245
- - - ! '>='
246
- - !ruby/object:Gem::Version
247
- version: '0'
248
- type: :development
249
- prerelease: false
250
- version_requirements: !ruby/object:Gem::Requirement
251
- none: false
252
- requirements:
253
- - - ! '>='
254
- - !ruby/object:Gem::Version
255
- version: '0'
256
- - !ruby/object:Gem::Dependency
257
- name: yard
258
- requirement: !ruby/object:Gem::Requirement
259
- none: false
260
- requirements:
261
- - - ! '>='
262
- - !ruby/object:Gem::Version
263
- version: '0'
264
- type: :development
265
- prerelease: false
266
- version_requirements: !ruby/object:Gem::Requirement
267
- none: false
268
- requirements:
269
- - - ! '>='
270
- - !ruby/object:Gem::Version
271
- version: '0'
272
- - !ruby/object:Gem::Dependency
273
- name: redcarpet
274
- requirement: !ruby/object:Gem::Requirement
275
- none: false
276
- requirements:
277
- - - ! '>='
278
- - !ruby/object:Gem::Version
279
- version: '0'
280
- type: :development
281
- prerelease: false
282
- version_requirements: !ruby/object:Gem::Requirement
283
- none: false
284
- requirements:
285
- - - ! '>='
286
- - !ruby/object:Gem::Version
287
- version: '0'
288
- - !ruby/object:Gem::Dependency
289
- name: addressable
290
- requirement: !ruby/object:Gem::Requirement
291
- none: false
292
- requirements:
293
- - - ! '>='
294
- - !ruby/object:Gem::Version
295
- version: '0'
296
- type: :development
297
- prerelease: false
298
- version_requirements: !ruby/object:Gem::Requirement
299
- none: false
300
- requirements:
301
- - - ! '>='
302
- - !ruby/object:Gem::Version
303
- version: '0'
304
- - !ruby/object:Gem::Dependency
305
- name: htmlentities
306
- requirement: !ruby/object:Gem::Requirement
307
- none: false
308
- requirements:
309
- - - ! '>='
310
- - !ruby/object:Gem::Version
311
- version: '0'
312
- type: :development
313
- prerelease: false
314
- version_requirements: !ruby/object:Gem::Requirement
315
- none: false
316
- requirements:
317
- - - ! '>='
318
- - !ruby/object:Gem::Version
319
- version: '0'
320
144
  description: ! " Treat your dataset like a:\n\n * stream of lines when it's
321
145
  efficient to process by lines\n * stream of field arrays when it's efficient
322
146
  to deal directly with fields\n * stream of lightweight objects when it's efficient
323
147
  to deal with objects\n\n Wukong is friends with Hadoop the elephant, Pig the query
324
148
  language, and the cat on your command line.\n"
325
- email: coders@infochimps.org
149
+ email: coders@infochimps.com
326
150
  executables:
327
151
  - wu-local
328
152
  extensions: []
@@ -361,13 +185,26 @@ files:
361
185
  - bin/wu-sum
362
186
  - examples/Gemfile
363
187
  - examples/README.md
364
- - examples/dataflow/apache_log_line.rb
365
- - examples/dataflow/fibonacci_series.rb
366
- - examples/dataflow/parse_apache_logs.rb
367
- - examples/dataflow/pig_latinizer.rb
188
+ - examples/basic/string_reverser.rb
189
+ - examples/basic/tiny_count.rb
190
+ - examples/basic/word_count/accumulator.rb
191
+ - examples/basic/word_count/tokenizer.rb
192
+ - examples/basic/word_count/word_count.rb
368
193
  - examples/dataflow/scraper_macro_flow.rb
369
- - examples/dataflow/simple.rb
370
- - examples/dataflow/telegram.rb
194
+ - examples/deploy_pack/Gemfile
195
+ - examples/deploy_pack/README.md
196
+ - examples/deploy_pack/a/b/c/.gitkeep
197
+ - examples/deploy_pack/app/processors/string_reverser.rb
198
+ - examples/deploy_pack/config/environment.rb
199
+ - examples/dsl/dataflow/fibonacci_series.rb
200
+ - examples/dsl/dataflow/scraper_macro_flow.rb
201
+ - examples/dsl/dataflow/simple.rb
202
+ - examples/dsl/dataflow/telegram.rb
203
+ - examples/dsl/workflow/cherry_pie.dot
204
+ - examples/dsl/workflow/cherry_pie.md
205
+ - examples/dsl/workflow/cherry_pie.png
206
+ - examples/dsl/workflow/cherry_pie.rb
207
+ - examples/empty/.gitkeep
371
208
  - examples/examples_helper.rb
372
209
  - examples/geo.rb
373
210
  - examples/geo/geo_grids.numbers
@@ -375,48 +212,23 @@ files:
375
212
  - examples/geo/quadtile.rb
376
213
  - examples/geo/spec/geolocated_spec.rb
377
214
  - examples/geo/tile_fetcher.rb
378
- - examples/graph/minimum_spanning_tree.rb
379
- - examples/graph/union_find.rb
380
- - examples/jabberwocky.txt
381
- - examples/models/wikipedia.rb
382
- - examples/munging/Gemfile
383
- - examples/munging/airline_flights/airline.rb
215
+ - examples/graph/implied_geolocation/README.md
216
+ - examples/graph/minimum_spanning_tree/airfares_graphviz.rb
384
217
  - examples/munging/airline_flights/airline_flights.rake
385
218
  - examples/munging/airline_flights/airplane.rb
386
- - examples/munging/airline_flights/airport.rb
387
219
  - examples/munging/airline_flights/airport_id_unification.rb
388
220
  - examples/munging/airline_flights/airport_ok_chars.rb
389
- - examples/munging/airline_flights/flight.rb
390
- - examples/munging/airline_flights/models.rb
391
- - examples/munging/airline_flights/parse.rb
221
+ - examples/munging/airline_flights/indexable.rb
222
+ - examples/munging/airline_flights/indexable_spec.rb
392
223
  - examples/munging/airline_flights/reconcile_airports.rb
393
- - examples/munging/airline_flights/route.rb
394
224
  - examples/munging/airline_flights/tasks.rake
395
- - examples/munging/airline_flights/timezone_fixup.rb
396
225
  - examples/munging/airline_flights/topcities.rb
397
- - examples/munging/airports/40_wbans.txt
398
- - examples/munging/airports/filter_weather_reports.rb
399
- - examples/munging/airports/join.pig
400
- - examples/munging/airports/to_tsv.rb
401
- - examples/munging/airports/usa_wbans.pig
402
- - examples/munging/airports/usa_wbans.txt
403
- - examples/munging/airports/wbans.pig
404
- - examples/munging/airports/wbans.txt
405
226
  - examples/munging/geo/geo_json.rb
406
227
  - examples/munging/geo/geo_models.rb
407
228
  - examples/munging/geo/geonames_models.rb
408
229
  - examples/munging/geo/iso_codes.rb
409
230
  - examples/munging/geo/reconcile_countries.rb
410
231
  - examples/munging/geo/tasks.rake
411
- - examples/munging/rake_helper.rb
412
- - examples/munging/weather/.gitignore
413
- - examples/munging/weather/Gemfile
414
- - examples/munging/weather/Rakefile
415
- - examples/munging/weather/extract_ish.rb
416
- - examples/munging/weather/models/weather.rb
417
- - examples/munging/weather/utils/noaa_downloader.rb
418
- - examples/munging/wikipedia/README.md
419
- - examples/munging/wikipedia/Rakefile
420
232
  - examples/munging/wikipedia/articles/extract_articles-parsed.rb
421
233
  - examples/munging/wikipedia/articles/extract_articles-templated.rb
422
234
  - examples/munging/wikipedia/articles/textualize_articles.rb
@@ -426,30 +238,13 @@ files:
426
238
  - examples/munging/wikipedia/articles/wp2txt_utils.rb
427
239
  - examples/munging/wikipedia/dbpedia/dbpedia_common.rb
428
240
  - examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb
241
+ - examples/munging/wikipedia/dbpedia/extract_links-cruft.rb
429
242
  - examples/munging/wikipedia/dbpedia/extract_links.rb
430
243
  - examples/munging/wikipedia/dbpedia/sameas_extractor.rb
431
- - examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig
432
- - examples/munging/wikipedia/page_metadata/extract_page_metadata.rb
433
- - examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old
434
- - examples/munging/wikipedia/pagelinks/augment_pagelinks.pig
435
- - examples/munging/wikipedia/pagelinks/extract_pagelinks.rb
436
- - examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old
437
- - examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig
438
- - examples/munging/wikipedia/pageviews/augment_pageviews.pig
439
- - examples/munging/wikipedia/pageviews/extract_pageviews.rb
440
- - examples/munging/wikipedia/pig_style_guide.md
441
- - examples/munging/wikipedia/redirects/redirects_page_metadata.pig
442
- - examples/munging/wikipedia/subuniverse/sub_articles.pig
443
- - examples/munging/wikipedia/subuniverse/sub_page_metadata.pig
444
- - examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig
445
- - examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig
446
- - examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig
447
- - examples/munging/wikipedia/subuniverse/sub_pageviews.pig
448
- - examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig
449
- - examples/munging/wikipedia/utils/get_namespaces.rb
450
- - examples/munging/wikipedia/utils/munging_utils.rb
451
- - examples/munging/wikipedia/utils/namespaces.json
452
244
  - examples/rake_helper.rb
245
+ - examples/ruby_project/Gemfile
246
+ - examples/ruby_project/README.md
247
+ - examples/ruby_project/a/b/c/.gitkeep
453
248
  - examples/server_logs/geo_ip_mapping/munge_geolite.rb
454
249
  - examples/server_logs/logline.rb
455
250
  - examples/server_logs/models.rb
@@ -459,31 +254,22 @@ files:
459
254
  - examples/server_logs/server_logs-02-histograms-mapper.rb
460
255
  - examples/server_logs/server_logs-03-breadcrumbs-full.rb
461
256
  - examples/server_logs/server_logs-04-page_page_edges-full.rb
462
- - examples/string_reverser.rb
463
- - examples/text/latinize_text.rb
464
- - examples/text/pig_latin.rb
257
+ - examples/serverlogs/geo_ip_mapping/munge_geolite.rb
258
+ - examples/serverlogs/models/logline.rb
259
+ - examples/serverlogs/parser/apache_parser_widget.rb
260
+ - examples/serverlogs/visit_paths/common.rb
261
+ - examples/serverlogs/visit_paths/page_counts.pig
262
+ - examples/serverlogs/visit_paths/serverlogs-01-parse-script.rb
263
+ - examples/serverlogs/visit_paths/serverlogs-02-histograms-full.rb
264
+ - examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb
265
+ - examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb
266
+ - examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb
267
+ - examples/text/pig_latin/pig_latinizer.rb
268
+ - examples/text/pig_latin/pig_latinizer_widget.rb
465
269
  - examples/text/regional_flavor/README.md
466
270
  - examples/text/regional_flavor/article_wordbags.pig
467
271
  - examples/text/regional_flavor/j01-article_wordbags.rb
468
272
  - examples/text/regional_flavor/simple_pig_script.pig
469
- - examples/tiny_count.rb
470
- - examples/twitter/locations.rb
471
- - examples/twitter/models.rb
472
- - examples/twitter/pt1-fiddle.pig
473
- - examples/twitter/pt2-simple_parse.pig
474
- - examples/twitter/pt2-simple_parse.rb
475
- - examples/twitter/pt3-join_on_zips.pig
476
- - examples/twitter/pt4-strong_links.rb
477
- - examples/twitter/pt5-lnglat_and_strong_links.pig
478
- - examples/twitter/states.tsv
479
- - examples/word_count/accumulator.rb
480
- - examples/word_count/tokenizer.rb
481
- - examples/word_count/word_count.rb
482
- - examples/workflow/cherry_pie.dot
483
- - examples/workflow/cherry_pie.md
484
- - examples/workflow/cherry_pie.png
485
- - examples/workflow/cherry_pie.rb
486
- - examples/workflow/package_gem.rb
487
273
  - lib/hanuman.rb
488
274
  - lib/hanuman/graph.rb
489
275
  - lib/hanuman/graphvizzer.rb
@@ -492,12 +278,22 @@ files:
492
278
  - lib/hanuman/link.rb
493
279
  - lib/hanuman/registry.rb
494
280
  - lib/hanuman/stage.rb
281
+ - lib/wu/geo.rb
282
+ - lib/wu/geo/geo_grids.numbers
283
+ - lib/wu/geo/geolocated.rb
284
+ - lib/wu/geo/quadtile.rb
285
+ - lib/wu/graph/union_find.rb
286
+ - lib/wu/model/reconcilable.rb
287
+ - lib/wu/munging.rb
288
+ - lib/wu/social/models/twitter.rb
289
+ - lib/wu/wikipedia/models.rb
495
290
  - lib/wukong.rb
496
291
  - lib/wukong/boot.rb
497
292
  - lib/wukong/configuration.rb
498
293
  - lib/wukong/dataflow.rb
499
294
  - lib/wukong/driver.rb
500
295
  - lib/wukong/emitter.rb
296
+ - lib/wukong/logger.rb
501
297
  - lib/wukong/model/faker.rb
502
298
  - lib/wukong/model/flatpack_parser/flat.rb
503
299
  - lib/wukong/model/flatpack_parser/flatpack.rb
@@ -505,6 +301,8 @@ files:
505
301
  - lib/wukong/model/flatpack_parser/parser.rb
506
302
  - lib/wukong/model/flatpack_parser/tokens.rb
507
303
  - lib/wukong/processor.rb
304
+ - lib/wukong/runner.rb
305
+ - lib/wukong/server.rb
508
306
  - lib/wukong/spec_helpers.rb
509
307
  - lib/wukong/spec_helpers/integration_driver.rb
510
308
  - lib/wukong/spec_helpers/integration_driver_matchers.rb
@@ -525,8 +323,6 @@ files:
525
323
  - lib/wukong/widget/reducers/moments.rb
526
324
  - lib/wukong/widget/reducers/sort.rb
527
325
  - lib/wukong/widget/serializers.rb
528
- - lib/wukong/widget/sink.rb
529
- - lib/wukong/widget/source.rb
530
326
  - lib/wukong/widget/utils.rb
531
327
  - lib/wukong/widgets.rb
532
328
  - spec/examples/dataflow/fibonacci_series_spec.rb
@@ -545,11 +341,15 @@ files:
545
341
  - spec/hanuman/stage_spec.rb
546
342
  - spec/spec.opts
547
343
  - spec/spec_helper.rb
344
+ - spec/support/example_test_helpers.rb
548
345
  - spec/support/hanuman_test_helpers.rb
346
+ - spec/support/integration_helper.rb
347
+ - spec/support/model_test_helpers.rb
549
348
  - spec/support/shared_context_for_reducers.rb
550
349
  - spec/support/shared_examples_for_builders.rb
551
350
  - spec/support/shared_examples_for_shortcuts.rb
552
- - spec/support/wukong_test_helpers.rb
351
+ - spec/wu/geo/geolocated_spec.rb
352
+ - spec/wu/model/reconcilable_spec.rb
553
353
  - spec/wukong/dataflow_spec.rb
554
354
  - spec/wukong/local_runner_spec.rb
555
355
  - spec/wukong/model/faker_spec.rb
@@ -564,6 +364,7 @@ files:
564
364
  - spec/wukong/widget/serializers_spec.rb
565
365
  - spec/wukong/widget/sink_spec.rb
566
366
  - spec/wukong/widget/source_spec.rb
367
+ - spec/wukong/wu_local_spec.rb
567
368
  - spec/wukong/wukong_spec.rb
568
369
  - wukong.gemspec
569
370
  homepage: https://github.com/infochimps-labs/wukong
@@ -609,11 +410,15 @@ test_files:
609
410
  - spec/hanuman/stage_spec.rb
610
411
  - spec/spec.opts
611
412
  - spec/spec_helper.rb
413
+ - spec/support/example_test_helpers.rb
612
414
  - spec/support/hanuman_test_helpers.rb
415
+ - spec/support/integration_helper.rb
416
+ - spec/support/model_test_helpers.rb
613
417
  - spec/support/shared_context_for_reducers.rb
614
418
  - spec/support/shared_examples_for_builders.rb
615
419
  - spec/support/shared_examples_for_shortcuts.rb
616
- - spec/support/wukong_test_helpers.rb
420
+ - spec/wu/geo/geolocated_spec.rb
421
+ - spec/wu/model/reconcilable_spec.rb
617
422
  - spec/wukong/dataflow_spec.rb
618
423
  - spec/wukong/local_runner_spec.rb
619
424
  - spec/wukong/model/faker_spec.rb
@@ -628,5 +433,6 @@ test_files:
628
433
  - spec/wukong/widget/serializers_spec.rb
629
434
  - spec/wukong/widget/sink_spec.rb
630
435
  - spec/wukong/widget/source_spec.rb
436
+ - spec/wukong/wu_local_spec.rb
631
437
  - spec/wukong/wukong_spec.rb
632
438
  has_rdoc: