wukong 3.0.0.pre2 → 3.0.0.pre3
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +13 -0
- data/README.md +182 -6
- data/bin/wu-local +13 -5
- data/bin/wu-server +1 -1
- data/examples/Gemfile +2 -1
- data/examples/basic/string_reverser.rb +23 -0
- data/examples/{tiny_count.rb → basic/tiny_count.rb} +0 -0
- data/examples/{word_count → basic/word_count}/accumulator.rb +0 -0
- data/examples/{word_count → basic/word_count}/tokenizer.rb +0 -0
- data/examples/{word_count → basic/word_count}/word_count.rb +0 -0
- data/examples/deploy_pack/Gemfile +7 -0
- data/examples/deploy_pack/README.md +6 -0
- data/examples/{text/latinize_text.rb → deploy_pack/a/b/c/.gitkeep} +0 -0
- data/examples/deploy_pack/app/processors/string_reverser.rb +5 -0
- data/examples/deploy_pack/config/environment.rb +1 -0
- data/examples/{dataflow → dsl/dataflow}/fibonacci_series.rb +0 -0
- data/examples/dsl/dataflow/scraper_macro_flow.rb +28 -0
- data/examples/{dataflow → dsl/dataflow}/simple.rb +0 -0
- data/examples/{dataflow → dsl/dataflow}/telegram.rb +0 -0
- data/examples/{workflow → dsl/workflow}/cherry_pie.dot +0 -0
- data/examples/{workflow → dsl/workflow}/cherry_pie.md +0 -0
- data/examples/{workflow → dsl/workflow}/cherry_pie.png +0 -0
- data/examples/{workflow → dsl/workflow}/cherry_pie.rb +0 -0
- data/examples/empty/.gitkeep +0 -0
- data/examples/graph/implied_geolocation/README.md +63 -0
- data/examples/graph/{minimum_spanning_tree.rb → minimum_spanning_tree/airfares_graphviz.rb} +0 -0
- data/examples/munging/airline_flights/indexable.rb +75 -0
- data/examples/munging/airline_flights/indexable_spec.rb +90 -0
- data/examples/munging/geo/geonames_models.rb +29 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +1 -0
- data/examples/munging/wikipedia/dbpedia/extract_links-cruft.rb +66 -0
- data/examples/munging/wikipedia/dbpedia/extract_links.rb +213 -146
- data/examples/rake_helper.rb +12 -0
- data/examples/ruby_project/Gemfile +7 -0
- data/examples/ruby_project/README.md +6 -0
- data/examples/ruby_project/a/b/c/.gitkeep +0 -0
- data/examples/serverlogs/geo_ip_mapping/munge_geolite.rb +82 -0
- data/examples/serverlogs/models/logline.rb +102 -0
- data/examples/{dataflow/parse_apache_logs.rb → serverlogs/parser/apache_parser_widget.rb} +0 -0
- data/examples/serverlogs/visit_paths/common.rb +4 -0
- data/examples/serverlogs/visit_paths/page_counts.pig +48 -0
- data/examples/serverlogs/visit_paths/serverlogs-01-parse-script.rb +11 -0
- data/examples/serverlogs/visit_paths/serverlogs-02-histograms-full.rb +31 -0
- data/examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb +12 -0
- data/examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb +67 -0
- data/examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb +38 -0
- data/examples/text/{pig_latin.rb → pig_latin/pig_latinizer.rb} +0 -0
- data/examples/{dataflow/pig_latinizer.rb → text/pig_latin/pig_latinizer_widget.rb} +0 -0
- data/lib/hanuman/graph.rb +6 -1
- data/lib/wu/geo.rb +4 -0
- data/lib/wu/geo/geo_grids.numbers +0 -0
- data/lib/wu/geo/geolocated.rb +331 -0
- data/lib/wu/geo/quadtile.rb +69 -0
- data/{examples → lib/wu}/graph/union_find.rb +0 -0
- data/lib/wu/model/reconcilable.rb +63 -0
- data/{examples/munging/wikipedia/utils/munging_utils.rb → lib/wu/munging.rb} +7 -4
- data/lib/wu/social/models/twitter.rb +31 -0
- data/{examples/models/wikipedia.rb → lib/wu/wikipedia/models.rb} +0 -0
- data/lib/wukong.rb +9 -4
- data/lib/wukong/boot.rb +10 -1
- data/lib/wukong/driver.rb +65 -71
- data/lib/wukong/logger.rb +93 -0
- data/lib/wukong/processor.rb +38 -29
- data/lib/wukong/runner.rb +144 -0
- data/lib/wukong/server.rb +119 -0
- data/lib/wukong/spec_helpers.rb +1 -0
- data/lib/wukong/spec_helpers/integration_driver.rb +22 -9
- data/lib/wukong/spec_helpers/integration_driver_matchers.rb +26 -4
- data/lib/wukong/spec_helpers/processor_helpers.rb +4 -10
- data/lib/wukong/spec_helpers/shared_examples.rb +12 -13
- data/lib/wukong/version.rb +1 -1
- data/lib/wukong/widget/processors.rb +13 -0
- data/lib/wukong/widget/serializers.rb +55 -65
- data/lib/wukong/widgets.rb +0 -2
- data/spec/hanuman/graph_spec.rb +14 -0
- data/spec/spec_helper.rb +4 -30
- data/spec/support/{wukong_test_helpers.rb → example_test_helpers.rb} +29 -2
- data/spec/support/integration_helper.rb +38 -0
- data/spec/support/model_test_helpers.rb +115 -0
- data/spec/wu/geo/geolocated_spec.rb +247 -0
- data/spec/wu/model/reconcilable_spec.rb +152 -0
- data/spec/wukong/widget/processors_spec.rb +0 -1
- data/spec/wukong/widget/serializers_spec.rb +88 -62
- data/spec/wukong/wu_local_spec.rb +125 -0
- data/wukong.gemspec +3 -16
- metadata +72 -266
- data/examples/dataflow/apache_log_line.rb +0 -100
- data/examples/jabberwocky.txt +0 -36
- data/examples/munging/Gemfile +0 -8
- data/examples/munging/airline_flights/airline.rb +0 -57
- data/examples/munging/airline_flights/airport.rb +0 -211
- data/examples/munging/airline_flights/flight.rb +0 -156
- data/examples/munging/airline_flights/models.rb +0 -4
- data/examples/munging/airline_flights/parse.rb +0 -26
- data/examples/munging/airline_flights/route.rb +0 -35
- data/examples/munging/airline_flights/timezone_fixup.rb +0 -62
- data/examples/munging/airports/40_wbans.txt +0 -40
- data/examples/munging/airports/filter_weather_reports.rb +0 -37
- data/examples/munging/airports/join.pig +0 -31
- data/examples/munging/airports/to_tsv.rb +0 -33
- data/examples/munging/airports/usa_wbans.pig +0 -19
- data/examples/munging/airports/usa_wbans.txt +0 -2157
- data/examples/munging/airports/wbans.pig +0 -19
- data/examples/munging/airports/wbans.txt +0 -2310
- data/examples/munging/rake_helper.rb +0 -62
- data/examples/munging/weather/.gitignore +0 -1
- data/examples/munging/weather/Gemfile +0 -4
- data/examples/munging/weather/Rakefile +0 -28
- data/examples/munging/weather/extract_ish.rb +0 -13
- data/examples/munging/weather/models/weather.rb +0 -119
- data/examples/munging/weather/utils/noaa_downloader.rb +0 -46
- data/examples/munging/wikipedia/README.md +0 -34
- data/examples/munging/wikipedia/Rakefile +0 -193
- data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +0 -18
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +0 -21
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +0 -27
- data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +0 -29
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +0 -14
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +0 -25
- data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +0 -29
- data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +0 -32
- data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +0 -85
- data/examples/munging/wikipedia/pig_style_guide.md +0 -25
- data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +0 -19
- data/examples/munging/wikipedia/subuniverse/sub_articles.pig +0 -23
- data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +0 -24
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +0 -22
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +0 -22
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +0 -26
- data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +0 -29
- data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +0 -24
- data/examples/munging/wikipedia/utils/get_namespaces.rb +0 -86
- data/examples/munging/wikipedia/utils/namespaces.json +0 -1
- data/examples/string_reverser.rb +0 -26
- data/examples/twitter/locations.rb +0 -29
- data/examples/twitter/models.rb +0 -24
- data/examples/twitter/pt1-fiddle.pig +0 -8
- data/examples/twitter/pt2-simple_parse.pig +0 -31
- data/examples/twitter/pt2-simple_parse.rb +0 -18
- data/examples/twitter/pt3-join_on_zips.pig +0 -39
- data/examples/twitter/pt4-strong_links.rb +0 -20
- data/examples/twitter/pt5-lnglat_and_strong_links.pig +0 -16
- data/examples/twitter/states.tsv +0 -50
- data/examples/workflow/package_gem.rb +0 -55
- data/lib/wukong/widget/sink.rb +0 -16
- data/lib/wukong/widget/source.rb +0 -14
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'wu-local' do
|
4
|
+
|
5
|
+
let(:input) { %w[1 2 3] }
|
6
|
+
|
7
|
+
context "without any arguments" do
|
8
|
+
subject { command('wu-local') }
|
9
|
+
it {should exit_with(:non_zero) }
|
10
|
+
it "displays help on STDERR" do
|
11
|
+
should have_stderr("usage: wu-local")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
context "running outside any Ruby project" do
|
16
|
+
subject { command('wu-local count').in(examples_dir('empty')) < input }
|
17
|
+
it { should exit_with(0) }
|
18
|
+
it "runs the processor" do
|
19
|
+
should have_stdout("3")
|
20
|
+
end
|
21
|
+
context "when passed a BUNDLE_GEMFILE" do
|
22
|
+
context "that doesn't belong to a deploy pack" do
|
23
|
+
subject { command('wu-local count').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('ruby_project', 'Gemfile').to_s)) < input }
|
24
|
+
it { should exit_with(0) }
|
25
|
+
it "runs the processor" do
|
26
|
+
should have_stdout("3")
|
27
|
+
end
|
28
|
+
end
|
29
|
+
context "that belongs to a deploy pack" do
|
30
|
+
subject { command('wu-local count').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('deploy_pack', 'Gemfile').to_s)) < input }
|
31
|
+
it { should exit_with(0) }
|
32
|
+
it "runs the processor" do
|
33
|
+
should have_stdout("3")
|
34
|
+
end
|
35
|
+
context "loading the deploy pack" do
|
36
|
+
subject { command('wu-local string_reverser').in(examples_dir('empty')).using(integration_env.merge("BUNDLE_GEMFILE" => examples_dir('deploy_pack', 'Gemfile').to_s)) < 'hi' }
|
37
|
+
it { should exit_with(0) }
|
38
|
+
it "runs the processor" do
|
39
|
+
should have_stdout("ih")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
context "running within a Ruby project" do
|
47
|
+
context "at its root" do
|
48
|
+
subject { command('wu-local count').in(examples_dir('ruby_project')) < input }
|
49
|
+
it { should exit_with(0) }
|
50
|
+
it "runs the processor" do
|
51
|
+
should have_stdout("3")
|
52
|
+
end
|
53
|
+
end
|
54
|
+
context "deep within it" do
|
55
|
+
subject { command('wu-local count').in(examples_dir('ruby_project')) < input }
|
56
|
+
it { should exit_with(0) }
|
57
|
+
it "runs the processor" do
|
58
|
+
should have_stdout("3")
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
context "running within a deploy pack" do
|
64
|
+
context "at its root" do
|
65
|
+
subject { command('wu-local count').in(examples_dir('deploy_pack')) < input }
|
66
|
+
it { should exit_with(0) }
|
67
|
+
it "runs the processor" do
|
68
|
+
should have_stdout("3")
|
69
|
+
end
|
70
|
+
context "loading the deploy pack" do
|
71
|
+
subject { command('wu-local string_reverser').in(examples_dir('deploy_pack')) < 'hi' }
|
72
|
+
it { should exit_with(0) }
|
73
|
+
it "runs the processor" do
|
74
|
+
should have_stdout("ih")
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
context "deep within it" do
|
79
|
+
subject { command('wu-local count').in(examples_dir('deploy_pack')) < input }
|
80
|
+
it { should exit_with(0) }
|
81
|
+
it "runs the processor" do
|
82
|
+
should have_stdout("3")
|
83
|
+
end
|
84
|
+
context "loading the deploy pack" do
|
85
|
+
subject { command('wu-local string_reverser').in(examples_dir('deploy_pack')) < 'hi' }
|
86
|
+
it { should exit_with(0) }
|
87
|
+
it "runs the processor" do
|
88
|
+
should have_stdout("ih")
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# context "running within a deploy pack" do
|
95
|
+
# context "at its root" do
|
96
|
+
# let(:subject) { command('wu-local', :cwd => examples_dir('deploy_pack')) }
|
97
|
+
# end
|
98
|
+
# context "deep within it" do
|
99
|
+
# let(:subject) { command('wu-local', :cwd => examples_dir('deploy_pack', 'a','b','c')) }
|
100
|
+
# end
|
101
|
+
# end
|
102
|
+
|
103
|
+
# context "in local mode" do
|
104
|
+
# context "on a map-only job" do
|
105
|
+
# let(:subject) { command('wu-hadoop', example_script('tokenizer.rb'), "--mode=local", "--input=#{example_script('sonnet_18.txt')}") }
|
106
|
+
# it { should exit_with(0) }
|
107
|
+
# it { should have_stdout('Shall', 'I', 'compare', 'thee', 'to', 'a', "summer's", 'day') }
|
108
|
+
# end
|
109
|
+
|
110
|
+
# context "on a map-reduce job" do
|
111
|
+
# let(:subject) { command('wu-hadoop', example_script('word_count.rb'), "--mode=local", "--input=#{example_script('sonnet_18.txt')}") }
|
112
|
+
# it { should exit_with(0) }
|
113
|
+
# it { should have_stdout(/complexion\s+1/, /Death\s+1/, /temperate\s+1/) }
|
114
|
+
# end
|
115
|
+
# end
|
116
|
+
|
117
|
+
# context "in Hadoop mode" do
|
118
|
+
# context "on a map-only job" do
|
119
|
+
# let(:subject) { command('wu-hadoop', example_script('tokenizer.rb'), "--mode=hadoop", "--input=/data/in", "--output=/data/out", "--dry_run") }
|
120
|
+
# it { should exit_with(0) }
|
121
|
+
# it { should have_stdout(%r{jar.*hadoop.*streaming.*\.jar}, %r{-mapper.+tokenizer\.rb}, %r{-input.*/data/in}, %r{-output.*/data/out}) }
|
122
|
+
# end
|
123
|
+
# end
|
124
|
+
|
125
|
+
end
|
data/wukong.gemspec
CHANGED
@@ -5,7 +5,7 @@ Gem::Specification.new do |gem|
|
|
5
5
|
gem.name = 'wukong'
|
6
6
|
gem.homepage = 'https://github.com/infochimps-labs/wukong'
|
7
7
|
gem.licenses = ["Apache 2.0"]
|
8
|
-
gem.email = 'coders@infochimps.
|
8
|
+
gem.email = 'coders@infochimps.com'
|
9
9
|
gem.authors = ['Infochimps', 'Philip (flip) Kromer', 'Travis Dempsey']
|
10
10
|
gem.version = Wukong::VERSION
|
11
11
|
|
@@ -25,25 +25,12 @@ EOF
|
|
25
25
|
gem.test_files = gem.files.grep(/^spec/)
|
26
26
|
gem.require_paths = ['lib']
|
27
27
|
|
28
|
-
gem.add_dependency('configliere', '
|
28
|
+
gem.add_dependency('configliere', '>= 0.4.18')
|
29
29
|
gem.add_dependency('multi_json', '>= 1.3.6')
|
30
|
-
gem.add_dependency('vayacondios-client', '>= 0.
|
30
|
+
gem.add_dependency('vayacondios-client', '>= 0.1.2')
|
31
31
|
gem.add_dependency('gorillib', '>= 0.4.2')
|
32
32
|
gem.add_dependency('forgery')
|
33
33
|
gem.add_dependency('uuidtools')
|
34
34
|
gem.add_dependency('eventmachine')
|
35
35
|
gem.add_dependency('log4r')
|
36
|
-
|
37
|
-
gem.add_development_dependency('bundler', '~> 1.1')
|
38
|
-
gem.add_development_dependency('rake', '>= 0.9')
|
39
|
-
gem.add_development_dependency('rspec', '>= 2.8')
|
40
|
-
gem.add_development_dependency('guard', '>= 1.0')
|
41
|
-
gem.add_development_dependency('guard-rspec', '>= 0.6')
|
42
|
-
gem.add_development_dependency('simplecov', '>= 0.5')
|
43
|
-
gem.add_development_dependency('pry')
|
44
|
-
gem.add_development_dependency('yard')
|
45
|
-
gem.add_development_dependency('redcarpet')
|
46
|
-
gem.add_development_dependency('addressable')
|
47
|
-
gem.add_development_dependency('htmlentities')
|
48
|
-
|
49
36
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.0.
|
4
|
+
version: 3.0.0.pre3
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,24 +11,24 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-12-
|
14
|
+
date: 2012-12-17 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: configliere
|
18
18
|
requirement: !ruby/object:Gem::Requirement
|
19
19
|
none: false
|
20
20
|
requirements:
|
21
|
-
- -
|
21
|
+
- - ! '>='
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
23
|
+
version: 0.4.18
|
24
24
|
type: :runtime
|
25
25
|
prerelease: false
|
26
26
|
version_requirements: !ruby/object:Gem::Requirement
|
27
27
|
none: false
|
28
28
|
requirements:
|
29
|
-
- -
|
29
|
+
- - ! '>='
|
30
30
|
- !ruby/object:Gem::Version
|
31
|
-
version:
|
31
|
+
version: 0.4.18
|
32
32
|
- !ruby/object:Gem::Dependency
|
33
33
|
name: multi_json
|
34
34
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,7 +52,7 @@ dependencies:
|
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version: 0.
|
55
|
+
version: 0.1.2
|
56
56
|
type: :runtime
|
57
57
|
prerelease: false
|
58
58
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -60,7 +60,7 @@ dependencies:
|
|
60
60
|
requirements:
|
61
61
|
- - ! '>='
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version: 0.
|
63
|
+
version: 0.1.2
|
64
64
|
- !ruby/object:Gem::Dependency
|
65
65
|
name: gorillib
|
66
66
|
requirement: !ruby/object:Gem::Requirement
|
@@ -141,188 +141,12 @@ dependencies:
|
|
141
141
|
- - ! '>='
|
142
142
|
- !ruby/object:Gem::Version
|
143
143
|
version: '0'
|
144
|
-
- !ruby/object:Gem::Dependency
|
145
|
-
name: bundler
|
146
|
-
requirement: !ruby/object:Gem::Requirement
|
147
|
-
none: false
|
148
|
-
requirements:
|
149
|
-
- - ~>
|
150
|
-
- !ruby/object:Gem::Version
|
151
|
-
version: '1.1'
|
152
|
-
type: :development
|
153
|
-
prerelease: false
|
154
|
-
version_requirements: !ruby/object:Gem::Requirement
|
155
|
-
none: false
|
156
|
-
requirements:
|
157
|
-
- - ~>
|
158
|
-
- !ruby/object:Gem::Version
|
159
|
-
version: '1.1'
|
160
|
-
- !ruby/object:Gem::Dependency
|
161
|
-
name: rake
|
162
|
-
requirement: !ruby/object:Gem::Requirement
|
163
|
-
none: false
|
164
|
-
requirements:
|
165
|
-
- - ! '>='
|
166
|
-
- !ruby/object:Gem::Version
|
167
|
-
version: '0.9'
|
168
|
-
type: :development
|
169
|
-
prerelease: false
|
170
|
-
version_requirements: !ruby/object:Gem::Requirement
|
171
|
-
none: false
|
172
|
-
requirements:
|
173
|
-
- - ! '>='
|
174
|
-
- !ruby/object:Gem::Version
|
175
|
-
version: '0.9'
|
176
|
-
- !ruby/object:Gem::Dependency
|
177
|
-
name: rspec
|
178
|
-
requirement: !ruby/object:Gem::Requirement
|
179
|
-
none: false
|
180
|
-
requirements:
|
181
|
-
- - ! '>='
|
182
|
-
- !ruby/object:Gem::Version
|
183
|
-
version: '2.8'
|
184
|
-
type: :development
|
185
|
-
prerelease: false
|
186
|
-
version_requirements: !ruby/object:Gem::Requirement
|
187
|
-
none: false
|
188
|
-
requirements:
|
189
|
-
- - ! '>='
|
190
|
-
- !ruby/object:Gem::Version
|
191
|
-
version: '2.8'
|
192
|
-
- !ruby/object:Gem::Dependency
|
193
|
-
name: guard
|
194
|
-
requirement: !ruby/object:Gem::Requirement
|
195
|
-
none: false
|
196
|
-
requirements:
|
197
|
-
- - ! '>='
|
198
|
-
- !ruby/object:Gem::Version
|
199
|
-
version: '1.0'
|
200
|
-
type: :development
|
201
|
-
prerelease: false
|
202
|
-
version_requirements: !ruby/object:Gem::Requirement
|
203
|
-
none: false
|
204
|
-
requirements:
|
205
|
-
- - ! '>='
|
206
|
-
- !ruby/object:Gem::Version
|
207
|
-
version: '1.0'
|
208
|
-
- !ruby/object:Gem::Dependency
|
209
|
-
name: guard-rspec
|
210
|
-
requirement: !ruby/object:Gem::Requirement
|
211
|
-
none: false
|
212
|
-
requirements:
|
213
|
-
- - ! '>='
|
214
|
-
- !ruby/object:Gem::Version
|
215
|
-
version: '0.6'
|
216
|
-
type: :development
|
217
|
-
prerelease: false
|
218
|
-
version_requirements: !ruby/object:Gem::Requirement
|
219
|
-
none: false
|
220
|
-
requirements:
|
221
|
-
- - ! '>='
|
222
|
-
- !ruby/object:Gem::Version
|
223
|
-
version: '0.6'
|
224
|
-
- !ruby/object:Gem::Dependency
|
225
|
-
name: simplecov
|
226
|
-
requirement: !ruby/object:Gem::Requirement
|
227
|
-
none: false
|
228
|
-
requirements:
|
229
|
-
- - ! '>='
|
230
|
-
- !ruby/object:Gem::Version
|
231
|
-
version: '0.5'
|
232
|
-
type: :development
|
233
|
-
prerelease: false
|
234
|
-
version_requirements: !ruby/object:Gem::Requirement
|
235
|
-
none: false
|
236
|
-
requirements:
|
237
|
-
- - ! '>='
|
238
|
-
- !ruby/object:Gem::Version
|
239
|
-
version: '0.5'
|
240
|
-
- !ruby/object:Gem::Dependency
|
241
|
-
name: pry
|
242
|
-
requirement: !ruby/object:Gem::Requirement
|
243
|
-
none: false
|
244
|
-
requirements:
|
245
|
-
- - ! '>='
|
246
|
-
- !ruby/object:Gem::Version
|
247
|
-
version: '0'
|
248
|
-
type: :development
|
249
|
-
prerelease: false
|
250
|
-
version_requirements: !ruby/object:Gem::Requirement
|
251
|
-
none: false
|
252
|
-
requirements:
|
253
|
-
- - ! '>='
|
254
|
-
- !ruby/object:Gem::Version
|
255
|
-
version: '0'
|
256
|
-
- !ruby/object:Gem::Dependency
|
257
|
-
name: yard
|
258
|
-
requirement: !ruby/object:Gem::Requirement
|
259
|
-
none: false
|
260
|
-
requirements:
|
261
|
-
- - ! '>='
|
262
|
-
- !ruby/object:Gem::Version
|
263
|
-
version: '0'
|
264
|
-
type: :development
|
265
|
-
prerelease: false
|
266
|
-
version_requirements: !ruby/object:Gem::Requirement
|
267
|
-
none: false
|
268
|
-
requirements:
|
269
|
-
- - ! '>='
|
270
|
-
- !ruby/object:Gem::Version
|
271
|
-
version: '0'
|
272
|
-
- !ruby/object:Gem::Dependency
|
273
|
-
name: redcarpet
|
274
|
-
requirement: !ruby/object:Gem::Requirement
|
275
|
-
none: false
|
276
|
-
requirements:
|
277
|
-
- - ! '>='
|
278
|
-
- !ruby/object:Gem::Version
|
279
|
-
version: '0'
|
280
|
-
type: :development
|
281
|
-
prerelease: false
|
282
|
-
version_requirements: !ruby/object:Gem::Requirement
|
283
|
-
none: false
|
284
|
-
requirements:
|
285
|
-
- - ! '>='
|
286
|
-
- !ruby/object:Gem::Version
|
287
|
-
version: '0'
|
288
|
-
- !ruby/object:Gem::Dependency
|
289
|
-
name: addressable
|
290
|
-
requirement: !ruby/object:Gem::Requirement
|
291
|
-
none: false
|
292
|
-
requirements:
|
293
|
-
- - ! '>='
|
294
|
-
- !ruby/object:Gem::Version
|
295
|
-
version: '0'
|
296
|
-
type: :development
|
297
|
-
prerelease: false
|
298
|
-
version_requirements: !ruby/object:Gem::Requirement
|
299
|
-
none: false
|
300
|
-
requirements:
|
301
|
-
- - ! '>='
|
302
|
-
- !ruby/object:Gem::Version
|
303
|
-
version: '0'
|
304
|
-
- !ruby/object:Gem::Dependency
|
305
|
-
name: htmlentities
|
306
|
-
requirement: !ruby/object:Gem::Requirement
|
307
|
-
none: false
|
308
|
-
requirements:
|
309
|
-
- - ! '>='
|
310
|
-
- !ruby/object:Gem::Version
|
311
|
-
version: '0'
|
312
|
-
type: :development
|
313
|
-
prerelease: false
|
314
|
-
version_requirements: !ruby/object:Gem::Requirement
|
315
|
-
none: false
|
316
|
-
requirements:
|
317
|
-
- - ! '>='
|
318
|
-
- !ruby/object:Gem::Version
|
319
|
-
version: '0'
|
320
144
|
description: ! " Treat your dataset like a:\n\n * stream of lines when it's
|
321
145
|
efficient to process by lines\n * stream of field arrays when it's efficient
|
322
146
|
to deal directly with fields\n * stream of lightweight objects when it's efficient
|
323
147
|
to deal with objects\n\n Wukong is friends with Hadoop the elephant, Pig the query
|
324
148
|
language, and the cat on your command line.\n"
|
325
|
-
email: coders@infochimps.
|
149
|
+
email: coders@infochimps.com
|
326
150
|
executables:
|
327
151
|
- wu-local
|
328
152
|
extensions: []
|
@@ -361,13 +185,26 @@ files:
|
|
361
185
|
- bin/wu-sum
|
362
186
|
- examples/Gemfile
|
363
187
|
- examples/README.md
|
364
|
-
- examples/
|
365
|
-
- examples/
|
366
|
-
- examples/
|
367
|
-
- examples/
|
188
|
+
- examples/basic/string_reverser.rb
|
189
|
+
- examples/basic/tiny_count.rb
|
190
|
+
- examples/basic/word_count/accumulator.rb
|
191
|
+
- examples/basic/word_count/tokenizer.rb
|
192
|
+
- examples/basic/word_count/word_count.rb
|
368
193
|
- examples/dataflow/scraper_macro_flow.rb
|
369
|
-
- examples/
|
370
|
-
- examples/
|
194
|
+
- examples/deploy_pack/Gemfile
|
195
|
+
- examples/deploy_pack/README.md
|
196
|
+
- examples/deploy_pack/a/b/c/.gitkeep
|
197
|
+
- examples/deploy_pack/app/processors/string_reverser.rb
|
198
|
+
- examples/deploy_pack/config/environment.rb
|
199
|
+
- examples/dsl/dataflow/fibonacci_series.rb
|
200
|
+
- examples/dsl/dataflow/scraper_macro_flow.rb
|
201
|
+
- examples/dsl/dataflow/simple.rb
|
202
|
+
- examples/dsl/dataflow/telegram.rb
|
203
|
+
- examples/dsl/workflow/cherry_pie.dot
|
204
|
+
- examples/dsl/workflow/cherry_pie.md
|
205
|
+
- examples/dsl/workflow/cherry_pie.png
|
206
|
+
- examples/dsl/workflow/cherry_pie.rb
|
207
|
+
- examples/empty/.gitkeep
|
371
208
|
- examples/examples_helper.rb
|
372
209
|
- examples/geo.rb
|
373
210
|
- examples/geo/geo_grids.numbers
|
@@ -375,48 +212,23 @@ files:
|
|
375
212
|
- examples/geo/quadtile.rb
|
376
213
|
- examples/geo/spec/geolocated_spec.rb
|
377
214
|
- examples/geo/tile_fetcher.rb
|
378
|
-
- examples/graph/
|
379
|
-
- examples/graph/
|
380
|
-
- examples/jabberwocky.txt
|
381
|
-
- examples/models/wikipedia.rb
|
382
|
-
- examples/munging/Gemfile
|
383
|
-
- examples/munging/airline_flights/airline.rb
|
215
|
+
- examples/graph/implied_geolocation/README.md
|
216
|
+
- examples/graph/minimum_spanning_tree/airfares_graphviz.rb
|
384
217
|
- examples/munging/airline_flights/airline_flights.rake
|
385
218
|
- examples/munging/airline_flights/airplane.rb
|
386
|
-
- examples/munging/airline_flights/airport.rb
|
387
219
|
- examples/munging/airline_flights/airport_id_unification.rb
|
388
220
|
- examples/munging/airline_flights/airport_ok_chars.rb
|
389
|
-
- examples/munging/airline_flights/
|
390
|
-
- examples/munging/airline_flights/
|
391
|
-
- examples/munging/airline_flights/parse.rb
|
221
|
+
- examples/munging/airline_flights/indexable.rb
|
222
|
+
- examples/munging/airline_flights/indexable_spec.rb
|
392
223
|
- examples/munging/airline_flights/reconcile_airports.rb
|
393
|
-
- examples/munging/airline_flights/route.rb
|
394
224
|
- examples/munging/airline_flights/tasks.rake
|
395
|
-
- examples/munging/airline_flights/timezone_fixup.rb
|
396
225
|
- examples/munging/airline_flights/topcities.rb
|
397
|
-
- examples/munging/airports/40_wbans.txt
|
398
|
-
- examples/munging/airports/filter_weather_reports.rb
|
399
|
-
- examples/munging/airports/join.pig
|
400
|
-
- examples/munging/airports/to_tsv.rb
|
401
|
-
- examples/munging/airports/usa_wbans.pig
|
402
|
-
- examples/munging/airports/usa_wbans.txt
|
403
|
-
- examples/munging/airports/wbans.pig
|
404
|
-
- examples/munging/airports/wbans.txt
|
405
226
|
- examples/munging/geo/geo_json.rb
|
406
227
|
- examples/munging/geo/geo_models.rb
|
407
228
|
- examples/munging/geo/geonames_models.rb
|
408
229
|
- examples/munging/geo/iso_codes.rb
|
409
230
|
- examples/munging/geo/reconcile_countries.rb
|
410
231
|
- examples/munging/geo/tasks.rake
|
411
|
-
- examples/munging/rake_helper.rb
|
412
|
-
- examples/munging/weather/.gitignore
|
413
|
-
- examples/munging/weather/Gemfile
|
414
|
-
- examples/munging/weather/Rakefile
|
415
|
-
- examples/munging/weather/extract_ish.rb
|
416
|
-
- examples/munging/weather/models/weather.rb
|
417
|
-
- examples/munging/weather/utils/noaa_downloader.rb
|
418
|
-
- examples/munging/wikipedia/README.md
|
419
|
-
- examples/munging/wikipedia/Rakefile
|
420
232
|
- examples/munging/wikipedia/articles/extract_articles-parsed.rb
|
421
233
|
- examples/munging/wikipedia/articles/extract_articles-templated.rb
|
422
234
|
- examples/munging/wikipedia/articles/textualize_articles.rb
|
@@ -426,30 +238,13 @@ files:
|
|
426
238
|
- examples/munging/wikipedia/articles/wp2txt_utils.rb
|
427
239
|
- examples/munging/wikipedia/dbpedia/dbpedia_common.rb
|
428
240
|
- examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb
|
241
|
+
- examples/munging/wikipedia/dbpedia/extract_links-cruft.rb
|
429
242
|
- examples/munging/wikipedia/dbpedia/extract_links.rb
|
430
243
|
- examples/munging/wikipedia/dbpedia/sameas_extractor.rb
|
431
|
-
- examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig
|
432
|
-
- examples/munging/wikipedia/page_metadata/extract_page_metadata.rb
|
433
|
-
- examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old
|
434
|
-
- examples/munging/wikipedia/pagelinks/augment_pagelinks.pig
|
435
|
-
- examples/munging/wikipedia/pagelinks/extract_pagelinks.rb
|
436
|
-
- examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old
|
437
|
-
- examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig
|
438
|
-
- examples/munging/wikipedia/pageviews/augment_pageviews.pig
|
439
|
-
- examples/munging/wikipedia/pageviews/extract_pageviews.rb
|
440
|
-
- examples/munging/wikipedia/pig_style_guide.md
|
441
|
-
- examples/munging/wikipedia/redirects/redirects_page_metadata.pig
|
442
|
-
- examples/munging/wikipedia/subuniverse/sub_articles.pig
|
443
|
-
- examples/munging/wikipedia/subuniverse/sub_page_metadata.pig
|
444
|
-
- examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig
|
445
|
-
- examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig
|
446
|
-
- examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig
|
447
|
-
- examples/munging/wikipedia/subuniverse/sub_pageviews.pig
|
448
|
-
- examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig
|
449
|
-
- examples/munging/wikipedia/utils/get_namespaces.rb
|
450
|
-
- examples/munging/wikipedia/utils/munging_utils.rb
|
451
|
-
- examples/munging/wikipedia/utils/namespaces.json
|
452
244
|
- examples/rake_helper.rb
|
245
|
+
- examples/ruby_project/Gemfile
|
246
|
+
- examples/ruby_project/README.md
|
247
|
+
- examples/ruby_project/a/b/c/.gitkeep
|
453
248
|
- examples/server_logs/geo_ip_mapping/munge_geolite.rb
|
454
249
|
- examples/server_logs/logline.rb
|
455
250
|
- examples/server_logs/models.rb
|
@@ -459,31 +254,22 @@ files:
|
|
459
254
|
- examples/server_logs/server_logs-02-histograms-mapper.rb
|
460
255
|
- examples/server_logs/server_logs-03-breadcrumbs-full.rb
|
461
256
|
- examples/server_logs/server_logs-04-page_page_edges-full.rb
|
462
|
-
- examples/
|
463
|
-
- examples/
|
464
|
-
- examples/
|
257
|
+
- examples/serverlogs/geo_ip_mapping/munge_geolite.rb
|
258
|
+
- examples/serverlogs/models/logline.rb
|
259
|
+
- examples/serverlogs/parser/apache_parser_widget.rb
|
260
|
+
- examples/serverlogs/visit_paths/common.rb
|
261
|
+
- examples/serverlogs/visit_paths/page_counts.pig
|
262
|
+
- examples/serverlogs/visit_paths/serverlogs-01-parse-script.rb
|
263
|
+
- examples/serverlogs/visit_paths/serverlogs-02-histograms-full.rb
|
264
|
+
- examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb
|
265
|
+
- examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb
|
266
|
+
- examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb
|
267
|
+
- examples/text/pig_latin/pig_latinizer.rb
|
268
|
+
- examples/text/pig_latin/pig_latinizer_widget.rb
|
465
269
|
- examples/text/regional_flavor/README.md
|
466
270
|
- examples/text/regional_flavor/article_wordbags.pig
|
467
271
|
- examples/text/regional_flavor/j01-article_wordbags.rb
|
468
272
|
- examples/text/regional_flavor/simple_pig_script.pig
|
469
|
-
- examples/tiny_count.rb
|
470
|
-
- examples/twitter/locations.rb
|
471
|
-
- examples/twitter/models.rb
|
472
|
-
- examples/twitter/pt1-fiddle.pig
|
473
|
-
- examples/twitter/pt2-simple_parse.pig
|
474
|
-
- examples/twitter/pt2-simple_parse.rb
|
475
|
-
- examples/twitter/pt3-join_on_zips.pig
|
476
|
-
- examples/twitter/pt4-strong_links.rb
|
477
|
-
- examples/twitter/pt5-lnglat_and_strong_links.pig
|
478
|
-
- examples/twitter/states.tsv
|
479
|
-
- examples/word_count/accumulator.rb
|
480
|
-
- examples/word_count/tokenizer.rb
|
481
|
-
- examples/word_count/word_count.rb
|
482
|
-
- examples/workflow/cherry_pie.dot
|
483
|
-
- examples/workflow/cherry_pie.md
|
484
|
-
- examples/workflow/cherry_pie.png
|
485
|
-
- examples/workflow/cherry_pie.rb
|
486
|
-
- examples/workflow/package_gem.rb
|
487
273
|
- lib/hanuman.rb
|
488
274
|
- lib/hanuman/graph.rb
|
489
275
|
- lib/hanuman/graphvizzer.rb
|
@@ -492,12 +278,22 @@ files:
|
|
492
278
|
- lib/hanuman/link.rb
|
493
279
|
- lib/hanuman/registry.rb
|
494
280
|
- lib/hanuman/stage.rb
|
281
|
+
- lib/wu/geo.rb
|
282
|
+
- lib/wu/geo/geo_grids.numbers
|
283
|
+
- lib/wu/geo/geolocated.rb
|
284
|
+
- lib/wu/geo/quadtile.rb
|
285
|
+
- lib/wu/graph/union_find.rb
|
286
|
+
- lib/wu/model/reconcilable.rb
|
287
|
+
- lib/wu/munging.rb
|
288
|
+
- lib/wu/social/models/twitter.rb
|
289
|
+
- lib/wu/wikipedia/models.rb
|
495
290
|
- lib/wukong.rb
|
496
291
|
- lib/wukong/boot.rb
|
497
292
|
- lib/wukong/configuration.rb
|
498
293
|
- lib/wukong/dataflow.rb
|
499
294
|
- lib/wukong/driver.rb
|
500
295
|
- lib/wukong/emitter.rb
|
296
|
+
- lib/wukong/logger.rb
|
501
297
|
- lib/wukong/model/faker.rb
|
502
298
|
- lib/wukong/model/flatpack_parser/flat.rb
|
503
299
|
- lib/wukong/model/flatpack_parser/flatpack.rb
|
@@ -505,6 +301,8 @@ files:
|
|
505
301
|
- lib/wukong/model/flatpack_parser/parser.rb
|
506
302
|
- lib/wukong/model/flatpack_parser/tokens.rb
|
507
303
|
- lib/wukong/processor.rb
|
304
|
+
- lib/wukong/runner.rb
|
305
|
+
- lib/wukong/server.rb
|
508
306
|
- lib/wukong/spec_helpers.rb
|
509
307
|
- lib/wukong/spec_helpers/integration_driver.rb
|
510
308
|
- lib/wukong/spec_helpers/integration_driver_matchers.rb
|
@@ -525,8 +323,6 @@ files:
|
|
525
323
|
- lib/wukong/widget/reducers/moments.rb
|
526
324
|
- lib/wukong/widget/reducers/sort.rb
|
527
325
|
- lib/wukong/widget/serializers.rb
|
528
|
-
- lib/wukong/widget/sink.rb
|
529
|
-
- lib/wukong/widget/source.rb
|
530
326
|
- lib/wukong/widget/utils.rb
|
531
327
|
- lib/wukong/widgets.rb
|
532
328
|
- spec/examples/dataflow/fibonacci_series_spec.rb
|
@@ -545,11 +341,15 @@ files:
|
|
545
341
|
- spec/hanuman/stage_spec.rb
|
546
342
|
- spec/spec.opts
|
547
343
|
- spec/spec_helper.rb
|
344
|
+
- spec/support/example_test_helpers.rb
|
548
345
|
- spec/support/hanuman_test_helpers.rb
|
346
|
+
- spec/support/integration_helper.rb
|
347
|
+
- spec/support/model_test_helpers.rb
|
549
348
|
- spec/support/shared_context_for_reducers.rb
|
550
349
|
- spec/support/shared_examples_for_builders.rb
|
551
350
|
- spec/support/shared_examples_for_shortcuts.rb
|
552
|
-
- spec/
|
351
|
+
- spec/wu/geo/geolocated_spec.rb
|
352
|
+
- spec/wu/model/reconcilable_spec.rb
|
553
353
|
- spec/wukong/dataflow_spec.rb
|
554
354
|
- spec/wukong/local_runner_spec.rb
|
555
355
|
- spec/wukong/model/faker_spec.rb
|
@@ -564,6 +364,7 @@ files:
|
|
564
364
|
- spec/wukong/widget/serializers_spec.rb
|
565
365
|
- spec/wukong/widget/sink_spec.rb
|
566
366
|
- spec/wukong/widget/source_spec.rb
|
367
|
+
- spec/wukong/wu_local_spec.rb
|
567
368
|
- spec/wukong/wukong_spec.rb
|
568
369
|
- wukong.gemspec
|
569
370
|
homepage: https://github.com/infochimps-labs/wukong
|
@@ -609,11 +410,15 @@ test_files:
|
|
609
410
|
- spec/hanuman/stage_spec.rb
|
610
411
|
- spec/spec.opts
|
611
412
|
- spec/spec_helper.rb
|
413
|
+
- spec/support/example_test_helpers.rb
|
612
414
|
- spec/support/hanuman_test_helpers.rb
|
415
|
+
- spec/support/integration_helper.rb
|
416
|
+
- spec/support/model_test_helpers.rb
|
613
417
|
- spec/support/shared_context_for_reducers.rb
|
614
418
|
- spec/support/shared_examples_for_builders.rb
|
615
419
|
- spec/support/shared_examples_for_shortcuts.rb
|
616
|
-
- spec/
|
420
|
+
- spec/wu/geo/geolocated_spec.rb
|
421
|
+
- spec/wu/model/reconcilable_spec.rb
|
617
422
|
- spec/wukong/dataflow_spec.rb
|
618
423
|
- spec/wukong/local_runner_spec.rb
|
619
424
|
- spec/wukong/model/faker_spec.rb
|
@@ -628,5 +433,6 @@ test_files:
|
|
628
433
|
- spec/wukong/widget/serializers_spec.rb
|
629
434
|
- spec/wukong/widget/sink_spec.rb
|
630
435
|
- spec/wukong/widget/source_spec.rb
|
436
|
+
- spec/wukong/wu_local_spec.rb
|
631
437
|
- spec/wukong/wukong_spec.rb
|
632
438
|
has_rdoc:
|