wukong 1.4.0 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +34 -7
- data/bin/cutc +1 -1
- data/bin/cuttab +1 -1
- data/bin/greptrue +1 -3
- data/bin/hdp-cat +1 -1
- data/bin/hdp-catd +1 -1
- data/bin/hdp-du +11 -6
- data/bin/hdp-get +1 -1
- data/bin/hdp-kill +1 -1
- data/bin/hdp-ls +1 -1
- data/bin/hdp-mkdir +1 -1
- data/bin/hdp-mv +1 -1
- data/bin/hdp-ps +1 -1
- data/bin/hdp-put +1 -1
- data/bin/hdp-rm +1 -1
- data/bin/hdp-sort +39 -19
- data/bin/hdp-stream +39 -19
- data/bin/hdp-stream-flat +9 -5
- data/bin/hdp-stream2 +39 -0
- data/bin/tabchar +1 -1
- data/bin/wu-date +13 -0
- data/bin/wu-datetime +13 -0
- data/bin/wu-plus +9 -0
- data/docpages/INSTALL.textile +0 -2
- data/docpages/index.textile +4 -2
- data/examples/apache_log_parser.rb +26 -14
- data/examples/graph/gen_symmetric_links.rb +10 -0
- data/examples/sample_records.rb +6 -8
- data/lib/wukong/datatypes/enum.rb +2 -2
- data/lib/wukong/dfs.rb +10 -9
- data/lib/wukong/encoding.rb +22 -4
- data/lib/wukong/extensions/emittable.rb +1 -1
- data/lib/wukong/extensions/hash_keys.rb +16 -0
- data/lib/wukong/extensions/hash_like.rb +17 -0
- data/lib/wukong/models/graph.rb +18 -20
- data/lib/wukong/schema.rb +13 -11
- data/lib/wukong/script.rb +26 -8
- data/lib/wukong/script/hadoop_command.rb +108 -2
- data/lib/wukong/streamer.rb +2 -0
- data/lib/wukong/streamer/base.rb +1 -0
- data/lib/wukong/streamer/record_streamer.rb +14 -0
- data/lib/wukong/streamer/struct_streamer.rb +2 -2
- data/spec/data/a_atsigns_b.tsv +64 -0
- data/spec/data/a_follows_b.tsv +53 -0
- data/spec/data/tweet.tsv +167 -0
- data/spec/data/twitter_user.tsv +55 -0
- data/wukong.gemspec +13 -3
- metadata +13 -23
@@ -0,0 +1,55 @@
|
|
1
|
+
# rsrc id screen_name scat fo fr st st crat
|
2
|
+
twitter_user 1000000001 jerry 20060101080808 18 6 6 0 20060101080808
|
3
|
+
twitter_user 1000000002 costanza81 20060101080812 5 14 0 0 20060101080812
|
4
|
+
twitter_user 1000000003 elaine 20060101080600 8 28 0 20060101080600
|
5
|
+
twitter_user 1000000004 puddy 20080401080808 7 20080401080808
|
6
|
+
twitter_user 1000000005 bania 20080402080808 7 20080402080808
|
7
|
+
twitter_user 1000000006 bob_sacamano 20080403080808 20080403080808
|
8
|
+
twitter_user 1000000007 original_kramer 20080404080808 20080404080808
|
9
|
+
twitter_user 1000000008 newman 20080405080808 20080405080808
|
10
|
+
twitter_user 1000000009 lomez 20080411080808 20080411080808
|
11
|
+
twitter_user 1000000010 man_hands 20080422080808 20080422080808
|
12
|
+
twitter_user 1000000011 fusilli_jerry 20080501080808 20080501080808
|
13
|
+
twitter_user 1000000012 bubble_boy 20080601080808 20080601080808
|
14
|
+
twitter_user 1000000013 jpeterman 20080601080808 20080601080808
|
15
|
+
twitter_user 1000000014 jpetermanco 20080601080908 20080601080908
|
16
|
+
twitter_user 1000000015 UNCLE_LEO 20080601080808 20080601080808
|
17
|
+
twitter_user 1000000016 serenity_now 20080601080808 20080601080808
|
18
|
+
twitter_user 1000000017 JackieChilesEsq 20080601080808 20080601080808
|
19
|
+
twitter_user 1000000018 mulva 20080601080808 20080601080808
|
20
|
+
twitter_user 1000000019 babubhatt 20080812080808 20080812080808
|
21
|
+
twitter_user 1000000020 marla 20080801080808 20080801080808
|
22
|
+
twitter_user 1000000021 ESTELLECOSTNAZA 20080908080808 20080908080808
|
23
|
+
twitter_user 1000000022 semischke 20080601080808 20080601080808
|
24
|
+
twitter_user 1000000023 bigstein 20080601080808 20080601080808
|
25
|
+
twitter_user 1000000024 superman 20080601080808 20080601080808
|
26
|
+
twitter_user 1000000025 ArtVandelay 20060101080812 5 14 0 0 20060101080812
|
27
|
+
twitter_user 1000000026 HJPennypacker 20080601080808 20080601080808
|
28
|
+
twitter_user 1000000027 KelVarnsen 20080601080808 20080601080808
|
29
|
+
twitter_user 1000000028 derek_Jeter
|
30
|
+
twitter_user 1000000029 BernieWilliams
|
31
|
+
twitter_user 1000000030 cougar_mom
|
32
|
+
twitter_user 1000000032 thementee
|
33
|
+
twitter_user 1000000069 bania
|
34
|
+
twitter_user 1000000506 elainebenes
|
35
|
+
twitter_user 1000000632 jerry
|
36
|
+
twitter_user 1000001431 Newman
|
37
|
+
twitter_user 1000004517 costanza81
|
38
|
+
twitter_user 1000005467 FrankCostanza
|
39
|
+
twitter_user 1000202970 bigstein
|
40
|
+
twitter_user 1000296583 Ross
|
41
|
+
twitter_user 1000313364 BabuBhatt
|
42
|
+
twitter_user 1000322339 Cushman
|
43
|
+
twitter_user 1000410101 Ronnie
|
44
|
+
twitter_user 1000431520 dean_jones
|
45
|
+
twitter_user 1000530581 Katya
|
46
|
+
twitter_user 1000566783 LloydBraun
|
47
|
+
twitter_user 1000606862 JackieChiles
|
48
|
+
twitter_user 1000616766 Pam
|
49
|
+
twitter_user 1000641417 JPeterman
|
50
|
+
twitter_user 1000724245 cosmo
|
51
|
+
twitter_user 1008980889 vannostrandmd 20080601080808 20080601080808
|
52
|
+
twitter_user 1000911320 Puddy
|
53
|
+
twitter_user created_at reply_sn
|
54
|
+
|
55
|
+
|
data/wukong.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{wukong}
|
8
|
-
s.version = "1.4.
|
8
|
+
s.version = "1.4.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Philip (flip) Kromer"]
|
12
|
-
s.date = %q{2009-
|
12
|
+
s.date = %q{2009-12-15}
|
13
13
|
s.description = %q{ Treat your dataset like a:
|
14
14
|
|
15
15
|
* stream of lines when it’s efficient to process by lines
|
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
|
|
19
19
|
Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.
|
20
20
|
}
|
21
21
|
s.email = %q{flip@infochimps.org}
|
22
|
-
s.executables = ["
|
22
|
+
s.executables = ["hdp-du", "hdp-sync", "hdp-wc", "wu-lign", "wu-sum", "hdp-parts_to_keys.rb"]
|
23
23
|
s.extra_rdoc_files = [
|
24
24
|
"LICENSE.textile",
|
25
25
|
"README.textile"
|
@@ -46,13 +46,17 @@ Gem::Specification.new do |s|
|
|
46
46
|
"bin/hdp-sort",
|
47
47
|
"bin/hdp-stream",
|
48
48
|
"bin/hdp-stream-flat",
|
49
|
+
"bin/hdp-stream2",
|
49
50
|
"bin/hdp-sync",
|
50
51
|
"bin/hdp-wc",
|
51
52
|
"bin/md5sort",
|
52
53
|
"bin/tabchar",
|
53
54
|
"bin/uniqc",
|
55
|
+
"bin/wu-date",
|
56
|
+
"bin/wu-datetime",
|
54
57
|
"bin/wu-hist",
|
55
58
|
"bin/wu-lign",
|
59
|
+
"bin/wu-plus",
|
56
60
|
"bin/wu-sum",
|
57
61
|
"docpages/INSTALL.textile",
|
58
62
|
"docpages/INSTALL.textile",
|
@@ -129,6 +133,7 @@ Gem::Specification.new do |s|
|
|
129
133
|
"lib/wukong/extensions/date_time.rb",
|
130
134
|
"lib/wukong/extensions/emittable.rb",
|
131
135
|
"lib/wukong/extensions/hash.rb",
|
136
|
+
"lib/wukong/extensions/hash_keys.rb",
|
132
137
|
"lib/wukong/extensions/hash_like.rb",
|
133
138
|
"lib/wukong/extensions/hashlike_class.rb",
|
134
139
|
"lib/wukong/extensions/module.rb",
|
@@ -153,6 +158,7 @@ Gem::Specification.new do |s|
|
|
153
158
|
"lib/wukong/streamer/list_reducer.rb",
|
154
159
|
"lib/wukong/streamer/preprocess_with_pipe_streamer.rb",
|
155
160
|
"lib/wukong/streamer/rank_and_bin_reducer.rb",
|
161
|
+
"lib/wukong/streamer/record_streamer.rb",
|
156
162
|
"lib/wukong/streamer/set_reducer.rb",
|
157
163
|
"lib/wukong/streamer/struct_streamer.rb",
|
158
164
|
"lib/wukong/streamer/summing_reducer.rb",
|
@@ -160,6 +166,10 @@ Gem::Specification.new do |s|
|
|
160
166
|
"lib/wukong/typed_struct.rb",
|
161
167
|
"lib/wukong/wukong_class.rb",
|
162
168
|
"spec/bin/hdp-wc_spec.rb",
|
169
|
+
"spec/data/a_atsigns_b.tsv",
|
170
|
+
"spec/data/a_follows_b.tsv",
|
171
|
+
"spec/data/tweet.tsv",
|
172
|
+
"spec/data/twitter_user.tsv",
|
163
173
|
"spec/spec_helper.rb",
|
164
174
|
"wukong.gemspec"
|
165
175
|
]
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Philip (flip) Kromer
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-12-15 00:00:00 -06:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -45,32 +45,12 @@ dependencies:
|
|
45
45
|
description: " Treat your dataset like a:\n\n * stream of lines when it\xE2\x80\x99s efficient to process by lines\n * stream of field arrays when it\xE2\x80\x99s efficient to deal directly with fields\n * stream of lightweight objects when it\xE2\x80\x99s efficient to deal with objects\n\n Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.\n"
|
46
46
|
email: flip@infochimps.org
|
47
47
|
executables:
|
48
|
-
- cutc
|
49
|
-
- cuttab
|
50
|
-
- greptrue
|
51
|
-
- hdp-cat
|
52
|
-
- hdp-catd
|
53
48
|
- hdp-du
|
54
|
-
- hdp-get
|
55
|
-
- hdp-kill
|
56
|
-
- hdp-ls
|
57
|
-
- hdp-mkdir
|
58
|
-
- hdp-mv
|
59
|
-
- hdp-parts_to_keys.rb
|
60
|
-
- hdp-ps
|
61
|
-
- hdp-put
|
62
|
-
- hdp-rm
|
63
|
-
- hdp-sort
|
64
|
-
- hdp-stream
|
65
|
-
- hdp-stream-flat
|
66
49
|
- hdp-sync
|
67
50
|
- hdp-wc
|
68
|
-
- md5sort
|
69
|
-
- tabchar
|
70
|
-
- uniqc
|
71
|
-
- wu-hist
|
72
51
|
- wu-lign
|
73
52
|
- wu-sum
|
53
|
+
- hdp-parts_to_keys.rb
|
74
54
|
extensions: []
|
75
55
|
|
76
56
|
extra_rdoc_files:
|
@@ -98,13 +78,17 @@ files:
|
|
98
78
|
- bin/hdp-sort
|
99
79
|
- bin/hdp-stream
|
100
80
|
- bin/hdp-stream-flat
|
81
|
+
- bin/hdp-stream2
|
101
82
|
- bin/hdp-sync
|
102
83
|
- bin/hdp-wc
|
103
84
|
- bin/md5sort
|
104
85
|
- bin/tabchar
|
105
86
|
- bin/uniqc
|
87
|
+
- bin/wu-date
|
88
|
+
- bin/wu-datetime
|
106
89
|
- bin/wu-hist
|
107
90
|
- bin/wu-lign
|
91
|
+
- bin/wu-plus
|
108
92
|
- bin/wu-sum
|
109
93
|
- docpages/INSTALL.textile
|
110
94
|
- docpages/LICENSE.textile
|
@@ -166,6 +150,7 @@ files:
|
|
166
150
|
- lib/wukong/extensions/date_time.rb
|
167
151
|
- lib/wukong/extensions/emittable.rb
|
168
152
|
- lib/wukong/extensions/hash.rb
|
153
|
+
- lib/wukong/extensions/hash_keys.rb
|
169
154
|
- lib/wukong/extensions/hash_like.rb
|
170
155
|
- lib/wukong/extensions/hashlike_class.rb
|
171
156
|
- lib/wukong/extensions/module.rb
|
@@ -190,6 +175,7 @@ files:
|
|
190
175
|
- lib/wukong/streamer/list_reducer.rb
|
191
176
|
- lib/wukong/streamer/preprocess_with_pipe_streamer.rb
|
192
177
|
- lib/wukong/streamer/rank_and_bin_reducer.rb
|
178
|
+
- lib/wukong/streamer/record_streamer.rb
|
193
179
|
- lib/wukong/streamer/set_reducer.rb
|
194
180
|
- lib/wukong/streamer/struct_streamer.rb
|
195
181
|
- lib/wukong/streamer/summing_reducer.rb
|
@@ -197,6 +183,10 @@ files:
|
|
197
183
|
- lib/wukong/typed_struct.rb
|
198
184
|
- lib/wukong/wukong_class.rb
|
199
185
|
- spec/bin/hdp-wc_spec.rb
|
186
|
+
- spec/data/a_atsigns_b.tsv
|
187
|
+
- spec/data/a_follows_b.tsv
|
188
|
+
- spec/data/tweet.tsv
|
189
|
+
- spec/data/twitter_user.tsv
|
200
190
|
- spec/spec_helper.rb
|
201
191
|
- wukong.gemspec
|
202
192
|
has_rdoc: true
|