wukong 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/README.textile +34 -7
  2. data/bin/cutc +1 -1
  3. data/bin/cuttab +1 -1
  4. data/bin/greptrue +1 -3
  5. data/bin/hdp-cat +1 -1
  6. data/bin/hdp-catd +1 -1
  7. data/bin/hdp-du +11 -6
  8. data/bin/hdp-get +1 -1
  9. data/bin/hdp-kill +1 -1
  10. data/bin/hdp-ls +1 -1
  11. data/bin/hdp-mkdir +1 -1
  12. data/bin/hdp-mv +1 -1
  13. data/bin/hdp-ps +1 -1
  14. data/bin/hdp-put +1 -1
  15. data/bin/hdp-rm +1 -1
  16. data/bin/hdp-sort +39 -19
  17. data/bin/hdp-stream +39 -19
  18. data/bin/hdp-stream-flat +9 -5
  19. data/bin/hdp-stream2 +39 -0
  20. data/bin/tabchar +1 -1
  21. data/bin/wu-date +13 -0
  22. data/bin/wu-datetime +13 -0
  23. data/bin/wu-plus +9 -0
  24. data/docpages/INSTALL.textile +0 -2
  25. data/docpages/index.textile +4 -2
  26. data/examples/apache_log_parser.rb +26 -14
  27. data/examples/graph/gen_symmetric_links.rb +10 -0
  28. data/examples/sample_records.rb +6 -8
  29. data/lib/wukong/datatypes/enum.rb +2 -2
  30. data/lib/wukong/dfs.rb +10 -9
  31. data/lib/wukong/encoding.rb +22 -4
  32. data/lib/wukong/extensions/emittable.rb +1 -1
  33. data/lib/wukong/extensions/hash_keys.rb +16 -0
  34. data/lib/wukong/extensions/hash_like.rb +17 -0
  35. data/lib/wukong/models/graph.rb +18 -20
  36. data/lib/wukong/schema.rb +13 -11
  37. data/lib/wukong/script.rb +26 -8
  38. data/lib/wukong/script/hadoop_command.rb +108 -2
  39. data/lib/wukong/streamer.rb +2 -0
  40. data/lib/wukong/streamer/base.rb +1 -0
  41. data/lib/wukong/streamer/record_streamer.rb +14 -0
  42. data/lib/wukong/streamer/struct_streamer.rb +2 -2
  43. data/spec/data/a_atsigns_b.tsv +64 -0
  44. data/spec/data/a_follows_b.tsv +53 -0
  45. data/spec/data/tweet.tsv +167 -0
  46. data/spec/data/twitter_user.tsv +55 -0
  47. data/wukong.gemspec +13 -3
  48. metadata +13 -23
@@ -0,0 +1,55 @@
1
+ # rsrc id screen_name scat fo fr st st crat
2
+ twitter_user 1000000001 jerry 20060101080808 18 6 6 0 20060101080808
3
+ twitter_user 1000000002 costanza81 20060101080812 5 14 0 0 20060101080812
4
+ twitter_user 1000000003 elaine 20060101080600 8 28 0 20060101080600
5
+ twitter_user 1000000004 puddy 20080401080808 7 20080401080808
6
+ twitter_user 1000000005 bania 20080402080808 7 20080402080808
7
+ twitter_user 1000000006 bob_sacamano 20080403080808 20080403080808
8
+ twitter_user 1000000007 original_kramer 20080404080808 20080404080808
9
+ twitter_user 1000000008 newman 20080405080808 20080405080808
10
+ twitter_user 1000000009 lomez 20080411080808 20080411080808
11
+ twitter_user 1000000010 man_hands 20080422080808 20080422080808
12
+ twitter_user 1000000011 fusilli_jerry 20080501080808 20080501080808
13
+ twitter_user 1000000012 bubble_boy 20080601080808 20080601080808
14
+ twitter_user 1000000013 jpeterman 20080601080808 20080601080808
15
+ twitter_user 1000000014 jpetermanco 20080601080908 20080601080908
16
+ twitter_user 1000000015 UNCLE_LEO 20080601080808 20080601080808
17
+ twitter_user 1000000016 serenity_now 20080601080808 20080601080808
18
+ twitter_user 1000000017 JackieChilesEsq 20080601080808 20080601080808
19
+ twitter_user 1000000018 mulva 20080601080808 20080601080808
20
+ twitter_user 1000000019 babubhatt 20080812080808 20080812080808
21
+ twitter_user 1000000020 marla 20080801080808 20080801080808
22
+ twitter_user 1000000021 ESTELLECOSTNAZA 20080908080808 20080908080808
23
+ twitter_user 1000000022 semischke 20080601080808 20080601080808
24
+ twitter_user 1000000023 bigstein 20080601080808 20080601080808
25
+ twitter_user 1000000024 superman 20080601080808 20080601080808
26
+ twitter_user 1000000025 ArtVandelay 20060101080812 5 14 0 0 20060101080812
27
+ twitter_user 1000000026 HJPennypacker 20080601080808 20080601080808
28
+ twitter_user 1000000027 KelVarnsen 20080601080808 20080601080808
29
+ twitter_user 1000000028 derek_Jeter
30
+ twitter_user 1000000029 BernieWilliams
31
+ twitter_user 1000000030 cougar_mom
32
+ twitter_user 1000000032 thementee
33
+ twitter_user 1000000069 bania
34
+ twitter_user 1000000506 elainebenes
35
+ twitter_user 1000000632 jerry
36
+ twitter_user 1000001431 Newman
37
+ twitter_user 1000004517 costanza81
38
+ twitter_user 1000005467 FrankCostanza
39
+ twitter_user 1000202970 bigstein
40
+ twitter_user 1000296583 Ross
41
+ twitter_user 1000313364 BabuBhatt
42
+ twitter_user 1000322339 Cushman
43
+ twitter_user 1000410101 Ronnie
44
+ twitter_user 1000431520 dean_jones
45
+ twitter_user 1000530581 Katya
46
+ twitter_user 1000566783 LloydBraun
47
+ twitter_user 1000606862 JackieChiles
48
+ twitter_user 1000616766 Pam
49
+ twitter_user 1000641417 JPeterman
50
+ twitter_user 1000724245 cosmo
51
+ twitter_user 1008980889 vannostrandmd 20080601080808 20080601080808
52
+ twitter_user 1000911320 Puddy
53
+ twitter_user created_at reply_sn
54
+
55
+
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{wukong}
8
- s.version = "1.4.0"
8
+ s.version = "1.4.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Philip (flip) Kromer"]
12
- s.date = %q{2009-10-12}
12
+ s.date = %q{2009-12-15}
13
13
  s.description = %q{ Treat your dataset like a:
14
14
 
15
15
  * stream of lines when it’s efficient to process by lines
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
19
19
  Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.
20
20
  }
21
21
  s.email = %q{flip@infochimps.org}
22
- s.executables = ["cutc", "cuttab", "greptrue", "hdp-cat", "hdp-catd", "hdp-du", "hdp-get", "hdp-kill", "hdp-ls", "hdp-mkdir", "hdp-mv", "hdp-parts_to_keys.rb", "hdp-ps", "hdp-put", "hdp-rm", "hdp-sort", "hdp-stream", "hdp-stream-flat", "hdp-sync", "hdp-wc", "md5sort", "tabchar", "uniqc", "wu-hist", "wu-lign", "wu-sum"]
22
+ s.executables = ["hdp-du", "hdp-sync", "hdp-wc", "wu-lign", "wu-sum", "hdp-parts_to_keys.rb"]
23
23
  s.extra_rdoc_files = [
24
24
  "LICENSE.textile",
25
25
  "README.textile"
@@ -46,13 +46,17 @@ Gem::Specification.new do |s|
46
46
  "bin/hdp-sort",
47
47
  "bin/hdp-stream",
48
48
  "bin/hdp-stream-flat",
49
+ "bin/hdp-stream2",
49
50
  "bin/hdp-sync",
50
51
  "bin/hdp-wc",
51
52
  "bin/md5sort",
52
53
  "bin/tabchar",
53
54
  "bin/uniqc",
55
+ "bin/wu-date",
56
+ "bin/wu-datetime",
54
57
  "bin/wu-hist",
55
58
  "bin/wu-lign",
59
+ "bin/wu-plus",
56
60
  "bin/wu-sum",
57
61
  "docpages/INSTALL.textile",
58
62
  "docpages/INSTALL.textile",
@@ -129,6 +133,7 @@ Gem::Specification.new do |s|
129
133
  "lib/wukong/extensions/date_time.rb",
130
134
  "lib/wukong/extensions/emittable.rb",
131
135
  "lib/wukong/extensions/hash.rb",
136
+ "lib/wukong/extensions/hash_keys.rb",
132
137
  "lib/wukong/extensions/hash_like.rb",
133
138
  "lib/wukong/extensions/hashlike_class.rb",
134
139
  "lib/wukong/extensions/module.rb",
@@ -153,6 +158,7 @@ Gem::Specification.new do |s|
153
158
  "lib/wukong/streamer/list_reducer.rb",
154
159
  "lib/wukong/streamer/preprocess_with_pipe_streamer.rb",
155
160
  "lib/wukong/streamer/rank_and_bin_reducer.rb",
161
+ "lib/wukong/streamer/record_streamer.rb",
156
162
  "lib/wukong/streamer/set_reducer.rb",
157
163
  "lib/wukong/streamer/struct_streamer.rb",
158
164
  "lib/wukong/streamer/summing_reducer.rb",
@@ -160,6 +166,10 @@ Gem::Specification.new do |s|
160
166
  "lib/wukong/typed_struct.rb",
161
167
  "lib/wukong/wukong_class.rb",
162
168
  "spec/bin/hdp-wc_spec.rb",
169
+ "spec/data/a_atsigns_b.tsv",
170
+ "spec/data/a_follows_b.tsv",
171
+ "spec/data/tweet.tsv",
172
+ "spec/data/twitter_user.tsv",
163
173
  "spec/spec_helper.rb",
164
174
  "wukong.gemspec"
165
175
  ]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Philip (flip) Kromer
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-10-12 00:00:00 -05:00
12
+ date: 2009-12-15 00:00:00 -06:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -45,32 +45,12 @@ dependencies:
45
45
  description: " Treat your dataset like a:\n\n * stream of lines when it\xE2\x80\x99s efficient to process by lines\n * stream of field arrays when it\xE2\x80\x99s efficient to deal directly with fields\n * stream of lightweight objects when it\xE2\x80\x99s efficient to deal with objects\n\n Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.\n"
46
46
  email: flip@infochimps.org
47
47
  executables:
48
- - cutc
49
- - cuttab
50
- - greptrue
51
- - hdp-cat
52
- - hdp-catd
53
48
  - hdp-du
54
- - hdp-get
55
- - hdp-kill
56
- - hdp-ls
57
- - hdp-mkdir
58
- - hdp-mv
59
- - hdp-parts_to_keys.rb
60
- - hdp-ps
61
- - hdp-put
62
- - hdp-rm
63
- - hdp-sort
64
- - hdp-stream
65
- - hdp-stream-flat
66
49
  - hdp-sync
67
50
  - hdp-wc
68
- - md5sort
69
- - tabchar
70
- - uniqc
71
- - wu-hist
72
51
  - wu-lign
73
52
  - wu-sum
53
+ - hdp-parts_to_keys.rb
74
54
  extensions: []
75
55
 
76
56
  extra_rdoc_files:
@@ -98,13 +78,17 @@ files:
98
78
  - bin/hdp-sort
99
79
  - bin/hdp-stream
100
80
  - bin/hdp-stream-flat
81
+ - bin/hdp-stream2
101
82
  - bin/hdp-sync
102
83
  - bin/hdp-wc
103
84
  - bin/md5sort
104
85
  - bin/tabchar
105
86
  - bin/uniqc
87
+ - bin/wu-date
88
+ - bin/wu-datetime
106
89
  - bin/wu-hist
107
90
  - bin/wu-lign
91
+ - bin/wu-plus
108
92
  - bin/wu-sum
109
93
  - docpages/INSTALL.textile
110
94
  - docpages/LICENSE.textile
@@ -166,6 +150,7 @@ files:
166
150
  - lib/wukong/extensions/date_time.rb
167
151
  - lib/wukong/extensions/emittable.rb
168
152
  - lib/wukong/extensions/hash.rb
153
+ - lib/wukong/extensions/hash_keys.rb
169
154
  - lib/wukong/extensions/hash_like.rb
170
155
  - lib/wukong/extensions/hashlike_class.rb
171
156
  - lib/wukong/extensions/module.rb
@@ -190,6 +175,7 @@ files:
190
175
  - lib/wukong/streamer/list_reducer.rb
191
176
  - lib/wukong/streamer/preprocess_with_pipe_streamer.rb
192
177
  - lib/wukong/streamer/rank_and_bin_reducer.rb
178
+ - lib/wukong/streamer/record_streamer.rb
193
179
  - lib/wukong/streamer/set_reducer.rb
194
180
  - lib/wukong/streamer/struct_streamer.rb
195
181
  - lib/wukong/streamer/summing_reducer.rb
@@ -197,6 +183,10 @@ files:
197
183
  - lib/wukong/typed_struct.rb
198
184
  - lib/wukong/wukong_class.rb
199
185
  - spec/bin/hdp-wc_spec.rb
186
+ - spec/data/a_atsigns_b.tsv
187
+ - spec/data/a_follows_b.tsv
188
+ - spec/data/tweet.tsv
189
+ - spec/data/twitter_user.tsv
200
190
  - spec/spec_helper.rb
201
191
  - wukong.gemspec
202
192
  has_rdoc: true