wukong 1.4.11 → 1.4.12
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/hdp-rm +3 -3
- data/lib/wukong/and_pig.rb +31 -3
- data/lib/wukong/extensions/date_time.rb +7 -5
- data/lib/wukong/keystore/tyrant_db.rb +21 -8
- data/wukong.gemspec +18 -18
- metadata +20 -20
data/bin/hdp-rm
CHANGED
@@ -14,9 +14,9 @@ if [ "$1" == "-r" ] ; then
|
|
14
14
|
shift
|
15
15
|
hadoop fs -test -e "$@"
|
16
16
|
if [ "$?" == "0" ] ; then
|
17
|
-
|
18
|
-
echo hadoop dfs -rmr "$@"
|
19
|
-
exec hadoop dfs -rmr "$@"
|
17
|
+
# echo "File exists, skipping trash, removing it..."
|
18
|
+
echo hadoop dfs -rmr -skipTrash "$@"
|
19
|
+
exec hadoop dfs -rmr -skipTrash "$@"
|
20
20
|
fi
|
21
21
|
else
|
22
22
|
hadoop fs -test -e "$@"
|
data/lib/wukong/and_pig.rb
CHANGED
@@ -2,10 +2,16 @@ module Enumerable
|
|
2
2
|
#
|
3
3
|
# Convert an array of values to a string representing it as a pig tuple
|
4
4
|
#
|
5
|
+
# def to_pig_tuple
|
6
|
+
# map{|*vals| '(' + vals.join(',') + ')' }
|
7
|
+
# end
|
8
|
+
|
9
|
+
#
|
10
|
+
# Convert an array to a pig tuple
|
11
|
+
#
|
5
12
|
def to_pig_tuple
|
6
|
-
|
13
|
+
'(' + self.join(',') + ')'
|
7
14
|
end
|
8
|
-
|
9
15
|
#
|
10
16
|
# Convert an array of values to a string pig format
|
11
17
|
# Delegates to to_pig_tuple -- see also to_pig_bag
|
@@ -17,7 +23,29 @@ module Enumerable
|
|
17
23
|
#
|
18
24
|
# Convert an array of values to a string representing it as a pig bag
|
19
25
|
#
|
26
|
+
# def to_pig_bag
|
27
|
+
# '{' + self.join(',') + '}'
|
28
|
+
# end
|
29
|
+
|
30
|
+
#
|
31
|
+
# Convert and array of values to a string representing it as a pig bag
|
32
|
+
#
|
20
33
|
def to_pig_bag
|
21
|
-
'{' + self.join(
|
34
|
+
'{' + self.map{|*vals| vals.to_pig_tuple}.join(",") + '}'
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# Convert a string representing a pig bag into a nested array
|
39
|
+
#
|
40
|
+
def from_pig_bag
|
41
|
+
self.split("),(").map{|t| t.gsub(/[\{\}]/, '').from_pig_tuple} rescue []
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# Convert a string representing a pig tuple into an array
|
46
|
+
#
|
47
|
+
def from_pig_tuple
|
48
|
+
self.gsub(/[\(\)]/, '').split(',')
|
22
49
|
end
|
50
|
+
|
23
51
|
end
|
@@ -1,21 +1,23 @@
|
|
1
1
|
require 'time'
|
2
|
+
require 'date'
|
2
3
|
DateTime.class_eval do
|
3
4
|
#
|
4
5
|
# Parses the time but never fails.
|
5
6
|
# Return value is always in the UTC time zone.
|
6
7
|
#
|
7
|
-
# A flattened
|
8
|
-
#
|
8
|
+
# A flattened datetime -- a 12-digit YYYYmmddHHMMMSS -- is fixed to the UTC
|
9
|
+
# time zone by parsing it as YYYYmmddHHMMMSSZ <- 'Z' at end
|
9
10
|
#
|
10
11
|
def self.parse_safely dt
|
12
|
+
return nil if dt.blank?
|
11
13
|
begin
|
12
14
|
if dt.to_s =~ /\A\d{12}Z?\z/
|
13
|
-
parse(dt+'Z', true)
|
15
|
+
parse(dt+'Z', true)
|
14
16
|
else
|
15
17
|
parse(dt, true).utc
|
16
18
|
end
|
17
|
-
rescue StandardError
|
18
|
-
|
19
|
+
rescue StandardError => e
|
20
|
+
Log.info e
|
19
21
|
end
|
20
22
|
end
|
21
23
|
|
@@ -53,7 +53,6 @@ class TokyoTyrant::Balancer::Base
|
|
53
53
|
def close
|
54
54
|
@servers.all?{ |server| server.close rescue nil}
|
55
55
|
end
|
56
|
-
|
57
56
|
end
|
58
57
|
|
59
58
|
module TokyoDbConnection
|
@@ -67,11 +66,27 @@ module TokyoDbConnection
|
|
67
66
|
].freeze unless defined?(TokyoDbConnection::TyrantDb::DB_SERVERS)
|
68
67
|
|
69
68
|
DB_PORTS = {
|
70
|
-
:
|
71
|
-
:
|
72
|
-
|
73
|
-
:
|
74
|
-
:
|
69
|
+
:screen_names => 12002,
|
70
|
+
:search_ids => 12003,
|
71
|
+
#
|
72
|
+
:tw_user_info => 14000,
|
73
|
+
:tw_wordbag => 14101,
|
74
|
+
:tw_influence => 14102,
|
75
|
+
:tw_trstrank => 14103,
|
76
|
+
:tw_conversation => 14104,
|
77
|
+
#
|
78
|
+
:screen_names2 => 12004,
|
79
|
+
:search_ids2 => 12005,
|
80
|
+
#
|
81
|
+
:tw_user_info2 => 14200,
|
82
|
+
:tw_wordbag2 => 14201,
|
83
|
+
:tw_influence2 => 14202,
|
84
|
+
:tw_trstrank2 => 14203,
|
85
|
+
:tw_conversation2 => 14204,
|
86
|
+
:tw_strong_links2 => 14205,
|
87
|
+
:tw_word_stats2 => 14206,
|
88
|
+
#
|
89
|
+
:ip_geo_census => 14400,
|
75
90
|
} unless defined?(TokyoDbConnection::TyrantDb::DB_PORTS)
|
76
91
|
|
77
92
|
def initialize dataset
|
@@ -82,8 +97,6 @@ module TokyoDbConnection
|
|
82
97
|
return @db if @db
|
83
98
|
port = DB_PORTS[dataset] or raise "Don't know how to reach dataset #{dataset}"
|
84
99
|
@db = TokyoTyrant::Balancer::DB.new(DB_SERVERS.map{|s| s+':'+port.to_s})
|
85
|
-
# @db = TokyoTyrant::DB.new(DB_SERVERS.first, port.to_i)
|
86
|
-
@db
|
87
100
|
end
|
88
101
|
|
89
102
|
def [](*args) ; db[*args] ; end
|
data/wukong.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{wukong}
|
8
|
-
s.version = "1.4.
|
8
|
+
s.version = "1.4.12"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Philip (flip) Kromer"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-08-03}
|
13
13
|
s.description = %q{ Treat your dataset like a:
|
14
14
|
|
15
15
|
* stream of lines when it's efficient to process by lines
|
@@ -245,31 +245,31 @@ Gem::Specification.new do |s|
|
|
245
245
|
"spec/spec_helper.rb",
|
246
246
|
"spec/wukong/encoding_spec.rb",
|
247
247
|
"spec/wukong/script_spec.rb",
|
248
|
-
"examples/binning_percentile_estimator.rb",
|
249
|
-
"examples/contrib/jeans/normalize.rb",
|
250
|
-
"examples/contrib/jeans/sizes.rb",
|
251
|
-
"examples/corpus/words_to_bigrams.rb",
|
252
|
-
"examples/count_keys.rb",
|
253
|
-
"examples/count_keys_at_mapper.rb",
|
254
|
-
"examples/keystore/cassandra_batch_test.rb",
|
255
|
-
"examples/keystore/conditional_outputter_example.rb",
|
256
|
-
"examples/network_graph/adjacency_list.rb",
|
257
|
-
"examples/network_graph/breadth_first_search.rb",
|
258
|
-
"examples/network_graph/gen_2paths.rb",
|
259
|
-
"examples/network_graph/gen_multi_edge.rb",
|
260
|
-
"examples/network_graph/gen_symmetric_links.rb",
|
261
248
|
"examples/pagerank/pagerank.rb",
|
262
249
|
"examples/pagerank/pagerank_initialize.rb",
|
263
|
-
"examples/rank_and_bin.rb",
|
264
250
|
"examples/sample_records.rb",
|
265
251
|
"examples/server_logs/apache_log_parser.rb",
|
266
252
|
"examples/server_logs/breadcrumbs.rb",
|
267
253
|
"examples/server_logs/user_agent.rb",
|
254
|
+
"examples/corpus/words_to_bigrams.rb",
|
255
|
+
"examples/count_keys.rb",
|
256
|
+
"examples/rank_and_bin.rb",
|
257
|
+
"examples/binning_percentile_estimator.rb",
|
268
258
|
"examples/size.rb",
|
269
|
-
"examples/stats/avg_value_frequency.rb",
|
270
259
|
"examples/store/chunked_store_example.rb",
|
260
|
+
"examples/network_graph/breadth_first_search.rb",
|
261
|
+
"examples/network_graph/gen_symmetric_links.rb",
|
262
|
+
"examples/network_graph/gen_multi_edge.rb",
|
263
|
+
"examples/network_graph/adjacency_list.rb",
|
264
|
+
"examples/network_graph/gen_2paths.rb",
|
265
|
+
"examples/keystore/cassandra_batch_test.rb",
|
266
|
+
"examples/keystore/conditional_outputter_example.rb",
|
267
|
+
"examples/stats/avg_value_frequency.rb",
|
268
|
+
"examples/contrib/jeans/sizes.rb",
|
269
|
+
"examples/contrib/jeans/normalize.rb",
|
270
|
+
"examples/word_count.rb",
|
271
271
|
"examples/stupidly_simple_filter.rb",
|
272
|
-
"examples/
|
272
|
+
"examples/count_keys_at_mapper.rb"
|
273
273
|
]
|
274
274
|
|
275
275
|
if s.respond_to? :specification_version then
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 4
|
9
|
-
-
|
10
|
-
version: 1.4.
|
9
|
+
- 12
|
10
|
+
version: 1.4.12
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Philip (flip) Kromer
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-08-03 00:00:00 +00:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -367,28 +367,28 @@ test_files:
|
|
367
367
|
- spec/spec_helper.rb
|
368
368
|
- spec/wukong/encoding_spec.rb
|
369
369
|
- spec/wukong/script_spec.rb
|
370
|
-
- examples/binning_percentile_estimator.rb
|
371
|
-
- examples/contrib/jeans/normalize.rb
|
372
|
-
- examples/contrib/jeans/sizes.rb
|
373
|
-
- examples/corpus/words_to_bigrams.rb
|
374
|
-
- examples/count_keys.rb
|
375
|
-
- examples/count_keys_at_mapper.rb
|
376
|
-
- examples/keystore/cassandra_batch_test.rb
|
377
|
-
- examples/keystore/conditional_outputter_example.rb
|
378
|
-
- examples/network_graph/adjacency_list.rb
|
379
|
-
- examples/network_graph/breadth_first_search.rb
|
380
|
-
- examples/network_graph/gen_2paths.rb
|
381
|
-
- examples/network_graph/gen_multi_edge.rb
|
382
|
-
- examples/network_graph/gen_symmetric_links.rb
|
383
370
|
- examples/pagerank/pagerank.rb
|
384
371
|
- examples/pagerank/pagerank_initialize.rb
|
385
|
-
- examples/rank_and_bin.rb
|
386
372
|
- examples/sample_records.rb
|
387
373
|
- examples/server_logs/apache_log_parser.rb
|
388
374
|
- examples/server_logs/breadcrumbs.rb
|
389
375
|
- examples/server_logs/user_agent.rb
|
376
|
+
- examples/corpus/words_to_bigrams.rb
|
377
|
+
- examples/count_keys.rb
|
378
|
+
- examples/rank_and_bin.rb
|
379
|
+
- examples/binning_percentile_estimator.rb
|
390
380
|
- examples/size.rb
|
391
|
-
- examples/stats/avg_value_frequency.rb
|
392
381
|
- examples/store/chunked_store_example.rb
|
393
|
-
- examples/
|
382
|
+
- examples/network_graph/breadth_first_search.rb
|
383
|
+
- examples/network_graph/gen_symmetric_links.rb
|
384
|
+
- examples/network_graph/gen_multi_edge.rb
|
385
|
+
- examples/network_graph/adjacency_list.rb
|
386
|
+
- examples/network_graph/gen_2paths.rb
|
387
|
+
- examples/keystore/cassandra_batch_test.rb
|
388
|
+
- examples/keystore/conditional_outputter_example.rb
|
389
|
+
- examples/stats/avg_value_frequency.rb
|
390
|
+
- examples/contrib/jeans/sizes.rb
|
391
|
+
- examples/contrib/jeans/normalize.rb
|
394
392
|
- examples/word_count.rb
|
393
|
+
- examples/stupidly_simple_filter.rb
|
394
|
+
- examples/count_keys_at_mapper.rb
|