wukong 1.4.11 → 1.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/hdp-rm +3 -3
- data/lib/wukong/and_pig.rb +31 -3
- data/lib/wukong/extensions/date_time.rb +7 -5
- data/lib/wukong/keystore/tyrant_db.rb +21 -8
- data/wukong.gemspec +18 -18
- metadata +20 -20
data/bin/hdp-rm
CHANGED
|
@@ -14,9 +14,9 @@ if [ "$1" == "-r" ] ; then
|
|
|
14
14
|
shift
|
|
15
15
|
hadoop fs -test -e "$@"
|
|
16
16
|
if [ "$?" == "0" ] ; then
|
|
17
|
-
|
|
18
|
-
echo hadoop dfs -rmr "$@"
|
|
19
|
-
exec hadoop dfs -rmr "$@"
|
|
17
|
+
# echo "File exists, skipping trash, removing it..."
|
|
18
|
+
echo hadoop dfs -rmr -skipTrash "$@"
|
|
19
|
+
exec hadoop dfs -rmr -skipTrash "$@"
|
|
20
20
|
fi
|
|
21
21
|
else
|
|
22
22
|
hadoop fs -test -e "$@"
|
data/lib/wukong/and_pig.rb
CHANGED
|
@@ -2,10 +2,16 @@ module Enumerable
|
|
|
2
2
|
#
|
|
3
3
|
# Convert an array of values to a string representing it as a pig tuple
|
|
4
4
|
#
|
|
5
|
+
# def to_pig_tuple
|
|
6
|
+
# map{|*vals| '(' + vals.join(',') + ')' }
|
|
7
|
+
# end
|
|
8
|
+
|
|
9
|
+
#
|
|
10
|
+
# Convert an array to a pig tuple
|
|
11
|
+
#
|
|
5
12
|
def to_pig_tuple
|
|
6
|
-
|
|
13
|
+
'(' + self.join(',') + ')'
|
|
7
14
|
end
|
|
8
|
-
|
|
9
15
|
#
|
|
10
16
|
# Convert an array of values to a string pig format
|
|
11
17
|
# Delegates to to_pig_tuple -- see also to_pig_bag
|
|
@@ -17,7 +23,29 @@ module Enumerable
|
|
|
17
23
|
#
|
|
18
24
|
# Convert an array of values to a string representing it as a pig bag
|
|
19
25
|
#
|
|
26
|
+
# def to_pig_bag
|
|
27
|
+
# '{' + self.join(',') + '}'
|
|
28
|
+
# end
|
|
29
|
+
|
|
30
|
+
#
|
|
31
|
+
# Convert and array of values to a string representing it as a pig bag
|
|
32
|
+
#
|
|
20
33
|
def to_pig_bag
|
|
21
|
-
'{' + self.join(
|
|
34
|
+
'{' + self.map{|*vals| vals.to_pig_tuple}.join(",") + '}'
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
#
|
|
38
|
+
# Convert a string representing a pig bag into a nested array
|
|
39
|
+
#
|
|
40
|
+
def from_pig_bag
|
|
41
|
+
self.split("),(").map{|t| t.gsub(/[\{\}]/, '').from_pig_tuple} rescue []
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
#
|
|
45
|
+
# Convert a string representing a pig tuple into an array
|
|
46
|
+
#
|
|
47
|
+
def from_pig_tuple
|
|
48
|
+
self.gsub(/[\(\)]/, '').split(',')
|
|
22
49
|
end
|
|
50
|
+
|
|
23
51
|
end
|
|
@@ -1,21 +1,23 @@
|
|
|
1
1
|
require 'time'
|
|
2
|
+
require 'date'
|
|
2
3
|
DateTime.class_eval do
|
|
3
4
|
#
|
|
4
5
|
# Parses the time but never fails.
|
|
5
6
|
# Return value is always in the UTC time zone.
|
|
6
7
|
#
|
|
7
|
-
# A flattened
|
|
8
|
-
#
|
|
8
|
+
# A flattened datetime -- a 12-digit YYYYmmddHHMMMSS -- is fixed to the UTC
|
|
9
|
+
# time zone by parsing it as YYYYmmddHHMMMSSZ <- 'Z' at end
|
|
9
10
|
#
|
|
10
11
|
def self.parse_safely dt
|
|
12
|
+
return nil if dt.blank?
|
|
11
13
|
begin
|
|
12
14
|
if dt.to_s =~ /\A\d{12}Z?\z/
|
|
13
|
-
parse(dt+'Z', true)
|
|
15
|
+
parse(dt+'Z', true)
|
|
14
16
|
else
|
|
15
17
|
parse(dt, true).utc
|
|
16
18
|
end
|
|
17
|
-
rescue StandardError
|
|
18
|
-
|
|
19
|
+
rescue StandardError => e
|
|
20
|
+
Log.info e
|
|
19
21
|
end
|
|
20
22
|
end
|
|
21
23
|
|
|
@@ -53,7 +53,6 @@ class TokyoTyrant::Balancer::Base
|
|
|
53
53
|
def close
|
|
54
54
|
@servers.all?{ |server| server.close rescue nil}
|
|
55
55
|
end
|
|
56
|
-
|
|
57
56
|
end
|
|
58
57
|
|
|
59
58
|
module TokyoDbConnection
|
|
@@ -67,11 +66,27 @@ module TokyoDbConnection
|
|
|
67
66
|
].freeze unless defined?(TokyoDbConnection::TyrantDb::DB_SERVERS)
|
|
68
67
|
|
|
69
68
|
DB_PORTS = {
|
|
70
|
-
:
|
|
71
|
-
:
|
|
72
|
-
|
|
73
|
-
:
|
|
74
|
-
:
|
|
69
|
+
:screen_names => 12002,
|
|
70
|
+
:search_ids => 12003,
|
|
71
|
+
#
|
|
72
|
+
:tw_user_info => 14000,
|
|
73
|
+
:tw_wordbag => 14101,
|
|
74
|
+
:tw_influence => 14102,
|
|
75
|
+
:tw_trstrank => 14103,
|
|
76
|
+
:tw_conversation => 14104,
|
|
77
|
+
#
|
|
78
|
+
:screen_names2 => 12004,
|
|
79
|
+
:search_ids2 => 12005,
|
|
80
|
+
#
|
|
81
|
+
:tw_user_info2 => 14200,
|
|
82
|
+
:tw_wordbag2 => 14201,
|
|
83
|
+
:tw_influence2 => 14202,
|
|
84
|
+
:tw_trstrank2 => 14203,
|
|
85
|
+
:tw_conversation2 => 14204,
|
|
86
|
+
:tw_strong_links2 => 14205,
|
|
87
|
+
:tw_word_stats2 => 14206,
|
|
88
|
+
#
|
|
89
|
+
:ip_geo_census => 14400,
|
|
75
90
|
} unless defined?(TokyoDbConnection::TyrantDb::DB_PORTS)
|
|
76
91
|
|
|
77
92
|
def initialize dataset
|
|
@@ -82,8 +97,6 @@ module TokyoDbConnection
|
|
|
82
97
|
return @db if @db
|
|
83
98
|
port = DB_PORTS[dataset] or raise "Don't know how to reach dataset #{dataset}"
|
|
84
99
|
@db = TokyoTyrant::Balancer::DB.new(DB_SERVERS.map{|s| s+':'+port.to_s})
|
|
85
|
-
# @db = TokyoTyrant::DB.new(DB_SERVERS.first, port.to_i)
|
|
86
|
-
@db
|
|
87
100
|
end
|
|
88
101
|
|
|
89
102
|
def [](*args) ; db[*args] ; end
|
data/wukong.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{wukong}
|
|
8
|
-
s.version = "1.4.
|
|
8
|
+
s.version = "1.4.12"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Philip (flip) Kromer"]
|
|
12
|
-
s.date = %q{2010-
|
|
12
|
+
s.date = %q{2010-08-03}
|
|
13
13
|
s.description = %q{ Treat your dataset like a:
|
|
14
14
|
|
|
15
15
|
* stream of lines when it's efficient to process by lines
|
|
@@ -245,31 +245,31 @@ Gem::Specification.new do |s|
|
|
|
245
245
|
"spec/spec_helper.rb",
|
|
246
246
|
"spec/wukong/encoding_spec.rb",
|
|
247
247
|
"spec/wukong/script_spec.rb",
|
|
248
|
-
"examples/binning_percentile_estimator.rb",
|
|
249
|
-
"examples/contrib/jeans/normalize.rb",
|
|
250
|
-
"examples/contrib/jeans/sizes.rb",
|
|
251
|
-
"examples/corpus/words_to_bigrams.rb",
|
|
252
|
-
"examples/count_keys.rb",
|
|
253
|
-
"examples/count_keys_at_mapper.rb",
|
|
254
|
-
"examples/keystore/cassandra_batch_test.rb",
|
|
255
|
-
"examples/keystore/conditional_outputter_example.rb",
|
|
256
|
-
"examples/network_graph/adjacency_list.rb",
|
|
257
|
-
"examples/network_graph/breadth_first_search.rb",
|
|
258
|
-
"examples/network_graph/gen_2paths.rb",
|
|
259
|
-
"examples/network_graph/gen_multi_edge.rb",
|
|
260
|
-
"examples/network_graph/gen_symmetric_links.rb",
|
|
261
248
|
"examples/pagerank/pagerank.rb",
|
|
262
249
|
"examples/pagerank/pagerank_initialize.rb",
|
|
263
|
-
"examples/rank_and_bin.rb",
|
|
264
250
|
"examples/sample_records.rb",
|
|
265
251
|
"examples/server_logs/apache_log_parser.rb",
|
|
266
252
|
"examples/server_logs/breadcrumbs.rb",
|
|
267
253
|
"examples/server_logs/user_agent.rb",
|
|
254
|
+
"examples/corpus/words_to_bigrams.rb",
|
|
255
|
+
"examples/count_keys.rb",
|
|
256
|
+
"examples/rank_and_bin.rb",
|
|
257
|
+
"examples/binning_percentile_estimator.rb",
|
|
268
258
|
"examples/size.rb",
|
|
269
|
-
"examples/stats/avg_value_frequency.rb",
|
|
270
259
|
"examples/store/chunked_store_example.rb",
|
|
260
|
+
"examples/network_graph/breadth_first_search.rb",
|
|
261
|
+
"examples/network_graph/gen_symmetric_links.rb",
|
|
262
|
+
"examples/network_graph/gen_multi_edge.rb",
|
|
263
|
+
"examples/network_graph/adjacency_list.rb",
|
|
264
|
+
"examples/network_graph/gen_2paths.rb",
|
|
265
|
+
"examples/keystore/cassandra_batch_test.rb",
|
|
266
|
+
"examples/keystore/conditional_outputter_example.rb",
|
|
267
|
+
"examples/stats/avg_value_frequency.rb",
|
|
268
|
+
"examples/contrib/jeans/sizes.rb",
|
|
269
|
+
"examples/contrib/jeans/normalize.rb",
|
|
270
|
+
"examples/word_count.rb",
|
|
271
271
|
"examples/stupidly_simple_filter.rb",
|
|
272
|
-
"examples/
|
|
272
|
+
"examples/count_keys_at_mapper.rb"
|
|
273
273
|
]
|
|
274
274
|
|
|
275
275
|
if s.respond_to? :specification_version then
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: wukong
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
hash:
|
|
4
|
+
hash: 31
|
|
5
5
|
prerelease: false
|
|
6
6
|
segments:
|
|
7
7
|
- 1
|
|
8
8
|
- 4
|
|
9
|
-
-
|
|
10
|
-
version: 1.4.
|
|
9
|
+
- 12
|
|
10
|
+
version: 1.4.12
|
|
11
11
|
platform: ruby
|
|
12
12
|
authors:
|
|
13
13
|
- Philip (flip) Kromer
|
|
@@ -15,7 +15,7 @@ autorequire:
|
|
|
15
15
|
bindir: bin
|
|
16
16
|
cert_chain: []
|
|
17
17
|
|
|
18
|
-
date: 2010-
|
|
18
|
+
date: 2010-08-03 00:00:00 +00:00
|
|
19
19
|
default_executable:
|
|
20
20
|
dependencies:
|
|
21
21
|
- !ruby/object:Gem::Dependency
|
|
@@ -367,28 +367,28 @@ test_files:
|
|
|
367
367
|
- spec/spec_helper.rb
|
|
368
368
|
- spec/wukong/encoding_spec.rb
|
|
369
369
|
- spec/wukong/script_spec.rb
|
|
370
|
-
- examples/binning_percentile_estimator.rb
|
|
371
|
-
- examples/contrib/jeans/normalize.rb
|
|
372
|
-
- examples/contrib/jeans/sizes.rb
|
|
373
|
-
- examples/corpus/words_to_bigrams.rb
|
|
374
|
-
- examples/count_keys.rb
|
|
375
|
-
- examples/count_keys_at_mapper.rb
|
|
376
|
-
- examples/keystore/cassandra_batch_test.rb
|
|
377
|
-
- examples/keystore/conditional_outputter_example.rb
|
|
378
|
-
- examples/network_graph/adjacency_list.rb
|
|
379
|
-
- examples/network_graph/breadth_first_search.rb
|
|
380
|
-
- examples/network_graph/gen_2paths.rb
|
|
381
|
-
- examples/network_graph/gen_multi_edge.rb
|
|
382
|
-
- examples/network_graph/gen_symmetric_links.rb
|
|
383
370
|
- examples/pagerank/pagerank.rb
|
|
384
371
|
- examples/pagerank/pagerank_initialize.rb
|
|
385
|
-
- examples/rank_and_bin.rb
|
|
386
372
|
- examples/sample_records.rb
|
|
387
373
|
- examples/server_logs/apache_log_parser.rb
|
|
388
374
|
- examples/server_logs/breadcrumbs.rb
|
|
389
375
|
- examples/server_logs/user_agent.rb
|
|
376
|
+
- examples/corpus/words_to_bigrams.rb
|
|
377
|
+
- examples/count_keys.rb
|
|
378
|
+
- examples/rank_and_bin.rb
|
|
379
|
+
- examples/binning_percentile_estimator.rb
|
|
390
380
|
- examples/size.rb
|
|
391
|
-
- examples/stats/avg_value_frequency.rb
|
|
392
381
|
- examples/store/chunked_store_example.rb
|
|
393
|
-
- examples/
|
|
382
|
+
- examples/network_graph/breadth_first_search.rb
|
|
383
|
+
- examples/network_graph/gen_symmetric_links.rb
|
|
384
|
+
- examples/network_graph/gen_multi_edge.rb
|
|
385
|
+
- examples/network_graph/adjacency_list.rb
|
|
386
|
+
- examples/network_graph/gen_2paths.rb
|
|
387
|
+
- examples/keystore/cassandra_batch_test.rb
|
|
388
|
+
- examples/keystore/conditional_outputter_example.rb
|
|
389
|
+
- examples/stats/avg_value_frequency.rb
|
|
390
|
+
- examples/contrib/jeans/sizes.rb
|
|
391
|
+
- examples/contrib/jeans/normalize.rb
|
|
394
392
|
- examples/word_count.rb
|
|
393
|
+
- examples/stupidly_simple_filter.rb
|
|
394
|
+
- examples/count_keys_at_mapper.rb
|