wukong 1.4.11 → 1.4.12

Sign up to get free protection for your applications and to get access to all the features.
data/bin/hdp-rm CHANGED
@@ -14,9 +14,9 @@ if [ "$1" == "-r" ] ; then
14
14
  shift
15
15
  hadoop fs -test -e "$@"
16
16
  if [ "$?" == "0" ] ; then
17
- # echo "File exists, skipping trash, removing it..."
18
- echo hadoop dfs -rmr "$@"
19
- exec hadoop dfs -rmr "$@"
17
+ # echo "File exists, skipping trash, removing it..."
18
+ echo hadoop dfs -rmr -skipTrash "$@"
19
+ exec hadoop dfs -rmr -skipTrash "$@"
20
20
  fi
21
21
  else
22
22
  hadoop fs -test -e "$@"
@@ -2,10 +2,16 @@ module Enumerable
2
2
  #
3
3
  # Convert an array of values to a string representing it as a pig tuple
4
4
  #
5
+ # def to_pig_tuple
6
+ # map{|*vals| '(' + vals.join(',') + ')' }
7
+ # end
8
+
9
+ #
10
+ # Convert an array to a pig tuple
11
+ #
5
12
  def to_pig_tuple
6
- map{|*vals| '(' + vals.join(',') + ')' }
13
+ '(' + self.join(',') + ')'
7
14
  end
8
-
9
15
  #
10
16
  # Convert an array of values to a string pig format
11
17
  # Delegates to to_pig_tuple -- see also to_pig_bag
@@ -17,7 +23,29 @@ module Enumerable
17
23
  #
18
24
  # Convert an array of values to a string representing it as a pig bag
19
25
  #
26
+ # def to_pig_bag
27
+ # '{' + self.join(',') + '}'
28
+ # end
29
+
30
+ #
31
+ # Convert and array of values to a string representing it as a pig bag
32
+ #
20
33
  def to_pig_bag
21
- '{' + self.join(',') + '}'
34
+ '{' + self.map{|*vals| vals.to_pig_tuple}.join(",") + '}'
35
+ end
36
+
37
+ #
38
+ # Convert a string representing a pig bag into a nested array
39
+ #
40
+ def from_pig_bag
41
+ self.split("),(").map{|t| t.gsub(/[\{\}]/, '').from_pig_tuple} rescue []
42
+ end
43
+
44
+ #
45
+ # Convert a string representing a pig tuple into an array
46
+ #
47
+ def from_pig_tuple
48
+ self.gsub(/[\(\)]/, '').split(',')
22
49
  end
50
+
23
51
  end
@@ -1,21 +1,23 @@
1
1
  require 'time'
2
+ require 'date'
2
3
  DateTime.class_eval do
3
4
  #
4
5
  # Parses the time but never fails.
5
6
  # Return value is always in the UTC time zone.
6
7
  #
7
- # A flattened time -- a 12-digit YYYYmmddHHMMMSS -- is treated as a UTC
8
- # datetime.
8
+ # A flattened datetime -- a 12-digit YYYYmmddHHMMMSS -- is fixed to the UTC
9
+ # time zone by parsing it as YYYYmmddHHMMMSSZ <- 'Z' at end
9
10
  #
10
11
  def self.parse_safely dt
12
+ return nil if dt.blank?
11
13
  begin
12
14
  if dt.to_s =~ /\A\d{12}Z?\z/
13
- parse(dt+'Z', true).utc
15
+ parse(dt+'Z', true)
14
16
  else
15
17
  parse(dt, true).utc
16
18
  end
17
- rescue StandardError
18
- nil
19
+ rescue StandardError => e
20
+ Log.info e
19
21
  end
20
22
  end
21
23
 
@@ -53,7 +53,6 @@ class TokyoTyrant::Balancer::Base
53
53
  def close
54
54
  @servers.all?{ |server| server.close rescue nil}
55
55
  end
56
-
57
56
  end
58
57
 
59
58
  module TokyoDbConnection
@@ -67,11 +66,27 @@ module TokyoDbConnection
67
66
  ].freeze unless defined?(TokyoDbConnection::TyrantDb::DB_SERVERS)
68
67
 
69
68
  DB_PORTS = {
70
- :user_ids => 12001,
71
- :screen_names => 12002,
72
- :search_ids => 12003,
73
- :tweets_parsed => 12004,
74
- :users_parsed => 12005,
69
+ :screen_names => 12002,
70
+ :search_ids => 12003,
71
+ #
72
+ :tw_user_info => 14000,
73
+ :tw_wordbag => 14101,
74
+ :tw_influence => 14102,
75
+ :tw_trstrank => 14103,
76
+ :tw_conversation => 14104,
77
+ #
78
+ :screen_names2 => 12004,
79
+ :search_ids2 => 12005,
80
+ #
81
+ :tw_user_info2 => 14200,
82
+ :tw_wordbag2 => 14201,
83
+ :tw_influence2 => 14202,
84
+ :tw_trstrank2 => 14203,
85
+ :tw_conversation2 => 14204,
86
+ :tw_strong_links2 => 14205,
87
+ :tw_word_stats2 => 14206,
88
+ #
89
+ :ip_geo_census => 14400,
75
90
  } unless defined?(TokyoDbConnection::TyrantDb::DB_PORTS)
76
91
 
77
92
  def initialize dataset
@@ -82,8 +97,6 @@ module TokyoDbConnection
82
97
  return @db if @db
83
98
  port = DB_PORTS[dataset] or raise "Don't know how to reach dataset #{dataset}"
84
99
  @db = TokyoTyrant::Balancer::DB.new(DB_SERVERS.map{|s| s+':'+port.to_s})
85
- # @db = TokyoTyrant::DB.new(DB_SERVERS.first, port.to_i)
86
- @db
87
100
  end
88
101
 
89
102
  def [](*args) ; db[*args] ; end
data/wukong.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{wukong}
8
- s.version = "1.4.11"
8
+ s.version = "1.4.12"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Philip (flip) Kromer"]
12
- s.date = %q{2010-07-30}
12
+ s.date = %q{2010-08-03}
13
13
  s.description = %q{ Treat your dataset like a:
14
14
 
15
15
  * stream of lines when it's efficient to process by lines
@@ -245,31 +245,31 @@ Gem::Specification.new do |s|
245
245
  "spec/spec_helper.rb",
246
246
  "spec/wukong/encoding_spec.rb",
247
247
  "spec/wukong/script_spec.rb",
248
- "examples/binning_percentile_estimator.rb",
249
- "examples/contrib/jeans/normalize.rb",
250
- "examples/contrib/jeans/sizes.rb",
251
- "examples/corpus/words_to_bigrams.rb",
252
- "examples/count_keys.rb",
253
- "examples/count_keys_at_mapper.rb",
254
- "examples/keystore/cassandra_batch_test.rb",
255
- "examples/keystore/conditional_outputter_example.rb",
256
- "examples/network_graph/adjacency_list.rb",
257
- "examples/network_graph/breadth_first_search.rb",
258
- "examples/network_graph/gen_2paths.rb",
259
- "examples/network_graph/gen_multi_edge.rb",
260
- "examples/network_graph/gen_symmetric_links.rb",
261
248
  "examples/pagerank/pagerank.rb",
262
249
  "examples/pagerank/pagerank_initialize.rb",
263
- "examples/rank_and_bin.rb",
264
250
  "examples/sample_records.rb",
265
251
  "examples/server_logs/apache_log_parser.rb",
266
252
  "examples/server_logs/breadcrumbs.rb",
267
253
  "examples/server_logs/user_agent.rb",
254
+ "examples/corpus/words_to_bigrams.rb",
255
+ "examples/count_keys.rb",
256
+ "examples/rank_and_bin.rb",
257
+ "examples/binning_percentile_estimator.rb",
268
258
  "examples/size.rb",
269
- "examples/stats/avg_value_frequency.rb",
270
259
  "examples/store/chunked_store_example.rb",
260
+ "examples/network_graph/breadth_first_search.rb",
261
+ "examples/network_graph/gen_symmetric_links.rb",
262
+ "examples/network_graph/gen_multi_edge.rb",
263
+ "examples/network_graph/adjacency_list.rb",
264
+ "examples/network_graph/gen_2paths.rb",
265
+ "examples/keystore/cassandra_batch_test.rb",
266
+ "examples/keystore/conditional_outputter_example.rb",
267
+ "examples/stats/avg_value_frequency.rb",
268
+ "examples/contrib/jeans/sizes.rb",
269
+ "examples/contrib/jeans/normalize.rb",
270
+ "examples/word_count.rb",
271
271
  "examples/stupidly_simple_filter.rb",
272
- "examples/word_count.rb"
272
+ "examples/count_keys_at_mapper.rb"
273
273
  ]
274
274
 
275
275
  if s.respond_to? :specification_version then
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 31
5
5
  prerelease: false
6
6
  segments:
7
7
  - 1
8
8
  - 4
9
- - 11
10
- version: 1.4.11
9
+ - 12
10
+ version: 1.4.12
11
11
  platform: ruby
12
12
  authors:
13
13
  - Philip (flip) Kromer
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-07-30 00:00:00 -05:00
18
+ date: 2010-08-03 00:00:00 +00:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -367,28 +367,28 @@ test_files:
367
367
  - spec/spec_helper.rb
368
368
  - spec/wukong/encoding_spec.rb
369
369
  - spec/wukong/script_spec.rb
370
- - examples/binning_percentile_estimator.rb
371
- - examples/contrib/jeans/normalize.rb
372
- - examples/contrib/jeans/sizes.rb
373
- - examples/corpus/words_to_bigrams.rb
374
- - examples/count_keys.rb
375
- - examples/count_keys_at_mapper.rb
376
- - examples/keystore/cassandra_batch_test.rb
377
- - examples/keystore/conditional_outputter_example.rb
378
- - examples/network_graph/adjacency_list.rb
379
- - examples/network_graph/breadth_first_search.rb
380
- - examples/network_graph/gen_2paths.rb
381
- - examples/network_graph/gen_multi_edge.rb
382
- - examples/network_graph/gen_symmetric_links.rb
383
370
  - examples/pagerank/pagerank.rb
384
371
  - examples/pagerank/pagerank_initialize.rb
385
- - examples/rank_and_bin.rb
386
372
  - examples/sample_records.rb
387
373
  - examples/server_logs/apache_log_parser.rb
388
374
  - examples/server_logs/breadcrumbs.rb
389
375
  - examples/server_logs/user_agent.rb
376
+ - examples/corpus/words_to_bigrams.rb
377
+ - examples/count_keys.rb
378
+ - examples/rank_and_bin.rb
379
+ - examples/binning_percentile_estimator.rb
390
380
  - examples/size.rb
391
- - examples/stats/avg_value_frequency.rb
392
381
  - examples/store/chunked_store_example.rb
393
- - examples/stupidly_simple_filter.rb
382
+ - examples/network_graph/breadth_first_search.rb
383
+ - examples/network_graph/gen_symmetric_links.rb
384
+ - examples/network_graph/gen_multi_edge.rb
385
+ - examples/network_graph/adjacency_list.rb
386
+ - examples/network_graph/gen_2paths.rb
387
+ - examples/keystore/cassandra_batch_test.rb
388
+ - examples/keystore/conditional_outputter_example.rb
389
+ - examples/stats/avg_value_frequency.rb
390
+ - examples/contrib/jeans/sizes.rb
391
+ - examples/contrib/jeans/normalize.rb
394
392
  - examples/word_count.rb
393
+ - examples/stupidly_simple_filter.rb
394
+ - examples/count_keys_at_mapper.rb