wukong 1.4.10 → 1.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.textile +11 -0
- data/bin/hdp-mkdirp +12 -0
- data/bin/hdp-rm +26 -5
- data/bin/hdp-sort +1 -1
- data/bin/hdp-stream +1 -1
- data/bin/hdp-stream-flat +1 -1
- data/examples/store/chunked_store_example.rb +18 -0
- data/lib/wukong/and_pig.rb +23 -0
- data/lib/wukong/monitor.rb +7 -0
- data/lib/wukong/monitor/chunked_store.rb +23 -0
- data/lib/wukong/monitor/periodic_logger.rb +34 -0
- data/lib/wukong/monitor/periodic_monitor.rb +72 -0
- data/lib/wukong/script/hadoop_command.rb +4 -0
- data/lib/wukong/store.rb +14 -0
- data/lib/wukong/store/base.rb +29 -0
- data/lib/wukong/store/chunked_flat_file_store.rb +37 -0
- data/lib/wukong/store/conditional_store.rb +57 -0
- data/lib/wukong/store/factory.rb +8 -0
- data/lib/wukong/store/flat_file_store.rb +90 -0
- data/lib/wukong/store/key_store.rb +51 -0
- data/lib/wukong/store/null_store.rb +15 -0
- data/lib/wukong/store/read_thru_store.rb +22 -0
- data/lib/wukong/store/tokyo_tdb_key_store.rb +33 -0
- data/lib/wukong/store/tyrant_rdb_key_store.rb +57 -0
- data/lib/wukong/store/tyrant_tdb_key_store.rb +20 -0
- data/wukong.gemspec +37 -17
- metadata +39 -19
data/CHANGELOG.textile
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
1
|
+
h2. Wukong v.14.11 2010-07-30
|
|
2
|
+
|
|
3
|
+
* added the @max_(maps|reduces)_per_(node|cluster)@ jobconfs.
|
|
4
|
+
* added jobconfs for io_job_mb and friends.
|
|
5
|
+
* added a loadable module to convert output data to pig bags and tuples
|
|
6
|
+
* pulled in several methods from active_support, incl. Enumerable#sum
|
|
7
|
+
* Scripts to find percentile rank of elements in a dataset
|
|
8
|
+
* We are starting to move wukong to a model where streaming is from a generic
|
|
9
|
+
source into a generic sink. Several stores have been landed in the code, but
|
|
10
|
+
many are in a half- or un-baked state. Please ignore this for the moment.
|
|
11
|
+
|
|
1
12
|
h2. Wukong v1.4.8 2010-06-05
|
|
2
13
|
|
|
3
14
|
* made scripts inject a helpful job name using mapred.job.name
|
data/bin/hdp-mkdirp
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Despite arguments and deliberation, this IS a necessary script. Azkaban, should you choose to
|
|
5
|
+
# use it, will fail if (it seems) ANY of its spawned subprocesses fails
|
|
6
|
+
#
|
|
7
|
+
|
|
8
|
+
hadoop fs -test -e "$@"
|
|
9
|
+
if [ "$?" != "0" ] ; then
|
|
10
|
+
# echo "File does not exist, making..."
|
|
11
|
+
exec hadoop fs -mkdir "$@"
|
|
12
|
+
fi
|
data/bin/hdp-rm
CHANGED
|
@@ -1,11 +1,32 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
|
|
3
|
+
#
|
|
4
|
+
# Documentation for hadoop fs -rmr says "acts just like Unix rm -rf command". If this
|
|
5
|
+
# is true then we need to ignore directories that don't exist and still return 0.
|
|
6
|
+
#
|
|
7
|
+
|
|
8
|
+
#
|
|
9
|
+
# All the dirty conditional logic here does is test whether a directory exists. If so, remove it
|
|
10
|
+
#
|
|
3
11
|
if [ "$1" == "-r" ] ; then
|
|
12
|
+
shift
|
|
13
|
+
if [ "$1" == "-skipTrash" ] ; then
|
|
4
14
|
shift
|
|
5
|
-
|
|
15
|
+
hadoop fs -test -e "$@"
|
|
16
|
+
if [ "$?" == "0" ] ; then
|
|
17
|
+
# echo "File exists, skipping trash, removing it..."
|
|
18
|
+
echo hadoop dfs -rmr "$@"
|
|
19
|
+
exec hadoop dfs -rmr "$@"
|
|
20
|
+
fi
|
|
21
|
+
else
|
|
22
|
+
hadoop fs -test -e "$@"
|
|
23
|
+
if [ "$?" == "0" ] ; then
|
|
24
|
+
# echo "File exists, removing it..."
|
|
25
|
+
echo hadoop dfs -rmr "$@"
|
|
26
|
+
exec hadoop dfs -rmr "$@"
|
|
27
|
+
fi
|
|
28
|
+
fi
|
|
6
29
|
else
|
|
7
|
-
|
|
30
|
+
echo hadoop dfs -rm "$@"
|
|
31
|
+
exec hadoop dfs -rm "$@"
|
|
8
32
|
fi
|
|
9
|
-
echo hadoop dfs -$action "$@"
|
|
10
|
-
# read -p "Hit ctrl-C to abort or enter to do this...."
|
|
11
|
-
exec hadoop dfs -$action "$@"
|
data/bin/hdp-sort
CHANGED
|
@@ -13,7 +13,7 @@ if [ "$output_file" == "" ] ; then echo "$0 input_file output_file [mapper=/bin/
|
|
|
13
13
|
HADOOP_HOME=${HADOOP_HOME-/usr/lib/hadoop}
|
|
14
14
|
|
|
15
15
|
cmd="${HADOOP_HOME}/bin/hadoop \
|
|
16
|
-
jar ${HADOOP_HOME}/contrib/streaming/hadoop
|
|
16
|
+
jar ${HADOOP_HOME}/contrib/streaming/hadoop-*streaming*.jar
|
|
17
17
|
-partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner
|
|
18
18
|
-jobconf num.key.fields.for.partition=\"$partfields\"
|
|
19
19
|
-jobconf stream.num.map.output.key.fields=\"$sortfields\"
|
data/bin/hdp-stream
CHANGED
|
@@ -13,7 +13,7 @@ if [ "$output_file" == "" ] ; then echo "$0 input_file output_file [mapper=/bin/
|
|
|
13
13
|
HADOOP_HOME=${HADOOP_HOME-/usr/lib/hadoop}
|
|
14
14
|
|
|
15
15
|
cmd="${HADOOP_HOME}/bin/hadoop \
|
|
16
|
-
jar ${HADOOP_HOME}/contrib/streaming/hadoop
|
|
16
|
+
jar ${HADOOP_HOME}/contrib/streaming/hadoop-*streaming*.jar
|
|
17
17
|
-partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner
|
|
18
18
|
-jobconf num.key.fields.for.partition=\"$partfields\"
|
|
19
19
|
-jobconf stream.num.map.output.key.fields=\"$sortfields\"
|
data/bin/hdp-stream-flat
CHANGED
|
@@ -14,7 +14,7 @@ HADOOP_HOME=${HADOOP_HOME-/usr/lib/hadoop}
|
|
|
14
14
|
# -jobconf mapred.reduce.tasks=3 \
|
|
15
15
|
|
|
16
16
|
exec ${HADOOP_HOME}/bin/hadoop \
|
|
17
|
-
jar ${HADOOP_HOME}/contrib/streaming/hadoop
|
|
17
|
+
jar ${HADOOP_HOME}/contrib/streaming/hadoop-*streaming*.jar \
|
|
18
18
|
"$@" \
|
|
19
19
|
-jobconf "mapred.job.name=`basename $0`-$map_script-$input_file-$output_file" \
|
|
20
20
|
-mapper "$map_script" \
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'rubygems'
|
|
3
|
+
require 'wukong'
|
|
4
|
+
# require 'wukong/store'
|
|
5
|
+
|
|
6
|
+
require 'configliere'
|
|
7
|
+
Configliere.use :commandline, :define, :config_file
|
|
8
|
+
Settings.read('foo.yaml')
|
|
9
|
+
|
|
10
|
+
# store = ChunkedFlatFileStore.new(Settings)
|
|
11
|
+
|
|
12
|
+
100.times do |iter|
|
|
13
|
+
# store.save [iter, Time.now.to_flat].join("\t")
|
|
14
|
+
$stdout.puts [iter, Time.now.to_flat].join("\t")
|
|
15
|
+
sleep 2
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
module Enumerable
|
|
2
|
+
#
|
|
3
|
+
# Convert an array of values to a string representing it as a pig tuple
|
|
4
|
+
#
|
|
5
|
+
def to_pig_tuple
|
|
6
|
+
map{|*vals| '(' + vals.join(',') + ')' }
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
#
|
|
10
|
+
# Convert an array of values to a string pig format
|
|
11
|
+
# Delegates to to_pig_tuple -- see also to_pig_bag
|
|
12
|
+
#
|
|
13
|
+
def to_pig *args
|
|
14
|
+
to_pig_tuple *args
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
#
|
|
18
|
+
# Convert an array of values to a string representing it as a pig bag
|
|
19
|
+
#
|
|
20
|
+
def to_pig_bag
|
|
21
|
+
'{' + self.join(',') + '}'
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require 'monkeyshines/monitor/periodic_monitor'
|
|
2
|
+
module Monkeyshines
|
|
3
|
+
module Monitor
|
|
4
|
+
module ChunkedStore
|
|
5
|
+
attr_accessor :file_pattern
|
|
6
|
+
def initialize file_pattern
|
|
7
|
+
self.file_pattern = file_pattern
|
|
8
|
+
super file_pattern.make
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def close_and_reopen
|
|
12
|
+
close
|
|
13
|
+
self.filename = file_pattern.make
|
|
14
|
+
dump_file
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def save *args
|
|
18
|
+
chunk_monitor.periodically{ close_rename_and_open }
|
|
19
|
+
super *args
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
module Monkeyshines
|
|
2
|
+
module Monitor
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
# Emits a log line but only every +iter_interval+ calls or +time_interval+
|
|
6
|
+
# lapse.
|
|
7
|
+
#
|
|
8
|
+
# Since the contents of the block aren't called until the criteria are met,
|
|
9
|
+
# you can put relatively expensive operations in the log without killing
|
|
10
|
+
# your iteration time.
|
|
11
|
+
#
|
|
12
|
+
class PeriodicLogger < PeriodicMonitor
|
|
13
|
+
#
|
|
14
|
+
# Call with a block that returns a string or array to log.
|
|
15
|
+
# If you return
|
|
16
|
+
#
|
|
17
|
+
# Ex: log if it has been at least 5 minutes since last announcement:
|
|
18
|
+
#
|
|
19
|
+
# periodic_logger = Monkeyshines::Monitor::PeriodicLogger.new(:time => 300)
|
|
20
|
+
# loop do
|
|
21
|
+
# # ... stuff ...
|
|
22
|
+
# periodic_logger.periodically{ [morbenfactor, crunkosity, exuberance] }
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
def periodically &block
|
|
26
|
+
super do
|
|
27
|
+
now = Time.now.utc.to_f
|
|
28
|
+
result = [ "%10d"%iter, "%7.1f"%since, "%7.1f"%inst_rate(now), (block ? block.call : nil) ].flatten.compact
|
|
29
|
+
Log.info result.join("\t")
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
module Monkeyshines
|
|
2
|
+
module Monitor
|
|
3
|
+
#
|
|
4
|
+
# Accepts a lightweight call every iteration.
|
|
5
|
+
#
|
|
6
|
+
# Once either a time or an iteration criterion is met, executes the block
|
|
7
|
+
# and resets the timer until next execution.
|
|
8
|
+
#
|
|
9
|
+
# Note that the +time_interval+ is measured *excution to execution* and not
|
|
10
|
+
# in multiples of iter_interval. Say I set a time_interval of 300s, and
|
|
11
|
+
# happen to iterate at 297s and 310s after start. Then the monitor will
|
|
12
|
+
# execute at 310s, and the next execution will happen on or after 610s.
|
|
13
|
+
#
|
|
14
|
+
# Also note that when *either* criterion is met, *both* criteria are
|
|
15
|
+
# reset. Say I set a time interval of 300s and an +iter_interval+ of 10_000;
|
|
16
|
+
# and that at 250s I reach iteration 10_000. Then the monitor will execute
|
|
17
|
+
# on or after 20_000 iteration or 550s, whichever happens first.
|
|
18
|
+
#
|
|
19
|
+
class PeriodicMonitor
|
|
20
|
+
attr_accessor :time_interval, :iter_interval
|
|
21
|
+
attr_accessor :last_time, :current_iter, :iter, :started_at
|
|
22
|
+
|
|
23
|
+
def initialize options={}
|
|
24
|
+
self.started_at = Time.now.utc.to_f
|
|
25
|
+
self.last_time = started_at
|
|
26
|
+
self.iter = 0
|
|
27
|
+
self.current_iter = 0
|
|
28
|
+
self.time_interval = options[:time]
|
|
29
|
+
self.iter_interval = options[:iters]
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# True if more than +iter_interval+ has elapsed since last execution.
|
|
33
|
+
def enough_iterations?
|
|
34
|
+
iter % iter_interval == 0 if iter_interval
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# True if more than +time_interval+ has elapsed since last execution.
|
|
38
|
+
def enough_time? now
|
|
39
|
+
(now - last_time) > time_interval if time_interval
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Time since monitor was created
|
|
43
|
+
def since
|
|
44
|
+
Time.now.utc.to_f - started_at
|
|
45
|
+
end
|
|
46
|
+
# Overall iterations per second
|
|
47
|
+
def rate
|
|
48
|
+
iter.to_f / since.to_f
|
|
49
|
+
end
|
|
50
|
+
# "Instantaneous" iterations per second
|
|
51
|
+
def inst_rate now
|
|
52
|
+
current_iter.to_f / (now-last_time).to_f
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
#
|
|
56
|
+
# if the interval conditions are met, executes block; otherwise just does
|
|
57
|
+
# bookkeeping and returns.
|
|
58
|
+
#
|
|
59
|
+
def periodically &block
|
|
60
|
+
self.iter += 1
|
|
61
|
+
self.current_iter += 1
|
|
62
|
+
now = Time.now.utc.to_f
|
|
63
|
+
if enough_iterations? || enough_time?(now)
|
|
64
|
+
block.call(iter, (now-last_time))
|
|
65
|
+
self.last_time = now
|
|
66
|
+
self.current_iter = 0
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -32,6 +32,10 @@ module Wukong
|
|
|
32
32
|
Settings.define :noempty, :description => "don't create zero-byte reduce files (hadoop mode only)", :wukong => true
|
|
33
33
|
Settings.define :job_name, :jobconf => true, :description => 'mapred.job.name', :wukong => true
|
|
34
34
|
# mapred.linerecordreader.maxlength :description => "Safeguards against corrupted data: lines longer than this (in bytes) are treated as bad records."
|
|
35
|
+
Settings.define :max_reduces_per_node, :jobconf => true, :description => 'mapred.max.reduces.per.node', :wukong => true
|
|
36
|
+
Settings.define :max_reduces_per_cluster,:jobconf => true, :description => 'mapred.max.reduces.per.cluster', :wukong => true
|
|
37
|
+
Settings.define :max_maps_per_node, :jobconf => true, :description => 'mapred.max.maps.per.node', :wukong => true
|
|
38
|
+
Settings.define :max_maps_per_cluster, :jobconf => true, :description => 'mapred.max.maps.per.cluster', :wukong => true
|
|
35
39
|
|
|
36
40
|
# emit a -jobconf hadoop option if the simplified command line arg is present
|
|
37
41
|
# if not, the resulting nil will be elided later
|
data/lib/wukong/store.rb
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
module Monkeyshines
|
|
2
|
+
module Store
|
|
3
|
+
extend FactoryModule
|
|
4
|
+
autoload :Base, 'monkeyshines/store/base'
|
|
5
|
+
autoload :FlatFileStore, 'monkeyshines/store/flat_file_store'
|
|
6
|
+
autoload :ConditionalStore, 'monkeyshines/store/conditional_store'
|
|
7
|
+
autoload :ChunkedFlatFileStore, 'monkeyshines/store/chunked_flat_file_store'
|
|
8
|
+
autoload :KeyStore, 'monkeyshines/store/key_store'
|
|
9
|
+
autoload :TokyoTdbKeyStore, 'monkeyshines/store/tokyo_tdb_key_store'
|
|
10
|
+
autoload :TyrantTdbKeyStore, 'monkeyshines/store/tyrant_tdb_key_store'
|
|
11
|
+
autoload :TyrantRdbKeyStore, 'monkeyshines/store/tyrant_rdb_key_store'
|
|
12
|
+
autoload :ReadThruStore, 'monkeyshines/store/read_thru_store'
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
module Monkeyshines
|
|
2
|
+
module Store
|
|
3
|
+
class Base
|
|
4
|
+
attr_accessor :options
|
|
5
|
+
def initialize _options={}
|
|
6
|
+
self.options = _options
|
|
7
|
+
Log.info "Creating #{self.class}"
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
#
|
|
11
|
+
def each_as klass, &block
|
|
12
|
+
self.each do |*args|
|
|
13
|
+
begin
|
|
14
|
+
item = klass.new *args[1..-1]
|
|
15
|
+
rescue Exception => e
|
|
16
|
+
Log.info [args, e.to_s, self].join("\t")
|
|
17
|
+
raise e
|
|
18
|
+
end
|
|
19
|
+
yield item
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def log_line
|
|
24
|
+
nil
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
module Monkeyshines
|
|
2
|
+
module Store
|
|
3
|
+
class ChunkedFlatFileStore < Monkeyshines::Store::FlatFileStore
|
|
4
|
+
attr_accessor :filename_pattern, :chunk_monitor, :handle
|
|
5
|
+
|
|
6
|
+
DEFAULT_OPTIONS = {
|
|
7
|
+
:chunktime => 4*60*60, # default 4 hours
|
|
8
|
+
:pattern => ":rootdir/:date/:handle+:timestamp-:pid.tsv",
|
|
9
|
+
:rootdir => nil,
|
|
10
|
+
:filemode => 'w',
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
def initialize _options
|
|
14
|
+
self.options = DEFAULT_OPTIONS.deep_merge(_options)
|
|
15
|
+
raise "You don't really want a chunk time this small: #{options[:chunktime]}" unless options[:chunktime] > 600
|
|
16
|
+
self.chunk_monitor = Monkeyshines::Monitor::PeriodicMonitor.new( :time => options[:chunktime] )
|
|
17
|
+
self.handle = options[:handle] || Monkeyshines::CONFIG[:handle]
|
|
18
|
+
self.filename_pattern = Monkeyshines::Utils::FilenamePattern.new(options[:pattern], :handle => handle, :rootdir => options[:rootdir])
|
|
19
|
+
super options.merge(:filename => filename_pattern.make())
|
|
20
|
+
self.mkdir!
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def save *args
|
|
24
|
+
result = super *args
|
|
25
|
+
chunk_monitor.periodically do
|
|
26
|
+
new_filename = filename_pattern.make()
|
|
27
|
+
Log.info "Rotating chunked file #{filename} into #{new_filename}"
|
|
28
|
+
self.close
|
|
29
|
+
@filename = new_filename
|
|
30
|
+
self.mkdir!
|
|
31
|
+
end
|
|
32
|
+
result
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
module Monkeyshines
|
|
2
|
+
module Store
|
|
3
|
+
class ConditionalStore < Monkeyshines::Store::Base
|
|
4
|
+
attr_accessor :options, :cache, :store, :misses
|
|
5
|
+
|
|
6
|
+
DEFAULT_OPTIONS = {
|
|
7
|
+
:cache => { :type => :tyrant_rdb_key_store },
|
|
8
|
+
:store => { :type => :chunked_flat_file_store },
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
#
|
|
12
|
+
#
|
|
13
|
+
# +cache+ must behave like a hash (Hash and
|
|
14
|
+
# Monkeyshines::Store::TyrantRdbKeyStore are both cromulent
|
|
15
|
+
# choices).
|
|
16
|
+
#
|
|
17
|
+
#
|
|
18
|
+
#
|
|
19
|
+
def initialize _options
|
|
20
|
+
self.options = DEFAULT_OPTIONS.deep_merge(_options)
|
|
21
|
+
self.cache = Monkeyshines::Store.create(options[:cache])
|
|
22
|
+
self.store = Monkeyshines::Store.create(options[:store])
|
|
23
|
+
self.misses = 0
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
#
|
|
27
|
+
# If key is absent, save the result of calling the block.
|
|
28
|
+
# If key is present, block is never called.
|
|
29
|
+
#
|
|
30
|
+
# Ex:
|
|
31
|
+
# rt_store.set(url) do
|
|
32
|
+
# fetcher.get url # will only be called if url isn't in rt_store
|
|
33
|
+
# end
|
|
34
|
+
#
|
|
35
|
+
def set key, force=nil, &block
|
|
36
|
+
return if (!force) && cache.include?(key)
|
|
37
|
+
cache_val, store_val = block.call()
|
|
38
|
+
return unless cache_val
|
|
39
|
+
cache.set_nr key, cache_val # update cache
|
|
40
|
+
store << store_val # save value
|
|
41
|
+
self.misses += 1 # track the cache miss
|
|
42
|
+
store_val
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def size() cache.size end
|
|
46
|
+
|
|
47
|
+
def log_line
|
|
48
|
+
[size, "%8d misses"%misses]
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def close()
|
|
52
|
+
cache.close
|
|
53
|
+
store.close
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
require 'fileutils'; include FileUtils
|
|
2
|
+
|
|
3
|
+
module Monkeyshines
|
|
4
|
+
module Store
|
|
5
|
+
#
|
|
6
|
+
class FlatFileStore < Store::Base
|
|
7
|
+
attr_accessor :filename, :filemode
|
|
8
|
+
|
|
9
|
+
#
|
|
10
|
+
# +filename_root+ : first part of name for files
|
|
11
|
+
#
|
|
12
|
+
def initialize options={}
|
|
13
|
+
Log.debug "New #{self.class} as #{options.inspect}"
|
|
14
|
+
self.filename = options[:filename] or raise "Missing filename in #{self.class}"
|
|
15
|
+
self.filemode = options[:filemode] || 'r'
|
|
16
|
+
skip!(options[:skip]) if options[:skip]
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
#
|
|
20
|
+
#
|
|
21
|
+
#
|
|
22
|
+
def each &block
|
|
23
|
+
file.each do |line|
|
|
24
|
+
next if line[0..0] == '#'
|
|
25
|
+
attrs = line.chomp.split("\t")
|
|
26
|
+
next if attrs.blank?
|
|
27
|
+
yield *attrs
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
#
|
|
32
|
+
# Read ahead n_lines lines in the file
|
|
33
|
+
#
|
|
34
|
+
def skip! n_lines
|
|
35
|
+
Log.info "Skipping #{n_lines} in #{self.class}:#{filename}"
|
|
36
|
+
n_lines.times do
|
|
37
|
+
file.readline
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
#
|
|
42
|
+
# Open the timestamped file,
|
|
43
|
+
# ensuring its directory exists
|
|
44
|
+
#
|
|
45
|
+
def file
|
|
46
|
+
return @file if @file
|
|
47
|
+
Log.info "Opening file #{filename} with mode #{filemode}"
|
|
48
|
+
@file = File.open(filename, filemode)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Close the dump file
|
|
52
|
+
def close
|
|
53
|
+
@file.close if @file
|
|
54
|
+
@file = nil
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Ensure the file's directory exists
|
|
58
|
+
def mkdir!
|
|
59
|
+
dir = File.dirname(filename)
|
|
60
|
+
return if File.directory?(dir)
|
|
61
|
+
Log.info "Making directory #{dir}"
|
|
62
|
+
FileUtils.mkdir_p dir
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# write to the file
|
|
66
|
+
def save obj
|
|
67
|
+
file << obj.to_flat.join("\t")+"\n"
|
|
68
|
+
obj
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# returns the size of the current file
|
|
72
|
+
def size
|
|
73
|
+
return 0 if !@file
|
|
74
|
+
File.size(filename)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def set key, *args, &block
|
|
78
|
+
tok, obj = block.call
|
|
79
|
+
save obj
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# delegates to +#save+ -- writes the object to the file
|
|
83
|
+
def <<(obj)
|
|
84
|
+
save obj
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
module Monkeyshines
|
|
2
|
+
module Store
|
|
3
|
+
class KeyStore < Monkeyshines::Store::Base
|
|
4
|
+
# The actual backing store; should respond to #set and #get methods
|
|
5
|
+
attr_accessor :db
|
|
6
|
+
|
|
7
|
+
#
|
|
8
|
+
# Executes block once for each element in the whole DB, in whatever order
|
|
9
|
+
# the DB thinks you should see it.
|
|
10
|
+
#
|
|
11
|
+
# Your block will see |key, val|
|
|
12
|
+
#
|
|
13
|
+
# key_store.each do |key, val|
|
|
14
|
+
# # ... stuff ...
|
|
15
|
+
# end
|
|
16
|
+
#
|
|
17
|
+
def each &block
|
|
18
|
+
db.iterinit
|
|
19
|
+
loop do
|
|
20
|
+
key = db.iternext or break
|
|
21
|
+
val = db[key]
|
|
22
|
+
yield key, val
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Save the value into the database
|
|
28
|
+
def set(key, val)
|
|
29
|
+
return unless val
|
|
30
|
+
db[key] = val
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
alias_method :save, :set
|
|
34
|
+
def get(key) db[key] end
|
|
35
|
+
def [](key) db[key] end
|
|
36
|
+
def close() db.close end
|
|
37
|
+
def size() db.size end
|
|
38
|
+
|
|
39
|
+
#
|
|
40
|
+
# Load from standard command-line options
|
|
41
|
+
#
|
|
42
|
+
# obvs only works when there's just one store
|
|
43
|
+
#
|
|
44
|
+
def self.new_from_command_line cmdline_opts, default_opts={}
|
|
45
|
+
options = default_opts.merge(cmdline_opts)
|
|
46
|
+
store = self.new(options[:store_db])
|
|
47
|
+
store
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
module Monkeyshines
|
|
2
|
+
module Store
|
|
3
|
+
class ReadThruStore < Monkeyshines::Store::TyrantTdbKeyStore
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
# If key is absent, save the result of calling the block.
|
|
7
|
+
# If key is present, block is never called.
|
|
8
|
+
#
|
|
9
|
+
# Ex:
|
|
10
|
+
# rt_store.set(url) do
|
|
11
|
+
# fetcher.get url # will only be called if url isn't in rt_store
|
|
12
|
+
# end
|
|
13
|
+
#
|
|
14
|
+
def set key, force=nil, &block
|
|
15
|
+
return if !force && db.has_key?(key)
|
|
16
|
+
result = block.call() or return
|
|
17
|
+
super(key, result)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
require 'tokyocabinet'
|
|
2
|
+
module Monkeyshines
|
|
3
|
+
module Store
|
|
4
|
+
#
|
|
5
|
+
# Implementation of KeyStore with a Local TokyoCabinet table database (TDB)
|
|
6
|
+
#
|
|
7
|
+
class TokyoTdbKeyStore < Monkeyshines::Store::KeyStore
|
|
8
|
+
|
|
9
|
+
# pass in the filename or URI of a tokyo cabinet table-style DB
|
|
10
|
+
# set create_db = true if you want to create a missing DB file
|
|
11
|
+
def initialize db_uri, *args
|
|
12
|
+
self.db = TokyoCabinet::TDB.new
|
|
13
|
+
db.open(db_uri, TokyoCabinet::TDB::OWRITER) or raise "#{self.class.to_s}: Can't open TokyoCabinet TDB #{db_uri}"
|
|
14
|
+
super *args
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def each_as klass, &block
|
|
19
|
+
self.each do |key, hsh|
|
|
20
|
+
yield klass.from_hash hsh
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
# Delegate to store
|
|
24
|
+
def set(key, val)
|
|
25
|
+
return unless val
|
|
26
|
+
db.put key, val.to_hash.compact
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def size() db.rnum end
|
|
30
|
+
|
|
31
|
+
end #class
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
require 'tokyotyrant'
|
|
2
|
+
module Monkeyshines
|
|
3
|
+
module Store
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
# Implementation of KeyStore with a Local TokyoCabinet hash database (RDB)
|
|
7
|
+
#
|
|
8
|
+
class TyrantRdbKeyStore < Monkeyshines::Store::KeyStore
|
|
9
|
+
attr_accessor :db_host, :db_port
|
|
10
|
+
|
|
11
|
+
# pass in the host:port uri of the key store.
|
|
12
|
+
def initialize options
|
|
13
|
+
raise "URI for #{self.class} is required" if options[:uri].blank?
|
|
14
|
+
self.db_host, self.db_port = options[:uri].to_s.split(':')
|
|
15
|
+
self.db_host.gsub!(/^(localhost|127\.0\.0\.1)$/,'')
|
|
16
|
+
super options
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def db
|
|
20
|
+
return @db if @db
|
|
21
|
+
@db ||= TokyoTyrant::RDB.new
|
|
22
|
+
@db.open(db_host, db_port) or raise("Can't open DB at host #{db_host} port #{db_port}. Pass in host:port' #{@db.ecode}: #{@db.errmsg(@db.ecode)}")
|
|
23
|
+
@db
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def close
|
|
27
|
+
@db.close if @db
|
|
28
|
+
@db = nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Save the value into the database without waiting for a response.
|
|
32
|
+
def set_nr(key, val)
|
|
33
|
+
db.putnr key, val if val
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def size() db.rnum end
|
|
37
|
+
def include? *args
|
|
38
|
+
db.has_key? *args
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# require 'memcache'
|
|
42
|
+
# def initialize db_uri=nil, *args
|
|
43
|
+
# # db_uri ||= ':1978'
|
|
44
|
+
# # self.db_host, self.db_port = db_uri.split(':')
|
|
45
|
+
# self.db = MemCache.new(db_uri, :no_reply => true)
|
|
46
|
+
# if !self.db then raise("Can't open DB #{db_uri}. Pass in host:port, default is ':1978' #{db.ecode}: #{db.errmsg(db.ecode)}") end
|
|
47
|
+
# super *args
|
|
48
|
+
# end
|
|
49
|
+
#
|
|
50
|
+
# def size
|
|
51
|
+
# db.stats
|
|
52
|
+
# end
|
|
53
|
+
|
|
54
|
+
end #class
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
require 'tokyotyrant'
|
|
2
|
+
require 'tyrant_rdb_key_store'
|
|
3
|
+
module Monkeyshines
|
|
4
|
+
module Store
|
|
5
|
+
#
|
|
6
|
+
# Implementation of KeyStore with a Local TokyoCabinet Table database (RDBTBL)
|
|
7
|
+
#
|
|
8
|
+
class TyrantRdbKeyStore < TyrantRdbKeyStore Monkeyshines::Store::KeyStore
|
|
9
|
+
|
|
10
|
+
def db
|
|
11
|
+
return @db if @db
|
|
12
|
+
@db ||= TokyoTyrant::RDBTBL.new
|
|
13
|
+
@db.open(db_host, db_port) or raise("Can't open DB #{db_host}:#{db_port}. Pass in host:port' #{@db.ecode}: #{@db.errmsg(@db.ecode)}")
|
|
14
|
+
@db
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
end #class
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
data/wukong.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{wukong}
|
|
8
|
-
s.version = "1.4.
|
|
8
|
+
s.version = "1.4.11"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Philip (flip) Kromer"]
|
|
12
|
-
s.date = %q{2010-07-
|
|
12
|
+
s.date = %q{2010-07-30}
|
|
13
13
|
s.description = %q{ Treat your dataset like a:
|
|
14
14
|
|
|
15
15
|
* stream of lines when it's efficient to process by lines
|
|
@@ -42,6 +42,7 @@ Gem::Specification.new do |s|
|
|
|
42
42
|
"bin/hdp-kill-task",
|
|
43
43
|
"bin/hdp-ls",
|
|
44
44
|
"bin/hdp-mkdir",
|
|
45
|
+
"bin/hdp-mkdirp",
|
|
45
46
|
"bin/hdp-mv",
|
|
46
47
|
"bin/hdp-parts_to_keys.rb",
|
|
47
48
|
"bin/hdp-ps",
|
|
@@ -151,9 +152,11 @@ Gem::Specification.new do |s|
|
|
|
151
152
|
"examples/size.rb",
|
|
152
153
|
"examples/stats/avg_value_frequency.rb",
|
|
153
154
|
"examples/stats/data/avg_value_frequency.tsv",
|
|
155
|
+
"examples/store/chunked_store_example.rb",
|
|
154
156
|
"examples/stupidly_simple_filter.rb",
|
|
155
157
|
"examples/word_count.rb",
|
|
156
158
|
"lib/wukong.rb",
|
|
159
|
+
"lib/wukong/and_pig.rb",
|
|
157
160
|
"lib/wukong/bad_record.rb",
|
|
158
161
|
"lib/wukong/datatypes.rb",
|
|
159
162
|
"lib/wukong/datatypes/enum.rb",
|
|
@@ -182,12 +185,28 @@ Gem::Specification.new do |s|
|
|
|
182
185
|
"lib/wukong/keystore/tyrant_notes.textile",
|
|
183
186
|
"lib/wukong/logger.rb",
|
|
184
187
|
"lib/wukong/models/graph.rb",
|
|
188
|
+
"lib/wukong/monitor.rb",
|
|
189
|
+
"lib/wukong/monitor/chunked_store.rb",
|
|
190
|
+
"lib/wukong/monitor/periodic_logger.rb",
|
|
191
|
+
"lib/wukong/monitor/periodic_monitor.rb",
|
|
185
192
|
"lib/wukong/periodic_monitor.rb",
|
|
186
193
|
"lib/wukong/rdf.rb",
|
|
187
194
|
"lib/wukong/schema.rb",
|
|
188
195
|
"lib/wukong/script.rb",
|
|
189
196
|
"lib/wukong/script/hadoop_command.rb",
|
|
190
197
|
"lib/wukong/script/local_command.rb",
|
|
198
|
+
"lib/wukong/store.rb",
|
|
199
|
+
"lib/wukong/store/base.rb",
|
|
200
|
+
"lib/wukong/store/chunked_flat_file_store.rb",
|
|
201
|
+
"lib/wukong/store/conditional_store.rb",
|
|
202
|
+
"lib/wukong/store/factory.rb",
|
|
203
|
+
"lib/wukong/store/flat_file_store.rb",
|
|
204
|
+
"lib/wukong/store/key_store.rb",
|
|
205
|
+
"lib/wukong/store/null_store.rb",
|
|
206
|
+
"lib/wukong/store/read_thru_store.rb",
|
|
207
|
+
"lib/wukong/store/tokyo_tdb_key_store.rb",
|
|
208
|
+
"lib/wukong/store/tyrant_rdb_key_store.rb",
|
|
209
|
+
"lib/wukong/store/tyrant_tdb_key_store.rb",
|
|
191
210
|
"lib/wukong/streamer.rb",
|
|
192
211
|
"lib/wukong/streamer/accumulating_reducer.rb",
|
|
193
212
|
"lib/wukong/streamer/base.rb",
|
|
@@ -226,30 +245,31 @@ Gem::Specification.new do |s|
|
|
|
226
245
|
"spec/spec_helper.rb",
|
|
227
246
|
"spec/wukong/encoding_spec.rb",
|
|
228
247
|
"spec/wukong/script_spec.rb",
|
|
248
|
+
"examples/binning_percentile_estimator.rb",
|
|
249
|
+
"examples/contrib/jeans/normalize.rb",
|
|
250
|
+
"examples/contrib/jeans/sizes.rb",
|
|
251
|
+
"examples/corpus/words_to_bigrams.rb",
|
|
252
|
+
"examples/count_keys.rb",
|
|
253
|
+
"examples/count_keys_at_mapper.rb",
|
|
254
|
+
"examples/keystore/cassandra_batch_test.rb",
|
|
255
|
+
"examples/keystore/conditional_outputter_example.rb",
|
|
256
|
+
"examples/network_graph/adjacency_list.rb",
|
|
257
|
+
"examples/network_graph/breadth_first_search.rb",
|
|
258
|
+
"examples/network_graph/gen_2paths.rb",
|
|
259
|
+
"examples/network_graph/gen_multi_edge.rb",
|
|
260
|
+
"examples/network_graph/gen_symmetric_links.rb",
|
|
229
261
|
"examples/pagerank/pagerank.rb",
|
|
230
262
|
"examples/pagerank/pagerank_initialize.rb",
|
|
263
|
+
"examples/rank_and_bin.rb",
|
|
231
264
|
"examples/sample_records.rb",
|
|
232
265
|
"examples/server_logs/apache_log_parser.rb",
|
|
233
266
|
"examples/server_logs/breadcrumbs.rb",
|
|
234
267
|
"examples/server_logs/user_agent.rb",
|
|
235
|
-
"examples/corpus/words_to_bigrams.rb",
|
|
236
|
-
"examples/count_keys.rb",
|
|
237
|
-
"examples/rank_and_bin.rb",
|
|
238
|
-
"examples/binning_percentile_estimator.rb",
|
|
239
268
|
"examples/size.rb",
|
|
240
|
-
"examples/network_graph/breadth_first_search.rb",
|
|
241
|
-
"examples/network_graph/gen_symmetric_links.rb",
|
|
242
|
-
"examples/network_graph/gen_multi_edge.rb",
|
|
243
|
-
"examples/network_graph/adjacency_list.rb",
|
|
244
|
-
"examples/network_graph/gen_2paths.rb",
|
|
245
|
-
"examples/keystore/cassandra_batch_test.rb",
|
|
246
|
-
"examples/keystore/conditional_outputter_example.rb",
|
|
247
269
|
"examples/stats/avg_value_frequency.rb",
|
|
248
|
-
"examples/
|
|
249
|
-
"examples/contrib/jeans/normalize.rb",
|
|
250
|
-
"examples/word_count.rb",
|
|
270
|
+
"examples/store/chunked_store_example.rb",
|
|
251
271
|
"examples/stupidly_simple_filter.rb",
|
|
252
|
-
"examples/
|
|
272
|
+
"examples/word_count.rb"
|
|
253
273
|
]
|
|
254
274
|
|
|
255
275
|
if s.respond_to? :specification_version then
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: wukong
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
hash:
|
|
4
|
+
hash: 17
|
|
5
5
|
prerelease: false
|
|
6
6
|
segments:
|
|
7
7
|
- 1
|
|
8
8
|
- 4
|
|
9
|
-
-
|
|
10
|
-
version: 1.4.
|
|
9
|
+
- 11
|
|
10
|
+
version: 1.4.11
|
|
11
11
|
platform: ruby
|
|
12
12
|
authors:
|
|
13
13
|
- Philip (flip) Kromer
|
|
@@ -15,7 +15,7 @@ autorequire:
|
|
|
15
15
|
bindir: bin
|
|
16
16
|
cert_chain: []
|
|
17
17
|
|
|
18
|
-
date: 2010-07-
|
|
18
|
+
date: 2010-07-30 00:00:00 -05:00
|
|
19
19
|
default_executable:
|
|
20
20
|
dependencies:
|
|
21
21
|
- !ruby/object:Gem::Dependency
|
|
@@ -136,6 +136,7 @@ files:
|
|
|
136
136
|
- bin/hdp-kill-task
|
|
137
137
|
- bin/hdp-ls
|
|
138
138
|
- bin/hdp-mkdir
|
|
139
|
+
- bin/hdp-mkdirp
|
|
139
140
|
- bin/hdp-mv
|
|
140
141
|
- bin/hdp-parts_to_keys.rb
|
|
141
142
|
- bin/hdp-ps
|
|
@@ -245,9 +246,11 @@ files:
|
|
|
245
246
|
- examples/size.rb
|
|
246
247
|
- examples/stats/avg_value_frequency.rb
|
|
247
248
|
- examples/stats/data/avg_value_frequency.tsv
|
|
249
|
+
- examples/store/chunked_store_example.rb
|
|
248
250
|
- examples/stupidly_simple_filter.rb
|
|
249
251
|
- examples/word_count.rb
|
|
250
252
|
- lib/wukong.rb
|
|
253
|
+
- lib/wukong/and_pig.rb
|
|
251
254
|
- lib/wukong/bad_record.rb
|
|
252
255
|
- lib/wukong/datatypes.rb
|
|
253
256
|
- lib/wukong/datatypes/enum.rb
|
|
@@ -276,12 +279,28 @@ files:
|
|
|
276
279
|
- lib/wukong/keystore/tyrant_notes.textile
|
|
277
280
|
- lib/wukong/logger.rb
|
|
278
281
|
- lib/wukong/models/graph.rb
|
|
282
|
+
- lib/wukong/monitor.rb
|
|
283
|
+
- lib/wukong/monitor/chunked_store.rb
|
|
284
|
+
- lib/wukong/monitor/periodic_logger.rb
|
|
285
|
+
- lib/wukong/monitor/periodic_monitor.rb
|
|
279
286
|
- lib/wukong/periodic_monitor.rb
|
|
280
287
|
- lib/wukong/rdf.rb
|
|
281
288
|
- lib/wukong/schema.rb
|
|
282
289
|
- lib/wukong/script.rb
|
|
283
290
|
- lib/wukong/script/hadoop_command.rb
|
|
284
291
|
- lib/wukong/script/local_command.rb
|
|
292
|
+
- lib/wukong/store.rb
|
|
293
|
+
- lib/wukong/store/base.rb
|
|
294
|
+
- lib/wukong/store/chunked_flat_file_store.rb
|
|
295
|
+
- lib/wukong/store/conditional_store.rb
|
|
296
|
+
- lib/wukong/store/factory.rb
|
|
297
|
+
- lib/wukong/store/flat_file_store.rb
|
|
298
|
+
- lib/wukong/store/key_store.rb
|
|
299
|
+
- lib/wukong/store/null_store.rb
|
|
300
|
+
- lib/wukong/store/read_thru_store.rb
|
|
301
|
+
- lib/wukong/store/tokyo_tdb_key_store.rb
|
|
302
|
+
- lib/wukong/store/tyrant_rdb_key_store.rb
|
|
303
|
+
- lib/wukong/store/tyrant_tdb_key_store.rb
|
|
285
304
|
- lib/wukong/streamer.rb
|
|
286
305
|
- lib/wukong/streamer/accumulating_reducer.rb
|
|
287
306
|
- lib/wukong/streamer/base.rb
|
|
@@ -348,27 +367,28 @@ test_files:
|
|
|
348
367
|
- spec/spec_helper.rb
|
|
349
368
|
- spec/wukong/encoding_spec.rb
|
|
350
369
|
- spec/wukong/script_spec.rb
|
|
370
|
+
- examples/binning_percentile_estimator.rb
|
|
371
|
+
- examples/contrib/jeans/normalize.rb
|
|
372
|
+
- examples/contrib/jeans/sizes.rb
|
|
373
|
+
- examples/corpus/words_to_bigrams.rb
|
|
374
|
+
- examples/count_keys.rb
|
|
375
|
+
- examples/count_keys_at_mapper.rb
|
|
376
|
+
- examples/keystore/cassandra_batch_test.rb
|
|
377
|
+
- examples/keystore/conditional_outputter_example.rb
|
|
378
|
+
- examples/network_graph/adjacency_list.rb
|
|
379
|
+
- examples/network_graph/breadth_first_search.rb
|
|
380
|
+
- examples/network_graph/gen_2paths.rb
|
|
381
|
+
- examples/network_graph/gen_multi_edge.rb
|
|
382
|
+
- examples/network_graph/gen_symmetric_links.rb
|
|
351
383
|
- examples/pagerank/pagerank.rb
|
|
352
384
|
- examples/pagerank/pagerank_initialize.rb
|
|
385
|
+
- examples/rank_and_bin.rb
|
|
353
386
|
- examples/sample_records.rb
|
|
354
387
|
- examples/server_logs/apache_log_parser.rb
|
|
355
388
|
- examples/server_logs/breadcrumbs.rb
|
|
356
389
|
- examples/server_logs/user_agent.rb
|
|
357
|
-
- examples/corpus/words_to_bigrams.rb
|
|
358
|
-
- examples/count_keys.rb
|
|
359
|
-
- examples/rank_and_bin.rb
|
|
360
|
-
- examples/binning_percentile_estimator.rb
|
|
361
390
|
- examples/size.rb
|
|
362
|
-
- examples/network_graph/breadth_first_search.rb
|
|
363
|
-
- examples/network_graph/gen_symmetric_links.rb
|
|
364
|
-
- examples/network_graph/gen_multi_edge.rb
|
|
365
|
-
- examples/network_graph/adjacency_list.rb
|
|
366
|
-
- examples/network_graph/gen_2paths.rb
|
|
367
|
-
- examples/keystore/cassandra_batch_test.rb
|
|
368
|
-
- examples/keystore/conditional_outputter_example.rb
|
|
369
391
|
- examples/stats/avg_value_frequency.rb
|
|
370
|
-
- examples/
|
|
371
|
-
- examples/contrib/jeans/normalize.rb
|
|
372
|
-
- examples/word_count.rb
|
|
392
|
+
- examples/store/chunked_store_example.rb
|
|
373
393
|
- examples/stupidly_simple_filter.rb
|
|
374
|
-
- examples/
|
|
394
|
+
- examples/word_count.rb
|