wukong 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.textile +107 -0
- data/README.textile +166 -0
- data/bin/cutc +30 -0
- data/bin/cuttab +5 -0
- data/bin/greptrue +8 -0
- data/bin/hdp-cat +3 -0
- data/bin/hdp-catd +3 -0
- data/bin/hdp-du +81 -0
- data/bin/hdp-get +3 -0
- data/bin/hdp-kill +3 -0
- data/bin/hdp-ls +10 -0
- data/bin/hdp-mkdir +3 -0
- data/bin/hdp-mv +3 -0
- data/bin/hdp-parts_to_keys.rb +77 -0
- data/bin/hdp-ps +3 -0
- data/bin/hdp-put +3 -0
- data/bin/hdp-rm +11 -0
- data/bin/hdp-sort +29 -0
- data/bin/hdp-stream +29 -0
- data/bin/hdp-stream-flat +18 -0
- data/bin/hdp-sync +17 -0
- data/bin/hdp-wc +67 -0
- data/bin/md5sort +20 -0
- data/bin/tabchar +5 -0
- data/bin/uniqc +3 -0
- data/bin/wu-hist +3 -0
- data/bin/wu-lign +177 -0
- data/bin/wu-sum +30 -0
- data/doc/INSTALL.textile +41 -0
- data/doc/LICENSE.textile +107 -0
- data/doc/README-tutorial.textile +163 -0
- data/doc/README-wulign.textile +59 -0
- data/doc/README-wutils.textile +128 -0
- data/doc/TODO.textile +61 -0
- data/doc/UsingWukong-part1-setup.textile +2 -0
- data/doc/UsingWukong-part2-scraping.textile +2 -0
- data/doc/UsingWukong-part3-parsing.textile +132 -0
- data/doc/code/api_response_example.txt +20 -0
- data/doc/code/parser_skeleton.rb +38 -0
- data/doc/hadoop-nfs.textile +51 -0
- data/doc/hadoop-setup.textile +29 -0
- data/doc/index.textile +124 -0
- data/doc/intro_to_map_reduce/MapReduceDiagram.graffle +0 -0
- data/doc/links.textile +42 -0
- data/doc/overview.textile +91 -0
- data/doc/pig/PigLatinExpressionsList.txt +122 -0
- data/doc/pig/PigLatinReferenceManual.html +19134 -0
- data/doc/pig/PigLatinReferenceManual.txt +1640 -0
- data/doc/tips.textile +116 -0
- data/doc/usage.textile +102 -0
- data/doc/utils.textile +48 -0
- data/examples/README.txt +17 -0
- data/examples/and_pig/sample_queries.rb +128 -0
- data/examples/apache_log_parser.rb +53 -0
- data/examples/count_keys.rb +56 -0
- data/examples/count_keys_at_mapper.rb +57 -0
- data/examples/graph/adjacency_list.rb +74 -0
- data/examples/graph/breadth_first_search.rb +79 -0
- data/examples/graph/gen_2paths.rb +68 -0
- data/examples/graph/gen_multi_edge.rb +103 -0
- data/examples/graph/gen_symmetric_links.rb +53 -0
- data/examples/package-local.rb +100 -0
- data/examples/package.rb +96 -0
- data/examples/pagerank/README.textile +6 -0
- data/examples/pagerank/gen_initial_pagerank_graph.pig +57 -0
- data/examples/pagerank/pagerank.rb +88 -0
- data/examples/pagerank/pagerank_initialize.rb +46 -0
- data/examples/pagerank/run_pagerank.sh +19 -0
- data/examples/rank_and_bin.rb +173 -0
- data/examples/run_all.sh +47 -0
- data/examples/sample_records.rb +44 -0
- data/examples/size.rb +60 -0
- data/examples/word_count.rb +95 -0
- data/lib/wukong.rb +11 -0
- data/lib/wukong/and_pig.rb +62 -0
- data/lib/wukong/and_pig/README.textile +12 -0
- data/lib/wukong/and_pig/as.rb +37 -0
- data/lib/wukong/and_pig/data_types.rb +30 -0
- data/lib/wukong/and_pig/functions.rb +50 -0
- data/lib/wukong/and_pig/generate.rb +85 -0
- data/lib/wukong/and_pig/generate/variable_inflections.rb +82 -0
- data/lib/wukong/and_pig/junk.rb +51 -0
- data/lib/wukong/and_pig/operators.rb +8 -0
- data/lib/wukong/and_pig/operators/compound.rb +29 -0
- data/lib/wukong/and_pig/operators/evaluators.rb +7 -0
- data/lib/wukong/and_pig/operators/execution.rb +15 -0
- data/lib/wukong/and_pig/operators/file_methods.rb +29 -0
- data/lib/wukong/and_pig/operators/foreach.rb +98 -0
- data/lib/wukong/and_pig/operators/groupies.rb +212 -0
- data/lib/wukong/and_pig/operators/load_store.rb +65 -0
- data/lib/wukong/and_pig/operators/meta.rb +42 -0
- data/lib/wukong/and_pig/operators/relational.rb +129 -0
- data/lib/wukong/and_pig/pig_struct.rb +48 -0
- data/lib/wukong/and_pig/pig_var.rb +95 -0
- data/lib/wukong/and_pig/symbol.rb +29 -0
- data/lib/wukong/and_pig/utils.rb +0 -0
- data/lib/wukong/bad_record.rb +18 -0
- data/lib/wukong/boot.rb +47 -0
- data/lib/wukong/datatypes.rb +24 -0
- data/lib/wukong/datatypes/enum.rb +123 -0
- data/lib/wukong/dfs.rb +80 -0
- data/lib/wukong/encoding.rb +111 -0
- data/lib/wukong/extensions.rb +15 -0
- data/lib/wukong/extensions/array.rb +18 -0
- data/lib/wukong/extensions/blank.rb +93 -0
- data/lib/wukong/extensions/class.rb +189 -0
- data/lib/wukong/extensions/date_time.rb +24 -0
- data/lib/wukong/extensions/emittable.rb +82 -0
- data/lib/wukong/extensions/hash.rb +120 -0
- data/lib/wukong/extensions/hash_like.rb +119 -0
- data/lib/wukong/extensions/hashlike_class.rb +47 -0
- data/lib/wukong/extensions/module.rb +2 -0
- data/lib/wukong/extensions/pathname.rb +27 -0
- data/lib/wukong/extensions/string.rb +65 -0
- data/lib/wukong/extensions/struct.rb +17 -0
- data/lib/wukong/extensions/symbol.rb +11 -0
- data/lib/wukong/logger.rb +53 -0
- data/lib/wukong/models/graph.rb +27 -0
- data/lib/wukong/rdf.rb +104 -0
- data/lib/wukong/schema.rb +37 -0
- data/lib/wukong/script.rb +265 -0
- data/lib/wukong/script/hadoop_command.rb +111 -0
- data/lib/wukong/script/local_command.rb +14 -0
- data/lib/wukong/streamer.rb +13 -0
- data/lib/wukong/streamer/accumulating_reducer.rb +89 -0
- data/lib/wukong/streamer/base.rb +76 -0
- data/lib/wukong/streamer/count_keys.rb +30 -0
- data/lib/wukong/streamer/count_lines.rb +26 -0
- data/lib/wukong/streamer/filter.rb +20 -0
- data/lib/wukong/streamer/line_streamer.rb +12 -0
- data/lib/wukong/streamer/list_reducer.rb +20 -0
- data/lib/wukong/streamer/preprocess_with_pipe_streamer.rb +22 -0
- data/lib/wukong/streamer/rank_and_bin_reducer.rb +145 -0
- data/lib/wukong/streamer/set_reducer.rb +14 -0
- data/lib/wukong/streamer/struct_streamer.rb +48 -0
- data/lib/wukong/streamer/summing_reducer.rb +29 -0
- data/lib/wukong/streamer/uniq_by_last_reducer.rb +44 -0
- data/lib/wukong/typed_struct.rb +12 -0
- data/lib/wukong/wukong_class.rb +21 -0
- data/spec/bin/hdp-wc_spec.rb +4 -0
- data/spec/spec_helper.rb +0 -0
- data/wukong.gemspec +179 -0
- metadata +214 -0
data/examples/run_all.sh
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
src_path="tmp/README.textile"
|
4
|
+
out_root="tmp/test"
|
5
|
+
hdp_opts="--map_tasks=1 --reduce_tasks=1"
|
6
|
+
|
7
|
+
# ---------------------------------------------------------------------------
|
8
|
+
#
|
9
|
+
# Set up directories and copy over sample input
|
10
|
+
#
|
11
|
+
|
12
|
+
# hdp-rm ${src_path}
|
13
|
+
# hdp-put `dirname $0`/../README.textile tmp/
|
14
|
+
# hdp-mkdir $out_root
|
15
|
+
|
16
|
+
# ---------------------------------------------------------------------------
|
17
|
+
#
|
18
|
+
# Run scripts
|
19
|
+
#
|
20
|
+
|
21
|
+
cmd="word_count"
|
22
|
+
# hdp-rm -r ${out_root}/${cmd}
|
23
|
+
# ./examples/${cmd}.rb --run $hdp_opts $src_path ${out_root}/${cmd}
|
24
|
+
# hdp-catd ${out_root}/${cmd} | head -n 20
|
25
|
+
word_count=${out_root}/${cmd}
|
26
|
+
|
27
|
+
cmd="sample_records"
|
28
|
+
# hdp-rm -r ${out_root}/${cmd}
|
29
|
+
# ./examples/${cmd}.rb --sampling_fraction=0.8 \
|
30
|
+
# --run $hdp_opts $src_path ${out_root}/${cmd}
|
31
|
+
# hdp-catd ${out_root}/${cmd} | head -n 200 | tail -n 20
|
32
|
+
sample_records=${out_root}/${cmd}
|
33
|
+
|
34
|
+
|
35
|
+
# cmd="size"
|
36
|
+
# hdp-rm -r ${out_root}/${cmd}
|
37
|
+
# ./examples/${cmd}.rb --run $hdp_opts $src_path ${out_root}/${cmd}
|
38
|
+
# hdp-catd ${out_root}/${cmd}
|
39
|
+
# size=${out_root}/${cmd}
|
40
|
+
|
41
|
+
|
42
|
+
cmd="count_keys"
|
43
|
+
hdp-rm -r ${out_root}/${cmd}
|
44
|
+
./examples/${cmd}.rb --run $hdp_opts $word_count ${out_root}/${cmd}
|
45
|
+
hdp-catd ${out_root}/${cmd} | head -n 200 | tail -n 20
|
46
|
+
count_keys=${out_root}/${cmd}
|
47
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$: << File.dirname(__FILE__)+'/../lib'
|
3
|
+
require 'wukong'
|
4
|
+
|
5
|
+
#
|
6
|
+
# Probabilistically emit some fraction of record/lines
|
7
|
+
#
|
8
|
+
# Set the sampling fraction at the command line using the
|
9
|
+
# --sampling_fraction=
|
10
|
+
# option: for example, to take a random 1/1000th of the lines in huge_files,
|
11
|
+
# ./examples/sample_records.rb --sampling_fraction=0.001 --go huge_files sampled_files
|
12
|
+
#
|
13
|
+
class Mapper < Wukong::Streamer::LineStreamer
|
14
|
+
include Wukong::Streamer::Filter
|
15
|
+
|
16
|
+
#
|
17
|
+
# floating-point number between 0 and 1 giving the fraction of lines to emit:
|
18
|
+
# at sampling_fraction=1 all records are emitted, at 0 none are.
|
19
|
+
#
|
20
|
+
# Takes its value from a mandatory command-line option
|
21
|
+
#
|
22
|
+
def sampling_fraction
|
23
|
+
@sampling_fraction ||= ( options[:sampling_fraction] && options[:sampling_fraction].to_f ) or
|
24
|
+
raise "Please supply a --sampling_fraction= argument, a decimal number between 0 and 1"
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# randomly decide to emit +sampling_fraction+ fraction of lines
|
29
|
+
#
|
30
|
+
def emit? line
|
31
|
+
rand < self.sampling_fraction
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class Script < Wukong::Script
|
36
|
+
def default_options
|
37
|
+
super.merge :reduce_tasks => 0
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# Executes the script
|
43
|
+
#
|
44
|
+
Script.new( Mapper, nil ).run
|
data/examples/size.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$: << File.dirname(__FILE__)+'/../lib'
|
3
|
+
require 'wukong'
|
4
|
+
|
5
|
+
module Size
|
6
|
+
#
|
7
|
+
# Feed the entire dataset through wc and sum the results
|
8
|
+
#
|
9
|
+
class Script < Wukong::Script
|
10
|
+
#
|
11
|
+
# Don't implement a wukong script to do something if there's a unix command
|
12
|
+
# that does it faster: just override map_command or reduce_command in your
|
13
|
+
# subclass of Wukong::Script to return the complete command line
|
14
|
+
#
|
15
|
+
def map_command
|
16
|
+
'/usr/bin/wc'
|
17
|
+
end
|
18
|
+
|
19
|
+
# Make all records go to one reducer
|
20
|
+
def default_options
|
21
|
+
super.merge :reduce_tasks => 1
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Sums the numeric value of each column in its input
|
27
|
+
#
|
28
|
+
class Reducer < Wukong::Streamer::Base
|
29
|
+
attr_accessor :sums
|
30
|
+
|
31
|
+
#
|
32
|
+
# The unix +wc+ command uses whitespace, not tabs, so we'll recordize
|
33
|
+
# accordingly.
|
34
|
+
#
|
35
|
+
def recordize line
|
36
|
+
line.strip.split(/\s+/)
|
37
|
+
end
|
38
|
+
|
39
|
+
#
|
40
|
+
# add each corresponding column in the input
|
41
|
+
#
|
42
|
+
def process *vals
|
43
|
+
self.sums = vals.zip( sums || [] ).map{|val,sum| val.to_i + sum.to_i }
|
44
|
+
end
|
45
|
+
|
46
|
+
#
|
47
|
+
# run through the whole reduction input and then output the total
|
48
|
+
#
|
49
|
+
def stream *args
|
50
|
+
super *args
|
51
|
+
emit sums
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Execute the script
|
57
|
+
Size::Script.new(
|
58
|
+
nil,
|
59
|
+
Size::Reducer
|
60
|
+
).run
|
@@ -0,0 +1,95 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$: << File.dirname(__FILE__)+'/../lib'
|
3
|
+
require 'wukong'
|
4
|
+
|
5
|
+
module WordCount
|
6
|
+
class Mapper < Wukong::Streamer::LineStreamer
|
7
|
+
#
|
8
|
+
# Split a string into its constituent words.
|
9
|
+
#
|
10
|
+
# This is pretty simpleminded:
|
11
|
+
# * downcase the word
|
12
|
+
# * Split at any non-alphanumeric boundary, including '_'
|
13
|
+
# * However, preserve the special cases of 's or 't at the end of a
|
14
|
+
# word.
|
15
|
+
#
|
16
|
+
# tokenize("Jim's dawg won't hunt: dawg_hunt error #3007a4")
|
17
|
+
# # => ["jim's", "dawd", "won't", "hunt", "dawg", "hunt", "error", "3007a4"]
|
18
|
+
#
|
19
|
+
def tokenize str
|
20
|
+
return [] unless str
|
21
|
+
str = str.downcase;
|
22
|
+
# kill off all punctuation except [stuff]'s or [stuff]'t
|
23
|
+
# this includes hyphens (words are split)
|
24
|
+
str = str.
|
25
|
+
gsub(/[^a-zA-Z0-9\']+/, ' ').
|
26
|
+
gsub(/(\w)\'([st])\b/, '\1!\2').gsub(/\'/, ' ').gsub(/!/, "'")
|
27
|
+
# Busticate at whitespace
|
28
|
+
words = str.strip.split(/\s+/)
|
29
|
+
words.reject!{|w| w.blank? }
|
30
|
+
words
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# Emit each word in each line.
|
35
|
+
#
|
36
|
+
def process line
|
37
|
+
tokenize(line).each{|word| yield [word, 1] }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# Accumulate the sum record-by-record:
|
43
|
+
#
|
44
|
+
class Reducer0 < Wukong::Streamer::Base
|
45
|
+
attr_accessor :key_count
|
46
|
+
def process word, count
|
47
|
+
@last_word ||= word
|
48
|
+
if (@last_word == word)
|
49
|
+
self.key_count += 1
|
50
|
+
else
|
51
|
+
yield [ @last_word, key_count ]
|
52
|
+
@last_word = word
|
53
|
+
end
|
54
|
+
end
|
55
|
+
def stream
|
56
|
+
emit @last_word, key_count
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# You can stack up all the values in a list then sum them at once:
|
62
|
+
#
|
63
|
+
require 'active_support/core_ext/enumerable'
|
64
|
+
class Reducer1 < Wukong::Streamer::ListReducer
|
65
|
+
def finalize
|
66
|
+
yield [ key, values.map(&:last).map(&:to_i).sum ]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
# A bit kinder to your memory manager: accumulate the sum record-by-record:
|
72
|
+
#
|
73
|
+
class Reducer2 < Wukong::Streamer::AccumulatingReducer
|
74
|
+
attr_accessor :key_count
|
75
|
+
def start!(*args) self.key_count = 0 end
|
76
|
+
def accumulate(*args) self.key_count += 1 end
|
77
|
+
def finalize
|
78
|
+
yield [ key, key_count ]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
#
|
83
|
+
# ... easiest of all, though: this is common enough that it's already included
|
84
|
+
#
|
85
|
+
require 'wukong/streamer/count_keys'
|
86
|
+
class Reducer3 < Wukong::Streamer::CountKeys
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
# Execute the script
|
92
|
+
Wukong::Script.new(
|
93
|
+
WordCount::Mapper,
|
94
|
+
WordCount::Reducer1
|
95
|
+
).run
|
data/lib/wukong.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'wukong/boot'
|
2
|
+
require 'wukong/extensions'
|
3
|
+
require 'wukong/datatypes'
|
4
|
+
require 'wukong/logger'
|
5
|
+
require 'wukong/bad_record'
|
6
|
+
autoload :TypedStruct, 'wukong/typed_struct'
|
7
|
+
module Wukong
|
8
|
+
autoload :Dfs, 'wukong/dfs'
|
9
|
+
autoload :Script, 'wukong/script'
|
10
|
+
autoload :Streamer, 'wukong/streamer'
|
11
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'wukong/and_pig/pig_var'
|
2
|
+
require 'wukong/and_pig/as'
|
3
|
+
require 'wukong/and_pig/functions'
|
4
|
+
require 'wukong/and_pig/operators'
|
5
|
+
require 'wukong/and_pig/data_types'
|
6
|
+
require 'wukong/and_pig/pig_struct'
|
7
|
+
require 'wukong/and_pig/generate'
|
8
|
+
require 'wukong/and_pig/symbol'
|
9
|
+
require 'wukong/and_pig/utils'
|
10
|
+
|
11
|
+
module Wukong
|
12
|
+
#
|
13
|
+
# Wukong::AndPig lets you generate and run pig[http://hadoop.apache.org/pig]
|
14
|
+
# code from within ruby (and interactively, from the +irb+ console).
|
15
|
+
#
|
16
|
+
# It uses the same typed structures you've defined for Wukong to create
|
17
|
+
# pig-types aware commands. For example, the Wukong class
|
18
|
+
#
|
19
|
+
# class Customer < TypedStruct.new( [:id, Integer],
|
20
|
+
# [:name, String], [:postal_code, Integer], [:balance, Float] )
|
21
|
+
# end
|
22
|
+
#
|
23
|
+
# will generate a LOAD command for pig as
|
24
|
+
#
|
25
|
+
# Customer1.pig_load('q4_reports/customers.tsv').set!
|
26
|
+
# # => Q4ReportsCustomers2 = LOAD 'q4_reports/customers.tsv'
|
27
|
+
# AS (id: int, name: chararray, postal_code: int, balance: float) ;
|
28
|
+
#
|
29
|
+
# You can write anonymous chains
|
30
|
+
#
|
31
|
+
# q1 = Customer1.
|
32
|
+
# pig_load('q4_reports/customers.tsv').set!.
|
33
|
+
# distinct.set! ;
|
34
|
+
# q1.
|
35
|
+
# group(:by => :postal_code).set!.
|
36
|
+
# generate([:group, :postal_code], ["COUNT(#{q1.relation})", :customers_per_zip]).set!.
|
37
|
+
# store!
|
38
|
+
#
|
39
|
+
# Q4ReportsCustomers35 = LOAD 'q4_reports/customers.tsv' AS (id: int,name: chararray,postal_code: int,balance: float) ;
|
40
|
+
# Q4ReportsCustomers36 = DISTINCT Q4ReportsCustomers35 ;
|
41
|
+
# Q4ReportsCustomers37 = GROUP Q4ReportsCustomers36 BY postal_code ;
|
42
|
+
# Q4ReportsCustomers38 = FOREACH Q4ReportsCustomers37 GENERATE
|
43
|
+
# group AS postal_code,
|
44
|
+
# COUNT(Q4ReportsCustomers36) AS customers_per_zip ;
|
45
|
+
#
|
46
|
+
# ---------------------------------------------------------------------------
|
47
|
+
#
|
48
|
+
# Note on pig:
|
49
|
+
#
|
50
|
+
# 1) Reverse the order of your tables in your join statement. Pig always
|
51
|
+
# streams the keys of the last input, (materializing in memory the keys of
|
52
|
+
# the first), so if one of your inputs has less instances of of a given key
|
53
|
+
# this may help.
|
54
|
+
#
|
55
|
+
# 2) Reduce the number of maps and reducers per machine and give it all the
|
56
|
+
# memory you can.
|
57
|
+
#
|
58
|
+
#
|
59
|
+
module AndPig
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Wukong::AndPig is a small library to more easily generate code for the
|
2
|
+
"Pig":http://hadoop.apache.org/pig data analysis language.
|
3
|
+
|
4
|
+
Wukong::AndPig lets you use the structs from your Wukong scripts to
|
5
|
+
generate Pig instructions that know their types and structure -- even through
|
6
|
+
multiple pig commands. For example, if you use +FOREACH ... GENERATE+ to select
|
7
|
+
only a few of those fields, Wukong::AndPig will know that the result has only
|
8
|
+
those fields.
|
9
|
+
|
10
|
+
We're still trying to figure out if this is a stupid and crazy idea, or just a
|
11
|
+
crazy idea: Yeah, we're using a functional/OO scripting language to generate code for an
|
12
|
+
imperative query language that generates Java code for ad-hoc map-reduce operations.
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class AS
|
2
|
+
attr_accessor :expr, :name, :type, :ref, :options
|
3
|
+
def initialize expr, name=nil, type=nil, ref=nil, *option_flags
|
4
|
+
case expr
|
5
|
+
when AS
|
6
|
+
self.expr = expr.expr
|
7
|
+
self.name = expr.name
|
8
|
+
self.type = expr.type
|
9
|
+
self.ref = expr.ref
|
10
|
+
self.options = expr.options
|
11
|
+
end
|
12
|
+
self.expr ||= expr
|
13
|
+
self.name = name if name
|
14
|
+
self.type = type if type
|
15
|
+
self.ref = ref if ref
|
16
|
+
self.options ||= { }
|
17
|
+
option_flags.each{|option| self.options[option] = true }
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
clause = "%-30s \t" % [ref, expr].compact.join('::')
|
22
|
+
if name
|
23
|
+
clause << "AS #{name}" unless options[:skip_name]
|
24
|
+
clause << ":#{type.typify}" unless ((!type) || options[:skip_type])
|
25
|
+
end
|
26
|
+
clause
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.[] *args
|
30
|
+
self.new *args
|
31
|
+
end
|
32
|
+
|
33
|
+
# Useful for feeding back into TypedStruct
|
34
|
+
def name_type
|
35
|
+
[name, type]
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# == SimpleDataTypes ==
|
2
|
+
# int
|
3
|
+
# long
|
4
|
+
# double
|
5
|
+
# arrays
|
6
|
+
# chararray
|
7
|
+
# bytearray
|
8
|
+
#
|
9
|
+
# == ComplexDataTypes ==
|
10
|
+
# tuple
|
11
|
+
# bag
|
12
|
+
# map
|
13
|
+
|
14
|
+
module Wukong
|
15
|
+
module AndPig
|
16
|
+
class PigVar
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# class ScalarInteger < TypedStruct.new [
|
23
|
+
# [:count, Integer ],
|
24
|
+
# ]
|
25
|
+
# include Wukong::AndPig::PigEmitter
|
26
|
+
# def self.load_scalar path
|
27
|
+
# var = super path
|
28
|
+
# var.to_i
|
29
|
+
# end
|
30
|
+
# end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
|
2
|
+
# == Built-in Functions
|
3
|
+
# EvalFunctions
|
4
|
+
# AVG
|
5
|
+
# CONCAT
|
6
|
+
# COUNT
|
7
|
+
# DIFF
|
8
|
+
# MIN
|
9
|
+
# MAX
|
10
|
+
# SIZE
|
11
|
+
# SUM
|
12
|
+
# TOKENIZE
|
13
|
+
|
14
|
+
# == NullOperators
|
15
|
+
# isnull
|
16
|
+
# isnotnull
|
17
|
+
#
|
18
|
+
# == BooleanOperators
|
19
|
+
# and
|
20
|
+
# or
|
21
|
+
# not
|
22
|
+
#
|
23
|
+
# == DereferenceOperators
|
24
|
+
# tupledereference.
|
25
|
+
# mapdereference#
|
26
|
+
#
|
27
|
+
# == SignOperators
|
28
|
+
# positive+
|
29
|
+
# negative-
|
30
|
+
#
|
31
|
+
# == CastOperators
|
32
|
+
# (type)$0
|
33
|
+
# (type)alias
|
34
|
+
#
|
35
|
+
# == ArithmeticOperators
|
36
|
+
# addition+
|
37
|
+
# subtraction-
|
38
|
+
# multiplication*
|
39
|
+
# division/
|
40
|
+
# modulo%
|
41
|
+
# bincond?
|
42
|
+
#
|
43
|
+
# == ComparisonOperators
|
44
|
+
# Equal==
|
45
|
+
# notequal!=
|
46
|
+
# lessthan<
|
47
|
+
# greaterthan>
|
48
|
+
# lessthanorequalto<=
|
49
|
+
# greaterthanorequalto>=
|
50
|
+
# patternmatchingmatches
|