wukong 0.1.4 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL.textile +89 -0
- data/README.textile +41 -74
- data/docpages/INSTALL.textile +94 -0
- data/{doc → docpages}/LICENSE.textile +0 -0
- data/{doc → docpages}/README-wulign.textile +6 -0
- data/docpages/UsingWukong-part1-get_ready.textile +17 -0
- data/{doc/overview.textile → docpages/UsingWukong-part2-ThinkingBigData.textile} +8 -24
- data/{doc → docpages}/UsingWukong-part3-parsing.textile +8 -2
- data/docpages/_config.yml +39 -0
- data/{doc/tips.textile → docpages/bigdata-tips.textile} +71 -44
- data/{doc → docpages}/code/api_response_example.txt +0 -0
- data/{doc → docpages}/code/parser_skeleton.rb +0 -0
- data/{doc/intro_to_map_reduce → docpages/diagrams}/MapReduceDiagram.graffle +0 -0
- data/docpages/favicon.ico +0 -0
- data/docpages/gem.css +16 -0
- data/docpages/hadoop-tips.textile +83 -0
- data/docpages/index.textile +90 -0
- data/docpages/intro.textile +8 -0
- data/docpages/moreinfo.textile +174 -0
- data/docpages/news.html +24 -0
- data/{doc → docpages}/pig/PigLatinExpressionsList.txt +0 -0
- data/{doc → docpages}/pig/PigLatinReferenceManual.html +0 -0
- data/{doc → docpages}/pig/PigLatinReferenceManual.txt +0 -0
- data/docpages/tutorial.textile +283 -0
- data/docpages/usage.textile +195 -0
- data/docpages/wutils.textile +263 -0
- data/wukong.gemspec +80 -50
- metadata +87 -54
- data/doc/INSTALL.textile +0 -41
- data/doc/README-tutorial.textile +0 -163
- data/doc/README-wutils.textile +0 -128
- data/doc/TODO.textile +0 -61
- data/doc/UsingWukong-part1-setup.textile +0 -2
- data/doc/UsingWukong-part2-scraping.textile +0 -2
- data/doc/hadoop-nfs.textile +0 -51
- data/doc/hadoop-setup.textile +0 -29
- data/doc/index.textile +0 -124
- data/doc/links.textile +0 -42
- data/doc/usage.textile +0 -102
- data/doc/utils.textile +0 -48
- data/examples/and_pig/sample_queries.rb +0 -128
- data/lib/wukong/and_pig.rb +0 -62
- data/lib/wukong/and_pig/README.textile +0 -12
- data/lib/wukong/and_pig/as.rb +0 -37
- data/lib/wukong/and_pig/data_types.rb +0 -30
- data/lib/wukong/and_pig/functions.rb +0 -50
- data/lib/wukong/and_pig/generate.rb +0 -85
- data/lib/wukong/and_pig/generate/variable_inflections.rb +0 -82
- data/lib/wukong/and_pig/junk.rb +0 -51
- data/lib/wukong/and_pig/operators.rb +0 -8
- data/lib/wukong/and_pig/operators/compound.rb +0 -29
- data/lib/wukong/and_pig/operators/evaluators.rb +0 -7
- data/lib/wukong/and_pig/operators/execution.rb +0 -15
- data/lib/wukong/and_pig/operators/file_methods.rb +0 -29
- data/lib/wukong/and_pig/operators/foreach.rb +0 -98
- data/lib/wukong/and_pig/operators/groupies.rb +0 -212
- data/lib/wukong/and_pig/operators/load_store.rb +0 -65
- data/lib/wukong/and_pig/operators/meta.rb +0 -42
- data/lib/wukong/and_pig/operators/relational.rb +0 -129
- data/lib/wukong/and_pig/pig_struct.rb +0 -48
- data/lib/wukong/and_pig/pig_var.rb +0 -95
- data/lib/wukong/and_pig/symbol.rb +0 -29
- data/lib/wukong/and_pig/utils.rb +0 -0
@@ -1,129 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# == RelationalOperators
|
3
|
-
#
|
4
|
-
# GROUP, COGROUP, JOIN see groupies.rb
|
5
|
-
# CROSS see
|
6
|
-
|
7
|
-
# distinct
|
8
|
-
# filter
|
9
|
-
# limit
|
10
|
-
# order
|
11
|
-
# split
|
12
|
-
# union
|
13
|
-
|
14
|
-
#
|
15
|
-
# stream
|
16
|
-
# load
|
17
|
-
# store
|
18
|
-
#
|
19
|
-
module Wukong
|
20
|
-
module AndPig
|
21
|
-
class PigVar
|
22
|
-
|
23
|
-
# ===========================================================================
|
24
|
-
#
|
25
|
-
# Options
|
26
|
-
#
|
27
|
-
def self.parallelize! str, options
|
28
|
-
str << " PARALLEL #{options[:parallel]}" if options[:parallel]
|
29
|
-
end
|
30
|
-
|
31
|
-
# ===========================================================================
|
32
|
-
#
|
33
|
-
# DISTINCT
|
34
|
-
#
|
35
|
-
def distinct lval, options={}
|
36
|
-
self.class.distinct lval, self, options
|
37
|
-
end
|
38
|
-
|
39
|
-
def self.distinct lval, rel, options={ }
|
40
|
-
cmd_str = rel.relationize
|
41
|
-
parallelize! cmd_str, options
|
42
|
-
simple_operation lval, rel, :distinct, cmd_str
|
43
|
-
end
|
44
|
-
|
45
|
-
# ===========================================================================
|
46
|
-
#
|
47
|
-
# FILTER
|
48
|
-
#
|
49
|
-
def filter by_str
|
50
|
-
new_in_chain klass, "FILTER #{relation} BY #{by_str}"
|
51
|
-
end
|
52
|
-
def self.filter lval, rel, by_str
|
53
|
-
simple_operation lval, rel, "FILTER", "#{rel.relation} BY #{by_str}"
|
54
|
-
end
|
55
|
-
|
56
|
-
# ===========================================================================
|
57
|
-
#
|
58
|
-
# LIMIT
|
59
|
-
#
|
60
|
-
def limit n
|
61
|
-
new_in_chain klass, "LIMIT #{relation} #{n}"
|
62
|
-
end
|
63
|
-
|
64
|
-
# ===========================================================================
|
65
|
-
#
|
66
|
-
# ORDER
|
67
|
-
#
|
68
|
-
# alias = ORDER alias BY { * [ASC|DESC] |
|
69
|
-
# field_alias [ASC|DESC] [, field_alias [ASC|DESC] …]
|
70
|
-
# } [PARALLEL n];
|
71
|
-
#
|
72
|
-
def order cmd_str, options={}
|
73
|
-
result = new_in_chain klass, "ORDER #{relation} BY #{cmd_str}"
|
74
|
-
parallelize! result.cmd, options
|
75
|
-
result
|
76
|
-
end
|
77
|
-
|
78
|
-
# ===========================================================================
|
79
|
-
#
|
80
|
-
# SPLIT
|
81
|
-
#
|
82
|
-
# SPLIT alias INTO alias IF expression, alias IF expression [, alias IF expression …];
|
83
|
-
#
|
84
|
-
#
|
85
|
-
def split relation_tests={}
|
86
|
-
split_str = relation_tests.map do |out_rel, test|
|
87
|
-
"#{out_rel} IF #{test}"
|
88
|
-
end.join(", ")
|
89
|
-
new_in_chain klass, "SPLIT #{relation} INTO #{split_str}"
|
90
|
-
end
|
91
|
-
|
92
|
-
# ===========================================================================
|
93
|
-
#
|
94
|
-
# CROSS
|
95
|
-
#
|
96
|
-
def cross *relations
|
97
|
-
options = relations.extract_options!
|
98
|
-
raise CrossArgumentError unless relations.length >= 1
|
99
|
-
relations_str = [self, *relations].map(&:relation).join(", ")
|
100
|
-
result = new_in_chain relations.first.klass, "CROSS #{relations_str}"
|
101
|
-
parallelize! result.cmd, options
|
102
|
-
result
|
103
|
-
end
|
104
|
-
|
105
|
-
# ===========================================================================
|
106
|
-
#
|
107
|
-
# UNION
|
108
|
-
#
|
109
|
-
# def self.union *relations
|
110
|
-
# raise UnionArgumentError unless relations.length >= 2
|
111
|
-
# new_in_chain relations.first.klass, "UNION #{relations}"
|
112
|
-
# end
|
113
|
-
|
114
|
-
# UNION as method
|
115
|
-
def union lval, *relations
|
116
|
-
self.class.union lval, [self]+relations
|
117
|
-
end
|
118
|
-
|
119
|
-
def self.union lval, *relations
|
120
|
-
raise UnionArgumentError unless relations.length >= 2
|
121
|
-
relations_str = relations.map(&:relation).join(", ")
|
122
|
-
simple_operation lval, relations.first, :union, relations_str
|
123
|
-
end
|
124
|
-
|
125
|
-
end
|
126
|
-
CrossArgumentError = ArgumentError.new("CROSS requires at least two relations. Heh heh: relations.")
|
127
|
-
UnionArgumentError = ArgumentError.new("UNION requires at least two relations. Heh heh: relations.")
|
128
|
-
end
|
129
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
module Wukong
|
2
|
-
module PigStructMethods
|
3
|
-
module ClassMethods
|
4
|
-
#
|
5
|
-
# Pig type string --
|
6
|
-
# the pig type strings for each sub-element.
|
7
|
-
#
|
8
|
-
def typify has_rsrc=nil
|
9
|
-
vars_str = members.zip(mtypes).map do |attr, mtype|
|
10
|
-
"%s: %s" % [attr, mtype.typify]
|
11
|
-
end
|
12
|
-
vars_str = ["rsrc: chararray"] + vars_str if has_rsrc
|
13
|
-
"(#{vars_str.join(', ')})"
|
14
|
-
end
|
15
|
-
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
def pig_load rel, *args
|
20
|
-
Wukong::AndPig::PigVar.pig_load rel, self, *args
|
21
|
-
end
|
22
|
-
|
23
|
-
#
|
24
|
-
# Returns type for a fieldspec
|
25
|
-
#
|
26
|
-
def field_type field
|
27
|
-
case field
|
28
|
-
when Symbol then members_types[field]
|
29
|
-
# when Array
|
30
|
-
# if field.length > 1 then members_types[field.first].field_type(field[1..-1])
|
31
|
-
# else field_type field.first
|
32
|
-
# end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
37
|
-
def self.included base
|
38
|
-
base.extend ClassMethods
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
Struct.class_eval do
|
44
|
-
include Wukong::PigStructMethods
|
45
|
-
def self.mtypes
|
46
|
-
members
|
47
|
-
end
|
48
|
-
end
|
@@ -1,95 +0,0 @@
|
|
1
|
-
module Wukong
|
2
|
-
module AndPig
|
3
|
-
|
4
|
-
#
|
5
|
-
# Make a PigVar understand the struct it describes
|
6
|
-
#
|
7
|
-
class PigVar
|
8
|
-
attr_accessor :klass, :name, :cmd
|
9
|
-
cattr_accessor :working_dir ; self.working_dir = '.'
|
10
|
-
def initialize klass, name, cmd
|
11
|
-
self.klass = klass
|
12
|
-
self.name = name
|
13
|
-
self.cmd = cmd
|
14
|
-
end
|
15
|
-
|
16
|
-
# Sugar for PigVar.new_relation
|
17
|
-
def self.[]= name, *args
|
18
|
-
set name, *args
|
19
|
-
end
|
20
|
-
# Sugar for PigVar.new_relation
|
21
|
-
def self.[] name
|
22
|
-
PIG_SYMBOLS[name]
|
23
|
-
end
|
24
|
-
|
25
|
-
# extract a field from an alias
|
26
|
-
def _ field
|
27
|
-
as_name = [name, field].join("_").to_sym
|
28
|
-
AS["#{relationize}.(#{field})", as_name, Bag.new([field, field_type(field)]), nil, :skip_type]
|
29
|
-
end
|
30
|
-
|
31
|
-
|
32
|
-
def self.set name, rval
|
33
|
-
PIG_SYMBOLS[name] = rval
|
34
|
-
rval.name = name
|
35
|
-
emit_setter rval.relation, rval
|
36
|
-
end
|
37
|
-
|
38
|
-
def relation
|
39
|
-
name.relationize
|
40
|
-
end
|
41
|
-
alias_method :relationize, :relation
|
42
|
-
|
43
|
-
#
|
44
|
-
# Create a name for a new anonymous relation
|
45
|
-
#
|
46
|
-
def self.anon slug
|
47
|
-
idx = (Wukong::AndPig.anon_var_idx += 1)
|
48
|
-
"anon_#{slug}_#{idx}_".to_sym
|
49
|
-
end
|
50
|
-
# Create a name building off this one
|
51
|
-
def anon
|
52
|
-
slug = name.to_s.gsub(/^anon_/,'').gsub(/_\d+_$/,'')
|
53
|
-
self.class.anon slug
|
54
|
-
end
|
55
|
-
|
56
|
-
#
|
57
|
-
def new_in_chain lval, l_klass, l_cmd
|
58
|
-
rval = self.class.new l_klass, lval, l_cmd
|
59
|
-
self.class.set lval, rval
|
60
|
-
end
|
61
|
-
|
62
|
-
# Delegate to klass
|
63
|
-
def field_type *args
|
64
|
-
self.klass.field_type *args
|
65
|
-
end
|
66
|
-
|
67
|
-
# Fields in this relation
|
68
|
-
def fields
|
69
|
-
klass.members.map(&:to_sym)
|
70
|
-
end
|
71
|
-
|
72
|
-
#
|
73
|
-
# Side-effect free operation
|
74
|
-
#
|
75
|
-
def simple_operation op
|
76
|
-
self.class.emit "#{op.to_s.upcase} #{relation}"
|
77
|
-
self
|
78
|
-
end
|
79
|
-
|
80
|
-
def self.simple_operation lval, rel, op, r_str
|
81
|
-
cmd = "%-8s %s" % [op.to_s.upcase, r_str]
|
82
|
-
rval = new(rel.klass, lval, cmd)
|
83
|
-
set lval, rval
|
84
|
-
end
|
85
|
-
|
86
|
-
def self.simple_declaration op, r_str
|
87
|
-
cmd = "%-8s %s" % [op.to_s.upcase, r_str]
|
88
|
-
emit cmd
|
89
|
-
end
|
90
|
-
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
|
@@ -1,29 +0,0 @@
|
|
1
|
-
module Wukong
|
2
|
-
module AndPig
|
3
|
-
PIG_SYMBOLS = { }
|
4
|
-
mattr_accessor :anon_var_idx
|
5
|
-
self.anon_var_idx = 0
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
|
10
|
-
Symbol.class_eval do
|
11
|
-
def << relation
|
12
|
-
case
|
13
|
-
when relation.is_a?(Wukong::AndPig::PigVar)
|
14
|
-
Wukong::AndPig::PigVar.new_relation(self, relation)
|
15
|
-
when relation.is_a?(Symbol) && (pig_var = Wukong::AndPig::PIG_SYMBOLS[relation])
|
16
|
-
Wukong::AndPig::PigVar.new_relation(self, pig_var)
|
17
|
-
else raise "Don't know how to pigify RHS #{relation.inspect}"
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def method_missing method, *args
|
22
|
-
pig_var = Wukong::AndPig::PIG_SYMBOLS[self]
|
23
|
-
if pig_var && pig_var.respond_to?(method)
|
24
|
-
pig_var.send(method, *args)
|
25
|
-
else
|
26
|
-
super method, *args
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
data/lib/wukong/and_pig/utils.rb
DELETED
File without changes
|