wukong 0.1.4 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL.textile +89 -0
- data/README.textile +41 -74
- data/docpages/INSTALL.textile +94 -0
- data/{doc → docpages}/LICENSE.textile +0 -0
- data/{doc → docpages}/README-wulign.textile +6 -0
- data/docpages/UsingWukong-part1-get_ready.textile +17 -0
- data/{doc/overview.textile → docpages/UsingWukong-part2-ThinkingBigData.textile} +8 -24
- data/{doc → docpages}/UsingWukong-part3-parsing.textile +8 -2
- data/docpages/_config.yml +39 -0
- data/{doc/tips.textile → docpages/bigdata-tips.textile} +71 -44
- data/{doc → docpages}/code/api_response_example.txt +0 -0
- data/{doc → docpages}/code/parser_skeleton.rb +0 -0
- data/{doc/intro_to_map_reduce → docpages/diagrams}/MapReduceDiagram.graffle +0 -0
- data/docpages/favicon.ico +0 -0
- data/docpages/gem.css +16 -0
- data/docpages/hadoop-tips.textile +83 -0
- data/docpages/index.textile +90 -0
- data/docpages/intro.textile +8 -0
- data/docpages/moreinfo.textile +174 -0
- data/docpages/news.html +24 -0
- data/{doc → docpages}/pig/PigLatinExpressionsList.txt +0 -0
- data/{doc → docpages}/pig/PigLatinReferenceManual.html +0 -0
- data/{doc → docpages}/pig/PigLatinReferenceManual.txt +0 -0
- data/docpages/tutorial.textile +283 -0
- data/docpages/usage.textile +195 -0
- data/docpages/wutils.textile +263 -0
- data/wukong.gemspec +80 -50
- metadata +87 -54
- data/doc/INSTALL.textile +0 -41
- data/doc/README-tutorial.textile +0 -163
- data/doc/README-wutils.textile +0 -128
- data/doc/TODO.textile +0 -61
- data/doc/UsingWukong-part1-setup.textile +0 -2
- data/doc/UsingWukong-part2-scraping.textile +0 -2
- data/doc/hadoop-nfs.textile +0 -51
- data/doc/hadoop-setup.textile +0 -29
- data/doc/index.textile +0 -124
- data/doc/links.textile +0 -42
- data/doc/usage.textile +0 -102
- data/doc/utils.textile +0 -48
- data/examples/and_pig/sample_queries.rb +0 -128
- data/lib/wukong/and_pig.rb +0 -62
- data/lib/wukong/and_pig/README.textile +0 -12
- data/lib/wukong/and_pig/as.rb +0 -37
- data/lib/wukong/and_pig/data_types.rb +0 -30
- data/lib/wukong/and_pig/functions.rb +0 -50
- data/lib/wukong/and_pig/generate.rb +0 -85
- data/lib/wukong/and_pig/generate/variable_inflections.rb +0 -82
- data/lib/wukong/and_pig/junk.rb +0 -51
- data/lib/wukong/and_pig/operators.rb +0 -8
- data/lib/wukong/and_pig/operators/compound.rb +0 -29
- data/lib/wukong/and_pig/operators/evaluators.rb +0 -7
- data/lib/wukong/and_pig/operators/execution.rb +0 -15
- data/lib/wukong/and_pig/operators/file_methods.rb +0 -29
- data/lib/wukong/and_pig/operators/foreach.rb +0 -98
- data/lib/wukong/and_pig/operators/groupies.rb +0 -212
- data/lib/wukong/and_pig/operators/load_store.rb +0 -65
- data/lib/wukong/and_pig/operators/meta.rb +0 -42
- data/lib/wukong/and_pig/operators/relational.rb +0 -129
- data/lib/wukong/and_pig/pig_struct.rb +0 -48
- data/lib/wukong/and_pig/pig_var.rb +0 -95
- data/lib/wukong/and_pig/symbol.rb +0 -29
- data/lib/wukong/and_pig/utils.rb +0 -0
@@ -1,129 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# == RelationalOperators
|
3
|
-
#
|
4
|
-
# GROUP, COGROUP, JOIN see groupies.rb
|
5
|
-
# CROSS see
|
6
|
-
|
7
|
-
# distinct
|
8
|
-
# filter
|
9
|
-
# limit
|
10
|
-
# order
|
11
|
-
# split
|
12
|
-
# union
|
13
|
-
|
14
|
-
#
|
15
|
-
# stream
|
16
|
-
# load
|
17
|
-
# store
|
18
|
-
#
|
19
|
-
module Wukong
|
20
|
-
module AndPig
|
21
|
-
class PigVar
|
22
|
-
|
23
|
-
# ===========================================================================
|
24
|
-
#
|
25
|
-
# Options
|
26
|
-
#
|
27
|
-
def self.parallelize! str, options
|
28
|
-
str << " PARALLEL #{options[:parallel]}" if options[:parallel]
|
29
|
-
end
|
30
|
-
|
31
|
-
# ===========================================================================
|
32
|
-
#
|
33
|
-
# DISTINCT
|
34
|
-
#
|
35
|
-
def distinct lval, options={}
|
36
|
-
self.class.distinct lval, self, options
|
37
|
-
end
|
38
|
-
|
39
|
-
def self.distinct lval, rel, options={ }
|
40
|
-
cmd_str = rel.relationize
|
41
|
-
parallelize! cmd_str, options
|
42
|
-
simple_operation lval, rel, :distinct, cmd_str
|
43
|
-
end
|
44
|
-
|
45
|
-
# ===========================================================================
|
46
|
-
#
|
47
|
-
# FILTER
|
48
|
-
#
|
49
|
-
def filter by_str
|
50
|
-
new_in_chain klass, "FILTER #{relation} BY #{by_str}"
|
51
|
-
end
|
52
|
-
def self.filter lval, rel, by_str
|
53
|
-
simple_operation lval, rel, "FILTER", "#{rel.relation} BY #{by_str}"
|
54
|
-
end
|
55
|
-
|
56
|
-
# ===========================================================================
|
57
|
-
#
|
58
|
-
# LIMIT
|
59
|
-
#
|
60
|
-
def limit n
|
61
|
-
new_in_chain klass, "LIMIT #{relation} #{n}"
|
62
|
-
end
|
63
|
-
|
64
|
-
# ===========================================================================
|
65
|
-
#
|
66
|
-
# ORDER
|
67
|
-
#
|
68
|
-
# alias = ORDER alias BY { * [ASC|DESC] |
|
69
|
-
# field_alias [ASC|DESC] [, field_alias [ASC|DESC] …]
|
70
|
-
# } [PARALLEL n];
|
71
|
-
#
|
72
|
-
def order cmd_str, options={}
|
73
|
-
result = new_in_chain klass, "ORDER #{relation} BY #{cmd_str}"
|
74
|
-
parallelize! result.cmd, options
|
75
|
-
result
|
76
|
-
end
|
77
|
-
|
78
|
-
# ===========================================================================
|
79
|
-
#
|
80
|
-
# SPLIT
|
81
|
-
#
|
82
|
-
# SPLIT alias INTO alias IF expression, alias IF expression [, alias IF expression …];
|
83
|
-
#
|
84
|
-
#
|
85
|
-
def split relation_tests={}
|
86
|
-
split_str = relation_tests.map do |out_rel, test|
|
87
|
-
"#{out_rel} IF #{test}"
|
88
|
-
end.join(", ")
|
89
|
-
new_in_chain klass, "SPLIT #{relation} INTO #{split_str}"
|
90
|
-
end
|
91
|
-
|
92
|
-
# ===========================================================================
|
93
|
-
#
|
94
|
-
# CROSS
|
95
|
-
#
|
96
|
-
def cross *relations
|
97
|
-
options = relations.extract_options!
|
98
|
-
raise CrossArgumentError unless relations.length >= 1
|
99
|
-
relations_str = [self, *relations].map(&:relation).join(", ")
|
100
|
-
result = new_in_chain relations.first.klass, "CROSS #{relations_str}"
|
101
|
-
parallelize! result.cmd, options
|
102
|
-
result
|
103
|
-
end
|
104
|
-
|
105
|
-
# ===========================================================================
|
106
|
-
#
|
107
|
-
# UNION
|
108
|
-
#
|
109
|
-
# def self.union *relations
|
110
|
-
# raise UnionArgumentError unless relations.length >= 2
|
111
|
-
# new_in_chain relations.first.klass, "UNION #{relations}"
|
112
|
-
# end
|
113
|
-
|
114
|
-
# UNION as method
|
115
|
-
def union lval, *relations
|
116
|
-
self.class.union lval, [self]+relations
|
117
|
-
end
|
118
|
-
|
119
|
-
def self.union lval, *relations
|
120
|
-
raise UnionArgumentError unless relations.length >= 2
|
121
|
-
relations_str = relations.map(&:relation).join(", ")
|
122
|
-
simple_operation lval, relations.first, :union, relations_str
|
123
|
-
end
|
124
|
-
|
125
|
-
end
|
126
|
-
CrossArgumentError = ArgumentError.new("CROSS requires at least two relations. Heh heh: relations.")
|
127
|
-
UnionArgumentError = ArgumentError.new("UNION requires at least two relations. Heh heh: relations.")
|
128
|
-
end
|
129
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
module Wukong
|
2
|
-
module PigStructMethods
|
3
|
-
module ClassMethods
|
4
|
-
#
|
5
|
-
# Pig type string --
|
6
|
-
# the pig type strings for each sub-element.
|
7
|
-
#
|
8
|
-
def typify has_rsrc=nil
|
9
|
-
vars_str = members.zip(mtypes).map do |attr, mtype|
|
10
|
-
"%s: %s" % [attr, mtype.typify]
|
11
|
-
end
|
12
|
-
vars_str = ["rsrc: chararray"] + vars_str if has_rsrc
|
13
|
-
"(#{vars_str.join(', ')})"
|
14
|
-
end
|
15
|
-
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
def pig_load rel, *args
|
20
|
-
Wukong::AndPig::PigVar.pig_load rel, self, *args
|
21
|
-
end
|
22
|
-
|
23
|
-
#
|
24
|
-
# Returns type for a fieldspec
|
25
|
-
#
|
26
|
-
def field_type field
|
27
|
-
case field
|
28
|
-
when Symbol then members_types[field]
|
29
|
-
# when Array
|
30
|
-
# if field.length > 1 then members_types[field.first].field_type(field[1..-1])
|
31
|
-
# else field_type field.first
|
32
|
-
# end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
37
|
-
def self.included base
|
38
|
-
base.extend ClassMethods
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
Struct.class_eval do
|
44
|
-
include Wukong::PigStructMethods
|
45
|
-
def self.mtypes
|
46
|
-
members
|
47
|
-
end
|
48
|
-
end
|
@@ -1,95 +0,0 @@
|
|
1
|
-
module Wukong
|
2
|
-
module AndPig
|
3
|
-
|
4
|
-
#
|
5
|
-
# Make a PigVar understand the struct it describes
|
6
|
-
#
|
7
|
-
class PigVar
|
8
|
-
attr_accessor :klass, :name, :cmd
|
9
|
-
cattr_accessor :working_dir ; self.working_dir = '.'
|
10
|
-
def initialize klass, name, cmd
|
11
|
-
self.klass = klass
|
12
|
-
self.name = name
|
13
|
-
self.cmd = cmd
|
14
|
-
end
|
15
|
-
|
16
|
-
# Sugar for PigVar.new_relation
|
17
|
-
def self.[]= name, *args
|
18
|
-
set name, *args
|
19
|
-
end
|
20
|
-
# Sugar for PigVar.new_relation
|
21
|
-
def self.[] name
|
22
|
-
PIG_SYMBOLS[name]
|
23
|
-
end
|
24
|
-
|
25
|
-
# extract a field from an alias
|
26
|
-
def _ field
|
27
|
-
as_name = [name, field].join("_").to_sym
|
28
|
-
AS["#{relationize}.(#{field})", as_name, Bag.new([field, field_type(field)]), nil, :skip_type]
|
29
|
-
end
|
30
|
-
|
31
|
-
|
32
|
-
def self.set name, rval
|
33
|
-
PIG_SYMBOLS[name] = rval
|
34
|
-
rval.name = name
|
35
|
-
emit_setter rval.relation, rval
|
36
|
-
end
|
37
|
-
|
38
|
-
def relation
|
39
|
-
name.relationize
|
40
|
-
end
|
41
|
-
alias_method :relationize, :relation
|
42
|
-
|
43
|
-
#
|
44
|
-
# Create a name for a new anonymous relation
|
45
|
-
#
|
46
|
-
def self.anon slug
|
47
|
-
idx = (Wukong::AndPig.anon_var_idx += 1)
|
48
|
-
"anon_#{slug}_#{idx}_".to_sym
|
49
|
-
end
|
50
|
-
# Create a name building off this one
|
51
|
-
def anon
|
52
|
-
slug = name.to_s.gsub(/^anon_/,'').gsub(/_\d+_$/,'')
|
53
|
-
self.class.anon slug
|
54
|
-
end
|
55
|
-
|
56
|
-
#
|
57
|
-
def new_in_chain lval, l_klass, l_cmd
|
58
|
-
rval = self.class.new l_klass, lval, l_cmd
|
59
|
-
self.class.set lval, rval
|
60
|
-
end
|
61
|
-
|
62
|
-
# Delegate to klass
|
63
|
-
def field_type *args
|
64
|
-
self.klass.field_type *args
|
65
|
-
end
|
66
|
-
|
67
|
-
# Fields in this relation
|
68
|
-
def fields
|
69
|
-
klass.members.map(&:to_sym)
|
70
|
-
end
|
71
|
-
|
72
|
-
#
|
73
|
-
# Side-effect free operation
|
74
|
-
#
|
75
|
-
def simple_operation op
|
76
|
-
self.class.emit "#{op.to_s.upcase} #{relation}"
|
77
|
-
self
|
78
|
-
end
|
79
|
-
|
80
|
-
def self.simple_operation lval, rel, op, r_str
|
81
|
-
cmd = "%-8s %s" % [op.to_s.upcase, r_str]
|
82
|
-
rval = new(rel.klass, lval, cmd)
|
83
|
-
set lval, rval
|
84
|
-
end
|
85
|
-
|
86
|
-
def self.simple_declaration op, r_str
|
87
|
-
cmd = "%-8s %s" % [op.to_s.upcase, r_str]
|
88
|
-
emit cmd
|
89
|
-
end
|
90
|
-
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
|
@@ -1,29 +0,0 @@
|
|
1
|
-
module Wukong
|
2
|
-
module AndPig
|
3
|
-
PIG_SYMBOLS = { }
|
4
|
-
mattr_accessor :anon_var_idx
|
5
|
-
self.anon_var_idx = 0
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
|
10
|
-
Symbol.class_eval do
|
11
|
-
def << relation
|
12
|
-
case
|
13
|
-
when relation.is_a?(Wukong::AndPig::PigVar)
|
14
|
-
Wukong::AndPig::PigVar.new_relation(self, relation)
|
15
|
-
when relation.is_a?(Symbol) && (pig_var = Wukong::AndPig::PIG_SYMBOLS[relation])
|
16
|
-
Wukong::AndPig::PigVar.new_relation(self, pig_var)
|
17
|
-
else raise "Don't know how to pigify RHS #{relation.inspect}"
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def method_missing method, *args
|
22
|
-
pig_var = Wukong::AndPig::PIG_SYMBOLS[self]
|
23
|
-
if pig_var && pig_var.respond_to?(method)
|
24
|
-
pig_var.send(method, *args)
|
25
|
-
else
|
26
|
-
super method, *args
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
data/lib/wukong/and_pig/utils.rb
DELETED
File without changes
|