piglet 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/README.rdoc +15 -0
- data/lib/piglet.rb +12 -4
- data/lib/piglet/field/binary_conditional.rb +4 -3
- data/lib/piglet/field/call_expression.rb +6 -6
- data/lib/piglet/field/field.rb +134 -0
- data/lib/piglet/field/infix_expression.rb +25 -2
- data/lib/piglet/field/literal.rb +19 -2
- data/lib/piglet/field/prefix_expression.rb +4 -2
- data/lib/piglet/field/reference.rb +8 -3
- data/lib/piglet/field/rename.rb +5 -3
- data/lib/piglet/field/suffix_expression.rb +4 -2
- data/lib/piglet/inout/load.rb +5 -0
- data/lib/piglet/interpreter.rb +2 -4
- data/lib/piglet/relation/cogroup.rb +15 -0
- data/lib/piglet/relation/cross.rb +5 -0
- data/lib/piglet/relation/foreach.rb +5 -0
- data/lib/piglet/relation/group.rb +16 -0
- data/lib/piglet/relation/join.rb +5 -0
- data/lib/piglet/relation/relation.rb +17 -2
- data/lib/piglet/relation/union.rb +1 -1
- data/lib/piglet/schema/bag.rb +21 -0
- data/lib/piglet/schema/tuple.rb +111 -0
- data/spec/piglet/field/binary_conditional_spec.rb +47 -0
- data/spec/piglet/field/field_spec.rb +103 -0
- data/spec/piglet/field/infix_expression_spec.rb +69 -0
- data/spec/piglet/field/literal_spec.rb +27 -0
- data/spec/piglet/field/reference_spec.rb +15 -1
- data/spec/piglet/interpreter_spec.rb +8 -395
- data/spec/piglet/relation/relation_spec.rb +4 -0
- data/spec/piglet/relation/union_spec.rb +37 -0
- data/spec/piglet/schema/tuple_spec.rb +121 -0
- data/spec/piglet_spec.rb +664 -0
- metadata +17 -3
- data/lib/piglet/field/operators.rb +0 -80
@@ -8,6 +8,21 @@ module Piglet
|
|
8
8
|
@sources = @join_fields.keys
|
9
9
|
@parallel = description[:parallel]
|
10
10
|
end
|
11
|
+
|
12
|
+
def schema
|
13
|
+
first_schema = @sources.first.schema
|
14
|
+
join_fields = @join_fields[@sources.first]
|
15
|
+
if join_fields.is_a?(Enumerable) && join_fields.size > 1
|
16
|
+
group_type = join_fields.map { |f| [f, first_schema.field_type[f]] }
|
17
|
+
description = [[:group, :tuple, group_type]]
|
18
|
+
else
|
19
|
+
description = [[:group, *join_fields]]
|
20
|
+
end
|
21
|
+
@sources.each do |source|
|
22
|
+
description << [source.alias.to_sym, Piglet::Schema::Bag.new(source.schema)]
|
23
|
+
end
|
24
|
+
Piglet::Schema::Tuple.parse(description)
|
25
|
+
end
|
11
26
|
|
12
27
|
def to_s
|
13
28
|
joins = @sources.map do |s|
|
@@ -8,6 +8,11 @@ module Piglet
|
|
8
8
|
@sources, @parallel = relations, options[:parallel]
|
9
9
|
end
|
10
10
|
|
11
|
+
def schema
|
12
|
+
schemas = @sources.map { |s| s.schema }
|
13
|
+
schemas.first.union(schemas[1..-1])
|
14
|
+
end
|
15
|
+
|
11
16
|
def to_s
|
12
17
|
str = "CROSS #{source_aliases.join(', ')}"
|
13
18
|
str << " PARALLEL #{@parallel}" if @parallel
|
@@ -6,6 +6,11 @@ module Piglet
|
|
6
6
|
def initialize(relation, field_expressions)
|
7
7
|
@sources, @field_expressions = [relation], [field_expressions].flatten
|
8
8
|
end
|
9
|
+
|
10
|
+
def schema
|
11
|
+
description = @field_expressions.map { |expr| [expr.name, expr.type] }
|
12
|
+
Piglet::Schema::Tuple.parse(description)
|
13
|
+
end
|
9
14
|
|
10
15
|
def to_s
|
11
16
|
"FOREACH #{@sources.first.alias} GENERATE #{field_expressions_string}"
|
@@ -7,6 +7,22 @@ module Piglet
|
|
7
7
|
options ||= {}
|
8
8
|
@sources, @grouping, @parallel = [relation], grouping, options[:parallel]
|
9
9
|
end
|
10
|
+
|
11
|
+
def schema
|
12
|
+
parent = @sources.first
|
13
|
+
parent_schema = parent.schema
|
14
|
+
if @grouping.size == 1
|
15
|
+
group_type = parent.schema.field_type(@grouping.first)
|
16
|
+
else
|
17
|
+
group_type = Piglet::Schema::Tuple.parse(
|
18
|
+
@grouping.map { |field| [field, parent_schema.field_type(field)] }
|
19
|
+
)
|
20
|
+
end
|
21
|
+
Piglet::Schema::Tuple.parse([
|
22
|
+
[:group, group_type],
|
23
|
+
[parent.alias.to_sym, Piglet::Schema::Bag.new(parent_schema)]
|
24
|
+
])
|
25
|
+
end
|
10
26
|
|
11
27
|
def to_s
|
12
28
|
str = "GROUP #{@sources.first.alias} BY "
|
data/lib/piglet/relation/join.rb
CHANGED
@@ -9,6 +9,11 @@ module Piglet
|
|
9
9
|
@using = description[:using]
|
10
10
|
@parallel = description[:parallel]
|
11
11
|
end
|
12
|
+
|
13
|
+
def schema
|
14
|
+
schemas = @sources.map { |s| s.schema }
|
15
|
+
schemas.first.union(schemas[1..-1])
|
16
|
+
end
|
12
17
|
|
13
18
|
def to_s
|
14
19
|
joins = @sources.map { |s| "#{s.alias} BY #{@join_fields[s]}" }.join(', ')
|
@@ -139,16 +139,31 @@ module Piglet
|
|
139
139
|
Union.new(*([self] + relations))
|
140
140
|
end
|
141
141
|
|
142
|
+
def field(name)
|
143
|
+
type = schema.field_type(name) rescue nil
|
144
|
+
Field::Reference.new(name, self, :type => type)
|
145
|
+
end
|
146
|
+
|
147
|
+
def schema
|
148
|
+
if @sources.nil?
|
149
|
+
raise Piglet::Schema::SchemaError, 'Could not determine the schema since there was no source relation and this relation does not define its own schema'
|
150
|
+
elsif @sources.size > 1
|
151
|
+
raise Piglet::Schema::SchemaError, 'Could not determine the schema since there were more than one source relation'
|
152
|
+
else
|
153
|
+
@sources.first.schema
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
142
157
|
def method_missing(name, *args)
|
143
158
|
if name.to_s =~ /^\w+$/ && args.empty?
|
144
|
-
|
159
|
+
field(name)
|
145
160
|
else
|
146
161
|
super
|
147
162
|
end
|
148
163
|
end
|
149
164
|
|
150
165
|
def [](n)
|
151
|
-
|
166
|
+
field("\$#{n}")
|
152
167
|
end
|
153
168
|
|
154
169
|
def hash
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Piglet
|
2
|
+
module Schema
|
3
|
+
class Bag
|
4
|
+
def initialize(tuple)
|
5
|
+
@tuple = tuple
|
6
|
+
end
|
7
|
+
|
8
|
+
def field_names
|
9
|
+
@tuple.field_names
|
10
|
+
end
|
11
|
+
|
12
|
+
def field_type(name)
|
13
|
+
@tuple.field_type(name)
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_s
|
17
|
+
@tuple.to_s.sub(/^\((.*)\)$/, '{\1}')
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
module Piglet
|
2
|
+
module Schema
|
3
|
+
class Tuple
|
4
|
+
attr_reader :field_names
|
5
|
+
|
6
|
+
def initialize(field_names, type_map)
|
7
|
+
@field_names = [ ]
|
8
|
+
@field_names = field_names.dup if field_names
|
9
|
+
@type_map = { }
|
10
|
+
@type_map = type_map.dup if type_map
|
11
|
+
end
|
12
|
+
|
13
|
+
# Returns a new Tuple with a schema described by the specified array.
|
14
|
+
#
|
15
|
+
# The array will be interpreted as follows: each element defines a field,
|
16
|
+
# and a field can have an optional type. To define a typeless field simply
|
17
|
+
# use a symbol, to define a typed field use an array with two values: the
|
18
|
+
# first is the name and the second is the type.
|
19
|
+
#
|
20
|
+
# The type of a field can be one of the following:
|
21
|
+
# * <code>:int</code>
|
22
|
+
# * <code>:long</code>
|
23
|
+
# * <code>:float</code>
|
24
|
+
# * <code>:double</code>
|
25
|
+
# * <code>:chararray</code>
|
26
|
+
# * <code>:bytearray</code>
|
27
|
+
# * <code>:tuple</code> or Piglet::Schema::Tuple
|
28
|
+
# * <code>:bag</code> or Piglet::Schema::Bag
|
29
|
+
#
|
30
|
+
# If a type is not given it defaults to <code>:bytearray</code>. To define
|
31
|
+
# a tuple field either pass a Piglet::Schema::Tuple object as the type, or
|
32
|
+
# use <code>:tuple</code> and a thrid element, which is the schema of the
|
33
|
+
# tuple, e.g. <code>[[:a, :tuple, [:b, :c]]]</code>.
|
34
|
+
#
|
35
|
+
# Maps are currently not supported.
|
36
|
+
#
|
37
|
+
# Examples (Piglet schema description to the left with the Pig Latin
|
38
|
+
# schema definition to the right):
|
39
|
+
#
|
40
|
+
# [:a, :b, :c] # => (a:bytearray, b:bytearray, c:bytearray)
|
41
|
+
# [[:a, :chararray], [:b, :float]] # => (a:chararray, b:float)
|
42
|
+
# [[:a, Tuple.parse(:b, :c)]] # => (a:tuple (b:bytearray, c:bytearray))
|
43
|
+
# [[:a, :bag, [:b, :c]]] # => (a:bag {x:tuple (b:bytearray, c:bytearray)})
|
44
|
+
def self.parse(description)
|
45
|
+
field_names = [ ]
|
46
|
+
type_map = { }
|
47
|
+
index = 0
|
48
|
+
description.map do |component|
|
49
|
+
case component
|
50
|
+
when Enumerable
|
51
|
+
head = component.first
|
52
|
+
tail = component[1..-1]
|
53
|
+
case tail.first
|
54
|
+
when :tuple
|
55
|
+
type_map[head || index] = parse(*tail[1..-1])
|
56
|
+
when :bag
|
57
|
+
type_map[head || index] = Bag.new(parse(*tail[1..-1]))
|
58
|
+
else
|
59
|
+
type_map[head || index] = tail.first
|
60
|
+
end
|
61
|
+
field_names << head
|
62
|
+
else
|
63
|
+
type_map[component] = :bytearray
|
64
|
+
field_names << component
|
65
|
+
end
|
66
|
+
index += 1
|
67
|
+
end
|
68
|
+
Tuple.new(field_names, type_map)
|
69
|
+
end
|
70
|
+
|
71
|
+
def union(*tuples)
|
72
|
+
field_names = @field_names.dup
|
73
|
+
type_map = @type_map.dup
|
74
|
+
tuples.flatten.each do |tuple|
|
75
|
+
tuple.field_names.each do |f|
|
76
|
+
field_names << f
|
77
|
+
type_map[f] = tuple.field_type(f)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
Tuple.new(field_names, type_map)
|
81
|
+
end
|
82
|
+
|
83
|
+
def field_type(field_name)
|
84
|
+
if Integer === field_name
|
85
|
+
field_name = @field_names[field_name] || field_name
|
86
|
+
end
|
87
|
+
@type_map[field_name]
|
88
|
+
end
|
89
|
+
|
90
|
+
def to_s
|
91
|
+
field_declarations = @field_names.map do |field_name|
|
92
|
+
type = field_type(field_name)
|
93
|
+
type_str = case type
|
94
|
+
when Tuple
|
95
|
+
"tuple #{type}"
|
96
|
+
when Bag
|
97
|
+
"bag #{type}"
|
98
|
+
else
|
99
|
+
type.to_s
|
100
|
+
end
|
101
|
+
if field_name
|
102
|
+
"#{field_name}:#{type_str}"
|
103
|
+
else
|
104
|
+
type_str
|
105
|
+
end
|
106
|
+
end
|
107
|
+
"(#{field_declarations.join(', ')})"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
include Piglet::Field
|
5
|
+
|
6
|
+
|
7
|
+
describe BinaryConditional do
|
8
|
+
|
9
|
+
before do
|
10
|
+
@true_test = mock('test expression')
|
11
|
+
@true_test.stub!(:to_s).and_return('true')
|
12
|
+
@expressions = {}
|
13
|
+
[:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
|
14
|
+
@expressions[type] = mock("#{type} expression")
|
15
|
+
@expressions[type].extend Field
|
16
|
+
@expressions[type].stub!(:type).and_return(type)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '#type' do
|
21
|
+
it 'returns the type of the true expression' do
|
22
|
+
bincond = BinaryConditional.new(@true_test, @expressions[:int], @expressions[:float])
|
23
|
+
bincond.type.should == :int
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'returns int if the true expression is an Integer' do
|
27
|
+
bincond = BinaryConditional.new(@true_test, 3, @expressions[:float])
|
28
|
+
bincond.type.should == :int
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'returns float if the true expression is a Float' do
|
32
|
+
bincond = BinaryConditional.new(@true_test, 3.14, @expressions[:float])
|
33
|
+
bincond.type.should == :float
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'returns boolean if the true expression is true' do
|
37
|
+
bincond = BinaryConditional.new(@true_test, true, @expressions[:float])
|
38
|
+
bincond.type.should == :boolean
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'returns boolean if the true expression is false' do
|
42
|
+
bincond = BinaryConditional.new(@true_test, false, @expressions[:float])
|
43
|
+
bincond.type.should == :boolean
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
include Piglet::Field
|
5
|
+
|
6
|
+
|
7
|
+
describe Field do
|
8
|
+
|
9
|
+
before do
|
10
|
+
@field = mock('field')
|
11
|
+
@field.extend Field
|
12
|
+
@expressions = {}
|
13
|
+
[:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
|
14
|
+
@expressions[type] = mock("#{type} expression")
|
15
|
+
@expressions[type].extend Field
|
16
|
+
@expressions[type].stub!(:type).and_return(type)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '#type' do
|
21
|
+
[:==, :ne, :<, :>, :<=, :>=, :and, :or].each do |op|
|
22
|
+
op_str = (op == :ne ? '!=' : op).to_s.upcase
|
23
|
+
|
24
|
+
it "knows that a #{op_str} expression always is of type boolean" do
|
25
|
+
(@field.send(op, @field)).type.should eql(:boolean)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'knows that % yields an integer' do
|
30
|
+
(@field % 5).type.should eql(:int)
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'knows that a call to IsEmpty is of type boolean' do
|
34
|
+
@field.empty?.type.should eql(:boolean)
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'knows that the NOT operator yields a boolean' do
|
38
|
+
@field.not.type.should eql(:boolean)
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'knows that the "is null" operator yields a boolean' do
|
42
|
+
@field.null?.type.should eql(:boolean)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'knows that the "is not null" operator yields a boolean' do
|
46
|
+
@field.not_null?.type.should eql(:boolean)
|
47
|
+
end
|
48
|
+
|
49
|
+
[:int, :long, :float, :double, :chararray, :bytearray].each do |type|
|
50
|
+
it "knows that a cast to #{type} is of type #{type}" do
|
51
|
+
@field.cast(type).type.should eql(type)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'knows that a "matches" expression is always of type boolean' do
|
56
|
+
@field.matches(/hello world/).type.should eql(:boolean)
|
57
|
+
end
|
58
|
+
|
59
|
+
[:int, :long, :float, :double].each do |type|
|
60
|
+
it "knows that negating a #{type} yields a #{type}" do
|
61
|
+
@expressions[type].neg.type.should eql(type)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
[:+, :-, :*].each do |op|
|
66
|
+
it "knows that int #{op} int yields an int" do
|
67
|
+
(@expressions[:int].send(op, @expressions[:int])).type.should eql(:int)
|
68
|
+
end
|
69
|
+
|
70
|
+
it "knows that int #{op} long yields a long" do
|
71
|
+
(@expressions[:int].send(op, @expressions[:long])).type.should eql(:long)
|
72
|
+
end
|
73
|
+
|
74
|
+
it "knows that int #{op} float yields a float" do
|
75
|
+
(@expressions[:int].send(op, @expressions[:float])).type.should eql(:float)
|
76
|
+
end
|
77
|
+
|
78
|
+
it "knows that int #{op} double yields a double" do
|
79
|
+
(@expressions[:int].send(op, @expressions[:double])).type.should eql(:double)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
combos = {
|
84
|
+
[:int, :int] => :int,
|
85
|
+
[:int, :long] => :long,
|
86
|
+
[:int, :float] => :float,
|
87
|
+
[:int, :double] => :double,
|
88
|
+
[:long, :float] => :float,
|
89
|
+
[:long, :double] => :double
|
90
|
+
}
|
91
|
+
|
92
|
+
combos.each do |operands, result|
|
93
|
+
it "knows that #{operands[0]}/#{operands[1]} yields a #{result}" do
|
94
|
+
(@expressions[operands[0]] / @expressions[operands[1]]).type.should eql(result)
|
95
|
+
end
|
96
|
+
|
97
|
+
it "knows that #{operands[1]}/#{operands[0]} yields a #{result}" do
|
98
|
+
(@expressions[operands[1]] / @expressions[operands[0]]).type.should eql(result)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
include Piglet::Field
|
5
|
+
|
6
|
+
|
7
|
+
describe InfixExpression do
|
8
|
+
|
9
|
+
before do
|
10
|
+
@expressions = {}
|
11
|
+
[:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
|
12
|
+
@expressions[type] = mock("#{type} expression")
|
13
|
+
@expressions[type].extend Field
|
14
|
+
@expressions[type].stub!(:type).and_return(type)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe '#type' do
|
19
|
+
context 'specified' do
|
20
|
+
it 'returns the type specified in the options' do
|
21
|
+
expr = InfixExpression.new(@true_test, @expressions[:int], @expressions[:chararray], :type => :long)
|
22
|
+
expr.type.should == :long
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'inferred' do
|
27
|
+
it 'returns the type of the left expression if no other rules apply' do
|
28
|
+
expr = InfixExpression.new('x', @expressions[:chararray], @expressions[:bytearray])
|
29
|
+
expr.type.should == :chararray
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'returns double if the lefthand type is a double' do
|
33
|
+
expr = InfixExpression.new(@true_test, @expressions[:double], @expressions[:int])
|
34
|
+
expr.type.should == :double
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'returns double if the righthand type is a double' do
|
38
|
+
expr = InfixExpression.new(@true_test, @expressions[:float], @expressions[:double])
|
39
|
+
expr.type.should == :double
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'returns double when the other operand is of type long' do
|
43
|
+
expr = InfixExpression.new(@true_test, @expressions[:double], @expressions[:long])
|
44
|
+
expr.type.should == :double
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'returns float if one type is long and the other is a float' do
|
48
|
+
expr = InfixExpression.new(@true_test, @expressions[:long], @expressions[:float])
|
49
|
+
expr.type.should == :float
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'returns long if the lefthand type is long, and the righthand is an int' do
|
53
|
+
expr = InfixExpression.new(@true_test, @expressions[:long], @expressions[:int])
|
54
|
+
expr.type.should == :long
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'returns long if the righthand type is long, and the lefthand is an int' do
|
58
|
+
expr = InfixExpression.new(@true_test, @expressions[:int], @expressions[:long])
|
59
|
+
expr.type.should == :long
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'returns float if one operand is of type int and the other is a float' do
|
63
|
+
expr = InfixExpression.new(@true_test, @expressions[:int], @expressions[:float])
|
64
|
+
expr.type.should == :float
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|