piglet 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/README.rdoc +15 -0
- data/lib/piglet.rb +12 -4
- data/lib/piglet/field/binary_conditional.rb +4 -3
- data/lib/piglet/field/call_expression.rb +6 -6
- data/lib/piglet/field/field.rb +134 -0
- data/lib/piglet/field/infix_expression.rb +25 -2
- data/lib/piglet/field/literal.rb +19 -2
- data/lib/piglet/field/prefix_expression.rb +4 -2
- data/lib/piglet/field/reference.rb +8 -3
- data/lib/piglet/field/rename.rb +5 -3
- data/lib/piglet/field/suffix_expression.rb +4 -2
- data/lib/piglet/inout/load.rb +5 -0
- data/lib/piglet/interpreter.rb +2 -4
- data/lib/piglet/relation/cogroup.rb +15 -0
- data/lib/piglet/relation/cross.rb +5 -0
- data/lib/piglet/relation/foreach.rb +5 -0
- data/lib/piglet/relation/group.rb +16 -0
- data/lib/piglet/relation/join.rb +5 -0
- data/lib/piglet/relation/relation.rb +17 -2
- data/lib/piglet/relation/union.rb +1 -1
- data/lib/piglet/schema/bag.rb +21 -0
- data/lib/piglet/schema/tuple.rb +111 -0
- data/spec/piglet/field/binary_conditional_spec.rb +47 -0
- data/spec/piglet/field/field_spec.rb +103 -0
- data/spec/piglet/field/infix_expression_spec.rb +69 -0
- data/spec/piglet/field/literal_spec.rb +27 -0
- data/spec/piglet/field/reference_spec.rb +15 -1
- data/spec/piglet/interpreter_spec.rb +8 -395
- data/spec/piglet/relation/relation_spec.rb +4 -0
- data/spec/piglet/relation/union_spec.rb +37 -0
- data/spec/piglet/schema/tuple_spec.rb +121 -0
- data/spec/piglet_spec.rb +664 -0
- metadata +17 -3
- data/lib/piglet/field/operators.rb +0 -80
@@ -8,6 +8,21 @@ module Piglet
|
|
8
8
|
@sources = @join_fields.keys
|
9
9
|
@parallel = description[:parallel]
|
10
10
|
end
|
11
|
+
|
12
|
+
def schema
|
13
|
+
first_schema = @sources.first.schema
|
14
|
+
join_fields = @join_fields[@sources.first]
|
15
|
+
if join_fields.is_a?(Enumerable) && join_fields.size > 1
|
16
|
+
group_type = join_fields.map { |f| [f, first_schema.field_type[f]] }
|
17
|
+
description = [[:group, :tuple, group_type]]
|
18
|
+
else
|
19
|
+
description = [[:group, *join_fields]]
|
20
|
+
end
|
21
|
+
@sources.each do |source|
|
22
|
+
description << [source.alias.to_sym, Piglet::Schema::Bag.new(source.schema)]
|
23
|
+
end
|
24
|
+
Piglet::Schema::Tuple.parse(description)
|
25
|
+
end
|
11
26
|
|
12
27
|
def to_s
|
13
28
|
joins = @sources.map do |s|
|
@@ -8,6 +8,11 @@ module Piglet
|
|
8
8
|
@sources, @parallel = relations, options[:parallel]
|
9
9
|
end
|
10
10
|
|
11
|
+
def schema
|
12
|
+
schemas = @sources.map { |s| s.schema }
|
13
|
+
schemas.first.union(schemas[1..-1])
|
14
|
+
end
|
15
|
+
|
11
16
|
def to_s
|
12
17
|
str = "CROSS #{source_aliases.join(', ')}"
|
13
18
|
str << " PARALLEL #{@parallel}" if @parallel
|
@@ -6,6 +6,11 @@ module Piglet
|
|
6
6
|
def initialize(relation, field_expressions)
|
7
7
|
@sources, @field_expressions = [relation], [field_expressions].flatten
|
8
8
|
end
|
9
|
+
|
10
|
+
def schema
|
11
|
+
description = @field_expressions.map { |expr| [expr.name, expr.type] }
|
12
|
+
Piglet::Schema::Tuple.parse(description)
|
13
|
+
end
|
9
14
|
|
10
15
|
def to_s
|
11
16
|
"FOREACH #{@sources.first.alias} GENERATE #{field_expressions_string}"
|
@@ -7,6 +7,22 @@ module Piglet
|
|
7
7
|
options ||= {}
|
8
8
|
@sources, @grouping, @parallel = [relation], grouping, options[:parallel]
|
9
9
|
end
|
10
|
+
|
11
|
+
def schema
|
12
|
+
parent = @sources.first
|
13
|
+
parent_schema = parent.schema
|
14
|
+
if @grouping.size == 1
|
15
|
+
group_type = parent.schema.field_type(@grouping.first)
|
16
|
+
else
|
17
|
+
group_type = Piglet::Schema::Tuple.parse(
|
18
|
+
@grouping.map { |field| [field, parent_schema.field_type(field)] }
|
19
|
+
)
|
20
|
+
end
|
21
|
+
Piglet::Schema::Tuple.parse([
|
22
|
+
[:group, group_type],
|
23
|
+
[parent.alias.to_sym, Piglet::Schema::Bag.new(parent_schema)]
|
24
|
+
])
|
25
|
+
end
|
10
26
|
|
11
27
|
def to_s
|
12
28
|
str = "GROUP #{@sources.first.alias} BY "
|
data/lib/piglet/relation/join.rb
CHANGED
@@ -9,6 +9,11 @@ module Piglet
|
|
9
9
|
@using = description[:using]
|
10
10
|
@parallel = description[:parallel]
|
11
11
|
end
|
12
|
+
|
13
|
+
def schema
|
14
|
+
schemas = @sources.map { |s| s.schema }
|
15
|
+
schemas.first.union(schemas[1..-1])
|
16
|
+
end
|
12
17
|
|
13
18
|
def to_s
|
14
19
|
joins = @sources.map { |s| "#{s.alias} BY #{@join_fields[s]}" }.join(', ')
|
@@ -139,16 +139,31 @@ module Piglet
|
|
139
139
|
Union.new(*([self] + relations))
|
140
140
|
end
|
141
141
|
|
142
|
+
def field(name)
|
143
|
+
type = schema.field_type(name) rescue nil
|
144
|
+
Field::Reference.new(name, self, :type => type)
|
145
|
+
end
|
146
|
+
|
147
|
+
def schema
|
148
|
+
if @sources.nil?
|
149
|
+
raise Piglet::Schema::SchemaError, 'Could not determine the schema since there was no source relation and this relation does not define its own schema'
|
150
|
+
elsif @sources.size > 1
|
151
|
+
raise Piglet::Schema::SchemaError, 'Could not determine the schema since there were more than one source relation'
|
152
|
+
else
|
153
|
+
@sources.first.schema
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
142
157
|
def method_missing(name, *args)
|
143
158
|
if name.to_s =~ /^\w+$/ && args.empty?
|
144
|
-
|
159
|
+
field(name)
|
145
160
|
else
|
146
161
|
super
|
147
162
|
end
|
148
163
|
end
|
149
164
|
|
150
165
|
def [](n)
|
151
|
-
|
166
|
+
field("\$#{n}")
|
152
167
|
end
|
153
168
|
|
154
169
|
def hash
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Piglet
|
2
|
+
module Schema
|
3
|
+
class Bag
|
4
|
+
def initialize(tuple)
|
5
|
+
@tuple = tuple
|
6
|
+
end
|
7
|
+
|
8
|
+
def field_names
|
9
|
+
@tuple.field_names
|
10
|
+
end
|
11
|
+
|
12
|
+
def field_type(name)
|
13
|
+
@tuple.field_type(name)
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_s
|
17
|
+
@tuple.to_s.sub(/^\((.*)\)$/, '{\1}')
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
module Piglet
|
2
|
+
module Schema
|
3
|
+
class Tuple
|
4
|
+
attr_reader :field_names
|
5
|
+
|
6
|
+
def initialize(field_names, type_map)
|
7
|
+
@field_names = [ ]
|
8
|
+
@field_names = field_names.dup if field_names
|
9
|
+
@type_map = { }
|
10
|
+
@type_map = type_map.dup if type_map
|
11
|
+
end
|
12
|
+
|
13
|
+
# Returns a new Tuple with a schema described by the specified array.
|
14
|
+
#
|
15
|
+
# The array will be interpreted as follows: each element defines a field,
|
16
|
+
# and a field can have an optional type. To define a typeless field simply
|
17
|
+
# use a symbol, to define a typed field use an array with two values: the
|
18
|
+
# first is the name and the second is the type.
|
19
|
+
#
|
20
|
+
# The type of a field can be one of the following:
|
21
|
+
# * <code>:int</code>
|
22
|
+
# * <code>:long</code>
|
23
|
+
# * <code>:float</code>
|
24
|
+
# * <code>:double</code>
|
25
|
+
# * <code>:chararray</code>
|
26
|
+
# * <code>:bytearray</code>
|
27
|
+
# * <code>:tuple</code> or Piglet::Schema::Tuple
|
28
|
+
# * <code>:bag</code> or Piglet::Schema::Bag
|
29
|
+
#
|
30
|
+
# If a type is not given it defaults to <code>:bytearray</code>. To define
|
31
|
+
# a tuple field either pass a Piglet::Schema::Tuple object as the type, or
|
32
|
+
# use <code>:tuple</code> and a thrid element, which is the schema of the
|
33
|
+
# tuple, e.g. <code>[[:a, :tuple, [:b, :c]]]</code>.
|
34
|
+
#
|
35
|
+
# Maps are currently not supported.
|
36
|
+
#
|
37
|
+
# Examples (Piglet schema description to the left with the Pig Latin
|
38
|
+
# schema definition to the right):
|
39
|
+
#
|
40
|
+
# [:a, :b, :c] # => (a:bytearray, b:bytearray, c:bytearray)
|
41
|
+
# [[:a, :chararray], [:b, :float]] # => (a:chararray, b:float)
|
42
|
+
# [[:a, Tuple.parse(:b, :c)]] # => (a:tuple (b:bytearray, c:bytearray))
|
43
|
+
# [[:a, :bag, [:b, :c]]] # => (a:bag {x:tuple (b:bytearray, c:bytearray)})
|
44
|
+
def self.parse(description)
|
45
|
+
field_names = [ ]
|
46
|
+
type_map = { }
|
47
|
+
index = 0
|
48
|
+
description.map do |component|
|
49
|
+
case component
|
50
|
+
when Enumerable
|
51
|
+
head = component.first
|
52
|
+
tail = component[1..-1]
|
53
|
+
case tail.first
|
54
|
+
when :tuple
|
55
|
+
type_map[head || index] = parse(*tail[1..-1])
|
56
|
+
when :bag
|
57
|
+
type_map[head || index] = Bag.new(parse(*tail[1..-1]))
|
58
|
+
else
|
59
|
+
type_map[head || index] = tail.first
|
60
|
+
end
|
61
|
+
field_names << head
|
62
|
+
else
|
63
|
+
type_map[component] = :bytearray
|
64
|
+
field_names << component
|
65
|
+
end
|
66
|
+
index += 1
|
67
|
+
end
|
68
|
+
Tuple.new(field_names, type_map)
|
69
|
+
end
|
70
|
+
|
71
|
+
def union(*tuples)
|
72
|
+
field_names = @field_names.dup
|
73
|
+
type_map = @type_map.dup
|
74
|
+
tuples.flatten.each do |tuple|
|
75
|
+
tuple.field_names.each do |f|
|
76
|
+
field_names << f
|
77
|
+
type_map[f] = tuple.field_type(f)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
Tuple.new(field_names, type_map)
|
81
|
+
end
|
82
|
+
|
83
|
+
def field_type(field_name)
|
84
|
+
if Integer === field_name
|
85
|
+
field_name = @field_names[field_name] || field_name
|
86
|
+
end
|
87
|
+
@type_map[field_name]
|
88
|
+
end
|
89
|
+
|
90
|
+
def to_s
|
91
|
+
field_declarations = @field_names.map do |field_name|
|
92
|
+
type = field_type(field_name)
|
93
|
+
type_str = case type
|
94
|
+
when Tuple
|
95
|
+
"tuple #{type}"
|
96
|
+
when Bag
|
97
|
+
"bag #{type}"
|
98
|
+
else
|
99
|
+
type.to_s
|
100
|
+
end
|
101
|
+
if field_name
|
102
|
+
"#{field_name}:#{type_str}"
|
103
|
+
else
|
104
|
+
type_str
|
105
|
+
end
|
106
|
+
end
|
107
|
+
"(#{field_declarations.join(', ')})"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
include Piglet::Field
|
5
|
+
|
6
|
+
|
7
|
+
describe BinaryConditional do
|
8
|
+
|
9
|
+
before do
|
10
|
+
@true_test = mock('test expression')
|
11
|
+
@true_test.stub!(:to_s).and_return('true')
|
12
|
+
@expressions = {}
|
13
|
+
[:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
|
14
|
+
@expressions[type] = mock("#{type} expression")
|
15
|
+
@expressions[type].extend Field
|
16
|
+
@expressions[type].stub!(:type).and_return(type)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '#type' do
|
21
|
+
it 'returns the type of the true expression' do
|
22
|
+
bincond = BinaryConditional.new(@true_test, @expressions[:int], @expressions[:float])
|
23
|
+
bincond.type.should == :int
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'returns int if the true expression is an Integer' do
|
27
|
+
bincond = BinaryConditional.new(@true_test, 3, @expressions[:float])
|
28
|
+
bincond.type.should == :int
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'returns float if the true expression is a Float' do
|
32
|
+
bincond = BinaryConditional.new(@true_test, 3.14, @expressions[:float])
|
33
|
+
bincond.type.should == :float
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'returns boolean if the true expression is true' do
|
37
|
+
bincond = BinaryConditional.new(@true_test, true, @expressions[:float])
|
38
|
+
bincond.type.should == :boolean
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'returns boolean if the true expression is false' do
|
42
|
+
bincond = BinaryConditional.new(@true_test, false, @expressions[:float])
|
43
|
+
bincond.type.should == :boolean
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
include Piglet::Field
|
5
|
+
|
6
|
+
|
7
|
+
describe Field do
|
8
|
+
|
9
|
+
before do
|
10
|
+
@field = mock('field')
|
11
|
+
@field.extend Field
|
12
|
+
@expressions = {}
|
13
|
+
[:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
|
14
|
+
@expressions[type] = mock("#{type} expression")
|
15
|
+
@expressions[type].extend Field
|
16
|
+
@expressions[type].stub!(:type).and_return(type)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '#type' do
|
21
|
+
[:==, :ne, :<, :>, :<=, :>=, :and, :or].each do |op|
|
22
|
+
op_str = (op == :ne ? '!=' : op).to_s.upcase
|
23
|
+
|
24
|
+
it "knows that a #{op_str} expression always is of type boolean" do
|
25
|
+
(@field.send(op, @field)).type.should eql(:boolean)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'knows that % yields an integer' do
|
30
|
+
(@field % 5).type.should eql(:int)
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'knows that a call to IsEmpty is of type boolean' do
|
34
|
+
@field.empty?.type.should eql(:boolean)
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'knows that the NOT operator yields a boolean' do
|
38
|
+
@field.not.type.should eql(:boolean)
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'knows that the "is null" operator yields a boolean' do
|
42
|
+
@field.null?.type.should eql(:boolean)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'knows that the "is not null" operator yields a boolean' do
|
46
|
+
@field.not_null?.type.should eql(:boolean)
|
47
|
+
end
|
48
|
+
|
49
|
+
[:int, :long, :float, :double, :chararray, :bytearray].each do |type|
|
50
|
+
it "knows that a cast to #{type} is of type #{type}" do
|
51
|
+
@field.cast(type).type.should eql(type)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'knows that a "matches" expression is always of type boolean' do
|
56
|
+
@field.matches(/hello world/).type.should eql(:boolean)
|
57
|
+
end
|
58
|
+
|
59
|
+
[:int, :long, :float, :double].each do |type|
|
60
|
+
it "knows that negating a #{type} yields a #{type}" do
|
61
|
+
@expressions[type].neg.type.should eql(type)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
[:+, :-, :*].each do |op|
|
66
|
+
it "knows that int #{op} int yields an int" do
|
67
|
+
(@expressions[:int].send(op, @expressions[:int])).type.should eql(:int)
|
68
|
+
end
|
69
|
+
|
70
|
+
it "knows that int #{op} long yields a long" do
|
71
|
+
(@expressions[:int].send(op, @expressions[:long])).type.should eql(:long)
|
72
|
+
end
|
73
|
+
|
74
|
+
it "knows that int #{op} float yields a float" do
|
75
|
+
(@expressions[:int].send(op, @expressions[:float])).type.should eql(:float)
|
76
|
+
end
|
77
|
+
|
78
|
+
it "knows that int #{op} double yields a double" do
|
79
|
+
(@expressions[:int].send(op, @expressions[:double])).type.should eql(:double)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
combos = {
|
84
|
+
[:int, :int] => :int,
|
85
|
+
[:int, :long] => :long,
|
86
|
+
[:int, :float] => :float,
|
87
|
+
[:int, :double] => :double,
|
88
|
+
[:long, :float] => :float,
|
89
|
+
[:long, :double] => :double
|
90
|
+
}
|
91
|
+
|
92
|
+
combos.each do |operands, result|
|
93
|
+
it "knows that #{operands[0]}/#{operands[1]} yields a #{result}" do
|
94
|
+
(@expressions[operands[0]] / @expressions[operands[1]]).type.should eql(result)
|
95
|
+
end
|
96
|
+
|
97
|
+
it "knows that #{operands[1]}/#{operands[0]} yields a #{result}" do
|
98
|
+
(@expressions[operands[1]] / @expressions[operands[0]]).type.should eql(result)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
|
2
|
+
|
3
|
+
|
4
|
+
include Piglet::Field
|
5
|
+
|
6
|
+
|
7
|
+
describe InfixExpression do
|
8
|
+
|
9
|
+
before do
|
10
|
+
@expressions = {}
|
11
|
+
[:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
|
12
|
+
@expressions[type] = mock("#{type} expression")
|
13
|
+
@expressions[type].extend Field
|
14
|
+
@expressions[type].stub!(:type).and_return(type)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe '#type' do
|
19
|
+
context 'specified' do
|
20
|
+
it 'returns the type specified in the options' do
|
21
|
+
expr = InfixExpression.new(@true_test, @expressions[:int], @expressions[:chararray], :type => :long)
|
22
|
+
expr.type.should == :long
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'inferred' do
|
27
|
+
it 'returns the type of the left expression if no other rules apply' do
|
28
|
+
expr = InfixExpression.new('x', @expressions[:chararray], @expressions[:bytearray])
|
29
|
+
expr.type.should == :chararray
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'returns double if the lefthand type is a double' do
|
33
|
+
expr = InfixExpression.new(@true_test, @expressions[:double], @expressions[:int])
|
34
|
+
expr.type.should == :double
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'returns double if the righthand type is a double' do
|
38
|
+
expr = InfixExpression.new(@true_test, @expressions[:float], @expressions[:double])
|
39
|
+
expr.type.should == :double
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'returns double when the other operand is of type long' do
|
43
|
+
expr = InfixExpression.new(@true_test, @expressions[:double], @expressions[:long])
|
44
|
+
expr.type.should == :double
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'returns float if one type is long and the other is a float' do
|
48
|
+
expr = InfixExpression.new(@true_test, @expressions[:long], @expressions[:float])
|
49
|
+
expr.type.should == :float
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'returns long if the lefthand type is long, and the righthand is an int' do
|
53
|
+
expr = InfixExpression.new(@true_test, @expressions[:long], @expressions[:int])
|
54
|
+
expr.type.should == :long
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'returns long if the righthand type is long, and the lefthand is an int' do
|
58
|
+
expr = InfixExpression.new(@true_test, @expressions[:int], @expressions[:long])
|
59
|
+
expr.type.should == :long
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'returns float if one operand is of type int and the other is a float' do
|
63
|
+
expr = InfixExpression.new(@true_test, @expressions[:int], @expressions[:float])
|
64
|
+
expr.type.should == :float
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|