piglet 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -8,6 +8,21 @@ module Piglet
8
8
  @sources = @join_fields.keys
9
9
  @parallel = description[:parallel]
10
10
  end
11
+
12
+ def schema
13
+ first_schema = @sources.first.schema
14
+ join_fields = @join_fields[@sources.first]
15
+ if join_fields.is_a?(Enumerable) && join_fields.size > 1
16
+ group_type = join_fields.map { |f| [f, first_schema.field_type[f]] }
17
+ description = [[:group, :tuple, group_type]]
18
+ else
19
+ description = [[:group, *join_fields]]
20
+ end
21
+ @sources.each do |source|
22
+ description << [source.alias.to_sym, Piglet::Schema::Bag.new(source.schema)]
23
+ end
24
+ Piglet::Schema::Tuple.parse(description)
25
+ end
11
26
 
12
27
  def to_s
13
28
  joins = @sources.map do |s|
@@ -8,6 +8,11 @@ module Piglet
8
8
  @sources, @parallel = relations, options[:parallel]
9
9
  end
10
10
 
11
+ def schema
12
+ schemas = @sources.map { |s| s.schema }
13
+ schemas.first.union(schemas[1..-1])
14
+ end
15
+
11
16
  def to_s
12
17
  str = "CROSS #{source_aliases.join(', ')}"
13
18
  str << " PARALLEL #{@parallel}" if @parallel
@@ -6,6 +6,11 @@ module Piglet
6
6
  def initialize(relation, field_expressions)
7
7
  @sources, @field_expressions = [relation], [field_expressions].flatten
8
8
  end
9
+
10
+ def schema
11
+ description = @field_expressions.map { |expr| [expr.name, expr.type] }
12
+ Piglet::Schema::Tuple.parse(description)
13
+ end
9
14
 
10
15
  def to_s
11
16
  "FOREACH #{@sources.first.alias} GENERATE #{field_expressions_string}"
@@ -7,6 +7,22 @@ module Piglet
7
7
  options ||= {}
8
8
  @sources, @grouping, @parallel = [relation], grouping, options[:parallel]
9
9
  end
10
+
11
+ def schema
12
+ parent = @sources.first
13
+ parent_schema = parent.schema
14
+ if @grouping.size == 1
15
+ group_type = parent.schema.field_type(@grouping.first)
16
+ else
17
+ group_type = Piglet::Schema::Tuple.parse(
18
+ @grouping.map { |field| [field, parent_schema.field_type(field)] }
19
+ )
20
+ end
21
+ Piglet::Schema::Tuple.parse([
22
+ [:group, group_type],
23
+ [parent.alias.to_sym, Piglet::Schema::Bag.new(parent_schema)]
24
+ ])
25
+ end
10
26
 
11
27
  def to_s
12
28
  str = "GROUP #{@sources.first.alias} BY "
@@ -9,6 +9,11 @@ module Piglet
9
9
  @using = description[:using]
10
10
  @parallel = description[:parallel]
11
11
  end
12
+
13
+ def schema
14
+ schemas = @sources.map { |s| s.schema }
15
+ schemas.first.union(schemas[1..-1])
16
+ end
12
17
 
13
18
  def to_s
14
19
  joins = @sources.map { |s| "#{s.alias} BY #{@join_fields[s]}" }.join(', ')
@@ -139,16 +139,31 @@ module Piglet
139
139
  Union.new(*([self] + relations))
140
140
  end
141
141
 
142
+ def field(name)
143
+ type = schema.field_type(name) rescue nil
144
+ Field::Reference.new(name, self, :type => type)
145
+ end
146
+
147
+ def schema
148
+ if @sources.nil?
149
+ raise Piglet::Schema::SchemaError, 'Could not determine the schema since there was no source relation and this relation does not define its own schema'
150
+ elsif @sources.size > 1
151
+ raise Piglet::Schema::SchemaError, 'Could not determine the schema since there were more than one source relation'
152
+ else
153
+ @sources.first.schema
154
+ end
155
+ end
156
+
142
157
  def method_missing(name, *args)
143
158
  if name.to_s =~ /^\w+$/ && args.empty?
144
- Field::Reference.new(name, self)
159
+ field(name)
145
160
  else
146
161
  super
147
162
  end
148
163
  end
149
164
 
150
165
  def [](n)
151
- Field::Reference.new("\$#{n}", self)
166
+ field("\$#{n}")
152
167
  end
153
168
 
154
169
  def hash
@@ -4,7 +4,7 @@ module Piglet
4
4
  include Relation
5
5
 
6
6
  def initialize(*relations)
7
- @sources = relations
7
+ @sources = [relations].flatten
8
8
  end
9
9
 
10
10
  def to_s
@@ -0,0 +1,21 @@
1
+ module Piglet
2
+ module Schema
3
+ class Bag
4
+ def initialize(tuple)
5
+ @tuple = tuple
6
+ end
7
+
8
+ def field_names
9
+ @tuple.field_names
10
+ end
11
+
12
+ def field_type(name)
13
+ @tuple.field_type(name)
14
+ end
15
+
16
+ def to_s
17
+ @tuple.to_s.sub(/^\((.*)\)$/, '{\1}')
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,111 @@
1
+ module Piglet
2
+ module Schema
3
+ class Tuple
4
+ attr_reader :field_names
5
+
6
+ def initialize(field_names, type_map)
7
+ @field_names = [ ]
8
+ @field_names = field_names.dup if field_names
9
+ @type_map = { }
10
+ @type_map = type_map.dup if type_map
11
+ end
12
+
13
+ # Returns a new Tuple with a schema described by the specified array.
14
+ #
15
+ # The array will be interpreted as follows: each element defines a field,
16
+ # and a field can have an optional type. To define a typeless field simply
17
+ # use a symbol, to define a typed field use an array with two values: the
18
+ # first is the name and the second is the type.
19
+ #
20
+ # The type of a field can be one of the following:
21
+ # * <code>:int</code>
22
+ # * <code>:long</code>
23
+ # * <code>:float</code>
24
+ # * <code>:double</code>
25
+ # * <code>:chararray</code>
26
+ # * <code>:bytearray</code>
27
+ # * <code>:tuple</code> or Piglet::Schema::Tuple
28
+ # * <code>:bag</code> or Piglet::Schema::Bag
29
+ #
30
+ # If a type is not given it defaults to <code>:bytearray</code>. To define
31
+ # a tuple field either pass a Piglet::Schema::Tuple object as the type, or
32
+ # use <code>:tuple</code> and a thrid element, which is the schema of the
33
+ # tuple, e.g. <code>[[:a, :tuple, [:b, :c]]]</code>.
34
+ #
35
+ # Maps are currently not supported.
36
+ #
37
+ # Examples (Piglet schema description to the left with the Pig Latin
38
+ # schema definition to the right):
39
+ #
40
+ # [:a, :b, :c] # => (a:bytearray, b:bytearray, c:bytearray)
41
+ # [[:a, :chararray], [:b, :float]] # => (a:chararray, b:float)
42
+ # [[:a, Tuple.parse(:b, :c)]] # => (a:tuple (b:bytearray, c:bytearray))
43
+ # [[:a, :bag, [:b, :c]]] # => (a:bag {x:tuple (b:bytearray, c:bytearray)})
44
+ def self.parse(description)
45
+ field_names = [ ]
46
+ type_map = { }
47
+ index = 0
48
+ description.map do |component|
49
+ case component
50
+ when Enumerable
51
+ head = component.first
52
+ tail = component[1..-1]
53
+ case tail.first
54
+ when :tuple
55
+ type_map[head || index] = parse(*tail[1..-1])
56
+ when :bag
57
+ type_map[head || index] = Bag.new(parse(*tail[1..-1]))
58
+ else
59
+ type_map[head || index] = tail.first
60
+ end
61
+ field_names << head
62
+ else
63
+ type_map[component] = :bytearray
64
+ field_names << component
65
+ end
66
+ index += 1
67
+ end
68
+ Tuple.new(field_names, type_map)
69
+ end
70
+
71
+ def union(*tuples)
72
+ field_names = @field_names.dup
73
+ type_map = @type_map.dup
74
+ tuples.flatten.each do |tuple|
75
+ tuple.field_names.each do |f|
76
+ field_names << f
77
+ type_map[f] = tuple.field_type(f)
78
+ end
79
+ end
80
+ Tuple.new(field_names, type_map)
81
+ end
82
+
83
+ def field_type(field_name)
84
+ if Integer === field_name
85
+ field_name = @field_names[field_name] || field_name
86
+ end
87
+ @type_map[field_name]
88
+ end
89
+
90
+ def to_s
91
+ field_declarations = @field_names.map do |field_name|
92
+ type = field_type(field_name)
93
+ type_str = case type
94
+ when Tuple
95
+ "tuple #{type}"
96
+ when Bag
97
+ "bag #{type}"
98
+ else
99
+ type.to_s
100
+ end
101
+ if field_name
102
+ "#{field_name}:#{type_str}"
103
+ else
104
+ type_str
105
+ end
106
+ end
107
+ "(#{field_declarations.join(', ')})"
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,47 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+
4
+ include Piglet::Field
5
+
6
+
7
+ describe BinaryConditional do
8
+
9
+ before do
10
+ @true_test = mock('test expression')
11
+ @true_test.stub!(:to_s).and_return('true')
12
+ @expressions = {}
13
+ [:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
14
+ @expressions[type] = mock("#{type} expression")
15
+ @expressions[type].extend Field
16
+ @expressions[type].stub!(:type).and_return(type)
17
+ end
18
+ end
19
+
20
+ describe '#type' do
21
+ it 'returns the type of the true expression' do
22
+ bincond = BinaryConditional.new(@true_test, @expressions[:int], @expressions[:float])
23
+ bincond.type.should == :int
24
+ end
25
+
26
+ it 'returns int if the true expression is an Integer' do
27
+ bincond = BinaryConditional.new(@true_test, 3, @expressions[:float])
28
+ bincond.type.should == :int
29
+ end
30
+
31
+ it 'returns float if the true expression is a Float' do
32
+ bincond = BinaryConditional.new(@true_test, 3.14, @expressions[:float])
33
+ bincond.type.should == :float
34
+ end
35
+
36
+ it 'returns boolean if the true expression is true' do
37
+ bincond = BinaryConditional.new(@true_test, true, @expressions[:float])
38
+ bincond.type.should == :boolean
39
+ end
40
+
41
+ it 'returns boolean if the true expression is false' do
42
+ bincond = BinaryConditional.new(@true_test, false, @expressions[:float])
43
+ bincond.type.should == :boolean
44
+ end
45
+ end
46
+
47
+ end
@@ -0,0 +1,103 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+
4
+ include Piglet::Field
5
+
6
+
7
+ describe Field do
8
+
9
+ before do
10
+ @field = mock('field')
11
+ @field.extend Field
12
+ @expressions = {}
13
+ [:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
14
+ @expressions[type] = mock("#{type} expression")
15
+ @expressions[type].extend Field
16
+ @expressions[type].stub!(:type).and_return(type)
17
+ end
18
+ end
19
+
20
+ describe '#type' do
21
+ [:==, :ne, :<, :>, :<=, :>=, :and, :or].each do |op|
22
+ op_str = (op == :ne ? '!=' : op).to_s.upcase
23
+
24
+ it "knows that a #{op_str} expression always is of type boolean" do
25
+ (@field.send(op, @field)).type.should eql(:boolean)
26
+ end
27
+ end
28
+
29
+ it 'knows that % yields an integer' do
30
+ (@field % 5).type.should eql(:int)
31
+ end
32
+
33
+ it 'knows that a call to IsEmpty is of type boolean' do
34
+ @field.empty?.type.should eql(:boolean)
35
+ end
36
+
37
+ it 'knows that the NOT operator yields a boolean' do
38
+ @field.not.type.should eql(:boolean)
39
+ end
40
+
41
+ it 'knows that the "is null" operator yields a boolean' do
42
+ @field.null?.type.should eql(:boolean)
43
+ end
44
+
45
+ it 'knows that the "is not null" operator yields a boolean' do
46
+ @field.not_null?.type.should eql(:boolean)
47
+ end
48
+
49
+ [:int, :long, :float, :double, :chararray, :bytearray].each do |type|
50
+ it "knows that a cast to #{type} is of type #{type}" do
51
+ @field.cast(type).type.should eql(type)
52
+ end
53
+ end
54
+
55
+ it 'knows that a "matches" expression is always of type boolean' do
56
+ @field.matches(/hello world/).type.should eql(:boolean)
57
+ end
58
+
59
+ [:int, :long, :float, :double].each do |type|
60
+ it "knows that negating a #{type} yields a #{type}" do
61
+ @expressions[type].neg.type.should eql(type)
62
+ end
63
+ end
64
+
65
+ [:+, :-, :*].each do |op|
66
+ it "knows that int #{op} int yields an int" do
67
+ (@expressions[:int].send(op, @expressions[:int])).type.should eql(:int)
68
+ end
69
+
70
+ it "knows that int #{op} long yields a long" do
71
+ (@expressions[:int].send(op, @expressions[:long])).type.should eql(:long)
72
+ end
73
+
74
+ it "knows that int #{op} float yields a float" do
75
+ (@expressions[:int].send(op, @expressions[:float])).type.should eql(:float)
76
+ end
77
+
78
+ it "knows that int #{op} double yields a double" do
79
+ (@expressions[:int].send(op, @expressions[:double])).type.should eql(:double)
80
+ end
81
+ end
82
+
83
+ combos = {
84
+ [:int, :int] => :int,
85
+ [:int, :long] => :long,
86
+ [:int, :float] => :float,
87
+ [:int, :double] => :double,
88
+ [:long, :float] => :float,
89
+ [:long, :double] => :double
90
+ }
91
+
92
+ combos.each do |operands, result|
93
+ it "knows that #{operands[0]}/#{operands[1]} yields a #{result}" do
94
+ (@expressions[operands[0]] / @expressions[operands[1]]).type.should eql(result)
95
+ end
96
+
97
+ it "knows that #{operands[1]}/#{operands[0]} yields a #{result}" do
98
+ (@expressions[operands[1]] / @expressions[operands[0]]).type.should eql(result)
99
+ end
100
+ end
101
+ end
102
+
103
+ end
@@ -0,0 +1,69 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+
4
+ include Piglet::Field
5
+
6
+
7
+ describe InfixExpression do
8
+
9
+ before do
10
+ @expressions = {}
11
+ [:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
12
+ @expressions[type] = mock("#{type} expression")
13
+ @expressions[type].extend Field
14
+ @expressions[type].stub!(:type).and_return(type)
15
+ end
16
+ end
17
+
18
+ describe '#type' do
19
+ context 'specified' do
20
+ it 'returns the type specified in the options' do
21
+ expr = InfixExpression.new(@true_test, @expressions[:int], @expressions[:chararray], :type => :long)
22
+ expr.type.should == :long
23
+ end
24
+ end
25
+
26
+ context 'inferred' do
27
+ it 'returns the type of the left expression if no other rules apply' do
28
+ expr = InfixExpression.new('x', @expressions[:chararray], @expressions[:bytearray])
29
+ expr.type.should == :chararray
30
+ end
31
+
32
+ it 'returns double if the lefthand type is a double' do
33
+ expr = InfixExpression.new(@true_test, @expressions[:double], @expressions[:int])
34
+ expr.type.should == :double
35
+ end
36
+
37
+ it 'returns double if the righthand type is a double' do
38
+ expr = InfixExpression.new(@true_test, @expressions[:float], @expressions[:double])
39
+ expr.type.should == :double
40
+ end
41
+
42
+ it 'returns double when the other operand is of type long' do
43
+ expr = InfixExpression.new(@true_test, @expressions[:double], @expressions[:long])
44
+ expr.type.should == :double
45
+ end
46
+
47
+ it 'returns float if one type is long and the other is a float' do
48
+ expr = InfixExpression.new(@true_test, @expressions[:long], @expressions[:float])
49
+ expr.type.should == :float
50
+ end
51
+
52
+ it 'returns long if the lefthand type is long, and the righthand is an int' do
53
+ expr = InfixExpression.new(@true_test, @expressions[:long], @expressions[:int])
54
+ expr.type.should == :long
55
+ end
56
+
57
+ it 'returns long if the righthand type is long, and the lefthand is an int' do
58
+ expr = InfixExpression.new(@true_test, @expressions[:int], @expressions[:long])
59
+ expr.type.should == :long
60
+ end
61
+
62
+ it 'returns float if one operand is of type int and the other is a float' do
63
+ expr = InfixExpression.new(@true_test, @expressions[:int], @expressions[:float])
64
+ expr.type.should == :float
65
+ end
66
+ end
67
+ end
68
+
69
+ end