piglet 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,21 @@ module Piglet
8
8
  @sources = @join_fields.keys
9
9
  @parallel = description[:parallel]
10
10
  end
11
+
12
+ def schema
13
+ first_schema = @sources.first.schema
14
+ join_fields = @join_fields[@sources.first]
15
+ if join_fields.is_a?(Enumerable) && join_fields.size > 1
16
+ group_type = join_fields.map { |f| [f, first_schema.field_type[f]] }
17
+ description = [[:group, :tuple, group_type]]
18
+ else
19
+ description = [[:group, *join_fields]]
20
+ end
21
+ @sources.each do |source|
22
+ description << [source.alias.to_sym, Piglet::Schema::Bag.new(source.schema)]
23
+ end
24
+ Piglet::Schema::Tuple.parse(description)
25
+ end
11
26
 
12
27
  def to_s
13
28
  joins = @sources.map do |s|
@@ -8,6 +8,11 @@ module Piglet
8
8
  @sources, @parallel = relations, options[:parallel]
9
9
  end
10
10
 
11
+ def schema
12
+ schemas = @sources.map { |s| s.schema }
13
+ schemas.first.union(schemas[1..-1])
14
+ end
15
+
11
16
  def to_s
12
17
  str = "CROSS #{source_aliases.join(', ')}"
13
18
  str << " PARALLEL #{@parallel}" if @parallel
@@ -6,6 +6,11 @@ module Piglet
6
6
  def initialize(relation, field_expressions)
7
7
  @sources, @field_expressions = [relation], [field_expressions].flatten
8
8
  end
9
+
10
+ def schema
11
+ description = @field_expressions.map { |expr| [expr.name, expr.type] }
12
+ Piglet::Schema::Tuple.parse(description)
13
+ end
9
14
 
10
15
  def to_s
11
16
  "FOREACH #{@sources.first.alias} GENERATE #{field_expressions_string}"
@@ -7,6 +7,22 @@ module Piglet
7
7
  options ||= {}
8
8
  @sources, @grouping, @parallel = [relation], grouping, options[:parallel]
9
9
  end
10
+
11
+ def schema
12
+ parent = @sources.first
13
+ parent_schema = parent.schema
14
+ if @grouping.size == 1
15
+ group_type = parent.schema.field_type(@grouping.first)
16
+ else
17
+ group_type = Piglet::Schema::Tuple.parse(
18
+ @grouping.map { |field| [field, parent_schema.field_type(field)] }
19
+ )
20
+ end
21
+ Piglet::Schema::Tuple.parse([
22
+ [:group, group_type],
23
+ [parent.alias.to_sym, Piglet::Schema::Bag.new(parent_schema)]
24
+ ])
25
+ end
10
26
 
11
27
  def to_s
12
28
  str = "GROUP #{@sources.first.alias} BY "
@@ -9,6 +9,11 @@ module Piglet
9
9
  @using = description[:using]
10
10
  @parallel = description[:parallel]
11
11
  end
12
+
13
+ def schema
14
+ schemas = @sources.map { |s| s.schema }
15
+ schemas.first.union(schemas[1..-1])
16
+ end
12
17
 
13
18
  def to_s
14
19
  joins = @sources.map { |s| "#{s.alias} BY #{@join_fields[s]}" }.join(', ')
@@ -139,16 +139,31 @@ module Piglet
139
139
  Union.new(*([self] + relations))
140
140
  end
141
141
 
142
+ def field(name)
143
+ type = schema.field_type(name) rescue nil
144
+ Field::Reference.new(name, self, :type => type)
145
+ end
146
+
147
+ def schema
148
+ if @sources.nil?
149
+ raise Piglet::Schema::SchemaError, 'Could not determine the schema since there was no source relation and this relation does not define its own schema'
150
+ elsif @sources.size > 1
151
+ raise Piglet::Schema::SchemaError, 'Could not determine the schema since there were more than one source relation'
152
+ else
153
+ @sources.first.schema
154
+ end
155
+ end
156
+
142
157
  def method_missing(name, *args)
143
158
  if name.to_s =~ /^\w+$/ && args.empty?
144
- Field::Reference.new(name, self)
159
+ field(name)
145
160
  else
146
161
  super
147
162
  end
148
163
  end
149
164
 
150
165
  def [](n)
151
- Field::Reference.new("\$#{n}", self)
166
+ field("\$#{n}")
152
167
  end
153
168
 
154
169
  def hash
@@ -4,7 +4,7 @@ module Piglet
4
4
  include Relation
5
5
 
6
6
  def initialize(*relations)
7
- @sources = relations
7
+ @sources = [relations].flatten
8
8
  end
9
9
 
10
10
  def to_s
@@ -0,0 +1,21 @@
1
+ module Piglet
2
+ module Schema
3
+ class Bag
4
+ def initialize(tuple)
5
+ @tuple = tuple
6
+ end
7
+
8
+ def field_names
9
+ @tuple.field_names
10
+ end
11
+
12
+ def field_type(name)
13
+ @tuple.field_type(name)
14
+ end
15
+
16
+ def to_s
17
+ @tuple.to_s.sub(/^\((.*)\)$/, '{\1}')
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,111 @@
1
+ module Piglet
2
+ module Schema
3
+ class Tuple
4
+ attr_reader :field_names
5
+
6
+ def initialize(field_names, type_map)
7
+ @field_names = [ ]
8
+ @field_names = field_names.dup if field_names
9
+ @type_map = { }
10
+ @type_map = type_map.dup if type_map
11
+ end
12
+
13
+ # Returns a new Tuple with a schema described by the specified array.
14
+ #
15
+ # The array will be interpreted as follows: each element defines a field,
16
+ # and a field can have an optional type. To define a typeless field simply
17
+ # use a symbol, to define a typed field use an array with two values: the
18
+ # first is the name and the second is the type.
19
+ #
20
+ # The type of a field can be one of the following:
21
+ # * <code>:int</code>
22
+ # * <code>:long</code>
23
+ # * <code>:float</code>
24
+ # * <code>:double</code>
25
+ # * <code>:chararray</code>
26
+ # * <code>:bytearray</code>
27
+ # * <code>:tuple</code> or Piglet::Schema::Tuple
28
+ # * <code>:bag</code> or Piglet::Schema::Bag
29
+ #
30
+ # If a type is not given it defaults to <code>:bytearray</code>. To define
31
+ # a tuple field either pass a Piglet::Schema::Tuple object as the type, or
32
+ # use <code>:tuple</code> and a thrid element, which is the schema of the
33
+ # tuple, e.g. <code>[[:a, :tuple, [:b, :c]]]</code>.
34
+ #
35
+ # Maps are currently not supported.
36
+ #
37
+ # Examples (Piglet schema description to the left with the Pig Latin
38
+ # schema definition to the right):
39
+ #
40
+ # [:a, :b, :c] # => (a:bytearray, b:bytearray, c:bytearray)
41
+ # [[:a, :chararray], [:b, :float]] # => (a:chararray, b:float)
42
+ # [[:a, Tuple.parse(:b, :c)]] # => (a:tuple (b:bytearray, c:bytearray))
43
+ # [[:a, :bag, [:b, :c]]] # => (a:bag {x:tuple (b:bytearray, c:bytearray)})
44
+ def self.parse(description)
45
+ field_names = [ ]
46
+ type_map = { }
47
+ index = 0
48
+ description.map do |component|
49
+ case component
50
+ when Enumerable
51
+ head = component.first
52
+ tail = component[1..-1]
53
+ case tail.first
54
+ when :tuple
55
+ type_map[head || index] = parse(*tail[1..-1])
56
+ when :bag
57
+ type_map[head || index] = Bag.new(parse(*tail[1..-1]))
58
+ else
59
+ type_map[head || index] = tail.first
60
+ end
61
+ field_names << head
62
+ else
63
+ type_map[component] = :bytearray
64
+ field_names << component
65
+ end
66
+ index += 1
67
+ end
68
+ Tuple.new(field_names, type_map)
69
+ end
70
+
71
+ def union(*tuples)
72
+ field_names = @field_names.dup
73
+ type_map = @type_map.dup
74
+ tuples.flatten.each do |tuple|
75
+ tuple.field_names.each do |f|
76
+ field_names << f
77
+ type_map[f] = tuple.field_type(f)
78
+ end
79
+ end
80
+ Tuple.new(field_names, type_map)
81
+ end
82
+
83
+ def field_type(field_name)
84
+ if Integer === field_name
85
+ field_name = @field_names[field_name] || field_name
86
+ end
87
+ @type_map[field_name]
88
+ end
89
+
90
+ def to_s
91
+ field_declarations = @field_names.map do |field_name|
92
+ type = field_type(field_name)
93
+ type_str = case type
94
+ when Tuple
95
+ "tuple #{type}"
96
+ when Bag
97
+ "bag #{type}"
98
+ else
99
+ type.to_s
100
+ end
101
+ if field_name
102
+ "#{field_name}:#{type_str}"
103
+ else
104
+ type_str
105
+ end
106
+ end
107
+ "(#{field_declarations.join(', ')})"
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,47 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+
4
+ include Piglet::Field
5
+
6
+
7
+ describe BinaryConditional do
8
+
9
+ before do
10
+ @true_test = mock('test expression')
11
+ @true_test.stub!(:to_s).and_return('true')
12
+ @expressions = {}
13
+ [:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
14
+ @expressions[type] = mock("#{type} expression")
15
+ @expressions[type].extend Field
16
+ @expressions[type].stub!(:type).and_return(type)
17
+ end
18
+ end
19
+
20
+ describe '#type' do
21
+ it 'returns the type of the true expression' do
22
+ bincond = BinaryConditional.new(@true_test, @expressions[:int], @expressions[:float])
23
+ bincond.type.should == :int
24
+ end
25
+
26
+ it 'returns int if the true expression is an Integer' do
27
+ bincond = BinaryConditional.new(@true_test, 3, @expressions[:float])
28
+ bincond.type.should == :int
29
+ end
30
+
31
+ it 'returns float if the true expression is a Float' do
32
+ bincond = BinaryConditional.new(@true_test, 3.14, @expressions[:float])
33
+ bincond.type.should == :float
34
+ end
35
+
36
+ it 'returns boolean if the true expression is true' do
37
+ bincond = BinaryConditional.new(@true_test, true, @expressions[:float])
38
+ bincond.type.should == :boolean
39
+ end
40
+
41
+ it 'returns boolean if the true expression is false' do
42
+ bincond = BinaryConditional.new(@true_test, false, @expressions[:float])
43
+ bincond.type.should == :boolean
44
+ end
45
+ end
46
+
47
+ end
@@ -0,0 +1,103 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+
4
+ include Piglet::Field
5
+
6
+
7
+ describe Field do
8
+
9
+ before do
10
+ @field = mock('field')
11
+ @field.extend Field
12
+ @expressions = {}
13
+ [:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
14
+ @expressions[type] = mock("#{type} expression")
15
+ @expressions[type].extend Field
16
+ @expressions[type].stub!(:type).and_return(type)
17
+ end
18
+ end
19
+
20
+ describe '#type' do
21
+ [:==, :ne, :<, :>, :<=, :>=, :and, :or].each do |op|
22
+ op_str = (op == :ne ? '!=' : op).to_s.upcase
23
+
24
+ it "knows that a #{op_str} expression always is of type boolean" do
25
+ (@field.send(op, @field)).type.should eql(:boolean)
26
+ end
27
+ end
28
+
29
+ it 'knows that % yields an integer' do
30
+ (@field % 5).type.should eql(:int)
31
+ end
32
+
33
+ it 'knows that a call to IsEmpty is of type boolean' do
34
+ @field.empty?.type.should eql(:boolean)
35
+ end
36
+
37
+ it 'knows that the NOT operator yields a boolean' do
38
+ @field.not.type.should eql(:boolean)
39
+ end
40
+
41
+ it 'knows that the "is null" operator yields a boolean' do
42
+ @field.null?.type.should eql(:boolean)
43
+ end
44
+
45
+ it 'knows that the "is not null" operator yields a boolean' do
46
+ @field.not_null?.type.should eql(:boolean)
47
+ end
48
+
49
+ [:int, :long, :float, :double, :chararray, :bytearray].each do |type|
50
+ it "knows that a cast to #{type} is of type #{type}" do
51
+ @field.cast(type).type.should eql(type)
52
+ end
53
+ end
54
+
55
+ it 'knows that a "matches" expression is always of type boolean' do
56
+ @field.matches(/hello world/).type.should eql(:boolean)
57
+ end
58
+
59
+ [:int, :long, :float, :double].each do |type|
60
+ it "knows that negating a #{type} yields a #{type}" do
61
+ @expressions[type].neg.type.should eql(type)
62
+ end
63
+ end
64
+
65
+ [:+, :-, :*].each do |op|
66
+ it "knows that int #{op} int yields an int" do
67
+ (@expressions[:int].send(op, @expressions[:int])).type.should eql(:int)
68
+ end
69
+
70
+ it "knows that int #{op} long yields a long" do
71
+ (@expressions[:int].send(op, @expressions[:long])).type.should eql(:long)
72
+ end
73
+
74
+ it "knows that int #{op} float yields a float" do
75
+ (@expressions[:int].send(op, @expressions[:float])).type.should eql(:float)
76
+ end
77
+
78
+ it "knows that int #{op} double yields a double" do
79
+ (@expressions[:int].send(op, @expressions[:double])).type.should eql(:double)
80
+ end
81
+ end
82
+
83
+ combos = {
84
+ [:int, :int] => :int,
85
+ [:int, :long] => :long,
86
+ [:int, :float] => :float,
87
+ [:int, :double] => :double,
88
+ [:long, :float] => :float,
89
+ [:long, :double] => :double
90
+ }
91
+
92
+ combos.each do |operands, result|
93
+ it "knows that #{operands[0]}/#{operands[1]} yields a #{result}" do
94
+ (@expressions[operands[0]] / @expressions[operands[1]]).type.should eql(result)
95
+ end
96
+
97
+ it "knows that #{operands[1]}/#{operands[0]} yields a #{result}" do
98
+ (@expressions[operands[1]] / @expressions[operands[0]]).type.should eql(result)
99
+ end
100
+ end
101
+ end
102
+
103
+ end
@@ -0,0 +1,69 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+
4
+ include Piglet::Field
5
+
6
+
7
+ describe InfixExpression do
8
+
9
+ before do
10
+ @expressions = {}
11
+ [:int, :long, :float, :double, :chararray, :bytearray, :bag, :tuple].each do |type|
12
+ @expressions[type] = mock("#{type} expression")
13
+ @expressions[type].extend Field
14
+ @expressions[type].stub!(:type).and_return(type)
15
+ end
16
+ end
17
+
18
+ describe '#type' do
19
+ context 'specified' do
20
+ it 'returns the type specified in the options' do
21
+ expr = InfixExpression.new(@true_test, @expressions[:int], @expressions[:chararray], :type => :long)
22
+ expr.type.should == :long
23
+ end
24
+ end
25
+
26
+ context 'inferred' do
27
+ it 'returns the type of the left expression if no other rules apply' do
28
+ expr = InfixExpression.new('x', @expressions[:chararray], @expressions[:bytearray])
29
+ expr.type.should == :chararray
30
+ end
31
+
32
+ it 'returns double if the lefthand type is a double' do
33
+ expr = InfixExpression.new(@true_test, @expressions[:double], @expressions[:int])
34
+ expr.type.should == :double
35
+ end
36
+
37
+ it 'returns double if the righthand type is a double' do
38
+ expr = InfixExpression.new(@true_test, @expressions[:float], @expressions[:double])
39
+ expr.type.should == :double
40
+ end
41
+
42
+ it 'returns double when the other operand is of type long' do
43
+ expr = InfixExpression.new(@true_test, @expressions[:double], @expressions[:long])
44
+ expr.type.should == :double
45
+ end
46
+
47
+ it 'returns float if one type is long and the other is a float' do
48
+ expr = InfixExpression.new(@true_test, @expressions[:long], @expressions[:float])
49
+ expr.type.should == :float
50
+ end
51
+
52
+ it 'returns long if the lefthand type is long, and the righthand is an int' do
53
+ expr = InfixExpression.new(@true_test, @expressions[:long], @expressions[:int])
54
+ expr.type.should == :long
55
+ end
56
+
57
+ it 'returns long if the righthand type is long, and the lefthand is an int' do
58
+ expr = InfixExpression.new(@true_test, @expressions[:int], @expressions[:long])
59
+ expr.type.should == :long
60
+ end
61
+
62
+ it 'returns float if one operand is of type int and the other is a float' do
63
+ expr = InfixExpression.new(@true_test, @expressions[:int], @expressions[:float])
64
+ expr.type.should == :float
65
+ end
66
+ end
67
+ end
68
+
69
+ end