piglet 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -20,3 +20,4 @@ doc
20
20
  pkg
21
21
 
22
22
  ## PROJECT::SPECIFIC
23
+ pig_*.log
data/README.rdoc CHANGED
@@ -209,6 +209,21 @@ and then use them just as any other operator:
209
209
 
210
210
  nifty, huh?
211
211
 
212
+ === Types & schemas
213
+
214
+ Piglet knows the schema of relations, so you can do something else that Pig lacks: introspection. This lets you do things like like this code, which counts the unique values of all fields in a relation:
215
+
216
+ relation = load('in', :schema => [:a, :b, :c])
217
+ relation.schema.field_names.each do |field|
218
+ grouped = relation.group(field)
219
+ counted = grouped.foreach { |r| [r[1].count] }
220
+ store(counted, "out-#{field}")
221
+ end
222
+
223
+ This feature obviously only works if you have specified a schema in the call to #load.
224
+
225
+ There are currently many limitations to this feature, so use it with caution. Since the schema support isn't completely reliable Piglet does not enforce schemas, and it does not warn you if you try to access a field that doesn't exist. If it had, it would probably be more annoying and limiting than it would be worth.
226
+
212
227
  == Limitations
213
228
 
214
229
  The aim is to support most of Pig Latin, but currently there are some limitations.
data/lib/piglet.rb CHANGED
@@ -1,6 +1,9 @@
1
1
  # :main: README.rdoc
2
2
  module Piglet # :nodoc:
3
- VERSION = '0.1.2'
3
+ VERSION = '0.2.0'
4
+
5
+ class PigletError < StandardError; end
6
+ class NotSupportedError < PigletError; end
4
7
 
5
8
  autoload :Interpreter, 'piglet/interpreter'
6
9
 
@@ -37,12 +40,17 @@ module Piglet # :nodoc:
37
40
  autoload :CallExpression, 'piglet/field/call_expression'
38
41
  autoload :InfixExpression, 'piglet/field/infix_expression'
39
42
  autoload :Literal, 'piglet/field/literal'
40
- autoload :Operators, 'piglet/field/operators'
43
+ autoload :Field, 'piglet/field/field'
41
44
  autoload :PrefixExpression, 'piglet/field/prefix_expression'
42
45
  autoload :Reference, 'piglet/field/reference'
43
46
  autoload :Rename, 'piglet/field/rename'
44
47
  autoload :SuffixExpression, 'piglet/field/suffix_expression'
45
48
  end
46
-
47
- class NotSupportedError < StandardError; end
49
+
50
+ module Schema
51
+ autoload :Bag, 'piglet/schema/bag'
52
+ autoload :Tuple, 'piglet/schema/tuple'
53
+
54
+ class SchemaError < PigletError; end
55
+ end
48
56
  end
@@ -1,12 +1,13 @@
1
1
  module Piglet
2
2
  module Field
3
- include Operators
4
-
5
3
  class BinaryConditional
4
+ include Field
5
+
6
6
  def initialize(test, if_true, if_false)
7
7
  @test, @if_true, @if_false = test, if_true, if_false
8
+ @type = expression_type(@if_true)
8
9
  end
9
-
10
+
10
11
  def to_s
11
12
  "(#{@test} ? #{@if_true} : #{@if_false})"
12
13
  end
@@ -1,20 +1,20 @@
1
1
  module Piglet
2
2
  module Field
3
3
  class CallExpression # :nodoc:
4
- include Operators
4
+ include Field
5
5
 
6
- def initialize(name, inner_expression, options=nil)
6
+ def initialize(function_name, inner_expression, options=nil)
7
7
  options ||= {}
8
- @name, @inner_expression = name, inner_expression
9
- @new_name = options[:as]
8
+ @function_name, @inner_expression = function_name, inner_expression
9
+ @type = options[:type] || inner_expression.type
10
10
  end
11
-
11
+
12
12
  def simple?
13
13
  false
14
14
  end
15
15
 
16
16
  def to_s
17
- "#{@name}(#{@inner_expression})"
17
+ "#{@function_name}(#{@inner_expression})"
18
18
  end
19
19
  end
20
20
  end
@@ -0,0 +1,134 @@
1
+ module Piglet
2
+ module Field
3
+ module Field # :nodoc:
4
+ SYMBOLIC_OPERATORS = [:==, :>, :<, :>=, :<=, :%, :+, :-, :*, :/]
5
+ FUNCTIONS = [:avg, :count, :max, :min, :size, :sum, :tokenize]
6
+
7
+ attr_reader :name, :type
8
+
9
+ FUNCTIONS.each do |fun|
10
+ define_method(fun) do
11
+ CallExpression.new(fun.to_s.upcase, self, :type => function_return_type(fun, self.type))
12
+ end
13
+ end
14
+
15
+ def empty?
16
+ CallExpression.new('IsEmpty', self, :type => :boolean)
17
+ end
18
+
19
+ def diff(other)
20
+ raise NotSupportedError
21
+ end
22
+
23
+ def as(new_name)
24
+ Rename.new(new_name, self)
25
+ end
26
+
27
+ def not
28
+ PrefixExpression.new('NOT', self, true, :type => :boolean)
29
+ end
30
+
31
+ def null?
32
+ SuffixExpression.new('is null', self, :type => :boolean)
33
+ end
34
+
35
+ def not_null?
36
+ SuffixExpression.new('is not null', self, :type => :boolean)
37
+ end
38
+
39
+ def cast(type)
40
+ PrefixExpression.new("(#{type.to_s})", self, true, :type => type.to_sym)
41
+ end
42
+
43
+ def matches(pattern)
44
+ regex_options_pattern = /^\(\?.+?:(.*)\)$/
45
+ pattern = pattern.to_s.sub(regex_options_pattern, '\1') if pattern.is_a?(Regexp) && pattern.to_s =~ regex_options_pattern
46
+ InfixExpression.new('matches', self, "'#{pattern.to_s}'", :type => :boolean)
47
+ end
48
+
49
+ def neg
50
+ PrefixExpression.new('-', self, false, :type => self.type)
51
+ end
52
+
53
+ def ne(other)
54
+ InfixExpression.new('!=', self, other, :type => :boolean)
55
+ end
56
+
57
+ def and(other)
58
+ InfixExpression.new('AND', self, other, :type => :boolean)
59
+ end
60
+
61
+ def or(other)
62
+ InfixExpression.new('OR', self, other, :type => :boolean)
63
+ end
64
+
65
+ SYMBOLIC_OPERATORS.each do |op|
66
+ define_method(op) do |other|
67
+ InfixExpression.new(op.to_s, self, other, :type => symbolic_operator_return_type(op, self, other))
68
+ end
69
+ end
70
+
71
+ protected
72
+
73
+ def parenthesise(expr)
74
+ if expr.respond_to?(:simple?) && ! expr.simple?
75
+ "(#{expr})"
76
+ else
77
+ expr.to_s
78
+ end
79
+ end
80
+
81
+ def escape(str)
82
+ str.gsub(/("|'|\\)/) { |m| "\\#{$1}" }
83
+ end
84
+
85
+ def function_return_type(function_name, expression_type)
86
+ case function_name
87
+ when :avg, :sum
88
+ case expression_type
89
+ when :int, :long
90
+ :long
91
+ when :float, :double, :bytearray
92
+ :double
93
+ else
94
+ nil
95
+ end
96
+ when :count, :size
97
+ :long
98
+ when :max, :min
99
+ expression_type
100
+ when :tokenize
101
+ :bag
102
+ else
103
+ nil
104
+ end
105
+ end
106
+
107
+ def symbolic_operator_return_type(operator, left_expression, right_expression)
108
+ case operator
109
+ when :==, :>, :<, :>=, :<=
110
+ :boolean
111
+ when :%
112
+ :int
113
+ else # :+, :-, :*, :/
114
+ nil
115
+ end
116
+ end
117
+
118
+ def expression_type(expression)
119
+ case expression
120
+ when Field
121
+ expression.type
122
+ when Integer
123
+ :int
124
+ when Numeric
125
+ :float
126
+ when true, false
127
+ :boolean
128
+ else
129
+ nil
130
+ end
131
+ end
132
+ end
133
+ end
134
+ end
@@ -1,10 +1,16 @@
1
1
  module Piglet
2
2
  module Field
3
3
  class InfixExpression # :nodoc:
4
- include Operators
4
+ include Field
5
5
 
6
- def initialize(operator, left_expression, right_expression)
6
+ def initialize(operator, left_expression, right_expression, options=nil)
7
+ options ||= {}
7
8
  @operator, @left_expression, @right_expression = operator, left_expression, right_expression
9
+ if options[:type]
10
+ @type = options[:type]
11
+ else
12
+ @type = determine_type(@left_expression, @right_expression)
13
+ end
8
14
  end
9
15
 
10
16
  def simple?
@@ -14,6 +20,23 @@ module Piglet
14
20
  def to_s
15
21
  "#{parenthesise(@left_expression)} #{@operator} #{parenthesise(@right_expression)}"
16
22
  end
23
+
24
+ private
25
+
26
+ def determine_type(left, right)
27
+ left_type = expression_type(left)
28
+ right_type = expression_type(right)
29
+
30
+ if left_type == :double || right_type == :double
31
+ :double
32
+ elsif left_type == :float || right_type == :float
33
+ :float
34
+ elsif left_type == :long || right_type == :long
35
+ :long
36
+ else
37
+ left_type
38
+ end
39
+ end
17
40
  end
18
41
  end
19
42
  end
@@ -1,10 +1,12 @@
1
1
  module Piglet
2
2
  module Field
3
3
  class Literal
4
- include Operators
4
+ include Field
5
5
 
6
- def initialize(obj)
6
+ def initialize(obj, options=nil)
7
+ options ||= {}
7
8
  @obj = obj
9
+ @type = options[:type] || literal_type(obj)
8
10
  end
9
11
 
10
12
  def to_s
@@ -15,6 +17,21 @@ module Piglet
15
17
  "'#{escape(@obj.to_s)}'"
16
18
  end
17
19
  end
20
+
21
+ private
22
+
23
+ def literal_type(obj)
24
+ case obj
25
+ when String
26
+ :chararray
27
+ when Integer
28
+ :int
29
+ when Numeric
30
+ :double
31
+ else
32
+ :bytearray
33
+ end
34
+ end
18
35
  end
19
36
  end
20
37
  end
@@ -1,10 +1,12 @@
1
1
  module Piglet
2
2
  module Field
3
3
  class PrefixExpression # :nodoc:
4
- include Operators
4
+ include Field
5
5
 
6
- def initialize(operator, expression, space_between=true)
6
+ def initialize(operator, expression, space_between=true, options=nil)
7
+ options ||= {}
7
8
  @operator, @expression, @space_between = operator, expression, space_between
9
+ @type = options[:type] || expression.type
8
10
  end
9
11
 
10
12
  def simple?
@@ -1,28 +1,33 @@
1
1
  module Piglet
2
2
  module Field
3
3
  class Reference # :nodoc:
4
- include Operators
4
+ include Field
5
5
 
6
6
  def initialize(name, relation=nil, options=nil)
7
7
  options ||= {}
8
8
  @name, @parent = name, relation
9
9
  @explicit_ancestry = options[:explicit_ancestry] || false
10
+ @type = options[:type]
10
11
  end
11
12
 
12
13
  def simple?
13
14
  true
14
15
  end
15
16
 
17
+ def field(name)
18
+ Reference.new(name, self, :explicit_ancestry => true)
19
+ end
20
+
16
21
  def method_missing(name, *args)
17
22
  if name.to_s =~ /^\w+$/ && args.empty?
18
- Reference.new(name, self, :explicit_ancestry => true)
23
+ field(name)
19
24
  else
20
25
  super
21
26
  end
22
27
  end
23
28
 
24
29
  def [](n)
25
- Reference.new("\$#{n}", self, :explicit_ancestry => true)
30
+ field("\$#{n}")
26
31
  end
27
32
 
28
33
  def to_s
@@ -1,12 +1,14 @@
1
1
  module Piglet
2
2
  module Field
3
3
  class Rename # :nodoc:
4
+ attr_reader :name, :type
5
+
4
6
  def initialize(new_name, field_expression)
5
- @new_name, @field_expression = new_name, field_expression
7
+ @name, @field_expression, @type = new_name, field_expression, field_expression.type
6
8
  end
7
-
9
+
8
10
  def to_s
9
- "#{@field_expression} AS #{@new_name}"
11
+ "#{@field_expression} AS #{@name}"
10
12
  end
11
13
  end
12
14
  end
@@ -1,10 +1,12 @@
1
1
  module Piglet
2
2
  module Field
3
3
  class SuffixExpression # :nodoc:
4
- include Operators
4
+ include Field
5
5
 
6
- def initialize(operator, expression)
6
+ def initialize(operator, expression, options=nil)
7
+ options ||= {}
7
8
  @operator, @expression = operator, expression
9
+ @type = options[:type] || expression.type
8
10
  end
9
11
 
10
12
  def simple?
@@ -1,6 +1,7 @@
1
1
  module Piglet
2
2
  module Inout
3
3
  class Load # :nodoc:
4
+ include Piglet::Relation::Relation
4
5
  include StorageTypes
5
6
 
6
7
  def initialize(path, options={})
@@ -8,6 +9,10 @@ module Piglet
8
9
  @path, @using, @schema = path, options[:using], options[:schema]
9
10
  end
10
11
 
12
+ def schema
13
+ Piglet::Schema::Tuple.parse(@schema) if @schema
14
+ end
15
+
11
16
  def to_s
12
17
  str = "LOAD '#{@path}'"
13
18
  str << " USING #{resolve_load_store_function(@using)}" if @using
@@ -27,7 +27,6 @@ module Piglet
27
27
  unless store.relation.nil?
28
28
  assignments(store.relation, handled_relations).each do |assignment|
29
29
  statements << assignment
30
- handled_relations << assignment.target
31
30
  end
32
31
  end
33
32
  statements << store
@@ -52,9 +51,7 @@ module Piglet
52
51
  # NOTE: the syntax load('path', :schema => {:a => :chararray, :b => :int})
53
52
  # would be nice, but the order of the keys can't be guaranteed in Ruby 1.8.
54
53
  def load(path, options={})
55
- load = Inout::Load.new(path, options)
56
- load.extend Piglet::Relation::Relation
57
- load
54
+ Inout::Load.new(path, options)
58
55
  end
59
56
 
60
57
  # STORE
@@ -116,6 +113,7 @@ module Piglet
116
113
  def assignments(relation, ignore_set)
117
114
  return [] if ignore_set.include?(relation)
118
115
  assignment = Assignment.new(relation)
116
+ ignore_set << relation
119
117
  if relation.sources
120
118
  (relation.sources.map { |source| assignments(source, ignore_set) } + [assignment]).flatten
121
119
  else