wukong 0.1.4 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/INSTALL.textile +89 -0
  2. data/README.textile +41 -74
  3. data/docpages/INSTALL.textile +94 -0
  4. data/{doc → docpages}/LICENSE.textile +0 -0
  5. data/{doc → docpages}/README-wulign.textile +6 -0
  6. data/docpages/UsingWukong-part1-get_ready.textile +17 -0
  7. data/{doc/overview.textile → docpages/UsingWukong-part2-ThinkingBigData.textile} +8 -24
  8. data/{doc → docpages}/UsingWukong-part3-parsing.textile +8 -2
  9. data/docpages/_config.yml +39 -0
  10. data/{doc/tips.textile → docpages/bigdata-tips.textile} +71 -44
  11. data/{doc → docpages}/code/api_response_example.txt +0 -0
  12. data/{doc → docpages}/code/parser_skeleton.rb +0 -0
  13. data/{doc/intro_to_map_reduce → docpages/diagrams}/MapReduceDiagram.graffle +0 -0
  14. data/docpages/favicon.ico +0 -0
  15. data/docpages/gem.css +16 -0
  16. data/docpages/hadoop-tips.textile +83 -0
  17. data/docpages/index.textile +90 -0
  18. data/docpages/intro.textile +8 -0
  19. data/docpages/moreinfo.textile +174 -0
  20. data/docpages/news.html +24 -0
  21. data/{doc → docpages}/pig/PigLatinExpressionsList.txt +0 -0
  22. data/{doc → docpages}/pig/PigLatinReferenceManual.html +0 -0
  23. data/{doc → docpages}/pig/PigLatinReferenceManual.txt +0 -0
  24. data/docpages/tutorial.textile +283 -0
  25. data/docpages/usage.textile +195 -0
  26. data/docpages/wutils.textile +263 -0
  27. data/wukong.gemspec +80 -50
  28. metadata +87 -54
  29. data/doc/INSTALL.textile +0 -41
  30. data/doc/README-tutorial.textile +0 -163
  31. data/doc/README-wutils.textile +0 -128
  32. data/doc/TODO.textile +0 -61
  33. data/doc/UsingWukong-part1-setup.textile +0 -2
  34. data/doc/UsingWukong-part2-scraping.textile +0 -2
  35. data/doc/hadoop-nfs.textile +0 -51
  36. data/doc/hadoop-setup.textile +0 -29
  37. data/doc/index.textile +0 -124
  38. data/doc/links.textile +0 -42
  39. data/doc/usage.textile +0 -102
  40. data/doc/utils.textile +0 -48
  41. data/examples/and_pig/sample_queries.rb +0 -128
  42. data/lib/wukong/and_pig.rb +0 -62
  43. data/lib/wukong/and_pig/README.textile +0 -12
  44. data/lib/wukong/and_pig/as.rb +0 -37
  45. data/lib/wukong/and_pig/data_types.rb +0 -30
  46. data/lib/wukong/and_pig/functions.rb +0 -50
  47. data/lib/wukong/and_pig/generate.rb +0 -85
  48. data/lib/wukong/and_pig/generate/variable_inflections.rb +0 -82
  49. data/lib/wukong/and_pig/junk.rb +0 -51
  50. data/lib/wukong/and_pig/operators.rb +0 -8
  51. data/lib/wukong/and_pig/operators/compound.rb +0 -29
  52. data/lib/wukong/and_pig/operators/evaluators.rb +0 -7
  53. data/lib/wukong/and_pig/operators/execution.rb +0 -15
  54. data/lib/wukong/and_pig/operators/file_methods.rb +0 -29
  55. data/lib/wukong/and_pig/operators/foreach.rb +0 -98
  56. data/lib/wukong/and_pig/operators/groupies.rb +0 -212
  57. data/lib/wukong/and_pig/operators/load_store.rb +0 -65
  58. data/lib/wukong/and_pig/operators/meta.rb +0 -42
  59. data/lib/wukong/and_pig/operators/relational.rb +0 -129
  60. data/lib/wukong/and_pig/pig_struct.rb +0 -48
  61. data/lib/wukong/and_pig/pig_var.rb +0 -95
  62. data/lib/wukong/and_pig/symbol.rb +0 -29
  63. data/lib/wukong/and_pig/utils.rb +0 -0
@@ -1,129 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # == RelationalOperators
3
- #
4
- # GROUP, COGROUP, JOIN see groupies.rb
5
- # CROSS see
6
-
7
- # distinct
8
- # filter
9
- # limit
10
- # order
11
- # split
12
- # union
13
-
14
- #
15
- # stream
16
- # load
17
- # store
18
- #
19
- module Wukong
20
- module AndPig
21
- class PigVar
22
-
23
- # ===========================================================================
24
- #
25
- # Options
26
- #
27
- def self.parallelize! str, options
28
- str << " PARALLEL #{options[:parallel]}" if options[:parallel]
29
- end
30
-
31
- # ===========================================================================
32
- #
33
- # DISTINCT
34
- #
35
- def distinct lval, options={}
36
- self.class.distinct lval, self, options
37
- end
38
-
39
- def self.distinct lval, rel, options={ }
40
- cmd_str = rel.relationize
41
- parallelize! cmd_str, options
42
- simple_operation lval, rel, :distinct, cmd_str
43
- end
44
-
45
- # ===========================================================================
46
- #
47
- # FILTER
48
- #
49
- def filter by_str
50
- new_in_chain klass, "FILTER #{relation} BY #{by_str}"
51
- end
52
- def self.filter lval, rel, by_str
53
- simple_operation lval, rel, "FILTER", "#{rel.relation} BY #{by_str}"
54
- end
55
-
56
- # ===========================================================================
57
- #
58
- # LIMIT
59
- #
60
- def limit n
61
- new_in_chain klass, "LIMIT #{relation} #{n}"
62
- end
63
-
64
- # ===========================================================================
65
- #
66
- # ORDER
67
- #
68
- # alias = ORDER alias BY { * [ASC|DESC] |
69
- # field_alias [ASC|DESC] [, field_alias [ASC|DESC] …]
70
- # } [PARALLEL n];
71
- #
72
- def order cmd_str, options={}
73
- result = new_in_chain klass, "ORDER #{relation} BY #{cmd_str}"
74
- parallelize! result.cmd, options
75
- result
76
- end
77
-
78
- # ===========================================================================
79
- #
80
- # SPLIT
81
- #
82
- # SPLIT alias INTO alias IF expression, alias IF expression [, alias IF expression …];
83
- #
84
- #
85
- def split relation_tests={}
86
- split_str = relation_tests.map do |out_rel, test|
87
- "#{out_rel} IF #{test}"
88
- end.join(", ")
89
- new_in_chain klass, "SPLIT #{relation} INTO #{split_str}"
90
- end
91
-
92
- # ===========================================================================
93
- #
94
- # CROSS
95
- #
96
- def cross *relations
97
- options = relations.extract_options!
98
- raise CrossArgumentError unless relations.length >= 1
99
- relations_str = [self, *relations].map(&:relation).join(", ")
100
- result = new_in_chain relations.first.klass, "CROSS #{relations_str}"
101
- parallelize! result.cmd, options
102
- result
103
- end
104
-
105
- # ===========================================================================
106
- #
107
- # UNION
108
- #
109
- # def self.union *relations
110
- # raise UnionArgumentError unless relations.length >= 2
111
- # new_in_chain relations.first.klass, "UNION #{relations}"
112
- # end
113
-
114
- # UNION as method
115
- def union lval, *relations
116
- self.class.union lval, [self]+relations
117
- end
118
-
119
- def self.union lval, *relations
120
- raise UnionArgumentError unless relations.length >= 2
121
- relations_str = relations.map(&:relation).join(", ")
122
- simple_operation lval, relations.first, :union, relations_str
123
- end
124
-
125
- end
126
- CrossArgumentError = ArgumentError.new("CROSS requires at least two relations. Heh heh: relations.")
127
- UnionArgumentError = ArgumentError.new("UNION requires at least two relations. Heh heh: relations.")
128
- end
129
- end
@@ -1,48 +0,0 @@
1
- module Wukong
2
- module PigStructMethods
3
- module ClassMethods
4
- #
5
- # Pig type string --
6
- # the pig type strings for each sub-element.
7
- #
8
- def typify has_rsrc=nil
9
- vars_str = members.zip(mtypes).map do |attr, mtype|
10
- "%s: %s" % [attr, mtype.typify]
11
- end
12
- vars_str = ["rsrc: chararray"] + vars_str if has_rsrc
13
- "(#{vars_str.join(', ')})"
14
- end
15
-
16
- #
17
- #
18
- #
19
- def pig_load rel, *args
20
- Wukong::AndPig::PigVar.pig_load rel, self, *args
21
- end
22
-
23
- #
24
- # Returns type for a fieldspec
25
- #
26
- def field_type field
27
- case field
28
- when Symbol then members_types[field]
29
- # when Array
30
- # if field.length > 1 then members_types[field.first].field_type(field[1..-1])
31
- # else field_type field.first
32
- # end
33
- end
34
- end
35
-
36
- end
37
- def self.included base
38
- base.extend ClassMethods
39
- end
40
- end
41
- end
42
-
43
- Struct.class_eval do
44
- include Wukong::PigStructMethods
45
- def self.mtypes
46
- members
47
- end
48
- end
@@ -1,95 +0,0 @@
1
- module Wukong
2
- module AndPig
3
-
4
- #
5
- # Make a PigVar understand the struct it describes
6
- #
7
- class PigVar
8
- attr_accessor :klass, :name, :cmd
9
- cattr_accessor :working_dir ; self.working_dir = '.'
10
- def initialize klass, name, cmd
11
- self.klass = klass
12
- self.name = name
13
- self.cmd = cmd
14
- end
15
-
16
- # Sugar for PigVar.new_relation
17
- def self.[]= name, *args
18
- set name, *args
19
- end
20
- # Sugar for PigVar.new_relation
21
- def self.[] name
22
- PIG_SYMBOLS[name]
23
- end
24
-
25
- # extract a field from an alias
26
- def _ field
27
- as_name = [name, field].join("_").to_sym
28
- AS["#{relationize}.(#{field})", as_name, Bag.new([field, field_type(field)]), nil, :skip_type]
29
- end
30
-
31
-
32
- def self.set name, rval
33
- PIG_SYMBOLS[name] = rval
34
- rval.name = name
35
- emit_setter rval.relation, rval
36
- end
37
-
38
- def relation
39
- name.relationize
40
- end
41
- alias_method :relationize, :relation
42
-
43
- #
44
- # Create a name for a new anonymous relation
45
- #
46
- def self.anon slug
47
- idx = (Wukong::AndPig.anon_var_idx += 1)
48
- "anon_#{slug}_#{idx}_".to_sym
49
- end
50
- # Create a name building off this one
51
- def anon
52
- slug = name.to_s.gsub(/^anon_/,'').gsub(/_\d+_$/,'')
53
- self.class.anon slug
54
- end
55
-
56
- #
57
- def new_in_chain lval, l_klass, l_cmd
58
- rval = self.class.new l_klass, lval, l_cmd
59
- self.class.set lval, rval
60
- end
61
-
62
- # Delegate to klass
63
- def field_type *args
64
- self.klass.field_type *args
65
- end
66
-
67
- # Fields in this relation
68
- def fields
69
- klass.members.map(&:to_sym)
70
- end
71
-
72
- #
73
- # Side-effect free operation
74
- #
75
- def simple_operation op
76
- self.class.emit "#{op.to_s.upcase} #{relation}"
77
- self
78
- end
79
-
80
- def self.simple_operation lval, rel, op, r_str
81
- cmd = "%-8s %s" % [op.to_s.upcase, r_str]
82
- rval = new(rel.klass, lval, cmd)
83
- set lval, rval
84
- end
85
-
86
- def self.simple_declaration op, r_str
87
- cmd = "%-8s %s" % [op.to_s.upcase, r_str]
88
- emit cmd
89
- end
90
-
91
- end
92
- end
93
- end
94
-
95
-
@@ -1,29 +0,0 @@
1
- module Wukong
2
- module AndPig
3
- PIG_SYMBOLS = { }
4
- mattr_accessor :anon_var_idx
5
- self.anon_var_idx = 0
6
- end
7
- end
8
-
9
-
10
- Symbol.class_eval do
11
- def << relation
12
- case
13
- when relation.is_a?(Wukong::AndPig::PigVar)
14
- Wukong::AndPig::PigVar.new_relation(self, relation)
15
- when relation.is_a?(Symbol) && (pig_var = Wukong::AndPig::PIG_SYMBOLS[relation])
16
- Wukong::AndPig::PigVar.new_relation(self, pig_var)
17
- else raise "Don't know how to pigify RHS #{relation.inspect}"
18
- end
19
- end
20
-
21
- def method_missing method, *args
22
- pig_var = Wukong::AndPig::PIG_SYMBOLS[self]
23
- if pig_var && pig_var.respond_to?(method)
24
- pig_var.send(method, *args)
25
- else
26
- super method, *args
27
- end
28
- end
29
- end
File without changes