wukong 1.4.7 → 1.4.9
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.textile +9 -0
- data/README.textile +1 -1
- data/bin/hdp-bzip +28 -0
- data/bin/hdp-mkdir +1 -1
- data/bin/hdp-stream-flat +3 -2
- data/bin/wu-lign +32 -18
- data/docpages/pig/cookbook.html +481 -0
- data/docpages/pig/images/hadoop-logo.jpg +0 -0
- data/docpages/pig/images/instruction_arrow.png +0 -0
- data/docpages/pig/images/pig-logo.gif +0 -0
- data/docpages/pig/piglatin_ref1.html +1103 -0
- data/docpages/pig/piglatin_ref2.html +14340 -0
- data/docpages/pig/setup.html +505 -0
- data/docpages/pig/skin/basic.css +166 -0
- data/docpages/pig/skin/breadcrumbs.js +237 -0
- data/docpages/pig/skin/fontsize.js +166 -0
- data/docpages/pig/skin/getBlank.js +40 -0
- data/docpages/pig/skin/getMenu.js +45 -0
- data/docpages/pig/skin/images/chapter.gif +0 -0
- data/docpages/pig/skin/images/chapter_open.gif +0 -0
- data/docpages/pig/skin/images/current.gif +0 -0
- data/docpages/pig/skin/images/external-link.gif +0 -0
- data/docpages/pig/skin/images/header_white_line.gif +0 -0
- data/docpages/pig/skin/images/page.gif +0 -0
- data/docpages/pig/skin/images/pdfdoc.gif +0 -0
- data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/print.css +54 -0
- data/docpages/pig/skin/profile.css +181 -0
- data/docpages/pig/skin/screen.css +587 -0
- data/docpages/pig/tutorial.html +1059 -0
- data/docpages/pig/udf.html +1509 -0
- data/examples/keystore/conditional_outputter_example.rb +70 -0
- data/examples/{graph → network_graph}/adjacency_list.rb +0 -0
- data/examples/{graph → network_graph}/breadth_first_search.rb +0 -0
- data/examples/{graph → network_graph}/gen_2paths.rb +0 -0
- data/examples/{graph → network_graph}/gen_multi_edge.rb +0 -0
- data/examples/{graph → network_graph}/gen_symmetric_links.rb +0 -0
- data/examples/pagerank/run_pagerank.sh +10 -8
- data/examples/{apache_log_parser.rb → server_logs/apache_log_parser.rb} +0 -0
- data/examples/stupidly_simple_filter.rb +43 -0
- data/lib/wukong/extensions/hash.rb +13 -0
- data/lib/wukong/extensions/hash_like.rb +7 -0
- data/lib/wukong/keystore/cassandra_conditional_outputter.rb +122 -0
- data/lib/wukong/script.rb +27 -22
- data/lib/wukong/script/hadoop_command.rb +5 -3
- data/lib/wukong/streamer/accumulating_reducer.rb +2 -1
- data/wukong.gemspec +64 -26
- metadata +89 -31
- data/docpages/pig/PigLatinReferenceManual.html +0 -19134
- data/examples/foo.rb +0 -9
- data/examples/package-local.rb +0 -100
- data/examples/package.rb +0 -96
- data/examples/run_all.sh +0 -47
@@ -0,0 +1,505 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
5
|
+
<meta content="Apache Forrest" name="Generator">
|
6
|
+
<meta name="Forrest-version" content="0.8">
|
7
|
+
<meta name="Forrest-skin-name" content="pelt">
|
8
|
+
<title>Pig Setup</title>
|
9
|
+
<link type="text/css" href="skin/basic.css" rel="stylesheet">
|
10
|
+
<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
|
11
|
+
<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
|
12
|
+
<link type="text/css" href="skin/profile.css" rel="stylesheet">
|
13
|
+
<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
|
14
|
+
<link rel="shortcut icon" href="">
|
15
|
+
</head>
|
16
|
+
<body onload="init()">
|
17
|
+
<script type="text/javascript">ndeSetTextSize();</script>
|
18
|
+
<div id="top">
|
19
|
+
<!--+
|
20
|
+
|breadtrail
|
21
|
+
+-->
|
22
|
+
<div class="breadtrail">
|
23
|
+
<a href="http://www.apache.org/">Apache</a> > <a href="http://hadoop.apache.org/">Hadoop</a> > <a href="http://hadoop.apache.org/pig/">Pig</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
|
24
|
+
</div>
|
25
|
+
<!--+
|
26
|
+
|header
|
27
|
+
+-->
|
28
|
+
<div class="header">
|
29
|
+
<!--+
|
30
|
+
|start group logo
|
31
|
+
+-->
|
32
|
+
<div class="grouplogo">
|
33
|
+
<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
|
34
|
+
</div>
|
35
|
+
<!--+
|
36
|
+
|end group logo
|
37
|
+
+-->
|
38
|
+
<!--+
|
39
|
+
|start Project Logo
|
40
|
+
+-->
|
41
|
+
<div class="projectlogo">
|
42
|
+
<a href="http://hadoop.apache.org/pig/"><img class="logoImage" alt="Pig" src="images/pig-logo.gif" title="A platform for analyzing large datasets."></a>
|
43
|
+
</div>
|
44
|
+
<!--+
|
45
|
+
|end Project Logo
|
46
|
+
+-->
|
47
|
+
<!--+
|
48
|
+
|start Search
|
49
|
+
+-->
|
50
|
+
<div class="searchbox">
|
51
|
+
<form action="http://www.google.com/search" method="get" class="roundtopsmall">
|
52
|
+
<input value="" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">
|
53
|
+
<input name="Search" value="Search" type="submit">
|
54
|
+
</form>
|
55
|
+
</div>
|
56
|
+
<!--+
|
57
|
+
|end search
|
58
|
+
+-->
|
59
|
+
<!--+
|
60
|
+
|start Tabs
|
61
|
+
+-->
|
62
|
+
<ul id="tabs">
|
63
|
+
<li>
|
64
|
+
<a class="unselected" href="http://hadoop.apache.org/pig/">Project</a>
|
65
|
+
</li>
|
66
|
+
<li>
|
67
|
+
<a class="unselected" href="http://wiki.apache.org/pig/">Wiki</a>
|
68
|
+
</li>
|
69
|
+
<li class="current">
|
70
|
+
<a class="selected" href="index.html">Pig 0.7.0 Documentation</a>
|
71
|
+
</li>
|
72
|
+
</ul>
|
73
|
+
<!--+
|
74
|
+
|end Tabs
|
75
|
+
+-->
|
76
|
+
</div>
|
77
|
+
</div>
|
78
|
+
<div id="main">
|
79
|
+
<div id="publishedStrip">
|
80
|
+
<!--+
|
81
|
+
|start Subtabs
|
82
|
+
+-->
|
83
|
+
<div id="level2tabs"></div>
|
84
|
+
<!--+
|
85
|
+
|end Endtabs
|
86
|
+
+-->
|
87
|
+
<script type="text/javascript"><!--
|
88
|
+
document.write("Last Published: " + document.lastModified);
|
89
|
+
// --></script>
|
90
|
+
</div>
|
91
|
+
<!--+
|
92
|
+
|breadtrail
|
93
|
+
+-->
|
94
|
+
<div class="breadtrail">
|
95
|
+
|
96
|
+
|
97
|
+
</div>
|
98
|
+
<!--+
|
99
|
+
|start Menu, mainarea
|
100
|
+
+-->
|
101
|
+
<!--+
|
102
|
+
|start Menu
|
103
|
+
+-->
|
104
|
+
<div id="menu">
|
105
|
+
<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Pig</div>
|
106
|
+
<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
|
107
|
+
<div class="menuitem">
|
108
|
+
<a href="index.html">Overview</a>
|
109
|
+
</div>
|
110
|
+
<div class="menupage">
|
111
|
+
<div class="menupagetitle">Setup</div>
|
112
|
+
</div>
|
113
|
+
<div class="menuitem">
|
114
|
+
<a href="tutorial.html">Tutorial</a>
|
115
|
+
</div>
|
116
|
+
<div class="menuitem">
|
117
|
+
<a href="piglatin_ref1.html">Pig Latin 1</a>
|
118
|
+
</div>
|
119
|
+
<div class="menuitem">
|
120
|
+
<a href="piglatin_ref2.html">Pig Latin 2</a>
|
121
|
+
</div>
|
122
|
+
<div class="menuitem">
|
123
|
+
<a href="cookbook.html">Cookbook</a>
|
124
|
+
</div>
|
125
|
+
<div class="menuitem">
|
126
|
+
<a href="udf.html">UDFs</a>
|
127
|
+
</div>
|
128
|
+
</div>
|
129
|
+
<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Zebra</div>
|
130
|
+
<div id="menu_1.2" class="menuitemgroup">
|
131
|
+
<div class="menuitem">
|
132
|
+
<a href="zebra_overview.html">Zebra Overview </a>
|
133
|
+
</div>
|
134
|
+
<div class="menuitem">
|
135
|
+
<a href="zebra_users.html">Zebra Users </a>
|
136
|
+
</div>
|
137
|
+
<div class="menuitem">
|
138
|
+
<a href="zebra_reference.html">Zebra Reference </a>
|
139
|
+
</div>
|
140
|
+
<div class="menuitem">
|
141
|
+
<a href="zebra_mapreduce.html">Zebra MapReduce </a>
|
142
|
+
</div>
|
143
|
+
<div class="menuitem">
|
144
|
+
<a href="zebra_pig.html">Zebra Pig </a>
|
145
|
+
</div>
|
146
|
+
<div class="menuitem">
|
147
|
+
<a href="zebra_stream.html">Zebra Streaming </a>
|
148
|
+
</div>
|
149
|
+
</div>
|
150
|
+
<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Miscellaneous</div>
|
151
|
+
<div id="menu_1.3" class="menuitemgroup">
|
152
|
+
<div class="menuitem">
|
153
|
+
<a href="api/">API Docs</a>
|
154
|
+
</div>
|
155
|
+
<div class="menuitem">
|
156
|
+
<a href="http://wiki.apache.org/pig/">Wiki</a>
|
157
|
+
</div>
|
158
|
+
<div class="menuitem">
|
159
|
+
<a href="http://wiki.apache.org/pig/FAQ">FAQ</a>
|
160
|
+
</div>
|
161
|
+
<div class="menuitem">
|
162
|
+
<a href="http://hadoop.apache.org/pig/releases.html">Release Notes</a>
|
163
|
+
</div>
|
164
|
+
</div>
|
165
|
+
<div id="credit"></div>
|
166
|
+
<div id="roundbottom">
|
167
|
+
<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
|
168
|
+
<!--+
|
169
|
+
|alternative credits
|
170
|
+
+-->
|
171
|
+
<div id="credit2"></div>
|
172
|
+
</div>
|
173
|
+
<!--+
|
174
|
+
|end Menu
|
175
|
+
+-->
|
176
|
+
<!--+
|
177
|
+
|start content
|
178
|
+
+-->
|
179
|
+
<div id="content">
|
180
|
+
<div title="Portable Document Format" class="pdflink">
|
181
|
+
<a class="dida" href="setup.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
|
182
|
+
PDF</a>
|
183
|
+
</div>
|
184
|
+
<h1>Pig Setup</h1>
|
185
|
+
<div id="minitoc-area">
|
186
|
+
<ul class="minitoc">
|
187
|
+
<li>
|
188
|
+
<a href="#Overview">Overview</a>
|
189
|
+
<ul class="minitoc">
|
190
|
+
<li>
|
191
|
+
<a href="#req">Requirements</a>
|
192
|
+
</li>
|
193
|
+
</ul>
|
194
|
+
</li>
|
195
|
+
<li>
|
196
|
+
<a href="#Beginning+Pig">Beginning Pig</a>
|
197
|
+
<ul class="minitoc">
|
198
|
+
<li>
|
199
|
+
<a href="#Download+Pig">Download Pig</a>
|
200
|
+
</li>
|
201
|
+
<li>
|
202
|
+
<a href="#Run+Modes">Run Modes</a>
|
203
|
+
</li>
|
204
|
+
<li>
|
205
|
+
<a href="#Grunt+Shell">Grunt Shell</a>
|
206
|
+
</li>
|
207
|
+
<li>
|
208
|
+
<a href="#Script+Files">Script Files</a>
|
209
|
+
</li>
|
210
|
+
</ul>
|
211
|
+
</li>
|
212
|
+
<li>
|
213
|
+
<a href="#Advanced+Pig">Advanced Pig</a>
|
214
|
+
<ul class="minitoc">
|
215
|
+
<li>
|
216
|
+
<a href="#Build+Pig">Build Pig</a>
|
217
|
+
</li>
|
218
|
+
<li>
|
219
|
+
<a href="#Environment+Variables+and+Properties">Environment Variables and Properties</a>
|
220
|
+
</li>
|
221
|
+
<li>
|
222
|
+
<a href="#Run+Modes-N1012B">Run Modes</a>
|
223
|
+
</li>
|
224
|
+
<li>
|
225
|
+
<a href="#Embedded+Programs">Embedded Programs</a>
|
226
|
+
</li>
|
227
|
+
</ul>
|
228
|
+
</li>
|
229
|
+
<li>
|
230
|
+
<a href="#Sample+Code">Sample Code</a>
|
231
|
+
</li>
|
232
|
+
</ul>
|
233
|
+
</div>
|
234
|
+
|
235
|
+
|
236
|
+
<a name="N1000D"></a><a name="Overview"></a>
|
237
|
+
<h2 class="h3">Overview</h2>
|
238
|
+
<div class="section">
|
239
|
+
<a name="N10013"></a><a name="req"></a>
|
240
|
+
<h3 class="h4">Requirements</h3>
|
241
|
+
<p>
|
242
|
+
<strong>Unix</strong> and <strong>Windows</strong> users need the following:</p>
|
243
|
+
<ol>
|
244
|
+
|
245
|
+
<li>
|
246
|
+
<strong>Hadoop 0.20.2</strong> - <a href="http://hadoop.apache.org/common/releases.html">http://hadoop.apache.org/common/releases.html</a>
|
247
|
+
</li>
|
248
|
+
|
249
|
+
<li>
|
250
|
+
<strong>Java 1.6</strong> - <a href="http://java.sun.com/javase/downloads/index.jsp">http://java.sun.com/javase/downloads/index.jsp</a> (set JAVA_HOME to the root of your Java installation)</li>
|
251
|
+
|
252
|
+
<li>
|
253
|
+
<strong>Ant 1.7</strong> - <a href="http://ant.apache.org/">http://ant.apache.org/</a> (optional, for builds) </li>
|
254
|
+
|
255
|
+
<li>
|
256
|
+
<strong>JUnit 4.5</strong> - <a href="http://junit.sourceforge.net/">http://junit.sourceforge.net/</a> (optional, for unit tests) </li>
|
257
|
+
|
258
|
+
</ol>
|
259
|
+
<p>
|
260
|
+
<strong>Windows</strong> users need to install Cygwin and the Perl package: <a href="http://www.cygwin.com/"> http://www.cygwin.com/</a>
|
261
|
+
</p>
|
262
|
+
</div>
|
263
|
+
|
264
|
+
|
265
|
+
|
266
|
+
<a name="N10055"></a><a name="Beginning+Pig"></a>
|
267
|
+
<h2 class="h3">Beginning Pig</h2>
|
268
|
+
<div class="section">
|
269
|
+
<a name="N1005B"></a><a name="Download+Pig"></a>
|
270
|
+
<h3 class="h4">Download Pig</h3>
|
271
|
+
<p>To get a Pig distribution, download a recent stable release from one of the Apache Download Mirrors (see <a href="http://hadoop.apache.org/pig/releases.html"> Pig Releases</a>).</p>
|
272
|
+
<p>Unpack the downloaded Pig distribution. The Pig script is located in the bin directory (/pig-n.n.n/bin/pig).</p>
|
273
|
+
<p>Add /pig-n.n.n/bin to your path. Use export (bash,sh,ksh) or setenv (tcsh,csh). For example: </p>
|
274
|
+
<pre class="code">
|
275
|
+
$ export PATH=/<my-path-to-pig>/pig-n.n.n/bin:$PATH
|
276
|
+
</pre>
|
277
|
+
<p>Try the following command, to get a list of Pig commands: </p>
|
278
|
+
<pre class="code">
|
279
|
+
$ pig -help
|
280
|
+
</pre>
|
281
|
+
<p>Try the following command, to start the Grunt shell:</p>
|
282
|
+
<pre class="code">
|
283
|
+
$ pig
|
284
|
+
</pre>
|
285
|
+
<a name="N10081"></a><a name="Run+Modes"></a>
|
286
|
+
<h3 class="h4">Run Modes</h3>
|
287
|
+
<p>Pig has two run modes or exectypes: </p>
|
288
|
+
<ul>
|
289
|
+
|
290
|
+
<li>
|
291
|
+
<p> Local Mode - To run Pig in local mode, you need access to a single machine. </p>
|
292
|
+
</li>
|
293
|
+
|
294
|
+
<li>
|
295
|
+
<p> Mapreduce Mode - To run Pig in mapreduce mode, you need access to a Hadoop cluster and HDFS installation.
|
296
|
+
Pig will automatically allocate and deallocate a 15-node cluster.</p>
|
297
|
+
</li>
|
298
|
+
|
299
|
+
</ul>
|
300
|
+
<p>You can run the Grunt shell, Pig scripts, or embedded programs using either mode.</p>
|
301
|
+
<a name="N10099"></a><a name="Grunt+Shell"></a>
|
302
|
+
<h3 class="h4">Grunt Shell</h3>
|
303
|
+
<p>Use Pig's interactive shell, Grunt, to enter pig commands manually. See the <a href="setup.html#Sample+Code">Sample Code</a> for instructions about the passwd file used in the example.</p>
|
304
|
+
<p>You can also run or execute script files from the Grunt shell.
|
305
|
+
See the <a href="piglatin_ref2.html#run">run</a> and <a href="piglatin_ref2.html#exec">exec</a> commands. </p>
|
306
|
+
<p>
|
307
|
+
<strong>Local Mode</strong>
|
308
|
+
</p>
|
309
|
+
<pre class="code">
|
310
|
+
$ pig -x local
|
311
|
+
</pre>
|
312
|
+
<p>
|
313
|
+
<strong>Mapreduce Mode</strong>
|
314
|
+
</p>
|
315
|
+
<pre class="code">
|
316
|
+
$ pig
|
317
|
+
or
|
318
|
+
$ pig -x mapreduce
|
319
|
+
</pre>
|
320
|
+
<p>For either mode, the Grunt shell is invoked and you can enter commands at the prompt. The results are displayed to your terminal screen (if DUMP is used) or to a file (if STORE is used).
|
321
|
+
</p>
|
322
|
+
<pre class="code">
|
323
|
+
grunt> A = load 'passwd' using PigStorage(':');
|
324
|
+
grunt> B = foreach A generate $0 as id;
|
325
|
+
grunt> dump B;
|
326
|
+
grunt> store B;
|
327
|
+
</pre>
|
328
|
+
<a name="N100CA"></a><a name="Script+Files"></a>
|
329
|
+
<h3 class="h4">Script Files</h3>
|
330
|
+
<p>Use script files to run Pig commands as batch jobs. See the <a href="setup.html#Sample+Code">Sample Code</a> for instructions about the passwd file and the script file (id.pig) used in the example.</p>
|
331
|
+
<p>
|
332
|
+
<strong>Local Mode</strong>
|
333
|
+
</p>
|
334
|
+
<pre class="code">
|
335
|
+
$ pig -x local id.pig
|
336
|
+
</pre>
|
337
|
+
<p>
|
338
|
+
<strong>Mapreduce Mode</strong>
|
339
|
+
</p>
|
340
|
+
<pre class="code">
|
341
|
+
$ pig id.pig
|
342
|
+
or
|
343
|
+
$ pig -x mapreduce id.pig
|
344
|
+
</pre>
|
345
|
+
<p>For either mode, the Pig Latin statements are executed and the results are displayed to your terminal screen (if DUMP is used) or to a file (if STORE is used).</p>
|
346
|
+
</div>
|
347
|
+
|
348
|
+
|
349
|
+
|
350
|
+
<a name="N100ED"></a><a name="Advanced+Pig"></a>
|
351
|
+
<h2 class="h3">Advanced Pig</h2>
|
352
|
+
<div class="section">
|
353
|
+
<a name="N100F3"></a><a name="Build+Pig"></a>
|
354
|
+
<h3 class="h4">Build Pig</h3>
|
355
|
+
<p>To build pig, do the following:</p>
|
356
|
+
<ol>
|
357
|
+
|
358
|
+
<li> Check out the Pig code from SVN: <em>svn co http://svn.apache.org/repos/asf/hadoop/pig/trunk</em>. </li>
|
359
|
+
|
360
|
+
<li> Build the code from the top directory: <em>ant</em>. If the build is successful, you should see the <em>pig.jar</em> created in that directory. </li>
|
361
|
+
|
362
|
+
<li> Validate your <em>pig.jar</em> by running a unit test: <em>ant test</em>
|
363
|
+
</li>
|
364
|
+
|
365
|
+
</ol>
|
366
|
+
<a name="N10117"></a><a name="Environment+Variables+and+Properties"></a>
|
367
|
+
<h3 class="h4">Environment Variables and Properties</h3>
|
368
|
+
<p>See <a href="setup.html#Download+Pig">Download Pig</a>.</p>
|
369
|
+
<p>The Pig environment variables are described in the Pig script file, located in the /pig-n.n.n/bin directory.</p>
|
370
|
+
<p>The Pig properties file, pig.properties, is located in the /pig-n.n.n/conf directory. You can specify an alternate location using the PIG_CONF_DIR environment variable.</p>
|
371
|
+
<a name="N1012B"></a><a name="Run+Modes-N1012B"></a>
|
372
|
+
<h3 class="h4">Run Modes</h3>
|
373
|
+
<p>See <a href="setup.html#Run+Modes">Run Modes</a>. </p>
|
374
|
+
<a name="N10139"></a><a name="Embedded+Programs"></a>
|
375
|
+
<h3 class="h4">Embedded Programs</h3>
|
376
|
+
<p>Used the embedded option to embed Pig commands in a host language and run the program.
|
377
|
+
See the <a href="setup.html#Sample+Code">Sample Code</a> for instructions about the passwd file and java files (idlocal.java, idmapreduce.java) used in the examples.</p>
|
378
|
+
<p>
|
379
|
+
<strong>Local Mode</strong>
|
380
|
+
</p>
|
381
|
+
<p>From your current working directory, compile the program: </p>
|
382
|
+
<pre class="code">
|
383
|
+
$ javac -cp pig.jar idlocal.java
|
384
|
+
</pre>
|
385
|
+
<p>Note: idlocal.class is written to your current working directory. Include “.” in the class path when you run the program. </p>
|
386
|
+
<p>From your current working directory, run the program:
|
387
|
+
</p>
|
388
|
+
<pre class="code">
|
389
|
+
Unix: $ java -cp pig.jar:. idlocal
|
390
|
+
Cygwin: $ java –cp ‘.;pig.jar’ idlocal
|
391
|
+
</pre>
|
392
|
+
<p>To view the results, check the output file, id.out. </p>
|
393
|
+
<p>
|
394
|
+
<strong>Mapreduce Mode</strong>
|
395
|
+
</p>
|
396
|
+
<p>Point $HADOOPDIR to the directory that contains the hadoop-site.xml file. Example:
|
397
|
+
</p>
|
398
|
+
<pre class="code">
|
399
|
+
$ export HADOOPDIR=/yourHADOOPsite/conf
|
400
|
+
</pre>
|
401
|
+
<p>From your current working directory, compile the program:
|
402
|
+
</p>
|
403
|
+
<pre class="code">
|
404
|
+
$ javac -cp pig.jar idmapreduce.java
|
405
|
+
</pre>
|
406
|
+
<p>Note: idmapreduce.class is written to your current working directory. Include “.” in the class path when you run the program. </p>
|
407
|
+
<p>From your current working directory, run the program:
|
408
|
+
</p>
|
409
|
+
<pre class="code">
|
410
|
+
Unix: $ java -cp pig.jar:.:$HADOOPDIR idmapreduce
|
411
|
+
Cygwin: $ java –cp ‘.;pig.jar;$HADOOPDIR’ idmapreduce
|
412
|
+
</pre>
|
413
|
+
<p>To view the results, check the idout directory on your Hadoop system. </p>
|
414
|
+
</div>
|
415
|
+
|
416
|
+
|
417
|
+
|
418
|
+
<a name="N1017F"></a><a name="Sample+Code"></a>
|
419
|
+
<h2 class="h3">Sample Code</h2>
|
420
|
+
<div class="section">
|
421
|
+
<p>The sample code is based on Pig Latin statements that extract all user IDs from the /etc/passwd file. </p>
|
422
|
+
<p>Copy the /etc/passwd file to your local working directory.</p>
|
423
|
+
<p>
|
424
|
+
<strong>id.pig</strong>
|
425
|
+
</p>
|
426
|
+
<p>For the Grunt Shell and script files. </p>
|
427
|
+
<pre class="code">
|
428
|
+
A = load 'passwd' using PigStorage(':');
|
429
|
+
B = foreach A generate $0 as id;
|
430
|
+
dump B;
|
431
|
+
store B into ‘id.out’;
|
432
|
+
</pre>
|
433
|
+
<p>
|
434
|
+
<strong>idlocal.java</strong>
|
435
|
+
</p>
|
436
|
+
<p>For embedded programs. </p>
|
437
|
+
<pre class="code">
|
438
|
+
import java.io.IOException;
|
439
|
+
import org.apache.pig.PigServer;
|
440
|
+
public class idlocal{
|
441
|
+
public static void main(String[] args) {
|
442
|
+
try {
|
443
|
+
PigServer pigServer = new PigServer("local");
|
444
|
+
runIdQuery(pigServer, "passwd");
|
445
|
+
}
|
446
|
+
catch(Exception e) {
|
447
|
+
}
|
448
|
+
}
|
449
|
+
public static void runIdQuery(PigServer pigServer, String inputFile) throws IOException {
|
450
|
+
pigServer.registerQuery("A = load '" + inputFile + "' using PigStorage(':');");
|
451
|
+
pigServer.registerQuery("B = foreach A generate $0 as id;");
|
452
|
+
pigServer.store("B", "id.out");
|
453
|
+
}
|
454
|
+
}
|
455
|
+
</pre>
|
456
|
+
<p>
|
457
|
+
<strong>idmapreduce.java</strong>
|
458
|
+
</p>
|
459
|
+
<p>For embedded programs. </p>
|
460
|
+
<pre class="code">
|
461
|
+
import java.io.IOException;
|
462
|
+
import org.apache.pig.PigServer;
|
463
|
+
public class idmapreduce{
|
464
|
+
public static void main(String[] args) {
|
465
|
+
try {
|
466
|
+
PigServer pigServer = new PigServer("mapreduce");
|
467
|
+
runIdQuery(pigServer, "passwd");
|
468
|
+
}
|
469
|
+
catch(Exception e) {
|
470
|
+
}
|
471
|
+
}
|
472
|
+
public static void runIdQuery(PigServer pigServer, String inputFile) throws IOException {
|
473
|
+
pigServer.registerQuery("A = load '" + inputFile + "' using PigStorage(':');")
|
474
|
+
pigServer.registerQuery("B = foreach A generate $0 as id;");
|
475
|
+
pigServer.store("B", "idout");
|
476
|
+
}
|
477
|
+
}
|
478
|
+
</pre>
|
479
|
+
</div>
|
480
|
+
|
481
|
+
</div>
|
482
|
+
<!--+
|
483
|
+
|end content
|
484
|
+
+-->
|
485
|
+
<div class="clearboth"> </div>
|
486
|
+
</div>
|
487
|
+
<div id="footer">
|
488
|
+
<!--+
|
489
|
+
|start bottomstrip
|
490
|
+
+-->
|
491
|
+
<div class="lastmodified">
|
492
|
+
<script type="text/javascript"><!--
|
493
|
+
document.write("Last Published: " + document.lastModified);
|
494
|
+
// --></script>
|
495
|
+
</div>
|
496
|
+
<div class="copyright">
|
497
|
+
Copyright ©
|
498
|
+
2007-2010 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
|
499
|
+
</div>
|
500
|
+
<!--+
|
501
|
+
|end bottomstrip
|
502
|
+
+-->
|
503
|
+
</div>
|
504
|
+
</body>
|
505
|
+
</html>
|