wukong 1.4.7 → 1.4.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. data/CHANGELOG.textile +9 -0
  2. data/README.textile +1 -1
  3. data/bin/hdp-bzip +28 -0
  4. data/bin/hdp-mkdir +1 -1
  5. data/bin/hdp-stream-flat +3 -2
  6. data/bin/wu-lign +32 -18
  7. data/docpages/pig/cookbook.html +481 -0
  8. data/docpages/pig/images/hadoop-logo.jpg +0 -0
  9. data/docpages/pig/images/instruction_arrow.png +0 -0
  10. data/docpages/pig/images/pig-logo.gif +0 -0
  11. data/docpages/pig/piglatin_ref1.html +1103 -0
  12. data/docpages/pig/piglatin_ref2.html +14340 -0
  13. data/docpages/pig/setup.html +505 -0
  14. data/docpages/pig/skin/basic.css +166 -0
  15. data/docpages/pig/skin/breadcrumbs.js +237 -0
  16. data/docpages/pig/skin/fontsize.js +166 -0
  17. data/docpages/pig/skin/getBlank.js +40 -0
  18. data/docpages/pig/skin/getMenu.js +45 -0
  19. data/docpages/pig/skin/images/chapter.gif +0 -0
  20. data/docpages/pig/skin/images/chapter_open.gif +0 -0
  21. data/docpages/pig/skin/images/current.gif +0 -0
  22. data/docpages/pig/skin/images/external-link.gif +0 -0
  23. data/docpages/pig/skin/images/header_white_line.gif +0 -0
  24. data/docpages/pig/skin/images/page.gif +0 -0
  25. data/docpages/pig/skin/images/pdfdoc.gif +0 -0
  26. data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  27. data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  28. data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  29. data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  30. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  31. data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  32. data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  33. data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  34. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  35. data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  36. data/docpages/pig/skin/print.css +54 -0
  37. data/docpages/pig/skin/profile.css +181 -0
  38. data/docpages/pig/skin/screen.css +587 -0
  39. data/docpages/pig/tutorial.html +1059 -0
  40. data/docpages/pig/udf.html +1509 -0
  41. data/examples/keystore/conditional_outputter_example.rb +70 -0
  42. data/examples/{graph → network_graph}/adjacency_list.rb +0 -0
  43. data/examples/{graph → network_graph}/breadth_first_search.rb +0 -0
  44. data/examples/{graph → network_graph}/gen_2paths.rb +0 -0
  45. data/examples/{graph → network_graph}/gen_multi_edge.rb +0 -0
  46. data/examples/{graph → network_graph}/gen_symmetric_links.rb +0 -0
  47. data/examples/pagerank/run_pagerank.sh +10 -8
  48. data/examples/{apache_log_parser.rb → server_logs/apache_log_parser.rb} +0 -0
  49. data/examples/stupidly_simple_filter.rb +43 -0
  50. data/lib/wukong/extensions/hash.rb +13 -0
  51. data/lib/wukong/extensions/hash_like.rb +7 -0
  52. data/lib/wukong/keystore/cassandra_conditional_outputter.rb +122 -0
  53. data/lib/wukong/script.rb +27 -22
  54. data/lib/wukong/script/hadoop_command.rb +5 -3
  55. data/lib/wukong/streamer/accumulating_reducer.rb +2 -1
  56. data/wukong.gemspec +64 -26
  57. metadata +89 -31
  58. data/docpages/pig/PigLatinReferenceManual.html +0 -19134
  59. data/examples/foo.rb +0 -9
  60. data/examples/package-local.rb +0 -100
  61. data/examples/package.rb +0 -96
  62. data/examples/run_all.sh +0 -47
@@ -0,0 +1,505 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2
+ <html>
3
+ <head>
4
+ <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+ <meta content="Apache Forrest" name="Generator">
6
+ <meta name="Forrest-version" content="0.8">
7
+ <meta name="Forrest-skin-name" content="pelt">
8
+ <title>Pig Setup</title>
9
+ <link type="text/css" href="skin/basic.css" rel="stylesheet">
10
+ <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
11
+ <link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
12
+ <link type="text/css" href="skin/profile.css" rel="stylesheet">
13
+ <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
14
+ <link rel="shortcut icon" href="">
15
+ </head>
16
+ <body onload="init()">
17
+ <script type="text/javascript">ndeSetTextSize();</script>
18
+ <div id="top">
19
+ <!--+
20
+ |breadtrail
21
+ +-->
22
+ <div class="breadtrail">
23
+ <a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/pig/">Pig</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
24
+ </div>
25
+ <!--+
26
+ |header
27
+ +-->
28
+ <div class="header">
29
+ <!--+
30
+ |start group logo
31
+ +-->
32
+ <div class="grouplogo">
33
+ <a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
34
+ </div>
35
+ <!--+
36
+ |end group logo
37
+ +-->
38
+ <!--+
39
+ |start Project Logo
40
+ +-->
41
+ <div class="projectlogo">
42
+ <a href="http://hadoop.apache.org/pig/"><img class="logoImage" alt="Pig" src="images/pig-logo.gif" title="A platform for analyzing large datasets."></a>
43
+ </div>
44
+ <!--+
45
+ |end Project Logo
46
+ +-->
47
+ <!--+
48
+ |start Search
49
+ +-->
50
+ <div class="searchbox">
51
+ <form action="http://www.google.com/search" method="get" class="roundtopsmall">
52
+ <input value="" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp;
53
+ <input name="Search" value="Search" type="submit">
54
+ </form>
55
+ </div>
56
+ <!--+
57
+ |end search
58
+ +-->
59
+ <!--+
60
+ |start Tabs
61
+ +-->
62
+ <ul id="tabs">
63
+ <li>
64
+ <a class="unselected" href="http://hadoop.apache.org/pig/">Project</a>
65
+ </li>
66
+ <li>
67
+ <a class="unselected" href="http://wiki.apache.org/pig/">Wiki</a>
68
+ </li>
69
+ <li class="current">
70
+ <a class="selected" href="index.html">Pig 0.7.0 Documentation</a>
71
+ </li>
72
+ </ul>
73
+ <!--+
74
+ |end Tabs
75
+ +-->
76
+ </div>
77
+ </div>
78
+ <div id="main">
79
+ <div id="publishedStrip">
80
+ <!--+
81
+ |start Subtabs
82
+ +-->
83
+ <div id="level2tabs"></div>
84
+ <!--+
85
+ |end Endtabs
86
+ +-->
87
+ <script type="text/javascript"><!--
88
+ document.write("Last Published: " + document.lastModified);
89
+ // --></script>
90
+ </div>
91
+ <!--+
92
+ |breadtrail
93
+ +-->
94
+ <div class="breadtrail">
95
+
96
+ &nbsp;
97
+ </div>
98
+ <!--+
99
+ |start Menu, mainarea
100
+ +-->
101
+ <!--+
102
+ |start Menu
103
+ +-->
104
+ <div id="menu">
105
+ <div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Pig</div>
106
+ <div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
107
+ <div class="menuitem">
108
+ <a href="index.html">Overview</a>
109
+ </div>
110
+ <div class="menupage">
111
+ <div class="menupagetitle">Setup</div>
112
+ </div>
113
+ <div class="menuitem">
114
+ <a href="tutorial.html">Tutorial</a>
115
+ </div>
116
+ <div class="menuitem">
117
+ <a href="piglatin_ref1.html">Pig Latin 1</a>
118
+ </div>
119
+ <div class="menuitem">
120
+ <a href="piglatin_ref2.html">Pig Latin 2</a>
121
+ </div>
122
+ <div class="menuitem">
123
+ <a href="cookbook.html">Cookbook</a>
124
+ </div>
125
+ <div class="menuitem">
126
+ <a href="udf.html">UDFs</a>
127
+ </div>
128
+ </div>
129
+ <div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Zebra</div>
130
+ <div id="menu_1.2" class="menuitemgroup">
131
+ <div class="menuitem">
132
+ <a href="zebra_overview.html">Zebra Overview </a>
133
+ </div>
134
+ <div class="menuitem">
135
+ <a href="zebra_users.html">Zebra Users </a>
136
+ </div>
137
+ <div class="menuitem">
138
+ <a href="zebra_reference.html">Zebra Reference </a>
139
+ </div>
140
+ <div class="menuitem">
141
+ <a href="zebra_mapreduce.html">Zebra MapReduce </a>
142
+ </div>
143
+ <div class="menuitem">
144
+ <a href="zebra_pig.html">Zebra Pig </a>
145
+ </div>
146
+ <div class="menuitem">
147
+ <a href="zebra_stream.html">Zebra Streaming </a>
148
+ </div>
149
+ </div>
150
+ <div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Miscellaneous</div>
151
+ <div id="menu_1.3" class="menuitemgroup">
152
+ <div class="menuitem">
153
+ <a href="api/">API Docs</a>
154
+ </div>
155
+ <div class="menuitem">
156
+ <a href="http://wiki.apache.org/pig/">Wiki</a>
157
+ </div>
158
+ <div class="menuitem">
159
+ <a href="http://wiki.apache.org/pig/FAQ">FAQ</a>
160
+ </div>
161
+ <div class="menuitem">
162
+ <a href="http://hadoop.apache.org/pig/releases.html">Release Notes</a>
163
+ </div>
164
+ </div>
165
+ <div id="credit"></div>
166
+ <div id="roundbottom">
167
+ <img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
168
+ <!--+
169
+ |alternative credits
170
+ +-->
171
+ <div id="credit2"></div>
172
+ </div>
173
+ <!--+
174
+ |end Menu
175
+ +-->
176
+ <!--+
177
+ |start content
178
+ +-->
179
+ <div id="content">
180
+ <div title="Portable Document Format" class="pdflink">
181
+ <a class="dida" href="setup.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
182
+ PDF</a>
183
+ </div>
184
+ <h1>Pig Setup</h1>
185
+ <div id="minitoc-area">
186
+ <ul class="minitoc">
187
+ <li>
188
+ <a href="#Overview">Overview</a>
189
+ <ul class="minitoc">
190
+ <li>
191
+ <a href="#req">Requirements</a>
192
+ </li>
193
+ </ul>
194
+ </li>
195
+ <li>
196
+ <a href="#Beginning+Pig">Beginning Pig</a>
197
+ <ul class="minitoc">
198
+ <li>
199
+ <a href="#Download+Pig">Download Pig</a>
200
+ </li>
201
+ <li>
202
+ <a href="#Run+Modes">Run Modes</a>
203
+ </li>
204
+ <li>
205
+ <a href="#Grunt+Shell">Grunt Shell</a>
206
+ </li>
207
+ <li>
208
+ <a href="#Script+Files">Script Files</a>
209
+ </li>
210
+ </ul>
211
+ </li>
212
+ <li>
213
+ <a href="#Advanced+Pig">Advanced Pig</a>
214
+ <ul class="minitoc">
215
+ <li>
216
+ <a href="#Build+Pig">Build Pig</a>
217
+ </li>
218
+ <li>
219
+ <a href="#Environment+Variables+and+Properties">Environment Variables and Properties</a>
220
+ </li>
221
+ <li>
222
+ <a href="#Run+Modes-N1012B">Run Modes</a>
223
+ </li>
224
+ <li>
225
+ <a href="#Embedded+Programs">Embedded Programs</a>
226
+ </li>
227
+ </ul>
228
+ </li>
229
+ <li>
230
+ <a href="#Sample+Code">Sample Code</a>
231
+ </li>
232
+ </ul>
233
+ </div>
234
+
235
+
236
+ <a name="N1000D"></a><a name="Overview"></a>
237
+ <h2 class="h3">Overview</h2>
238
+ <div class="section">
239
+ <a name="N10013"></a><a name="req"></a>
240
+ <h3 class="h4">Requirements</h3>
241
+ <p>
242
+ <strong>Unix</strong> and <strong>Windows</strong> users need the following:</p>
243
+ <ol>
244
+
245
+ <li>
246
+ <strong>Hadoop 0.20.2</strong> - <a href="http://hadoop.apache.org/common/releases.html">http://hadoop.apache.org/common/releases.html</a>
247
+ </li>
248
+
249
+ <li>
250
+ <strong>Java 1.6</strong> - <a href="http://java.sun.com/javase/downloads/index.jsp">http://java.sun.com/javase/downloads/index.jsp</a> (set JAVA_HOME to the root of your Java installation)</li>
251
+
252
+ <li>
253
+ <strong>Ant 1.7</strong> - <a href="http://ant.apache.org/">http://ant.apache.org/</a> (optional, for builds) </li>
254
+
255
+ <li>
256
+ <strong>JUnit 4.5</strong> - <a href="http://junit.sourceforge.net/">http://junit.sourceforge.net/</a> (optional, for unit tests) </li>
257
+
258
+ </ol>
259
+ <p>
260
+ <strong>Windows</strong> users need to install Cygwin and the Perl package: <a href="http://www.cygwin.com/"> http://www.cygwin.com/</a>
261
+ </p>
262
+ </div>
263
+
264
+
265
+
266
+ <a name="N10055"></a><a name="Beginning+Pig"></a>
267
+ <h2 class="h3">Beginning Pig</h2>
268
+ <div class="section">
269
+ <a name="N1005B"></a><a name="Download+Pig"></a>
270
+ <h3 class="h4">Download Pig</h3>
271
+ <p>To get a Pig distribution, download a recent stable release from one of the Apache Download Mirrors (see <a href="http://hadoop.apache.org/pig/releases.html"> Pig Releases</a>).</p>
272
+ <p>Unpack the downloaded Pig distribution. The Pig script is located in the bin directory (/pig-n.n.n/bin/pig).</p>
273
+ <p>Add /pig-n.n.n/bin to your path. Use export (bash,sh,ksh) or setenv (tcsh,csh). For example: </p>
274
+ <pre class="code">
275
+ $ export PATH=/&lt;my-path-to-pig&gt;/pig-n.n.n/bin:$PATH
276
+ </pre>
277
+ <p>Try the following command, to get a list of Pig commands: </p>
278
+ <pre class="code">
279
+ $ pig -help
280
+ </pre>
281
+ <p>Try the following command, to start the Grunt shell:</p>
282
+ <pre class="code">
283
+ $ pig
284
+ </pre>
285
+ <a name="N10081"></a><a name="Run+Modes"></a>
286
+ <h3 class="h4">Run Modes</h3>
287
+ <p>Pig has two run modes or exectypes: </p>
288
+ <ul>
289
+
290
+ <li>
291
+ <p> Local Mode - To run Pig in local mode, you need access to a single machine. </p>
292
+ </li>
293
+
294
+ <li>
295
+ <p> Mapreduce Mode - To run Pig in mapreduce mode, you need access to a Hadoop cluster and HDFS installation.
296
+ Pig will automatically allocate and deallocate a 15-node cluster.</p>
297
+ </li>
298
+
299
+ </ul>
300
+ <p>You can run the Grunt shell, Pig scripts, or embedded programs using either mode.</p>
301
+ <a name="N10099"></a><a name="Grunt+Shell"></a>
302
+ <h3 class="h4">Grunt Shell</h3>
303
+ <p>Use Pig's interactive shell, Grunt, to enter pig commands manually. See the <a href="setup.html#Sample+Code">Sample Code</a> for instructions about the passwd file used in the example.</p>
304
+ <p>You can also run or execute script files from the Grunt shell.
305
+ See the <a href="piglatin_ref2.html#run">run</a> and <a href="piglatin_ref2.html#exec">exec</a> commands. </p>
306
+ <p>
307
+ <strong>Local Mode</strong>
308
+ </p>
309
+ <pre class="code">
310
+ $ pig -x local
311
+ </pre>
312
+ <p>
313
+ <strong>Mapreduce Mode</strong>
314
+ </p>
315
+ <pre class="code">
316
+ $ pig
317
+ or
318
+ $ pig -x mapreduce
319
+ </pre>
320
+ <p>For either mode, the Grunt shell is invoked and you can enter commands at the prompt. The results are displayed to your terminal screen (if DUMP is used) or to a file (if STORE is used).
321
+ </p>
322
+ <pre class="code">
323
+ grunt&gt; A = load 'passwd' using PigStorage(':');
324
+ grunt&gt; B = foreach A generate $0 as id;
325
+ grunt&gt; dump B;
326
+ grunt&gt; store B;
327
+ </pre>
328
+ <a name="N100CA"></a><a name="Script+Files"></a>
329
+ <h3 class="h4">Script Files</h3>
330
+ <p>Use script files to run Pig commands as batch jobs. See the <a href="setup.html#Sample+Code">Sample Code</a> for instructions about the passwd file and the script file (id.pig) used in the example.</p>
331
+ <p>
332
+ <strong>Local Mode</strong>
333
+ </p>
334
+ <pre class="code">
335
+ $ pig -x local id.pig
336
+ </pre>
337
+ <p>
338
+ <strong>Mapreduce Mode</strong>
339
+ </p>
340
+ <pre class="code">
341
+ $ pig id.pig
342
+ or
343
+ $ pig -x mapreduce id.pig
344
+ </pre>
345
+ <p>For either mode, the Pig Latin statements are executed and the results are displayed to your terminal screen (if DUMP is used) or to a file (if STORE is used).</p>
346
+ </div>
347
+
348
+
349
+
350
+ <a name="N100ED"></a><a name="Advanced+Pig"></a>
351
+ <h2 class="h3">Advanced Pig</h2>
352
+ <div class="section">
353
+ <a name="N100F3"></a><a name="Build+Pig"></a>
354
+ <h3 class="h4">Build Pig</h3>
355
+ <p>To build pig, do the following:</p>
356
+ <ol>
357
+
358
+ <li> Check out the Pig code from SVN: <em>svn co http://svn.apache.org/repos/asf/hadoop/pig/trunk</em>. </li>
359
+
360
+ <li> Build the code from the top directory: <em>ant</em>. If the build is successful, you should see the <em>pig.jar</em> created in that directory. </li>
361
+
362
+ <li> Validate your <em>pig.jar</em> by running a unit test: <em>ant test</em>
363
+ </li>
364
+
365
+ </ol>
366
+ <a name="N10117"></a><a name="Environment+Variables+and+Properties"></a>
367
+ <h3 class="h4">Environment Variables and Properties</h3>
368
+ <p>See <a href="setup.html#Download+Pig">Download Pig</a>.</p>
369
+ <p>The Pig environment variables are described in the Pig script file, located in the /pig-n.n.n/bin directory.</p>
370
+ <p>The Pig properties file, pig.properties, is located in the /pig-n.n.n/conf directory. You can specify an alternate location using the PIG_CONF_DIR environment variable.</p>
371
+ <a name="N1012B"></a><a name="Run+Modes-N1012B"></a>
372
+ <h3 class="h4">Run Modes</h3>
373
+ <p>See <a href="setup.html#Run+Modes">Run Modes</a>. </p>
374
+ <a name="N10139"></a><a name="Embedded+Programs"></a>
375
+ <h3 class="h4">Embedded Programs</h3>
376
+ <p>Used the embedded option to embed Pig commands in a host language and run the program.
377
+ See the <a href="setup.html#Sample+Code">Sample Code</a> for instructions about the passwd file and java files (idlocal.java, idmapreduce.java) used in the examples.</p>
378
+ <p>
379
+ <strong>Local Mode</strong>
380
+ </p>
381
+ <p>From your current working directory, compile the program: </p>
382
+ <pre class="code">
383
+ $ javac -cp pig.jar idlocal.java
384
+ </pre>
385
+ <p>Note: idlocal.class is written to your current working directory. Include &ldquo;.&rdquo; in the class path when you run the program. </p>
386
+ <p>From your current working directory, run the program:
387
+ </p>
388
+ <pre class="code">
389
+ Unix: $ java -cp pig.jar:. idlocal
390
+ Cygwin: $ java &ndash;cp &lsquo;.;pig.jar&rsquo; idlocal
391
+ </pre>
392
+ <p>To view the results, check the output file, id.out. </p>
393
+ <p>
394
+ <strong>Mapreduce Mode</strong>
395
+ </p>
396
+ <p>Point $HADOOPDIR to the directory that contains the hadoop-site.xml file. Example:
397
+ </p>
398
+ <pre class="code">
399
+ $ export HADOOPDIR=/yourHADOOPsite/conf
400
+ </pre>
401
+ <p>From your current working directory, compile the program:
402
+ </p>
403
+ <pre class="code">
404
+ $ javac -cp pig.jar idmapreduce.java
405
+ </pre>
406
+ <p>Note: idmapreduce.class is written to your current working directory. Include &ldquo;.&rdquo; in the class path when you run the program. </p>
407
+ <p>From your current working directory, run the program:
408
+ </p>
409
+ <pre class="code">
410
+ Unix: $ java -cp pig.jar:.:$HADOOPDIR idmapreduce
411
+ Cygwin: $ java &ndash;cp &lsquo;.;pig.jar;$HADOOPDIR&rsquo; idmapreduce
412
+ </pre>
413
+ <p>To view the results, check the idout directory on your Hadoop system. </p>
414
+ </div>
415
+
416
+
417
+
418
+ <a name="N1017F"></a><a name="Sample+Code"></a>
419
+ <h2 class="h3">Sample Code</h2>
420
+ <div class="section">
421
+ <p>The sample code is based on Pig Latin statements that extract all user IDs from the /etc/passwd file. </p>
422
+ <p>Copy the /etc/passwd file to your local working directory.</p>
423
+ <p>
424
+ <strong>id.pig</strong>
425
+ </p>
426
+ <p>For the Grunt Shell and script files. </p>
427
+ <pre class="code">
428
+ A = load 'passwd' using PigStorage(':');
429
+ B = foreach A generate $0 as id;
430
+ dump B;
431
+ store B into &lsquo;id.out&rsquo;;
432
+ </pre>
433
+ <p>
434
+ <strong>idlocal.java</strong>
435
+ </p>
436
+ <p>For embedded programs. </p>
437
+ <pre class="code">
438
+ import java.io.IOException;
439
+ import org.apache.pig.PigServer;
440
+ public class idlocal{
441
+ public static void main(String[] args) {
442
+ try {
443
+ PigServer pigServer = new PigServer("local");
444
+ runIdQuery(pigServer, "passwd");
445
+ }
446
+ catch(Exception e) {
447
+ }
448
+ }
449
+ public static void runIdQuery(PigServer pigServer, String inputFile) throws IOException {
450
+ pigServer.registerQuery("A = load '" + inputFile + "' using PigStorage(':');");
451
+ pigServer.registerQuery("B = foreach A generate $0 as id;");
452
+ pigServer.store("B", "id.out");
453
+ }
454
+ }
455
+ </pre>
456
+ <p>
457
+ <strong>idmapreduce.java</strong>
458
+ </p>
459
+ <p>For embedded programs. </p>
460
+ <pre class="code">
461
+ import java.io.IOException;
462
+ import org.apache.pig.PigServer;
463
+ public class idmapreduce{
464
+ public static void main(String[] args) {
465
+ try {
466
+ PigServer pigServer = new PigServer("mapreduce");
467
+ runIdQuery(pigServer, "passwd");
468
+ }
469
+ catch(Exception e) {
470
+ }
471
+ }
472
+ public static void runIdQuery(PigServer pigServer, String inputFile) throws IOException {
473
+ pigServer.registerQuery("A = load '" + inputFile + "' using PigStorage(':');")
474
+ pigServer.registerQuery("B = foreach A generate $0 as id;");
475
+ pigServer.store("B", "idout");
476
+ }
477
+ }
478
+ </pre>
479
+ </div>
480
+
481
+ </div>
482
+ <!--+
483
+ |end content
484
+ +-->
485
+ <div class="clearboth">&nbsp;</div>
486
+ </div>
487
+ <div id="footer">
488
+ <!--+
489
+ |start bottomstrip
490
+ +-->
491
+ <div class="lastmodified">
492
+ <script type="text/javascript"><!--
493
+ document.write("Last Published: " + document.lastModified);
494
+ // --></script>
495
+ </div>
496
+ <div class="copyright">
497
+ Copyright &copy;
498
+ 2007-2010 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
499
+ </div>
500
+ <!--+
501
+ |end bottomstrip
502
+ +-->
503
+ </div>
504
+ </body>
505
+ </html>