ruby-hdfs-cdh4 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Alexander Staubo
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,49 @@
1
+ require 'mkmf'
2
+
3
+ # debug options
4
+ if enable_config 'debug'
5
+ puts 'enabling debug library build configuration.'
6
+
7
+ $CFLAGS = CONFIG['CFLAGS'].gsub(/\s\-O\d?\s/, ' -O0 ').
8
+ gsub(/\s?\-g\w*\s/, ' -ggdb3 ')
9
+ CONFIG['LDSHARED'] = CONFIG['LDSHARED'].gsub(/\s\-s(\s|\z)/, ' ')
10
+ end
11
+
12
+ # setup enviorment
13
+ ENV['HADOOP_ENV'] ||= '/etc/hadoop/conf/hadoop-env.sh'
14
+
15
+ # java home
16
+ ENV['JAVA_HOME'] ||= case
17
+ when File.readable?(ENV['HADOOP_ENV'])
18
+ puts 'JAVA_HOME is not set, attempting to read value from ' + ENV['HADOOP_ENV']
19
+
20
+ File.read(ENV['HADOOP_ENV']).split("\n").find do |line|
21
+ line.include? 'JAVA_HOME=' and not line.start_with? '#'
22
+ end.split('=').last
23
+ else # check common locations
24
+ java_locations = ['/usr/java/default'] # todo: add more locations to check
25
+
26
+ puts 'JAVA_HOME is not set, checking common locations: ' + java_locations.join(',')
27
+
28
+ java_locations.find do |path|
29
+ File.directory?(path) and File.exist? File.join path, 'bin/java'
30
+ end
31
+ end
32
+ abort 'unable to find value for JAVA_HOME' unless ENV['JAVA_HOME']
33
+
34
+ # libjvm
35
+ ENV['JAVA_LIB'] ||= File.dirname Dir.glob(File.join(ENV['JAVA_HOME'], '**', 'libjvm.so')).first
36
+ abort 'unable to find value for JAVA_LIB or detect location of libjvm.so' unless ENV['JAVA_LIB']
37
+
38
+ # java include paths
39
+ Dir.glob(File.join(ENV['JAVA_HOME'], 'include', '**', '.')).map {|path| File.dirname path}.each do |include_path|
40
+ $INCFLAGS << [' -I', include_path].join
41
+ end
42
+
43
+ dir_config 'hdfs'
44
+ ['jvm', 'hdfs'].each do |lib|
45
+ find_library lib, nil, ENV['JAVA_LIB']
46
+ end
47
+
48
+ have_library 'c', 'main'
49
+ create_makefile 'hdfs'
@@ -0,0 +1,935 @@
1
+ #include "ruby.h"
2
+ #include "hdfs.h"
3
+
4
+ #include <assert.h>
5
+ #include <string.h>
6
+ #include <ctype.h>
7
+ #include <math.h>
8
+
9
+ static VALUE m_hadoop;
10
+ static VALUE m_dfs;
11
+ static VALUE c_file;
12
+ static VALUE c_file_info;
13
+ static VALUE c_file_system;
14
+ static VALUE c_file_info_file;
15
+ static VALUE c_file_info_directory;
16
+ static VALUE e_dfs_exception;
17
+ static VALUE e_file_error;
18
+ static VALUE e_could_not_open;
19
+ static VALUE e_does_not_exist;
20
+
21
+ static const int32_t HDFS_DEFAULT_BLOCK_SIZE = 134217728;
22
+ static const int16_t HDFS_DEFAULT_REPLICATION = 3;
23
+ static const short HDFS_DEFAULT_MODE = 0644;
24
+ static const char* HDFS_DEFAULT_HOST = "localhost";
25
+ static const int HDFS_DEFAULT_RECURSIVE_DELETE = 0;
26
+ static const int HDFS_DEFAULT_PATH_STRING_LENGTH = 1024;
27
+ static const int HDFS_DEFAULT_PORT = 8020;
28
+
29
+ /*
30
+ * Data structs
31
+ */
32
+
33
+ typedef struct FSData {
34
+ hdfsFS fs;
35
+ } FSData;
36
+
37
+ typedef struct FileData {
38
+ hdfsFS fs;
39
+ hdfsFile file;
40
+ } FileData;
41
+
42
+ typedef struct FileInfo {
43
+ char* mName; /* the name of the file */
44
+ tTime mLastMod; /* the last modification time for the file in seconds */
45
+ tOffset mSize; /* the size of the file in bytes */
46
+ short mReplication; /* the count of replicas */
47
+ tOffset mBlockSize; /* the block size for the file */
48
+ char* mOwner; /* the owner of the file */
49
+ char* mGroup; /* the group associated with the file */
50
+ short mPermissions; /* the permissions associated with the file */
51
+ tTime mLastAccess; /* the last access time for the file in seconds */
52
+ } FileInfo;
53
+
54
+ /*
55
+ * Methods called upon freeing of objects.
56
+ */
57
+
58
+ void free_fs_data(FSData* data) {
59
+ if (data && data->fs != NULL) {
60
+ hdfsDisconnect(data->fs);
61
+ data->fs = NULL;
62
+ }
63
+ }
64
+
65
+ void free_file_data(FileData* data) {
66
+ if (data && data->file != NULL) {
67
+ hdfsCloseFile(data->fs, data->file);
68
+ data->file = NULL;
69
+ }
70
+ }
71
+
72
+ void free_file_info(FileInfo* file_info) {
73
+ if (file_info) {
74
+ free(file_info->mName);
75
+ free(file_info->mOwner);
76
+ free(file_info->mGroup);
77
+ free(file_info);
78
+ }
79
+ }
80
+
81
+ /*
82
+ * Helper functions
83
+ */
84
+
85
+ // Borrowed from:
86
+ // http://www.programiz.com/c-programming/examples/octal-decimal-convert
87
+ /* Converts a decimal-formatted integer to an octal-formatted integer. */
88
+ int decimal_octal(int n) {
89
+ int rem, i=1, octal=0;
90
+ while (n != 0) {
91
+ rem = n % 8;
92
+ n /= 8;
93
+ octal += rem * i;
94
+ i *= 10;
95
+ }
96
+ return octal;
97
+ }
98
+
99
+ /* Converts an octal-formatted integer to a decimal-formatted integer. */
100
+ int octal_decimal(int n) {
101
+ int decimal=0, i=0, rem;
102
+ while (n != 0) {
103
+ rem = n % 10;
104
+ n /= 10;
105
+ decimal += rem * pow(8, i);
106
+ ++i;
107
+ }
108
+ return decimal;
109
+ }
110
+
111
+ /*
112
+ * Copies an hdfsFileInfo struct into a Hadoop::DFS::FileInfo derivative
113
+ * object.
114
+ */
115
+ VALUE wrap_hdfsFileInfo(hdfsFileInfo* info) {
116
+ // Creates a FileInfo struct, populates it with information from the
117
+ // supplied hdfsFileInfo struct.
118
+ FileInfo* file_info = ALLOC_N(FileInfo, 1);
119
+ file_info->mName = strdup(info->mName);
120
+ file_info->mLastMod = info->mLastMod;
121
+ file_info->mSize = info->mSize;
122
+ file_info->mReplication = info->mReplication;
123
+ file_info->mBlockSize = info->mBlockSize;
124
+ file_info->mOwner = strdup(info->mOwner);
125
+ file_info->mGroup = strdup(info->mGroup);
126
+ file_info->mPermissions = info->mPermissions;
127
+ file_info->mLastAccess = info->mLastAccess;
128
+ // Assigns FileInfo::Info or FileInfo::Directory class based upon the type of
129
+ // the file.
130
+ switch(info->mKind) {
131
+ case kObjectKindDirectory:
132
+ return Data_Wrap_Struct(c_file_info_directory, NULL, free_file_info,
133
+ file_info);
134
+ case kObjectKindFile:
135
+ return Data_Wrap_Struct(c_file_info_file, NULL, free_file_info,
136
+ file_info);
137
+ default:
138
+ rb_raise(e_dfs_exception, "File was not a file or directory: %s",
139
+ RSTRING_PTR(info->mName));
140
+ }
141
+ return Qnil;
142
+ }
143
+
144
+ /*
145
+ * File system interface
146
+ */
147
+
148
+ VALUE HDFS_File_System_alloc(VALUE klass) {
149
+ FSData* data = ALLOC_N(FSData, 1);
150
+ data->fs = NULL;
151
+ VALUE instance = Data_Wrap_Struct(klass, NULL, free_fs_data, data);
152
+ return instance;
153
+ }
154
+
155
+ /**
156
+ * call-seq:
157
+ * hdfs.new -> hdfs
158
+ *
159
+ * Creates a new HDFS client connection.
160
+ */
161
+ VALUE HDFS_File_System_initialize(VALUE self, VALUE host, VALUE port) {
162
+ FSData* data = NULL;
163
+ Data_Get_Struct(self, FSData, data);
164
+ data->fs = hdfsConnect(
165
+ RTEST(host) ? RSTRING_PTR(host) : HDFS_DEFAULT_HOST,
166
+ RTEST(port) ? NUM2INT(port) : HDFS_DEFAULT_PORT);
167
+ return self;
168
+ }
169
+
170
+ /**
171
+ * call-seq:
172
+ * hdfs.disconnect -> nil
173
+ *
174
+ * Disconnects the client connection.
175
+ */
176
+ VALUE HDFS_File_System_disconnect(VALUE self) {
177
+ FSData* data = NULL;
178
+ Data_Get_Struct(self, FSData, data);
179
+ if (data->fs != NULL) {
180
+ hdfsDisconnect(data->fs);
181
+ data->fs = NULL;
182
+ }
183
+ return Qnil;
184
+ }
185
+
186
+ /**
187
+ * call-seq:
188
+ * hdfs.delete(path, recursive=false) -> success
189
+ *
190
+ * Deletes the file at the supplied path, recursively if specified. Returns
191
+ * True if successful, False if unsuccessful.
192
+ */
193
+ VALUE HDFS_File_System_delete(VALUE self, VALUE path, VALUE recursive) {
194
+ FSData* data = NULL;
195
+ Data_Get_Struct(self, FSData, data);
196
+ int success = hdfsDelete(data->fs, RSTRING_PTR(path),
197
+ (recursive == Qtrue) ? 1 : HDFS_DEFAULT_RECURSIVE_DELETE);
198
+ return success == 0 ? Qtrue : Qfalse;
199
+ }
200
+
201
+ /**
202
+ * call-seq:
203
+ * hdfs.rename(from_path, to_path) -> success
204
+ *
205
+ * Renames the file at the supplied path to the file at the destination path.
206
+ * Returns True if successful, False if unsuccessful.
207
+ */
208
+ VALUE HDFS_File_System_rename(VALUE self, VALUE from_path, VALUE to_path) {
209
+ FSData* data = NULL;
210
+ Data_Get_Struct(self, FSData, data);
211
+ int success = hdfsRename(data->fs, RSTRING_PTR(from_path), RSTRING_PTR(to_path));
212
+ return success == 0 ? Qtrue : Qfalse;
213
+ }
214
+
215
+ /**
216
+ * call-seq:
217
+ * hdfs.exist(path) -> file_existence
218
+ *
219
+ * Checks if a file exists at the supplied path. If file exists, returns True;
220
+ * if not, returns False.
221
+ */
222
+ VALUE HDFS_File_System_exist(VALUE self, VALUE path) {
223
+ FSData* data = NULL;
224
+ Data_Get_Struct(self, FSData, data);
225
+ int success = hdfsExists(data->fs, RSTRING_PTR(path));
226
+ return success == 0 ? Qtrue : Qfalse;
227
+ }
228
+
229
+ /**
230
+ * call-seq:
231
+ * hdfs.create_directory(path) -> success
232
+ *
233
+ * Checks if a file exists at the supplied path. If file exists, returns True;
234
+ * if not, returns False.
235
+ */
236
+ VALUE HDFS_File_System_create_directory(VALUE self, VALUE path) {
237
+ FSData* data = NULL;
238
+ Data_Get_Struct(self, FSData, data);
239
+ int success = hdfsCreateDirectory(data->fs, RSTRING_PTR(path));
240
+ return success == 0 ? Qtrue : Qfalse;
241
+ }
242
+
243
+ /**
244
+ * call-seq:
245
+ * hdfs.list_directory(path) -> file_infos
246
+ *
247
+ * Lists the directory at the supplied path, returning an Array of
248
+ * Hadoop::DFS::FileInfo objects. If the directory does not exist, raises
249
+ * a DoesNotExistError.
250
+ */
251
+ VALUE HDFS_File_System_list_directory(VALUE self, VALUE path) {
252
+ FSData* data = NULL;
253
+ Data_Get_Struct(self, FSData, data);
254
+ VALUE file_infos = rb_ary_new();
255
+ int num_files = 0;
256
+ hdfsFileInfo* infos = hdfsListDirectory(data->fs, RSTRING_PTR(path),
257
+ &num_files);
258
+ if (infos == NULL) {
259
+ rb_raise(e_does_not_exist, "Directory does not exist: %s",
260
+ RSTRING_PTR(path));
261
+ return Qnil;
262
+ }
263
+ int i;
264
+ for (i = 0; i < num_files; i++) {
265
+ hdfsFileInfo* cur_info = infos + i;
266
+ rb_ary_push(file_infos, wrap_hdfsFileInfo(cur_info));
267
+ }
268
+ hdfsFreeFileInfo(infos, num_files);
269
+ return file_infos;
270
+ }
271
+
272
+ /**
273
+ * call-seq:
274
+ * hdfs.stat(path) -> file_info
275
+ *
276
+ * Stats the file or directory at the supplied path, returning a
277
+ * Hadoop::DFS:FileInfo object corresponding to it. If the file or directory
278
+ * does not exist, raises a DoesNotExistError.
279
+ */
280
+ VALUE HDFS_File_System_stat(VALUE self, VALUE path) {
281
+ FSData* data = NULL;
282
+ Data_Get_Struct(self, FSData, data);
283
+ hdfsFileInfo* info = hdfsGetPathInfo(data->fs, RSTRING_PTR(path));
284
+ if (info == NULL) {
285
+ rb_raise(e_does_not_exist, "File does not exist: %s", RSTRING_PTR(path));
286
+ return Qnil;
287
+ }
288
+ VALUE file_info = wrap_hdfsFileInfo(info);
289
+ hdfsFreeFileInfo(info, 1);
290
+ return file_info;
291
+ }
292
+
293
+ /**
294
+ * call-seq:
295
+ * hdfs.set_replication(path, replication) -> success
296
+ *
297
+ * Sets the replication of the following path to the supplied number of nodes
298
+ * it will be replicated against. Returns True if successful; False if not.
299
+ */
300
+ VALUE HDFS_File_System_set_replication(VALUE self, VALUE path, VALUE replication) {
301
+ FSData* data = NULL;
302
+ Data_Get_Struct(self, FSData, data);
303
+ int success = hdfsSetReplication(data->fs, RSTRING_PTR(path),
304
+ RTEST(replication) ? NUM2INT(replication) : HDFS_DEFAULT_REPLICATION);
305
+ return success == 0 ? Qtrue : Qfalse;
306
+ }
307
+
308
+ /**
309
+ * call-seq:
310
+ * hdfs.cd(path) -> success
311
+ *
312
+ * Changes the current working directory to the supplied path. Returns True if
313
+ * successful; False if not.
314
+ */
315
+ VALUE HDFS_File_System_cd(VALUE self, VALUE path) {
316
+ FSData* data = NULL;
317
+ Data_Get_Struct(self, FSData, data);
318
+ int success = hdfsSetWorkingDirectory(data->fs, RSTRING_PTR(path));
319
+ return success == 0 ? Qtrue : Qfalse;
320
+ }
321
+
322
+ /**
323
+ * call-seq:
324
+ * hdfs.cwd -> success
325
+ *
326
+ * Changes the current working directory to the supplied path. Returns True if
327
+ * successful; False if not.
328
+ */
329
+ VALUE HDFS_File_System_cwd(VALUE self) {
330
+ FSData* data = NULL;
331
+ Data_Get_Struct(self, FSData, data);
332
+ char* cur_dir = (char *) malloc(
333
+ sizeof(char) * HDFS_DEFAULT_PATH_STRING_LENGTH);
334
+ int success = hdfsGetWorkingDirectory(data->fs, cur_dir,
335
+ HDFS_DEFAULT_PATH_STRING_LENGTH);
336
+ VALUE ruby_cur_dir = rb_str_new2(cur_dir);
337
+ free(cur_dir);
338
+ return ruby_cur_dir;
339
+ }
340
+
341
+ /**
342
+ * call-seq:
343
+ * hdfs.chgrp(path, group) -> success
344
+ *
345
+ * Changes the group of the supplied path. Returns True if successful; False
346
+ * if not.
347
+ */
348
+ VALUE HDFS_File_System_chgrp(VALUE self, VALUE path, VALUE group) {
349
+ FSData* data = NULL;
350
+ Data_Get_Struct(self, FSData, data);
351
+ int success = hdfsChown(data->fs, RSTRING_PTR(path), NULL,
352
+ RSTRING_PTR(group));
353
+ return success == 0 ? Qtrue : Qfalse;
354
+ }
355
+
356
+ /**
357
+ * call-seq:
358
+ * hdfs.chgrp(path, mode) -> retval
359
+ *
360
+ * Changes the mode of the supplied path. Returns True if successful; False
361
+ * if not.
362
+ */
363
+ VALUE HDFS_File_System_chmod(VALUE self, VALUE path, VALUE mode) {
364
+ FSData* data = NULL;
365
+ Data_Get_Struct(self, FSData, data);
366
+ int success = hdfsChmod(data->fs, RSTRING_PTR(path),
367
+ (short) RTEST(mode) ? octal_decimal(NUM2INT(mode)) : HDFS_DEFAULT_MODE);
368
+ return success == 0 ? Qtrue : Qfalse;
369
+ }
370
+
371
+ /**
372
+ * call-seq:
373
+ * hdfs.chown(path, owner) -> retval
374
+ *
375
+ * Changes the owner of the supplied path. Returns True if successful; False
376
+ * if not.
377
+ */
378
+ VALUE HDFS_File_System_chown(VALUE self, VALUE path, VALUE owner) {
379
+ FSData* data = NULL;
380
+ Data_Get_Struct(self, FSData, data);
381
+ int success = hdfsChown(data->fs, RSTRING_PTR(path), RSTRING_PTR(owner),
382
+ NULL);
383
+ return success == 0 ? Qtrue : Qfalse;
384
+ }
385
+
386
+ /**
387
+ * call-seq:
388
+ * hdfs.copy(from_path, to_path, to_fs=nil) -> retval
389
+ *
390
+ * Copies the file at HDFS location from_path to HDFS location to_path. If
391
+ * to_fs is specified, copies to this HDFS over the current HDFS. If
392
+ * successful, returns true; otherwise, returns false.
393
+ */
394
+ VALUE HDFS_File_System_copy(VALUE self, VALUE from_path, VALUE to_path, VALUE to_fs) {
395
+ FSData* data = NULL;
396
+ Data_Get_Struct(self, FSData, data);
397
+ hdfsFS destFS = data->fs;
398
+ if (RTEST(to_fs)) {
399
+ if (CLASS_OF(to_fs) == c_file_system) {
400
+ FSData* destFSData = NULL;
401
+ Data_Get_Struct(to_fs, FSData, destFSData);
402
+ destFS = destFSData->fs;
403
+ } else {
404
+ rb_raise(rb_eArgError, "to_fs must be of type Hadoop::DFS::FileSystem");
405
+ return Qnil;
406
+ }
407
+ }
408
+ int success = hdfsCopy(data->fs, RSTRING_PTR(from_path), destFS,
409
+ RSTRING_PTR(to_fs));
410
+ return success == 0 ? Qtrue : Qfalse;
411
+ }
412
+
413
+ /**
414
+ * call-seq:
415
+ * hdfs.move(from_path, to_path, to_fs=nil) -> retval
416
+ *
417
+ * Moves the file at HDFS location from_path to HDFS location to_path. If
418
+ * to_fs is specified, moves to this HDFS over the current HDFS. If
419
+ * successful, returns true; otherwise, returns false.
420
+ */
421
+ VALUE HDFS_File_System_move(VALUE self, VALUE from_path, VALUE to_path, VALUE to_fs) {
422
+ FSData* data = NULL;
423
+ Data_Get_Struct(self, FSData, data);
424
+ hdfsFS destFS = data->fs;
425
+ if (RTEST(to_fs)) {
426
+ if (CLASS_OF(to_fs) == c_file_system) {
427
+ FSData* destFSData = NULL;
428
+ Data_Get_Struct(to_fs, FSData, destFSData);
429
+ destFS = destFSData->fs;
430
+ } else {
431
+ rb_raise(rb_eArgError, "to_fs must be of type Hadoop::DFS::FileSystem");
432
+ return Qnil;
433
+ }
434
+ }
435
+ int success = hdfsMove(data->fs, RSTRING_PTR(from_path), destFS,
436
+ RSTRING_PTR(to_fs));
437
+ return success == 0 ? Qtrue : Qfalse;
438
+ }
439
+
440
+ /**
441
+ * call-seq:
442
+ * hdfs.capacity -> retval
443
+ *
444
+ * Returns the capacity of this HDFS file system in bytes, raising a
445
+ * DFSException if this was unsuccessful.
446
+ */
447
+ VALUE HDFS_File_System_capacity(VALUE self) {
448
+ FSData* data = NULL;
449
+ Data_Get_Struct(self, FSData, data);
450
+ long capacity = hdfsGetCapacity(data->fs);
451
+ if (capacity < 0) {
452
+ rb_raise(e_dfs_exception, "Error while retrieving capacity");
453
+ return Qnil;
454
+ }
455
+ return LONG2NUM(capacity);
456
+ }
457
+
458
+ /**
459
+ * call-seq:
460
+ * hdfs.capacity -> retval
461
+ *
462
+ * Returns the default block size of this HDFS file system in bytes, raising a
463
+ * DFSException if this was unsuccessful.
464
+ */
465
+ VALUE HDFS_File_System_default_block_size(VALUE self) {
466
+ FSData* data = NULL;
467
+ Data_Get_Struct(self, FSData, data);
468
+ long block_size = hdfsGetDefaultBlockSize(data->fs);
469
+ if (block_size < 0) {
470
+ rb_raise(e_dfs_exception, "Error while retrieving default block size");
471
+ return Qnil;
472
+ }
473
+ return LONG2NUM(block_size);
474
+ }
475
+
476
+ /**
477
+ * call-seq:
478
+ * hdfs.default_block_size_at_path(path) -> default_block_size
479
+ *
480
+ * Returns the default block size at the supplied HDFS path, raising a
481
+ * DFSException if this was unsuccessful.
482
+ */
483
+ VALUE HDFS_File_System_default_block_size_at_path(VALUE self, VALUE path) {
484
+ FSData* data = NULL;
485
+ Data_Get_Struct(self, FSData, data);
486
+ long block_size = hdfsGetDefaultBlockSizeAtPath(data->fs, RSTRING_PTR(path));
487
+ if (block_size < 0) {
488
+ rb_raise(e_dfs_exception,
489
+ "Error while retrieving default block size at path: %s", RSTRING_PTR(path));
490
+ return Qnil;
491
+ }
492
+ return LONG2NUM(block_size);
493
+ }
494
+
495
+ /**
496
+ * call-seq:
497
+ * hdfs.used -> retval
498
+ *
499
+ * Returns the bytes currently in use by this filesystem, raising a
500
+ * DFSException if unsuccessful.
501
+ */
502
+ VALUE HDFS_File_System_used(VALUE self) {
503
+ FSData* data = NULL;
504
+ Data_Get_Struct(self, FSData, data);
505
+ long used = hdfsGetUsed(data->fs);
506
+ if (used < 0) {
507
+ rb_raise(e_dfs_exception, "Error while retrieving used capacity");
508
+ return Qnil;
509
+ }
510
+ return LONG2NUM(used);
511
+ }
512
+
513
+ /**
514
+ * call-seq:
515
+ * hdfs.utime(path, modified_time=nil, access_time=nil) -> retval
516
+ *
517
+ * Changes the last modified and/or last access time in seconds since the Unix
518
+ * epoch for the supplied file. Returns true if successful; false if not.
519
+ */
520
+ VALUE HDFS_File_System_utime(VALUE self, VALUE path, VALUE modified_time, VALUE access_time) {
521
+ FSData* data = NULL;
522
+ Data_Get_Struct(self, FSData, data);
523
+ int success = hdfsUtime(data->fs, RSTRING_PTR(path),
524
+ (tTime) RTEST(modified_time) ? NUM2INT(modified_time) : -1,
525
+ (tTime) RTEST(access_time) ? NUM2INT(access_time) : -1);
526
+ return success == 0 ? Qtrue : Qfalse;
527
+ }
528
+
529
+ /**
530
+ * call-seq:
531
+ * hdfs.open(path, mode, options = {}) -> file
532
+ *
533
+ * Opens a file. If the file cannot be opened, raises a CouldNotOpenError;
534
+ * otherwise, returns a Hadoop::DFS::File object corresponding to the file.
535
+ */
536
+ VALUE HDFS_File_System_open(VALUE self, VALUE path, VALUE mode, VALUE options) {
537
+ FSData* data = NULL;
538
+ Data_Get_Struct(self, FSData, data);
539
+
540
+ int flags = 0;
541
+ if (strcmp("r", StringValuePtr(mode)) == 0) {
542
+ flags = O_RDONLY;
543
+ } else if (strcmp("w", StringValuePtr(mode)) == 0) {
544
+ flags = O_WRONLY;
545
+ } else {
546
+ rb_raise(rb_eArgError, "Mode must be 'r' or 'w'");
547
+ return Qnil;
548
+ }
549
+ VALUE r_buffer_size = rb_hash_aref(options, rb_eval_string(":buffer_size"));
550
+ VALUE r_replication = rb_hash_aref(options, rb_eval_string(":replication"));
551
+ VALUE r_block_size = rb_hash_aref(options, rb_eval_string(":block_size"));
552
+ hdfsFile file = hdfsOpenFile(data->fs, RSTRING_PTR(path), flags,
553
+ RTEST(r_buffer_size) ? NUM2INT(r_buffer_size) : 0,
554
+ RTEST(r_replication) ? NUM2INT(r_replication) : 0,
555
+ RTEST(r_block_size) ? NUM2INT(r_block_size) : HDFS_DEFAULT_BLOCK_SIZE);
556
+ if (file == NULL) {
557
+ rb_raise(e_could_not_open, "Could not open file %s", RSTRING_PTR(path));
558
+ return Qnil;
559
+ }
560
+
561
+ FileData* file_data = ALLOC_N(FileData, 1);
562
+ file_data->fs = data->fs;
563
+ file_data->file = file;
564
+ VALUE file_instance = Data_Wrap_Struct(c_file, NULL, free_file_data, file_data);
565
+ return file_instance;
566
+ }
567
+
568
+ /*
569
+ * File interface
570
+ */
571
+
572
+ /**
573
+ * call-seq:
574
+ * file.read(length) -> retval
575
+ *
576
+ * Reads the number of bytes specified by length from the current file object,
577
+ * returning the bytes read as a String. If this fails, raises a
578
+ * FileError.
579
+ */
580
+ VALUE HDFS_File_read(VALUE self, VALUE length) {
581
+ FileData* data = NULL;
582
+ Data_Get_Struct(self, FileData, data);
583
+ char* buffer = ALLOC_N(char, length);
584
+ MEMZERO(buffer, char, length);
585
+ tSize bytes_read = hdfsRead(data->fs, data->file, buffer, NUM2INT(length));
586
+ if (bytes_read == -1) {
587
+ rb_raise(e_file_error, "Failed to read data");
588
+ }
589
+ return rb_tainted_str_new2(buffer);
590
+ }
591
+
592
+ /**
593
+ * call-seq:
594
+ * file.write(bytes) -> num_bytes_written
595
+ *
596
+ * Writes the string specified by bytes to the current file object, returning
597
+ * the number of bytes read as an Integer. If this fails, raises a FileError.
598
+ */
599
+ VALUE HDFS_File_write(VALUE self, VALUE bytes) {
600
+ FileData* data = NULL;
601
+ Data_Get_Struct(self, FileData, data);
602
+ tSize bytes_written = hdfsWrite(data->fs, data->file, RSTRING_PTR(bytes), RSTRING_LEN(bytes));
603
+ if (bytes_written == -1) {
604
+ rb_raise(e_file_error, "Failed to write data");
605
+ }
606
+ return INT2NUM(bytes_written);
607
+ }
608
+
609
+ /**
610
+ * call-seq:
611
+ * file.tell -> current_position
612
+ *
613
+ * Returns the current byte position in the file as an Integer. If this fails,
614
+ * raises a FileError.
615
+ */
616
+ VALUE HDFS_File_tell(VALUE self) {
617
+ FileData* data = NULL;
618
+ Data_Get_Struct(self, FileData, data);
619
+ tSize offset = hdfsTell(data->fs, data->file);
620
+ if (offset == -1) {
621
+ rb_raise(e_file_error, "Failed to read position");
622
+ }
623
+ return INT2NUM(offset);
624
+ }
625
+
626
+ /**
627
+ * call-seq:
628
+ * file.seek(offset) -> success
629
+ *
630
+ * Seeks the file pointer to the supplied offset. If this fails, raises a
631
+ * FileError.
632
+ */
633
+ VALUE HDFS_File_seek(VALUE self, VALUE offset) {
634
+ FileData* data = NULL;
635
+ Data_Get_Struct(self, FileData, data);
636
+ int result = hdfsSeek(data->fs, data->file, NUM2INT(offset));
637
+ if (result != 0) {
638
+ rb_raise(e_file_error, "Failed to seek to position %d", NUM2INT(offset));
639
+ }
640
+ return Qtrue;
641
+ }
642
+
643
+ /**
644
+ * call-seq:
645
+ * file.flush -> success
646
+ *
647
+ * Flushes all buffers currently being written to this file. When this
648
+ * finishes, new readers will see the data written. If this fails, raises a
649
+ * FileError.
650
+ */
651
+ VALUE HDFS_File_flush(VALUE self) {
652
+ FileData* data = NULL;
653
+ Data_Get_Struct(self, FileData, data);
654
+ int result = hdfsFlush(data->fs, data->file);
655
+ if (result != 0) {
656
+ rb_raise(e_file_error, "Flush failed");
657
+ }
658
+ return Qtrue;
659
+ }
660
+
661
+ /**
662
+ * call-seq:
663
+ * file.available -> available_bytes
664
+ *
665
+ * Returns the number of bytes that can be read from this file without
666
+ * blocking. If this fails, raises a FileError.
667
+ */
668
+ VALUE HDFS_File_available(VALUE self) {
669
+ FileData* data = NULL;
670
+ Data_Get_Struct(self, FileData, data);
671
+ int result = hdfsAvailable(data->fs, data->file);
672
+ if (result == -1) {
673
+ rb_raise(e_file_error, "Failed to get available data");
674
+ }
675
+ return INT2NUM(result);
676
+ }
677
+
678
+ /**
679
+ * call-seq:
680
+ * file.close -> success
681
+ *
682
+ * Closes the current file. If this fails, raises a FileError.
683
+ */
684
+ VALUE HDFS_File_close(VALUE self) {
685
+ FileData* data = NULL;
686
+ Data_Get_Struct(self, FileData, data);
687
+ if (data->file != NULL) {
688
+ hdfsCloseFile(data->fs, data->file);
689
+ data->file = NULL;
690
+ }
691
+ return Qtrue;
692
+ }
693
+
694
+ /**
695
+ * HDFS File Info interface
696
+ */
697
+
698
+ /**
699
+ * call-seq:
700
+ * file_info.block_size -> retval
701
+ *
702
+ * Returns the block size of the file described by this object.
703
+ */
704
+ VALUE HDFS_File_Info_block_size(VALUE self) {
705
+ FileInfo* file_info = NULL;
706
+ Data_Get_Struct(self, FileInfo, file_info);
707
+ return INT2NUM(file_info->mBlockSize);
708
+ }
709
+
710
+ /**
711
+ * call-seq:
712
+ * file_info.group -> retval
713
+ *
714
+ * Returns the group of the file described by this object.
715
+ */
716
+ VALUE HDFS_File_Info_group(VALUE self) {
717
+ FileInfo* file_info = NULL;
718
+ Data_Get_Struct(self, FileInfo, file_info);
719
+ return rb_str_new(file_info->mGroup, strlen(file_info->mGroup));
720
+ }
721
+
722
+ /**
723
+ * call-seq:
724
+ * file_info.is_directory? -> retval
725
+ *
726
+ * Returns True if the file described by this object is a directory; otherwise,
727
+ * returns False.
728
+ */
729
+ VALUE HDFS_File_Info_is_directory(VALUE self) {
730
+ return Qfalse;
731
+ }
732
+
733
+ /**
734
+ * call-seq:
735
+ * file_info.is_file? -> retval
736
+ *
737
+ * Returns True if the file described by this object is a file; otherwise,
738
+ * returns False.
739
+ */
740
+ VALUE HDFS_File_Info_is_file(VALUE self) {
741
+ return Qfalse;
742
+ }
743
+
744
+ VALUE HDFS_File_Info_Directory_is_directory(VALUE self) {
745
+ return Qtrue;
746
+ }
747
+
748
+ VALUE HDFS_File_Info_File_is_file(VALUE self) {
749
+ return Qtrue;
750
+ }
751
+
752
+ /**
753
+ * call-seq:
754
+ * file_info.last_access -> retval
755
+ *
756
+ * Returns the time of last access as an Integer representing seconds since the
757
+ * epoch.
758
+ */
759
+ VALUE HDFS_File_Info_last_access(VALUE self) {
760
+ FileInfo* file_info = NULL;
761
+ Data_Get_Struct(self, FileInfo, file_info);
762
+ return LONG2NUM(file_info->mLastAccess);
763
+ }
764
+
765
+ /**
766
+ * call-seq:
767
+ * file_info.last_modified -> retval
768
+ *
769
+ * Returns the time of last modification as an Integer representing seconds
770
+ * since the epoch for the file
771
+ */
772
+ VALUE HDFS_File_Info_last_modified(VALUE self) {
773
+ FileInfo* file_info = NULL;
774
+ Data_Get_Struct(self, FileInfo, file_info);
775
+ return LONG2NUM(file_info->mLastMod);
776
+ }
777
+
778
+ /**
779
+ * call-seq:
780
+ * file_info.last_modified -> retval
781
+ *
782
+ * Returns the time of last modification as an Integer representing seconds
783
+ * since the epoch.
784
+ */
785
+ VALUE HDFS_File_Info_mode(VALUE self) {
786
+ FileInfo* file_info = NULL;
787
+ Data_Get_Struct(self, FileInfo, file_info);
788
+ return INT2NUM(decimal_octal(file_info->mPermissions));
789
+ }
790
+
791
+ /**
792
+ * call-seq:
793
+ * file_info.name -> retval
794
+ *
795
+ * Returns the name of the file as a String.
796
+ */
797
+ VALUE HDFS_File_Info_name(VALUE self) {
798
+ FileInfo* file_info = NULL;
799
+ Data_Get_Struct(self, FileInfo, file_info);
800
+ return rb_str_new(file_info->mName, strlen(file_info->mName));
801
+ }
802
+
803
+ /**
804
+ * call-seq:
805
+ * file_info.owner -> retval
806
+ *
807
+ * Returns the owner of the file as a String.
808
+ */
809
+ VALUE HDFS_File_Info_owner(VALUE self) {
810
+ FileInfo* file_info = NULL;
811
+ Data_Get_Struct(self, FileInfo, file_info);
812
+ return rb_str_new(file_info->mOwner, strlen(file_info->mOwner));
813
+ }
814
+
815
+ /**
816
+ * call-seq:
817
+ * file_info.replication -> retval
818
+ *
819
+ * Returns the replication factor of the file as an Integer.
820
+ */
821
+ VALUE HDFS_File_Info_replication(VALUE self) {
822
+ FileInfo* file_info = NULL;
823
+ Data_Get_Struct(self, FileInfo, file_info);
824
+ return INT2NUM(file_info->mReplication);
825
+ }
826
+
827
+ /**
828
+ * call-seq:
829
+ * file_info.name -> retval
830
+ *
831
+ * Returns the size of the file as an Integer.
832
+ */
833
+ VALUE HDFS_File_Info_size(VALUE self) {
834
+ FileInfo* file_info = NULL;
835
+ Data_Get_Struct(self, FileInfo, file_info);
836
+ return LONG2NUM(file_info->mSize);
837
+ }
838
+
839
+ /**
840
+ * call-seq:
841
+ * file_info.to_s -> retval
842
+ *
843
+ * Returns a human-readable representation of a Hadoop::DFS::FileSystem object
844
+ * as a String.
845
+ */
846
+ VALUE HDFS_File_Info_to_s(VALUE self) {
847
+ FileInfo* file_info = NULL;
848
+ Data_Get_Struct(self, FileInfo, file_info);
849
+ // Introspects current class, returns it as a String.
850
+ VALUE class_string = rb_funcall(
851
+ rb_funcall(self, rb_intern("class"), 0),
852
+ rb_intern("to_s"), 0);
853
+ char* output;
854
+ VALUE string_value = rb_str_new2("");
855
+ // If asprintf was successful, creates a Ruby String.
856
+ if (asprintf(&output, "#<%s: %s, mode=%d, owner=%s, group=%s>",
857
+ RSTRING_PTR(class_string), file_info->mName,
858
+ decimal_octal(file_info->mPermissions), file_info->mOwner,
859
+ file_info->mGroup) >= 0) {
860
+ string_value = rb_str_new(output, strlen(output));
861
+ }
862
+ free(output);
863
+ return string_value;
864
+ }
865
+
866
+ /*
867
+ * Extension initialization
868
+ */
869
+
870
+ void Init_hdfs() {
871
+ m_hadoop = rb_define_module("Hadoop");
872
+ m_dfs = rb_define_module_under(m_hadoop, "DFS");
873
+
874
+ c_file_system = rb_define_class_under(m_dfs, "FileSystem", rb_cObject);
875
+ rb_define_alloc_func(c_file_system, HDFS_File_System_alloc);
876
+ rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize, 2);
877
+ rb_define_method(c_file_system, "disconnect", HDFS_File_System_disconnect, 0);
878
+ rb_define_method(c_file_system, "open", HDFS_File_System_open, 3);
879
+ rb_define_method(c_file_system, "delete", HDFS_File_System_delete, 2);
880
+ rb_define_method(c_file_system, "rename", HDFS_File_System_rename, 2);
881
+ rb_define_method(c_file_system, "exist?", HDFS_File_System_exist, 1);
882
+ rb_define_method(c_file_system, "create_directory", HDFS_File_System_create_directory, 1);
883
+ rb_define_method(c_file_system, "list_directory", HDFS_File_System_list_directory, 1);
884
+ rb_define_method(c_file_system, "stat", HDFS_File_System_stat, 1);
885
+ rb_define_method(c_file_system, "set_replication", HDFS_File_System_set_replication, 2);
886
+ rb_define_method(c_file_system, "cd", HDFS_File_System_cd, 1);
887
+ rb_define_method(c_file_system, "cwd", HDFS_File_System_cwd, 0);
888
+ rb_define_method(c_file_system, "chgrp", HDFS_File_System_chgrp, 2);
889
+ rb_define_method(c_file_system, "chmod", HDFS_File_System_chmod, 2);
890
+ rb_define_method(c_file_system, "chown", HDFS_File_System_chown, 2);
891
+ rb_define_method(c_file_system, "copy", HDFS_File_System_copy, 3);
892
+ rb_define_method(c_file_system, "capacity", HDFS_File_System_capacity, 0);
893
+ rb_define_method(c_file_system, "default_block_size",
894
+ HDFS_File_System_default_block_size, 0);
895
+ rb_define_method(c_file_system, "default_block_size_at_path",
896
+ HDFS_File_System_default_block_size_at_path, 1);
897
+ rb_define_method(c_file_system, "move", HDFS_File_System_move, 2);
898
+ rb_define_method(c_file_system, "used", HDFS_File_System_used, 0);
899
+ rb_define_method(c_file_system, "utime", HDFS_File_System_utime, 3);
900
+
901
+ c_file = rb_define_class_under(m_dfs, "File", rb_cObject);
902
+ rb_define_method(c_file, "read", HDFS_File_read, 1);
903
+ rb_define_method(c_file, "write", HDFS_File_write, 1);
904
+ rb_define_method(c_file, "<<", HDFS_File_write, 1);
905
+ rb_define_method(c_file, "seek", HDFS_File_seek, 1);
906
+ rb_define_method(c_file, "tell", HDFS_File_tell, 0);
907
+ rb_define_method(c_file, "flush", HDFS_File_flush, 0);
908
+ rb_define_method(c_file, "available", HDFS_File_available, 0);
909
+ rb_define_method(c_file, "close", HDFS_File_close, 0);
910
+
911
+ c_file_info = rb_define_class_under(m_dfs, "FileInfo", rb_cObject);
912
+ rb_define_method(c_file_info, "block_size", HDFS_File_Info_block_size, 0);
913
+ rb_define_method(c_file_info, "group", HDFS_File_Info_group, 0);
914
+ rb_define_method(c_file_info, "is_directory?", HDFS_File_Info_is_directory, 0);
915
+ rb_define_method(c_file_info, "is_file?", HDFS_File_Info_is_file, 0);
916
+ rb_define_method(c_file_info, "last_access", HDFS_File_Info_last_access, 0);
917
+ rb_define_method(c_file_info, "last_modified", HDFS_File_Info_last_modified, 0);
918
+ rb_define_method(c_file_info, "mode", HDFS_File_Info_mode, 0);
919
+ rb_define_method(c_file_info, "name", HDFS_File_Info_name, 0);
920
+ rb_define_method(c_file_info, "owner", HDFS_File_Info_owner, 0);
921
+ rb_define_method(c_file_info, "replication", HDFS_File_Info_replication, 0);
922
+ rb_define_method(c_file_info, "size", HDFS_File_Info_size, 0);
923
+ rb_define_method(c_file_info, "to_s", HDFS_File_Info_to_s, 0);
924
+
925
+ c_file_info_file = rb_define_class_under(c_file_info, "File", c_file_info);
926
+ rb_define_method(c_file_info_file, "is_file?", HDFS_File_Info_File_is_file, 0);
927
+
928
+ c_file_info_directory = rb_define_class_under(c_file_info, "Directory", c_file_info);
929
+ rb_define_method(c_file_info_directory, "is_directory?", HDFS_File_Info_Directory_is_directory, 0);
930
+
931
+ e_dfs_exception = rb_define_class_under(m_dfs, "DFSException", rb_eStandardError);
932
+ e_file_error = rb_define_class_under(m_dfs, "FileError", e_dfs_exception);
933
+ e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError", e_file_error);
934
+ e_does_not_exist = rb_define_class_under(m_dfs, "DoesNotExistError", e_file_error);
935
+ }