ruby-hdfs-cdh4 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Alexander Staubo
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,49 @@
1
+ require 'mkmf'
2
+
3
+ # debug options
4
+ if enable_config 'debug'
5
+ puts 'enabling debug library build configuration.'
6
+
7
+ $CFLAGS = CONFIG['CFLAGS'].gsub(/\s\-O\d?\s/, ' -O0 ').
8
+ gsub(/\s?\-g\w*\s/, ' -ggdb3 ')
9
+ CONFIG['LDSHARED'] = CONFIG['LDSHARED'].gsub(/\s\-s(\s|\z)/, ' ')
10
+ end
11
+
12
+ # setup enviorment
13
+ ENV['HADOOP_ENV'] ||= '/etc/hadoop/conf/hadoop-env.sh'
14
+
15
+ # java home
16
+ ENV['JAVA_HOME'] ||= case
17
+ when File.readable?(ENV['HADOOP_ENV'])
18
+ puts 'JAVA_HOME is not set, attempting to read value from ' + ENV['HADOOP_ENV']
19
+
20
+ File.read(ENV['HADOOP_ENV']).split("\n").find do |line|
21
+ line.include? 'JAVA_HOME=' and not line.start_with? '#'
22
+ end.split('=').last
23
+ else # check common locations
24
+ java_locations = ['/usr/java/default'] # todo: add more locations to check
25
+
26
+ puts 'JAVA_HOME is not set, checking common locations: ' + java_locations.join(',')
27
+
28
+ java_locations.find do |path|
29
+ File.directory?(path) and File.exist? File.join path, 'bin/java'
30
+ end
31
+ end
32
+ abort 'unable to find value for JAVA_HOME' unless ENV['JAVA_HOME']
33
+
34
+ # libjvm
35
+ ENV['JAVA_LIB'] ||= File.dirname Dir.glob(File.join(ENV['JAVA_HOME'], '**', 'libjvm.so')).first
36
+ abort 'unable to find value for JAVA_LIB or detect location of libjvm.so' unless ENV['JAVA_LIB']
37
+
38
+ # java include paths
39
+ Dir.glob(File.join(ENV['JAVA_HOME'], 'include', '**', '.')).map {|path| File.dirname path}.each do |include_path|
40
+ $INCFLAGS << [' -I', include_path].join
41
+ end
42
+
43
+ dir_config 'hdfs'
44
+ ['jvm', 'hdfs'].each do |lib|
45
+ find_library lib, nil, ENV['JAVA_LIB']
46
+ end
47
+
48
+ have_library 'c', 'main'
49
+ create_makefile 'hdfs'
@@ -0,0 +1,935 @@
1
+ #include "ruby.h"
2
+ #include "hdfs.h"
3
+
4
+ #include <assert.h>
5
+ #include <string.h>
6
+ #include <ctype.h>
7
+ #include <math.h>
8
+
9
+ static VALUE m_hadoop;
10
+ static VALUE m_dfs;
11
+ static VALUE c_file;
12
+ static VALUE c_file_info;
13
+ static VALUE c_file_system;
14
+ static VALUE c_file_info_file;
15
+ static VALUE c_file_info_directory;
16
+ static VALUE e_dfs_exception;
17
+ static VALUE e_file_error;
18
+ static VALUE e_could_not_open;
19
+ static VALUE e_does_not_exist;
20
+
21
+ static const int32_t HDFS_DEFAULT_BLOCK_SIZE = 134217728;
22
+ static const int16_t HDFS_DEFAULT_REPLICATION = 3;
23
+ static const short HDFS_DEFAULT_MODE = 0644;
24
+ static const char* HDFS_DEFAULT_HOST = "localhost";
25
+ static const int HDFS_DEFAULT_RECURSIVE_DELETE = 0;
26
+ static const int HDFS_DEFAULT_PATH_STRING_LENGTH = 1024;
27
+ static const int HDFS_DEFAULT_PORT = 8020;
28
+
29
+ /*
30
+ * Data structs
31
+ */
32
+
33
+ typedef struct FSData {
34
+ hdfsFS fs;
35
+ } FSData;
36
+
37
+ typedef struct FileData {
38
+ hdfsFS fs;
39
+ hdfsFile file;
40
+ } FileData;
41
+
42
+ typedef struct FileInfo {
43
+ char* mName; /* the name of the file */
44
+ tTime mLastMod; /* the last modification time for the file in seconds */
45
+ tOffset mSize; /* the size of the file in bytes */
46
+ short mReplication; /* the count of replicas */
47
+ tOffset mBlockSize; /* the block size for the file */
48
+ char* mOwner; /* the owner of the file */
49
+ char* mGroup; /* the group associated with the file */
50
+ short mPermissions; /* the permissions associated with the file */
51
+ tTime mLastAccess; /* the last access time for the file in seconds */
52
+ } FileInfo;
53
+
54
+ /*
55
+ * Methods called upon freeing of objects.
56
+ */
57
+
58
+ void free_fs_data(FSData* data) {
59
+ if (data && data->fs != NULL) {
60
+ hdfsDisconnect(data->fs);
61
+ data->fs = NULL;
62
+ }
63
+ }
64
+
65
+ void free_file_data(FileData* data) {
66
+ if (data && data->file != NULL) {
67
+ hdfsCloseFile(data->fs, data->file);
68
+ data->file = NULL;
69
+ }
70
+ }
71
+
72
+ void free_file_info(FileInfo* file_info) {
73
+ if (file_info) {
74
+ free(file_info->mName);
75
+ free(file_info->mOwner);
76
+ free(file_info->mGroup);
77
+ free(file_info);
78
+ }
79
+ }
80
+
81
+ /*
82
+ * Helper functions
83
+ */
84
+
85
+ // Borrowed from:
86
+ // http://www.programiz.com/c-programming/examples/octal-decimal-convert
87
+ /* Converts a decimal-formatted integer to an octal-formatted integer. */
88
+ int decimal_octal(int n) {
89
+ int rem, i=1, octal=0;
90
+ while (n != 0) {
91
+ rem = n % 8;
92
+ n /= 8;
93
+ octal += rem * i;
94
+ i *= 10;
95
+ }
96
+ return octal;
97
+ }
98
+
99
+ /* Converts an octal-formatted integer to a decimal-formatted integer. */
100
+ int octal_decimal(int n) {
101
+ int decimal=0, i=0, rem;
102
+ while (n != 0) {
103
+ rem = n % 10;
104
+ n /= 10;
105
+ decimal += rem * pow(8, i);
106
+ ++i;
107
+ }
108
+ return decimal;
109
+ }
110
+
111
+ /*
112
+ * Copies an hdfsFileInfo struct into a Hadoop::DFS::FileInfo derivative
113
+ * object.
114
+ */
115
+ VALUE wrap_hdfsFileInfo(hdfsFileInfo* info) {
116
+ // Creates a FileInfo struct, populates it with information from the
117
+ // supplied hdfsFileInfo struct.
118
+ FileInfo* file_info = ALLOC_N(FileInfo, 1);
119
+ file_info->mName = strdup(info->mName);
120
+ file_info->mLastMod = info->mLastMod;
121
+ file_info->mSize = info->mSize;
122
+ file_info->mReplication = info->mReplication;
123
+ file_info->mBlockSize = info->mBlockSize;
124
+ file_info->mOwner = strdup(info->mOwner);
125
+ file_info->mGroup = strdup(info->mGroup);
126
+ file_info->mPermissions = info->mPermissions;
127
+ file_info->mLastAccess = info->mLastAccess;
128
+ // Assigns FileInfo::Info or FileInfo::Directory class based upon the type of
129
+ // the file.
130
+ switch(info->mKind) {
131
+ case kObjectKindDirectory:
132
+ return Data_Wrap_Struct(c_file_info_directory, NULL, free_file_info,
133
+ file_info);
134
+ case kObjectKindFile:
135
+ return Data_Wrap_Struct(c_file_info_file, NULL, free_file_info,
136
+ file_info);
137
+ default:
138
+ rb_raise(e_dfs_exception, "File was not a file or directory: %s",
139
+ RSTRING_PTR(info->mName));
140
+ }
141
+ return Qnil;
142
+ }
143
+
144
+ /*
145
+ * File system interface
146
+ */
147
+
148
+ VALUE HDFS_File_System_alloc(VALUE klass) {
149
+ FSData* data = ALLOC_N(FSData, 1);
150
+ data->fs = NULL;
151
+ VALUE instance = Data_Wrap_Struct(klass, NULL, free_fs_data, data);
152
+ return instance;
153
+ }
154
+
155
+ /**
156
+ * call-seq:
157
+ * hdfs.new -> hdfs
158
+ *
159
+ * Creates a new HDFS client connection.
160
+ */
161
+ VALUE HDFS_File_System_initialize(VALUE self, VALUE host, VALUE port) {
162
+ FSData* data = NULL;
163
+ Data_Get_Struct(self, FSData, data);
164
+ data->fs = hdfsConnect(
165
+ RTEST(host) ? RSTRING_PTR(host) : HDFS_DEFAULT_HOST,
166
+ RTEST(port) ? NUM2INT(port) : HDFS_DEFAULT_PORT);
167
+ return self;
168
+ }
169
+
170
+ /**
171
+ * call-seq:
172
+ * hdfs.disconnect -> nil
173
+ *
174
+ * Disconnects the client connection.
175
+ */
176
+ VALUE HDFS_File_System_disconnect(VALUE self) {
177
+ FSData* data = NULL;
178
+ Data_Get_Struct(self, FSData, data);
179
+ if (data->fs != NULL) {
180
+ hdfsDisconnect(data->fs);
181
+ data->fs = NULL;
182
+ }
183
+ return Qnil;
184
+ }
185
+
186
+ /**
187
+ * call-seq:
188
+ * hdfs.delete(path, recursive=false) -> success
189
+ *
190
+ * Deletes the file at the supplied path, recursively if specified. Returns
191
+ * True if successful, False if unsuccessful.
192
+ */
193
+ VALUE HDFS_File_System_delete(VALUE self, VALUE path, VALUE recursive) {
194
+ FSData* data = NULL;
195
+ Data_Get_Struct(self, FSData, data);
196
+ int success = hdfsDelete(data->fs, RSTRING_PTR(path),
197
+ (recursive == Qtrue) ? 1 : HDFS_DEFAULT_RECURSIVE_DELETE);
198
+ return success == 0 ? Qtrue : Qfalse;
199
+ }
200
+
201
+ /**
202
+ * call-seq:
203
+ * hdfs.rename(from_path, to_path) -> success
204
+ *
205
+ * Renames the file at the supplied path to the file at the destination path.
206
+ * Returns True if successful, False if unsuccessful.
207
+ */
208
+ VALUE HDFS_File_System_rename(VALUE self, VALUE from_path, VALUE to_path) {
209
+ FSData* data = NULL;
210
+ Data_Get_Struct(self, FSData, data);
211
+ int success = hdfsRename(data->fs, RSTRING_PTR(from_path), RSTRING_PTR(to_path));
212
+ return success == 0 ? Qtrue : Qfalse;
213
+ }
214
+
215
+ /**
216
+ * call-seq:
217
+ * hdfs.exist(path) -> file_existence
218
+ *
219
+ * Checks if a file exists at the supplied path. If file exists, returns True;
220
+ * if not, returns False.
221
+ */
222
+ VALUE HDFS_File_System_exist(VALUE self, VALUE path) {
223
+ FSData* data = NULL;
224
+ Data_Get_Struct(self, FSData, data);
225
+ int success = hdfsExists(data->fs, RSTRING_PTR(path));
226
+ return success == 0 ? Qtrue : Qfalse;
227
+ }
228
+
229
+ /**
230
+ * call-seq:
231
+ * hdfs.create_directory(path) -> success
232
+ *
233
+ * Checks if a file exists at the supplied path. If file exists, returns True;
234
+ * if not, returns False.
235
+ */
236
+ VALUE HDFS_File_System_create_directory(VALUE self, VALUE path) {
237
+ FSData* data = NULL;
238
+ Data_Get_Struct(self, FSData, data);
239
+ int success = hdfsCreateDirectory(data->fs, RSTRING_PTR(path));
240
+ return success == 0 ? Qtrue : Qfalse;
241
+ }
242
+
243
+ /**
244
+ * call-seq:
245
+ * hdfs.list_directory(path) -> file_infos
246
+ *
247
+ * Lists the directory at the supplied path, returning an Array of
248
+ * Hadoop::DFS::FileInfo objects. If the directory does not exist, raises
249
+ * a DoesNotExistError.
250
+ */
251
+ VALUE HDFS_File_System_list_directory(VALUE self, VALUE path) {
252
+ FSData* data = NULL;
253
+ Data_Get_Struct(self, FSData, data);
254
+ VALUE file_infos = rb_ary_new();
255
+ int num_files = 0;
256
+ hdfsFileInfo* infos = hdfsListDirectory(data->fs, RSTRING_PTR(path),
257
+ &num_files);
258
+ if (infos == NULL) {
259
+ rb_raise(e_does_not_exist, "Directory does not exist: %s",
260
+ RSTRING_PTR(path));
261
+ return Qnil;
262
+ }
263
+ int i;
264
+ for (i = 0; i < num_files; i++) {
265
+ hdfsFileInfo* cur_info = infos + i;
266
+ rb_ary_push(file_infos, wrap_hdfsFileInfo(cur_info));
267
+ }
268
+ hdfsFreeFileInfo(infos, num_files);
269
+ return file_infos;
270
+ }
271
+
272
+ /**
273
+ * call-seq:
274
+ * hdfs.stat(path) -> file_info
275
+ *
276
+ * Stats the file or directory at the supplied path, returning a
277
+ * Hadoop::DFS:FileInfo object corresponding to it. If the file or directory
278
+ * does not exist, raises a DoesNotExistError.
279
+ */
280
+ VALUE HDFS_File_System_stat(VALUE self, VALUE path) {
281
+ FSData* data = NULL;
282
+ Data_Get_Struct(self, FSData, data);
283
+ hdfsFileInfo* info = hdfsGetPathInfo(data->fs, RSTRING_PTR(path));
284
+ if (info == NULL) {
285
+ rb_raise(e_does_not_exist, "File does not exist: %s", RSTRING_PTR(path));
286
+ return Qnil;
287
+ }
288
+ VALUE file_info = wrap_hdfsFileInfo(info);
289
+ hdfsFreeFileInfo(info, 1);
290
+ return file_info;
291
+ }
292
+
293
+ /**
294
+ * call-seq:
295
+ * hdfs.set_replication(path, replication) -> success
296
+ *
297
+ * Sets the replication of the following path to the supplied number of nodes
298
+ * it will be replicated against. Returns True if successful; False if not.
299
+ */
300
+ VALUE HDFS_File_System_set_replication(VALUE self, VALUE path, VALUE replication) {
301
+ FSData* data = NULL;
302
+ Data_Get_Struct(self, FSData, data);
303
+ int success = hdfsSetReplication(data->fs, RSTRING_PTR(path),
304
+ RTEST(replication) ? NUM2INT(replication) : HDFS_DEFAULT_REPLICATION);
305
+ return success == 0 ? Qtrue : Qfalse;
306
+ }
307
+
308
+ /**
309
+ * call-seq:
310
+ * hdfs.cd(path) -> success
311
+ *
312
+ * Changes the current working directory to the supplied path. Returns True if
313
+ * successful; False if not.
314
+ */
315
+ VALUE HDFS_File_System_cd(VALUE self, VALUE path) {
316
+ FSData* data = NULL;
317
+ Data_Get_Struct(self, FSData, data);
318
+ int success = hdfsSetWorkingDirectory(data->fs, RSTRING_PTR(path));
319
+ return success == 0 ? Qtrue : Qfalse;
320
+ }
321
+
322
+ /**
323
+ * call-seq:
324
+ * hdfs.cwd -> success
325
+ *
326
+ * Changes the current working directory to the supplied path. Returns True if
327
+ * successful; False if not.
328
+ */
329
+ VALUE HDFS_File_System_cwd(VALUE self) {
330
+ FSData* data = NULL;
331
+ Data_Get_Struct(self, FSData, data);
332
+ char* cur_dir = (char *) malloc(
333
+ sizeof(char) * HDFS_DEFAULT_PATH_STRING_LENGTH);
334
+ int success = hdfsGetWorkingDirectory(data->fs, cur_dir,
335
+ HDFS_DEFAULT_PATH_STRING_LENGTH);
336
+ VALUE ruby_cur_dir = rb_str_new2(cur_dir);
337
+ free(cur_dir);
338
+ return ruby_cur_dir;
339
+ }
340
+
341
+ /**
342
+ * call-seq:
343
+ * hdfs.chgrp(path, group) -> success
344
+ *
345
+ * Changes the group of the supplied path. Returns True if successful; False
346
+ * if not.
347
+ */
348
+ VALUE HDFS_File_System_chgrp(VALUE self, VALUE path, VALUE group) {
349
+ FSData* data = NULL;
350
+ Data_Get_Struct(self, FSData, data);
351
+ int success = hdfsChown(data->fs, RSTRING_PTR(path), NULL,
352
+ RSTRING_PTR(group));
353
+ return success == 0 ? Qtrue : Qfalse;
354
+ }
355
+
356
+ /**
357
+ * call-seq:
358
+ * hdfs.chgrp(path, mode) -> retval
359
+ *
360
+ * Changes the mode of the supplied path. Returns True if successful; False
361
+ * if not.
362
+ */
363
+ VALUE HDFS_File_System_chmod(VALUE self, VALUE path, VALUE mode) {
364
+ FSData* data = NULL;
365
+ Data_Get_Struct(self, FSData, data);
366
+ int success = hdfsChmod(data->fs, RSTRING_PTR(path),
367
+ (short) RTEST(mode) ? octal_decimal(NUM2INT(mode)) : HDFS_DEFAULT_MODE);
368
+ return success == 0 ? Qtrue : Qfalse;
369
+ }
370
+
371
+ /**
372
+ * call-seq:
373
+ * hdfs.chown(path, owner) -> retval
374
+ *
375
+ * Changes the owner of the supplied path. Returns True if successful; False
376
+ * if not.
377
+ */
378
+ VALUE HDFS_File_System_chown(VALUE self, VALUE path, VALUE owner) {
379
+ FSData* data = NULL;
380
+ Data_Get_Struct(self, FSData, data);
381
+ int success = hdfsChown(data->fs, RSTRING_PTR(path), RSTRING_PTR(owner),
382
+ NULL);
383
+ return success == 0 ? Qtrue : Qfalse;
384
+ }
385
+
386
+ /**
387
+ * call-seq:
388
+ * hdfs.copy(from_path, to_path, to_fs=nil) -> retval
389
+ *
390
+ * Copies the file at HDFS location from_path to HDFS location to_path. If
391
+ * to_fs is specified, copies to this HDFS over the current HDFS. If
392
+ * successful, returns true; otherwise, returns false.
393
+ */
394
+ VALUE HDFS_File_System_copy(VALUE self, VALUE from_path, VALUE to_path, VALUE to_fs) {
395
+ FSData* data = NULL;
396
+ Data_Get_Struct(self, FSData, data);
397
+ hdfsFS destFS = data->fs;
398
+ if (RTEST(to_fs)) {
399
+ if (CLASS_OF(to_fs) == c_file_system) {
400
+ FSData* destFSData = NULL;
401
+ Data_Get_Struct(to_fs, FSData, destFSData);
402
+ destFS = destFSData->fs;
403
+ } else {
404
+ rb_raise(rb_eArgError, "to_fs must be of type Hadoop::DFS::FileSystem");
405
+ return Qnil;
406
+ }
407
+ }
408
+ int success = hdfsCopy(data->fs, RSTRING_PTR(from_path), destFS,
409
+ RSTRING_PTR(to_fs));
410
+ return success == 0 ? Qtrue : Qfalse;
411
+ }
412
+
413
+ /**
414
+ * call-seq:
415
+ * hdfs.move(from_path, to_path, to_fs=nil) -> retval
416
+ *
417
+ * Moves the file at HDFS location from_path to HDFS location to_path. If
418
+ * to_fs is specified, moves to this HDFS over the current HDFS. If
419
+ * successful, returns true; otherwise, returns false.
420
+ */
421
+ VALUE HDFS_File_System_move(VALUE self, VALUE from_path, VALUE to_path, VALUE to_fs) {
422
+ FSData* data = NULL;
423
+ Data_Get_Struct(self, FSData, data);
424
+ hdfsFS destFS = data->fs;
425
+ if (RTEST(to_fs)) {
426
+ if (CLASS_OF(to_fs) == c_file_system) {
427
+ FSData* destFSData = NULL;
428
+ Data_Get_Struct(to_fs, FSData, destFSData);
429
+ destFS = destFSData->fs;
430
+ } else {
431
+ rb_raise(rb_eArgError, "to_fs must be of type Hadoop::DFS::FileSystem");
432
+ return Qnil;
433
+ }
434
+ }
435
+ int success = hdfsMove(data->fs, RSTRING_PTR(from_path), destFS,
436
+ RSTRING_PTR(to_fs));
437
+ return success == 0 ? Qtrue : Qfalse;
438
+ }
439
+
440
+ /**
441
+ * call-seq:
442
+ * hdfs.capacity -> retval
443
+ *
444
+ * Returns the capacity of this HDFS file system in bytes, raising a
445
+ * DFSException if this was unsuccessful.
446
+ */
447
+ VALUE HDFS_File_System_capacity(VALUE self) {
448
+ FSData* data = NULL;
449
+ Data_Get_Struct(self, FSData, data);
450
+ long capacity = hdfsGetCapacity(data->fs);
451
+ if (capacity < 0) {
452
+ rb_raise(e_dfs_exception, "Error while retrieving capacity");
453
+ return Qnil;
454
+ }
455
+ return LONG2NUM(capacity);
456
+ }
457
+
458
+ /**
459
+ * call-seq:
460
+ * hdfs.capacity -> retval
461
+ *
462
+ * Returns the default block size of this HDFS file system in bytes, raising a
463
+ * DFSException if this was unsuccessful.
464
+ */
465
+ VALUE HDFS_File_System_default_block_size(VALUE self) {
466
+ FSData* data = NULL;
467
+ Data_Get_Struct(self, FSData, data);
468
+ long block_size = hdfsGetDefaultBlockSize(data->fs);
469
+ if (block_size < 0) {
470
+ rb_raise(e_dfs_exception, "Error while retrieving default block size");
471
+ return Qnil;
472
+ }
473
+ return LONG2NUM(block_size);
474
+ }
475
+
476
+ /**
477
+ * call-seq:
478
+ * hdfs.default_block_size_at_path(path) -> default_block_size
479
+ *
480
+ * Returns the default block size at the supplied HDFS path, raising a
481
+ * DFSException if this was unsuccessful.
482
+ */
483
+ VALUE HDFS_File_System_default_block_size_at_path(VALUE self, VALUE path) {
484
+ FSData* data = NULL;
485
+ Data_Get_Struct(self, FSData, data);
486
+ long block_size = hdfsGetDefaultBlockSizeAtPath(data->fs, RSTRING_PTR(path));
487
+ if (block_size < 0) {
488
+ rb_raise(e_dfs_exception,
489
+ "Error while retrieving default block size at path: %s", RSTRING_PTR(path));
490
+ return Qnil;
491
+ }
492
+ return LONG2NUM(block_size);
493
+ }
494
+
495
+ /**
496
+ * call-seq:
497
+ * hdfs.used -> retval
498
+ *
499
+ * Returns the bytes currently in use by this filesystem, raising a
500
+ * DFSException if unsuccessful.
501
+ */
502
+ VALUE HDFS_File_System_used(VALUE self) {
503
+ FSData* data = NULL;
504
+ Data_Get_Struct(self, FSData, data);
505
+ long used = hdfsGetUsed(data->fs);
506
+ if (used < 0) {
507
+ rb_raise(e_dfs_exception, "Error while retrieving used capacity");
508
+ return Qnil;
509
+ }
510
+ return LONG2NUM(used);
511
+ }
512
+
513
+ /**
514
+ * call-seq:
515
+ * hdfs.utime(path, modified_time=nil, access_time=nil) -> retval
516
+ *
517
+ * Changes the last modified and/or last access time in seconds since the Unix
518
+ * epoch for the supplied file. Returns true if successful; false if not.
519
+ */
520
+ VALUE HDFS_File_System_utime(VALUE self, VALUE path, VALUE modified_time, VALUE access_time) {
521
+ FSData* data = NULL;
522
+ Data_Get_Struct(self, FSData, data);
523
+ int success = hdfsUtime(data->fs, RSTRING_PTR(path),
524
+ (tTime) RTEST(modified_time) ? NUM2INT(modified_time) : -1,
525
+ (tTime) RTEST(access_time) ? NUM2INT(access_time) : -1);
526
+ return success == 0 ? Qtrue : Qfalse;
527
+ }
528
+
529
+ /**
530
+ * call-seq:
531
+ * hdfs.open(path, mode, options = {}) -> file
532
+ *
533
+ * Opens a file. If the file cannot be opened, raises a CouldNotOpenError;
534
+ * otherwise, returns a Hadoop::DFS::File object corresponding to the file.
535
+ */
536
+ VALUE HDFS_File_System_open(VALUE self, VALUE path, VALUE mode, VALUE options) {
537
+ FSData* data = NULL;
538
+ Data_Get_Struct(self, FSData, data);
539
+
540
+ int flags = 0;
541
+ if (strcmp("r", StringValuePtr(mode)) == 0) {
542
+ flags = O_RDONLY;
543
+ } else if (strcmp("w", StringValuePtr(mode)) == 0) {
544
+ flags = O_WRONLY;
545
+ } else {
546
+ rb_raise(rb_eArgError, "Mode must be 'r' or 'w'");
547
+ return Qnil;
548
+ }
549
+ VALUE r_buffer_size = rb_hash_aref(options, rb_eval_string(":buffer_size"));
550
+ VALUE r_replication = rb_hash_aref(options, rb_eval_string(":replication"));
551
+ VALUE r_block_size = rb_hash_aref(options, rb_eval_string(":block_size"));
552
+ hdfsFile file = hdfsOpenFile(data->fs, RSTRING_PTR(path), flags,
553
+ RTEST(r_buffer_size) ? NUM2INT(r_buffer_size) : 0,
554
+ RTEST(r_replication) ? NUM2INT(r_replication) : 0,
555
+ RTEST(r_block_size) ? NUM2INT(r_block_size) : HDFS_DEFAULT_BLOCK_SIZE);
556
+ if (file == NULL) {
557
+ rb_raise(e_could_not_open, "Could not open file %s", RSTRING_PTR(path));
558
+ return Qnil;
559
+ }
560
+
561
+ FileData* file_data = ALLOC_N(FileData, 1);
562
+ file_data->fs = data->fs;
563
+ file_data->file = file;
564
+ VALUE file_instance = Data_Wrap_Struct(c_file, NULL, free_file_data, file_data);
565
+ return file_instance;
566
+ }
567
+
568
+ /*
569
+ * File interface
570
+ */
571
+
572
+ /**
573
+ * call-seq:
574
+ * file.read(length) -> retval
575
+ *
576
+ * Reads the number of bytes specified by length from the current file object,
577
+ * returning the bytes read as a String. If this fails, raises a
578
+ * FileError.
579
+ */
580
+ VALUE HDFS_File_read(VALUE self, VALUE length) {
581
+ FileData* data = NULL;
582
+ Data_Get_Struct(self, FileData, data);
583
+ char* buffer = ALLOC_N(char, length);
584
+ MEMZERO(buffer, char, length);
585
+ tSize bytes_read = hdfsRead(data->fs, data->file, buffer, NUM2INT(length));
586
+ if (bytes_read == -1) {
587
+ rb_raise(e_file_error, "Failed to read data");
588
+ }
589
+ return rb_tainted_str_new2(buffer);
590
+ }
591
+
592
+ /**
593
+ * call-seq:
594
+ * file.write(bytes) -> num_bytes_written
595
+ *
596
+ * Writes the string specified by bytes to the current file object, returning
597
+ * the number of bytes read as an Integer. If this fails, raises a FileError.
598
+ */
599
+ VALUE HDFS_File_write(VALUE self, VALUE bytes) {
600
+ FileData* data = NULL;
601
+ Data_Get_Struct(self, FileData, data);
602
+ tSize bytes_written = hdfsWrite(data->fs, data->file, RSTRING_PTR(bytes), RSTRING_LEN(bytes));
603
+ if (bytes_written == -1) {
604
+ rb_raise(e_file_error, "Failed to write data");
605
+ }
606
+ return INT2NUM(bytes_written);
607
+ }
608
+
609
+ /**
610
+ * call-seq:
611
+ * file.tell -> current_position
612
+ *
613
+ * Returns the current byte position in the file as an Integer. If this fails,
614
+ * raises a FileError.
615
+ */
616
+ VALUE HDFS_File_tell(VALUE self) {
617
+ FileData* data = NULL;
618
+ Data_Get_Struct(self, FileData, data);
619
+ tSize offset = hdfsTell(data->fs, data->file);
620
+ if (offset == -1) {
621
+ rb_raise(e_file_error, "Failed to read position");
622
+ }
623
+ return INT2NUM(offset);
624
+ }
625
+
626
+ /**
627
+ * call-seq:
628
+ * file.seek(offset) -> success
629
+ *
630
+ * Seeks the file pointer to the supplied offset. If this fails, raises a
631
+ * FileError.
632
+ */
633
+ VALUE HDFS_File_seek(VALUE self, VALUE offset) {
634
+ FileData* data = NULL;
635
+ Data_Get_Struct(self, FileData, data);
636
+ int result = hdfsSeek(data->fs, data->file, NUM2INT(offset));
637
+ if (result != 0) {
638
+ rb_raise(e_file_error, "Failed to seek to position %d", NUM2INT(offset));
639
+ }
640
+ return Qtrue;
641
+ }
642
+
643
+ /**
644
+ * call-seq:
645
+ * file.flush -> success
646
+ *
647
+ * Flushes all buffers currently being written to this file. When this
648
+ * finishes, new readers will see the data written. If this fails, raises a
649
+ * FileError.
650
+ */
651
+ VALUE HDFS_File_flush(VALUE self) {
652
+ FileData* data = NULL;
653
+ Data_Get_Struct(self, FileData, data);
654
+ int result = hdfsFlush(data->fs, data->file);
655
+ if (result != 0) {
656
+ rb_raise(e_file_error, "Flush failed");
657
+ }
658
+ return Qtrue;
659
+ }
660
+
661
+ /**
662
+ * call-seq:
663
+ * file.available -> available_bytes
664
+ *
665
+ * Returns the number of bytes that can be read from this file without
666
+ * blocking. If this fails, raises a FileError.
667
+ */
668
+ VALUE HDFS_File_available(VALUE self) {
669
+ FileData* data = NULL;
670
+ Data_Get_Struct(self, FileData, data);
671
+ int result = hdfsAvailable(data->fs, data->file);
672
+ if (result == -1) {
673
+ rb_raise(e_file_error, "Failed to get available data");
674
+ }
675
+ return INT2NUM(result);
676
+ }
677
+
678
+ /**
679
+ * call-seq:
680
+ * file.close -> success
681
+ *
682
+ * Closes the current file. If this fails, raises a FileError.
683
+ */
684
+ VALUE HDFS_File_close(VALUE self) {
685
+ FileData* data = NULL;
686
+ Data_Get_Struct(self, FileData, data);
687
+ if (data->file != NULL) {
688
+ hdfsCloseFile(data->fs, data->file);
689
+ data->file = NULL;
690
+ }
691
+ return Qtrue;
692
+ }
693
+
694
+ /**
695
+ * HDFS File Info interface
696
+ */
697
+
698
+ /**
699
+ * call-seq:
700
+ * file_info.block_size -> retval
701
+ *
702
+ * Returns the block size of the file described by this object.
703
+ */
704
+ VALUE HDFS_File_Info_block_size(VALUE self) {
705
+ FileInfo* file_info = NULL;
706
+ Data_Get_Struct(self, FileInfo, file_info);
707
+ return INT2NUM(file_info->mBlockSize);
708
+ }
709
+
710
+ /**
711
+ * call-seq:
712
+ * file_info.group -> retval
713
+ *
714
+ * Returns the group of the file described by this object.
715
+ */
716
+ VALUE HDFS_File_Info_group(VALUE self) {
717
+ FileInfo* file_info = NULL;
718
+ Data_Get_Struct(self, FileInfo, file_info);
719
+ return rb_str_new(file_info->mGroup, strlen(file_info->mGroup));
720
+ }
721
+
722
+ /**
723
+ * call-seq:
724
+ * file_info.is_directory? -> retval
725
+ *
726
+ * Returns True if the file described by this object is a directory; otherwise,
727
+ * returns False.
728
+ */
729
+ VALUE HDFS_File_Info_is_directory(VALUE self) {
730
+ return Qfalse;
731
+ }
732
+
733
+ /**
734
+ * call-seq:
735
+ * file_info.is_file? -> retval
736
+ *
737
+ * Returns True if the file described by this object is a file; otherwise,
738
+ * returns False.
739
+ */
740
+ VALUE HDFS_File_Info_is_file(VALUE self) {
741
+ return Qfalse;
742
+ }
743
+
744
+ VALUE HDFS_File_Info_Directory_is_directory(VALUE self) {
745
+ return Qtrue;
746
+ }
747
+
748
+ VALUE HDFS_File_Info_File_is_file(VALUE self) {
749
+ return Qtrue;
750
+ }
751
+
752
+ /**
753
+ * call-seq:
754
+ * file_info.last_access -> retval
755
+ *
756
+ * Returns the time of last access as an Integer representing seconds since the
757
+ * epoch.
758
+ */
759
+ VALUE HDFS_File_Info_last_access(VALUE self) {
760
+ FileInfo* file_info = NULL;
761
+ Data_Get_Struct(self, FileInfo, file_info);
762
+ return LONG2NUM(file_info->mLastAccess);
763
+ }
764
+
765
+ /**
766
+ * call-seq:
767
+ * file_info.last_modified -> retval
768
+ *
769
+ * Returns the time of last modification as an Integer representing seconds
770
+ * since the epoch for the file
771
+ */
772
+ VALUE HDFS_File_Info_last_modified(VALUE self) {
773
+ FileInfo* file_info = NULL;
774
+ Data_Get_Struct(self, FileInfo, file_info);
775
+ return LONG2NUM(file_info->mLastMod);
776
+ }
777
+
778
+ /**
779
+ * call-seq:
780
+ * file_info.last_modified -> retval
781
+ *
782
+ * Returns the time of last modification as an Integer representing seconds
783
+ * since the epoch.
784
+ */
785
+ VALUE HDFS_File_Info_mode(VALUE self) {
786
+ FileInfo* file_info = NULL;
787
+ Data_Get_Struct(self, FileInfo, file_info);
788
+ return INT2NUM(decimal_octal(file_info->mPermissions));
789
+ }
790
+
791
+ /**
792
+ * call-seq:
793
+ * file_info.name -> retval
794
+ *
795
+ * Returns the name of the file as a String.
796
+ */
797
+ VALUE HDFS_File_Info_name(VALUE self) {
798
+ FileInfo* file_info = NULL;
799
+ Data_Get_Struct(self, FileInfo, file_info);
800
+ return rb_str_new(file_info->mName, strlen(file_info->mName));
801
+ }
802
+
803
+ /**
804
+ * call-seq:
805
+ * file_info.owner -> retval
806
+ *
807
+ * Returns the owner of the file as a String.
808
+ */
809
+ VALUE HDFS_File_Info_owner(VALUE self) {
810
+ FileInfo* file_info = NULL;
811
+ Data_Get_Struct(self, FileInfo, file_info);
812
+ return rb_str_new(file_info->mOwner, strlen(file_info->mOwner));
813
+ }
814
+
815
+ /**
816
+ * call-seq:
817
+ * file_info.replication -> retval
818
+ *
819
+ * Returns the replication factor of the file as an Integer.
820
+ */
821
+ VALUE HDFS_File_Info_replication(VALUE self) {
822
+ FileInfo* file_info = NULL;
823
+ Data_Get_Struct(self, FileInfo, file_info);
824
+ return INT2NUM(file_info->mReplication);
825
+ }
826
+
827
+ /**
828
+ * call-seq:
829
+ * file_info.name -> retval
830
+ *
831
+ * Returns the size of the file as an Integer.
832
+ */
833
+ VALUE HDFS_File_Info_size(VALUE self) {
834
+ FileInfo* file_info = NULL;
835
+ Data_Get_Struct(self, FileInfo, file_info);
836
+ return LONG2NUM(file_info->mSize);
837
+ }
838
+
839
+ /**
840
+ * call-seq:
841
+ * file_info.to_s -> retval
842
+ *
843
+ * Returns a human-readable representation of a Hadoop::DFS::FileSystem object
844
+ * as a String.
845
+ */
846
+ VALUE HDFS_File_Info_to_s(VALUE self) {
847
+ FileInfo* file_info = NULL;
848
+ Data_Get_Struct(self, FileInfo, file_info);
849
+ // Introspects current class, returns it as a String.
850
+ VALUE class_string = rb_funcall(
851
+ rb_funcall(self, rb_intern("class"), 0),
852
+ rb_intern("to_s"), 0);
853
+ char* output;
854
+ VALUE string_value = rb_str_new2("");
855
+ // If asprintf was successful, creates a Ruby String.
856
+ if (asprintf(&output, "#<%s: %s, mode=%d, owner=%s, group=%s>",
857
+ RSTRING_PTR(class_string), file_info->mName,
858
+ decimal_octal(file_info->mPermissions), file_info->mOwner,
859
+ file_info->mGroup) >= 0) {
860
+ string_value = rb_str_new(output, strlen(output));
861
+ }
862
+ free(output);
863
+ return string_value;
864
+ }
865
+
866
+ /*
867
+ * Extension initialization
868
+ */
869
+
870
+ void Init_hdfs() {
871
+ m_hadoop = rb_define_module("Hadoop");
872
+ m_dfs = rb_define_module_under(m_hadoop, "DFS");
873
+
874
+ c_file_system = rb_define_class_under(m_dfs, "FileSystem", rb_cObject);
875
+ rb_define_alloc_func(c_file_system, HDFS_File_System_alloc);
876
+ rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize, 2);
877
+ rb_define_method(c_file_system, "disconnect", HDFS_File_System_disconnect, 0);
878
+ rb_define_method(c_file_system, "open", HDFS_File_System_open, 3);
879
+ rb_define_method(c_file_system, "delete", HDFS_File_System_delete, 2);
880
+ rb_define_method(c_file_system, "rename", HDFS_File_System_rename, 2);
881
+ rb_define_method(c_file_system, "exist?", HDFS_File_System_exist, 1);
882
+ rb_define_method(c_file_system, "create_directory", HDFS_File_System_create_directory, 1);
883
+ rb_define_method(c_file_system, "list_directory", HDFS_File_System_list_directory, 1);
884
+ rb_define_method(c_file_system, "stat", HDFS_File_System_stat, 1);
885
+ rb_define_method(c_file_system, "set_replication", HDFS_File_System_set_replication, 2);
886
+ rb_define_method(c_file_system, "cd", HDFS_File_System_cd, 1);
887
+ rb_define_method(c_file_system, "cwd", HDFS_File_System_cwd, 0);
888
+ rb_define_method(c_file_system, "chgrp", HDFS_File_System_chgrp, 2);
889
+ rb_define_method(c_file_system, "chmod", HDFS_File_System_chmod, 2);
890
+ rb_define_method(c_file_system, "chown", HDFS_File_System_chown, 2);
891
+ rb_define_method(c_file_system, "copy", HDFS_File_System_copy, 3);
892
+ rb_define_method(c_file_system, "capacity", HDFS_File_System_capacity, 0);
893
+ rb_define_method(c_file_system, "default_block_size",
894
+ HDFS_File_System_default_block_size, 0);
895
+ rb_define_method(c_file_system, "default_block_size_at_path",
896
+ HDFS_File_System_default_block_size_at_path, 1);
897
+ rb_define_method(c_file_system, "move", HDFS_File_System_move, 2);
898
+ rb_define_method(c_file_system, "used", HDFS_File_System_used, 0);
899
+ rb_define_method(c_file_system, "utime", HDFS_File_System_utime, 3);
900
+
901
+ c_file = rb_define_class_under(m_dfs, "File", rb_cObject);
902
+ rb_define_method(c_file, "read", HDFS_File_read, 1);
903
+ rb_define_method(c_file, "write", HDFS_File_write, 1);
904
+ rb_define_method(c_file, "<<", HDFS_File_write, 1);
905
+ rb_define_method(c_file, "seek", HDFS_File_seek, 1);
906
+ rb_define_method(c_file, "tell", HDFS_File_tell, 0);
907
+ rb_define_method(c_file, "flush", HDFS_File_flush, 0);
908
+ rb_define_method(c_file, "available", HDFS_File_available, 0);
909
+ rb_define_method(c_file, "close", HDFS_File_close, 0);
910
+
911
+ c_file_info = rb_define_class_under(m_dfs, "FileInfo", rb_cObject);
912
+ rb_define_method(c_file_info, "block_size", HDFS_File_Info_block_size, 0);
913
+ rb_define_method(c_file_info, "group", HDFS_File_Info_group, 0);
914
+ rb_define_method(c_file_info, "is_directory?", HDFS_File_Info_is_directory, 0);
915
+ rb_define_method(c_file_info, "is_file?", HDFS_File_Info_is_file, 0);
916
+ rb_define_method(c_file_info, "last_access", HDFS_File_Info_last_access, 0);
917
+ rb_define_method(c_file_info, "last_modified", HDFS_File_Info_last_modified, 0);
918
+ rb_define_method(c_file_info, "mode", HDFS_File_Info_mode, 0);
919
+ rb_define_method(c_file_info, "name", HDFS_File_Info_name, 0);
920
+ rb_define_method(c_file_info, "owner", HDFS_File_Info_owner, 0);
921
+ rb_define_method(c_file_info, "replication", HDFS_File_Info_replication, 0);
922
+ rb_define_method(c_file_info, "size", HDFS_File_Info_size, 0);
923
+ rb_define_method(c_file_info, "to_s", HDFS_File_Info_to_s, 0);
924
+
925
+ c_file_info_file = rb_define_class_under(c_file_info, "File", c_file_info);
926
+ rb_define_method(c_file_info_file, "is_file?", HDFS_File_Info_File_is_file, 0);
927
+
928
+ c_file_info_directory = rb_define_class_under(c_file_info, "Directory", c_file_info);
929
+ rb_define_method(c_file_info_directory, "is_directory?", HDFS_File_Info_Directory_is_directory, 0);
930
+
931
+ e_dfs_exception = rb_define_class_under(m_dfs, "DFSException", rb_eStandardError);
932
+ e_file_error = rb_define_class_under(m_dfs, "FileError", e_dfs_exception);
933
+ e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError", e_file_error);
934
+ e_does_not_exist = rb_define_class_under(m_dfs, "DoesNotExistError", e_file_error);
935
+ }