ruby-hdfs-cdh4 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/hdfs/hdfs.c +149 -41
- metadata +3 -3
data/ext/hdfs/hdfs.c
CHANGED
@@ -19,13 +19,13 @@ static VALUE e_file_error;
|
|
19
19
|
static VALUE e_could_not_open;
|
20
20
|
static VALUE e_does_not_exist;
|
21
21
|
|
22
|
-
static const int32_t HDFS_DEFAULT_BLOCK_SIZE = 134217728;
|
23
22
|
static const int16_t HDFS_DEFAULT_REPLICATION = 3;
|
24
23
|
static const short HDFS_DEFAULT_MODE = 0644;
|
25
24
|
static const char* HDFS_DEFAULT_HOST = "0.0.0.0";
|
26
25
|
static const int HDFS_DEFAULT_RECURSIVE_DELETE = 0;
|
27
26
|
static const int HDFS_DEFAULT_PATH_STRING_LENGTH = 1024;
|
28
27
|
static const int HDFS_DEFAULT_PORT = 8020;
|
28
|
+
static const char* HDFS_DEFAULT_USER = NULL;
|
29
29
|
|
30
30
|
/*
|
31
31
|
* Data structs
|
@@ -109,6 +109,12 @@ int octal_decimal(int n) {
|
|
109
109
|
return decimal;
|
110
110
|
}
|
111
111
|
|
112
|
+
void ensure_file_open(FileData* data) {
|
113
|
+
if (data->file == NULL) {
|
114
|
+
rb_raise(e_file_error, "File is closed");
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
112
118
|
/*
|
113
119
|
* Copies an hdfsFileInfo struct into a Hadoop::DFS::FileInfo derivative
|
114
120
|
* object.
|
@@ -157,14 +163,17 @@ VALUE HDFS_File_System_alloc(VALUE klass) {
|
|
157
163
|
* call-seq:
|
158
164
|
* hdfs.new(options={}) -> hdfs
|
159
165
|
*
|
160
|
-
* Creates a new HDFS client connection, returning a new
|
166
|
+
* Creates a new HDFS client connection, configured by options, returning a new
|
161
167
|
* Hadoop::DFS::FileSystem object if successful. If this fails, raises a
|
162
168
|
* ConnectError.
|
163
169
|
*
|
164
|
-
*
|
165
|
-
*
|
166
|
-
*
|
167
|
-
*
|
170
|
+
* options can have the following keys:
|
171
|
+
*
|
172
|
+
* * *local*: whether to use the local filesystem instead of HDFS
|
173
|
+
* (default: false)
|
174
|
+
* * *host*: hostname or IP address of a Hadoop NameNode (default: '0.0.0.0')
|
175
|
+
* * *port*: port through which to connect to Hadoop NameNode (default: 8020)
|
176
|
+
* * *user*: user to connect to filesystem as (default: current user)
|
168
177
|
*/
|
169
178
|
VALUE HDFS_File_System_initialize(int argc, VALUE* argv, VALUE self) {
|
170
179
|
VALUE options;
|
@@ -177,26 +186,23 @@ VALUE HDFS_File_System_initialize(int argc, VALUE* argv, VALUE self) {
|
|
177
186
|
options = rb_hash_new();
|
178
187
|
}
|
179
188
|
|
189
|
+
VALUE r_user = rb_hash_aref(options, rb_eval_string(":user"));
|
190
|
+
char* hdfs_user = RTEST(r_user) ? RSTRING_PTR(r_user) :
|
191
|
+
(char*) HDFS_DEFAULT_USER;
|
192
|
+
|
180
193
|
VALUE r_local = rb_hash_aref(options, rb_eval_string(":local"));
|
181
194
|
if (r_local == Qtrue) {
|
182
|
-
data->fs =
|
195
|
+
data->fs = hdfsConnectAsUser(NULL, 0, hdfs_user);
|
183
196
|
} else {
|
184
197
|
VALUE r_host = rb_hash_aref(options, rb_eval_string(":host"));
|
185
198
|
VALUE r_port = rb_hash_aref(options, rb_eval_string(":port"));
|
186
199
|
|
187
200
|
// Sets default values for host and port if not supplied by user.
|
188
|
-
char* hdfs_host = (
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
}
|
194
|
-
|
195
|
-
if (RTEST(r_port)) {
|
196
|
-
hdfs_port = NUM2INT(r_port);
|
197
|
-
}
|
198
|
-
|
199
|
-
data->fs = hdfsConnect(hdfs_host, hdfs_port);
|
201
|
+
char* hdfs_host = RTEST(r_host) ? RSTRING_PTR(r_host) :
|
202
|
+
(char*) HDFS_DEFAULT_HOST;
|
203
|
+
int hdfs_port = RTEST(r_port) ? NUM2INT(r_port) :
|
204
|
+
HDFS_DEFAULT_PORT;
|
205
|
+
data->fs = hdfsConnectAsUser(hdfs_host, hdfs_port, hdfs_user);
|
200
206
|
}
|
201
207
|
|
202
208
|
if (data->fs == NULL) {
|
@@ -424,7 +430,7 @@ VALUE HDFS_File_System_cwd(VALUE self) {
|
|
424
430
|
VALUE HDFS_File_System_chgrp(VALUE self, VALUE path, VALUE group) {
|
425
431
|
FSData* data = NULL;
|
426
432
|
Data_Get_Struct(self, FSData, data);
|
427
|
-
if (
|
433
|
+
if (hdfsChown(data->fs, RSTRING_PTR(path), NULL, RSTRING_PTR(group)) < 0) {
|
428
434
|
rb_raise(e_dfs_exception, "Failed to chgrp path: %s to group: %s",
|
429
435
|
RSTRING_PTR(path), RSTRING_PTR(group));
|
430
436
|
return Qnil;
|
@@ -538,7 +544,8 @@ VALUE HDFS_File_System_move(int argc, VALUE* argv, VALUE self) {
|
|
538
544
|
}
|
539
545
|
if (hdfsMove(data->fs, RSTRING_PTR(from_path), destFS,
|
540
546
|
RSTRING_PTR(to_path)) < 0) {
|
541
|
-
rb_raise(e_dfs_exception, "Error while
|
547
|
+
rb_raise(e_dfs_exception, "Error while moving path: %s to path: %s",
|
548
|
+
RSTRING_PTR(from_path), RSTRING_PTR(to_path));
|
542
549
|
return Qnil;
|
543
550
|
}
|
544
551
|
return Qtrue;
|
@@ -599,6 +606,39 @@ VALUE HDFS_File_System_default_block_size_at_path(VALUE self, VALUE path) {
|
|
599
606
|
return LONG2NUM(block_size);
|
600
607
|
}
|
601
608
|
|
609
|
+
/**
|
610
|
+
* call-seq:
|
611
|
+
* hdfs.get_hosts(path, start, length) -> retval
|
612
|
+
*
|
613
|
+
* Returns the hostnames of the DataNodes which serve the portion of the file
|
614
|
+
* between the provided start and length bytes. Raises a DFSException if this
|
615
|
+
* fails.
|
616
|
+
*/
|
617
|
+
VALUE HDFS_File_System_get_hosts(VALUE self, VALUE path, VALUE start,
|
618
|
+
VALUE length) {
|
619
|
+
FSData* data = NULL;
|
620
|
+
Data_Get_Struct(self, FSData, data);
|
621
|
+
char*** hosts = hdfsGetHosts(data->fs, RSTRING_PTR(path), NUM2LONG(start),
|
622
|
+
NUM2LONG(length));
|
623
|
+
if (hosts == NULL) {
|
624
|
+
rb_raise(e_dfs_exception,
|
625
|
+
"Error while retrieving hosts at path: %s, start: %lu, length: %lu",
|
626
|
+
RSTRING_PTR(path), NUM2LONG(start), NUM2LONG(length));
|
627
|
+
return Qnil;
|
628
|
+
}
|
629
|
+
// Builds a Ruby Array object out of the hosts reported by HDFS.
|
630
|
+
VALUE hosts_array = rb_ary_new();
|
631
|
+
size_t i, j;
|
632
|
+
for (i = 0; hosts[i]; i++) {
|
633
|
+
VALUE cur_block_hosts = rb_ary_new();
|
634
|
+
for (j = 0; hosts[i][j]; j++) {
|
635
|
+
rb_ary_push(cur_block_hosts, rb_str_new2(hosts[i][j]));
|
636
|
+
}
|
637
|
+
rb_ary_push(hosts_array, cur_block_hosts);
|
638
|
+
}
|
639
|
+
return hosts_array;
|
640
|
+
}
|
641
|
+
|
602
642
|
/**
|
603
643
|
* call-seq:
|
604
644
|
* hdfs.used -> retval
|
@@ -655,8 +695,18 @@ VALUE HDFS_File_System_utime(int argc, VALUE* argv, VALUE self) {
|
|
655
695
|
* call-seq:
|
656
696
|
* hdfs.open(path, mode='r', options={}) -> file
|
657
697
|
*
|
658
|
-
* Opens a file. If the file cannot be
|
659
|
-
* otherwise, returns a Hadoop::DFS::File
|
698
|
+
* Opens a file using the supplied mode and options. If the file cannot be
|
699
|
+
* opened, raises a CouldNotOpenError; otherwise, returns a Hadoop::DFS::File
|
700
|
+
* object corresponding to the file.
|
701
|
+
*
|
702
|
+
* options can have the following keys:
|
703
|
+
*
|
704
|
+
* * *buffer_size*: size in bytes of buffer to use for file accesses
|
705
|
+
* (default: default buffer size as configured by HDFS)
|
706
|
+
* * *replication*: the number of nodes this file should be replicated against
|
707
|
+
* (default: default replication as configured by HDFS)
|
708
|
+
* * *block_size*: the HDFS block size in bytes to use for this file
|
709
|
+
* (default: default block size as configured by HDFS)
|
660
710
|
*/
|
661
711
|
VALUE HDFS_File_System_open(int argc, VALUE* argv, VALUE self) {
|
662
712
|
VALUE path, mode, options;
|
@@ -684,7 +734,7 @@ VALUE HDFS_File_System_open(int argc, VALUE* argv, VALUE self) {
|
|
684
734
|
hdfsFile file = hdfsOpenFile(data->fs, RSTRING_PTR(path), flags,
|
685
735
|
RTEST(r_buffer_size) ? NUM2INT(r_buffer_size) : 0,
|
686
736
|
RTEST(r_replication) ? NUM2INT(r_replication) : 0,
|
687
|
-
RTEST(r_block_size) ? NUM2INT(r_block_size) :
|
737
|
+
RTEST(r_block_size) ? NUM2INT(r_block_size) : 0);
|
688
738
|
if (file == NULL) {
|
689
739
|
rb_raise(e_could_not_open, "Could not open file %s", RSTRING_PTR(path));
|
690
740
|
return Qnil;
|
@@ -711,6 +761,7 @@ VALUE HDFS_File_System_open(int argc, VALUE* argv, VALUE self) {
|
|
711
761
|
VALUE HDFS_File_read(VALUE self, VALUE length) {
|
712
762
|
FileData* data = NULL;
|
713
763
|
Data_Get_Struct(self, FileData, data);
|
764
|
+
ensure_file_open(data);
|
714
765
|
char* buffer = ALLOC_N(char, length);
|
715
766
|
MEMZERO(buffer, char, length);
|
716
767
|
tSize bytes_read = hdfsRead(data->fs, data->file, buffer, NUM2INT(length));
|
@@ -730,6 +781,7 @@ VALUE HDFS_File_read(VALUE self, VALUE length) {
|
|
730
781
|
VALUE HDFS_File_write(VALUE self, VALUE bytes) {
|
731
782
|
FileData* data = NULL;
|
732
783
|
Data_Get_Struct(self, FileData, data);
|
784
|
+
ensure_file_open(data);
|
733
785
|
tSize bytes_written = hdfsWrite(data->fs, data->file, RSTRING_PTR(bytes), RSTRING_LEN(bytes));
|
734
786
|
if (bytes_written == -1) {
|
735
787
|
rb_raise(e_file_error, "Failed to write data");
|
@@ -747,6 +799,7 @@ VALUE HDFS_File_write(VALUE self, VALUE bytes) {
|
|
747
799
|
VALUE HDFS_File_tell(VALUE self) {
|
748
800
|
FileData* data = NULL;
|
749
801
|
Data_Get_Struct(self, FileData, data);
|
802
|
+
ensure_file_open(data);
|
750
803
|
tSize offset = hdfsTell(data->fs, data->file);
|
751
804
|
if (offset == -1) {
|
752
805
|
rb_raise(e_file_error, "Failed to read position");
|
@@ -764,6 +817,7 @@ VALUE HDFS_File_tell(VALUE self) {
|
|
764
817
|
VALUE HDFS_File_seek(VALUE self, VALUE offset) {
|
765
818
|
FileData* data = NULL;
|
766
819
|
Data_Get_Struct(self, FileData, data);
|
820
|
+
ensure_file_open(data);
|
767
821
|
int result = hdfsSeek(data->fs, data->file, NUM2INT(offset));
|
768
822
|
if (result != 0) {
|
769
823
|
rb_raise(e_file_error, "Failed to seek to position %d", NUM2INT(offset));
|
@@ -782,6 +836,7 @@ VALUE HDFS_File_seek(VALUE self, VALUE offset) {
|
|
782
836
|
VALUE HDFS_File_flush(VALUE self) {
|
783
837
|
FileData* data = NULL;
|
784
838
|
Data_Get_Struct(self, FileData, data);
|
839
|
+
ensure_file_open(data);
|
785
840
|
int result = hdfsFlush(data->fs, data->file);
|
786
841
|
if (result != 0) {
|
787
842
|
rb_raise(e_file_error, "Flush failed");
|
@@ -799,6 +854,7 @@ VALUE HDFS_File_flush(VALUE self) {
|
|
799
854
|
VALUE HDFS_File_available(VALUE self) {
|
800
855
|
FileData* data = NULL;
|
801
856
|
Data_Get_Struct(self, FileData, data);
|
857
|
+
ensure_file_open(data);
|
802
858
|
int result = hdfsAvailable(data->fs, data->file);
|
803
859
|
if (result == -1) {
|
804
860
|
rb_raise(e_file_error, "Failed to get available data");
|
@@ -816,12 +872,47 @@ VALUE HDFS_File_close(VALUE self) {
|
|
816
872
|
FileData* data = NULL;
|
817
873
|
Data_Get_Struct(self, FileData, data);
|
818
874
|
if (data->file != NULL) {
|
819
|
-
hdfsCloseFile(data->fs, data->file)
|
875
|
+
if (hdfsCloseFile(data->fs, data->file) < 0) {
|
876
|
+
rb_raise(e_file_error, "Could not close file");
|
877
|
+
return Qnil;
|
878
|
+
}
|
820
879
|
data->file = NULL;
|
821
880
|
}
|
822
881
|
return Qtrue;
|
823
882
|
}
|
824
883
|
|
884
|
+
/**
|
885
|
+
* call-seq:
|
886
|
+
* file.read_open -> open_for_read
|
887
|
+
*
|
888
|
+
* Returns True if this file is open for reading; otherwise returns False.
|
889
|
+
*/
|
890
|
+
VALUE HDFS_File_read_open(VALUE self) {
|
891
|
+
FileData* data = NULL;
|
892
|
+
Data_Get_Struct(self, FileData, data);
|
893
|
+
if (data->file) {
|
894
|
+
return hdfsFileIsOpenForRead(data->file) ? Qtrue : Qfalse;
|
895
|
+
} else {
|
896
|
+
return Qfalse;
|
897
|
+
}
|
898
|
+
}
|
899
|
+
|
900
|
+
/**
|
901
|
+
* call-seq:
|
902
|
+
* file.write_open -> open_for_write
|
903
|
+
*
|
904
|
+
* Returns True if this file is open for writing; otherwise returns False.
|
905
|
+
*/
|
906
|
+
VALUE HDFS_File_write_open(VALUE self) {
|
907
|
+
FileData* data = NULL;
|
908
|
+
Data_Get_Struct(self, FileData, data);
|
909
|
+
if (data->file) {
|
910
|
+
return hdfsFileIsOpenForWrite(data->file) ? Qtrue : Qfalse;
|
911
|
+
} else {
|
912
|
+
return Qfalse;
|
913
|
+
}
|
914
|
+
}
|
915
|
+
|
825
916
|
/**
|
826
917
|
* HDFS File Info interface
|
827
918
|
*/
|
@@ -1004,16 +1095,21 @@ void Init_hdfs() {
|
|
1004
1095
|
|
1005
1096
|
c_file_system = rb_define_class_under(m_dfs, "FileSystem", rb_cObject);
|
1006
1097
|
rb_define_alloc_func(c_file_system, HDFS_File_System_alloc);
|
1007
|
-
rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize,
|
1008
|
-
|
1098
|
+
rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize,
|
1099
|
+
-1);
|
1100
|
+
rb_define_method(c_file_system, "disconnect", HDFS_File_System_disconnect,
|
1101
|
+
0);
|
1009
1102
|
rb_define_method(c_file_system, "open", HDFS_File_System_open, -1);
|
1010
1103
|
rb_define_method(c_file_system, "delete", HDFS_File_System_delete, -1);
|
1011
1104
|
rb_define_method(c_file_system, "rename", HDFS_File_System_rename, 2);
|
1012
1105
|
rb_define_method(c_file_system, "exist?", HDFS_File_System_exist, 1);
|
1013
|
-
rb_define_method(c_file_system, "create_directory",
|
1014
|
-
|
1106
|
+
rb_define_method(c_file_system, "create_directory",
|
1107
|
+
HDFS_File_System_create_directory, 1);
|
1108
|
+
rb_define_method(c_file_system, "list_directory",
|
1109
|
+
HDFS_File_System_list_directory, 1);
|
1015
1110
|
rb_define_method(c_file_system, "stat", HDFS_File_System_stat, 1);
|
1016
|
-
rb_define_method(c_file_system, "set_replication",
|
1111
|
+
rb_define_method(c_file_system, "set_replication",
|
1112
|
+
HDFS_File_System_set_replication, -1);
|
1017
1113
|
rb_define_method(c_file_system, "cd", HDFS_File_System_cd, 1);
|
1018
1114
|
rb_define_method(c_file_system, "cwd", HDFS_File_System_cwd, 0);
|
1019
1115
|
rb_define_method(c_file_system, "chgrp", HDFS_File_System_chgrp, 2);
|
@@ -1025,6 +1121,7 @@ void Init_hdfs() {
|
|
1025
1121
|
HDFS_File_System_default_block_size, 0);
|
1026
1122
|
rb_define_method(c_file_system, "default_block_size_at_path",
|
1027
1123
|
HDFS_File_System_default_block_size_at_path, 1);
|
1124
|
+
rb_define_method(c_file_system, "get_hosts", HDFS_File_System_get_hosts, 3);
|
1028
1125
|
rb_define_method(c_file_system, "move", HDFS_File_System_move, -1);
|
1029
1126
|
rb_define_method(c_file_system, "used", HDFS_File_System_used, 0);
|
1030
1127
|
rb_define_method(c_file_system, "utime", HDFS_File_System_utime, -1);
|
@@ -1038,14 +1135,18 @@ void Init_hdfs() {
|
|
1038
1135
|
rb_define_method(c_file, "flush", HDFS_File_flush, 0);
|
1039
1136
|
rb_define_method(c_file, "available", HDFS_File_available, 0);
|
1040
1137
|
rb_define_method(c_file, "close", HDFS_File_close, 0);
|
1138
|
+
rb_define_method(c_file, "read_open?", HDFS_File_read_open, 0);
|
1139
|
+
rb_define_method(c_file, "write_open?", HDFS_File_write_open, 0);
|
1041
1140
|
|
1042
1141
|
c_file_info = rb_define_class_under(m_dfs, "FileInfo", rb_cObject);
|
1043
1142
|
rb_define_method(c_file_info, "block_size", HDFS_File_Info_block_size, 0);
|
1044
1143
|
rb_define_method(c_file_info, "group", HDFS_File_Info_group, 0);
|
1045
|
-
rb_define_method(c_file_info, "is_directory?", HDFS_File_Info_is_directory,
|
1144
|
+
rb_define_method(c_file_info, "is_directory?", HDFS_File_Info_is_directory,
|
1145
|
+
0);
|
1046
1146
|
rb_define_method(c_file_info, "is_file?", HDFS_File_Info_is_file, 0);
|
1047
1147
|
rb_define_method(c_file_info, "last_access", HDFS_File_Info_last_access, 0);
|
1048
|
-
rb_define_method(c_file_info, "last_modified", HDFS_File_Info_last_modified,
|
1148
|
+
rb_define_method(c_file_info, "last_modified", HDFS_File_Info_last_modified,
|
1149
|
+
0);
|
1049
1150
|
rb_define_method(c_file_info, "mode", HDFS_File_Info_mode, 0);
|
1050
1151
|
rb_define_method(c_file_info, "name", HDFS_File_Info_name, 0);
|
1051
1152
|
rb_define_method(c_file_info, "owner", HDFS_File_Info_owner, 0);
|
@@ -1054,14 +1155,21 @@ void Init_hdfs() {
|
|
1054
1155
|
rb_define_method(c_file_info, "to_s", HDFS_File_Info_to_s, 0);
|
1055
1156
|
|
1056
1157
|
c_file_info_file = rb_define_class_under(c_file_info, "File", c_file_info);
|
1057
|
-
rb_define_method(c_file_info_file, "is_file?", HDFS_File_Info_File_is_file,
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1158
|
+
rb_define_method(c_file_info_file, "is_file?", HDFS_File_Info_File_is_file,
|
1159
|
+
0);
|
1160
|
+
|
1161
|
+
c_file_info_directory = rb_define_class_under(c_file_info, "Directory",
|
1162
|
+
c_file_info);
|
1163
|
+
rb_define_method(c_file_info_directory, "is_directory?",
|
1164
|
+
HDFS_File_Info_Directory_is_directory, 0);
|
1165
|
+
|
1166
|
+
e_dfs_exception = rb_define_class_under(m_dfs, "DFSException",
|
1167
|
+
rb_eStandardError);
|
1168
|
+
e_connect_error = rb_define_class_under(m_dfs, "ConnectError",
|
1169
|
+
e_dfs_exception);
|
1064
1170
|
e_file_error = rb_define_class_under(m_dfs, "FileError", e_dfs_exception);
|
1065
|
-
e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError",
|
1066
|
-
|
1171
|
+
e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError",
|
1172
|
+
e_file_error);
|
1173
|
+
e_does_not_exist = rb_define_class_under(m_dfs, "DoesNotExistError",
|
1174
|
+
e_file_error);
|
1067
1175
|
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-hdfs-cdh4
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2013-05-
|
14
|
+
date: 2013-05-25 00:00:00.000000000 Z
|
15
15
|
dependencies: []
|
16
16
|
description: ruby hadoop libhdfs client with support for cdh4
|
17
17
|
email:
|
@@ -48,7 +48,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
48
48
|
version: 1.8.10
|
49
49
|
requirements: []
|
50
50
|
rubyforge_project:
|
51
|
-
rubygems_version: 1.8.
|
51
|
+
rubygems_version: 1.8.24
|
52
52
|
signing_key:
|
53
53
|
specification_version: 3
|
54
54
|
summary: ruby hadoop libhdfs client with support for cdh4
|