ruby-hdfs-cdh4 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/hdfs/hdfs.c +149 -41
- metadata +3 -3
data/ext/hdfs/hdfs.c
CHANGED
@@ -19,13 +19,13 @@ static VALUE e_file_error;
|
|
19
19
|
static VALUE e_could_not_open;
|
20
20
|
static VALUE e_does_not_exist;
|
21
21
|
|
22
|
-
static const int32_t HDFS_DEFAULT_BLOCK_SIZE = 134217728;
|
23
22
|
static const int16_t HDFS_DEFAULT_REPLICATION = 3;
|
24
23
|
static const short HDFS_DEFAULT_MODE = 0644;
|
25
24
|
static const char* HDFS_DEFAULT_HOST = "0.0.0.0";
|
26
25
|
static const int HDFS_DEFAULT_RECURSIVE_DELETE = 0;
|
27
26
|
static const int HDFS_DEFAULT_PATH_STRING_LENGTH = 1024;
|
28
27
|
static const int HDFS_DEFAULT_PORT = 8020;
|
28
|
+
static const char* HDFS_DEFAULT_USER = NULL;
|
29
29
|
|
30
30
|
/*
|
31
31
|
* Data structs
|
@@ -109,6 +109,12 @@ int octal_decimal(int n) {
|
|
109
109
|
return decimal;
|
110
110
|
}
|
111
111
|
|
112
|
+
void ensure_file_open(FileData* data) {
|
113
|
+
if (data->file == NULL) {
|
114
|
+
rb_raise(e_file_error, "File is closed");
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
112
118
|
/*
|
113
119
|
* Copies an hdfsFileInfo struct into a Hadoop::DFS::FileInfo derivative
|
114
120
|
* object.
|
@@ -157,14 +163,17 @@ VALUE HDFS_File_System_alloc(VALUE klass) {
|
|
157
163
|
* call-seq:
|
158
164
|
* hdfs.new(options={}) -> hdfs
|
159
165
|
*
|
160
|
-
* Creates a new HDFS client connection, returning a new
|
166
|
+
* Creates a new HDFS client connection, configured by options, returning a new
|
161
167
|
* Hadoop::DFS::FileSystem object if successful. If this fails, raises a
|
162
168
|
* ConnectError.
|
163
169
|
*
|
164
|
-
*
|
165
|
-
*
|
166
|
-
*
|
167
|
-
*
|
170
|
+
* options can have the following keys:
|
171
|
+
*
|
172
|
+
* * *local*: whether to use the local filesystem instead of HDFS
|
173
|
+
* (default: false)
|
174
|
+
* * *host*: hostname or IP address of a Hadoop NameNode (default: '0.0.0.0')
|
175
|
+
* * *port*: port through which to connect to Hadoop NameNode (default: 8020)
|
176
|
+
* * *user*: user to connect to filesystem as (default: current user)
|
168
177
|
*/
|
169
178
|
VALUE HDFS_File_System_initialize(int argc, VALUE* argv, VALUE self) {
|
170
179
|
VALUE options;
|
@@ -177,26 +186,23 @@ VALUE HDFS_File_System_initialize(int argc, VALUE* argv, VALUE self) {
|
|
177
186
|
options = rb_hash_new();
|
178
187
|
}
|
179
188
|
|
189
|
+
VALUE r_user = rb_hash_aref(options, rb_eval_string(":user"));
|
190
|
+
char* hdfs_user = RTEST(r_user) ? RSTRING_PTR(r_user) :
|
191
|
+
(char*) HDFS_DEFAULT_USER;
|
192
|
+
|
180
193
|
VALUE r_local = rb_hash_aref(options, rb_eval_string(":local"));
|
181
194
|
if (r_local == Qtrue) {
|
182
|
-
data->fs =
|
195
|
+
data->fs = hdfsConnectAsUser(NULL, 0, hdfs_user);
|
183
196
|
} else {
|
184
197
|
VALUE r_host = rb_hash_aref(options, rb_eval_string(":host"));
|
185
198
|
VALUE r_port = rb_hash_aref(options, rb_eval_string(":port"));
|
186
199
|
|
187
200
|
// Sets default values for host and port if not supplied by user.
|
188
|
-
char* hdfs_host = (
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
}
|
194
|
-
|
195
|
-
if (RTEST(r_port)) {
|
196
|
-
hdfs_port = NUM2INT(r_port);
|
197
|
-
}
|
198
|
-
|
199
|
-
data->fs = hdfsConnect(hdfs_host, hdfs_port);
|
201
|
+
char* hdfs_host = RTEST(r_host) ? RSTRING_PTR(r_host) :
|
202
|
+
(char*) HDFS_DEFAULT_HOST;
|
203
|
+
int hdfs_port = RTEST(r_port) ? NUM2INT(r_port) :
|
204
|
+
HDFS_DEFAULT_PORT;
|
205
|
+
data->fs = hdfsConnectAsUser(hdfs_host, hdfs_port, hdfs_user);
|
200
206
|
}
|
201
207
|
|
202
208
|
if (data->fs == NULL) {
|
@@ -424,7 +430,7 @@ VALUE HDFS_File_System_cwd(VALUE self) {
|
|
424
430
|
VALUE HDFS_File_System_chgrp(VALUE self, VALUE path, VALUE group) {
|
425
431
|
FSData* data = NULL;
|
426
432
|
Data_Get_Struct(self, FSData, data);
|
427
|
-
if (
|
433
|
+
if (hdfsChown(data->fs, RSTRING_PTR(path), NULL, RSTRING_PTR(group)) < 0) {
|
428
434
|
rb_raise(e_dfs_exception, "Failed to chgrp path: %s to group: %s",
|
429
435
|
RSTRING_PTR(path), RSTRING_PTR(group));
|
430
436
|
return Qnil;
|
@@ -538,7 +544,8 @@ VALUE HDFS_File_System_move(int argc, VALUE* argv, VALUE self) {
|
|
538
544
|
}
|
539
545
|
if (hdfsMove(data->fs, RSTRING_PTR(from_path), destFS,
|
540
546
|
RSTRING_PTR(to_path)) < 0) {
|
541
|
-
rb_raise(e_dfs_exception, "Error while
|
547
|
+
rb_raise(e_dfs_exception, "Error while moving path: %s to path: %s",
|
548
|
+
RSTRING_PTR(from_path), RSTRING_PTR(to_path));
|
542
549
|
return Qnil;
|
543
550
|
}
|
544
551
|
return Qtrue;
|
@@ -599,6 +606,39 @@ VALUE HDFS_File_System_default_block_size_at_path(VALUE self, VALUE path) {
|
|
599
606
|
return LONG2NUM(block_size);
|
600
607
|
}
|
601
608
|
|
609
|
+
/**
|
610
|
+
* call-seq:
|
611
|
+
* hdfs.get_hosts(path, start, length) -> retval
|
612
|
+
*
|
613
|
+
* Returns the hostnames of the DataNodes which serve the portion of the file
|
614
|
+
* between the provided start and length bytes. Raises a DFSException if this
|
615
|
+
* fails.
|
616
|
+
*/
|
617
|
+
VALUE HDFS_File_System_get_hosts(VALUE self, VALUE path, VALUE start,
|
618
|
+
VALUE length) {
|
619
|
+
FSData* data = NULL;
|
620
|
+
Data_Get_Struct(self, FSData, data);
|
621
|
+
char*** hosts = hdfsGetHosts(data->fs, RSTRING_PTR(path), NUM2LONG(start),
|
622
|
+
NUM2LONG(length));
|
623
|
+
if (hosts == NULL) {
|
624
|
+
rb_raise(e_dfs_exception,
|
625
|
+
"Error while retrieving hosts at path: %s, start: %lu, length: %lu",
|
626
|
+
RSTRING_PTR(path), NUM2LONG(start), NUM2LONG(length));
|
627
|
+
return Qnil;
|
628
|
+
}
|
629
|
+
// Builds a Ruby Array object out of the hosts reported by HDFS.
|
630
|
+
VALUE hosts_array = rb_ary_new();
|
631
|
+
size_t i, j;
|
632
|
+
for (i = 0; hosts[i]; i++) {
|
633
|
+
VALUE cur_block_hosts = rb_ary_new();
|
634
|
+
for (j = 0; hosts[i][j]; j++) {
|
635
|
+
rb_ary_push(cur_block_hosts, rb_str_new2(hosts[i][j]));
|
636
|
+
}
|
637
|
+
rb_ary_push(hosts_array, cur_block_hosts);
|
638
|
+
}
|
639
|
+
return hosts_array;
|
640
|
+
}
|
641
|
+
|
602
642
|
/**
|
603
643
|
* call-seq:
|
604
644
|
* hdfs.used -> retval
|
@@ -655,8 +695,18 @@ VALUE HDFS_File_System_utime(int argc, VALUE* argv, VALUE self) {
|
|
655
695
|
* call-seq:
|
656
696
|
* hdfs.open(path, mode='r', options={}) -> file
|
657
697
|
*
|
658
|
-
* Opens a file. If the file cannot be
|
659
|
-
* otherwise, returns a Hadoop::DFS::File
|
698
|
+
* Opens a file using the supplied mode and options. If the file cannot be
|
699
|
+
* opened, raises a CouldNotOpenError; otherwise, returns a Hadoop::DFS::File
|
700
|
+
* object corresponding to the file.
|
701
|
+
*
|
702
|
+
* options can have the following keys:
|
703
|
+
*
|
704
|
+
* * *buffer_size*: size in bytes of buffer to use for file accesses
|
705
|
+
* (default: default buffer size as configured by HDFS)
|
706
|
+
* * *replication*: the number of nodes this file should be replicated against
|
707
|
+
* (default: default replication as configured by HDFS)
|
708
|
+
* * *block_size*: the HDFS block size in bytes to use for this file
|
709
|
+
* (default: default block size as configured by HDFS)
|
660
710
|
*/
|
661
711
|
VALUE HDFS_File_System_open(int argc, VALUE* argv, VALUE self) {
|
662
712
|
VALUE path, mode, options;
|
@@ -684,7 +734,7 @@ VALUE HDFS_File_System_open(int argc, VALUE* argv, VALUE self) {
|
|
684
734
|
hdfsFile file = hdfsOpenFile(data->fs, RSTRING_PTR(path), flags,
|
685
735
|
RTEST(r_buffer_size) ? NUM2INT(r_buffer_size) : 0,
|
686
736
|
RTEST(r_replication) ? NUM2INT(r_replication) : 0,
|
687
|
-
RTEST(r_block_size) ? NUM2INT(r_block_size) :
|
737
|
+
RTEST(r_block_size) ? NUM2INT(r_block_size) : 0);
|
688
738
|
if (file == NULL) {
|
689
739
|
rb_raise(e_could_not_open, "Could not open file %s", RSTRING_PTR(path));
|
690
740
|
return Qnil;
|
@@ -711,6 +761,7 @@ VALUE HDFS_File_System_open(int argc, VALUE* argv, VALUE self) {
|
|
711
761
|
VALUE HDFS_File_read(VALUE self, VALUE length) {
|
712
762
|
FileData* data = NULL;
|
713
763
|
Data_Get_Struct(self, FileData, data);
|
764
|
+
ensure_file_open(data);
|
714
765
|
char* buffer = ALLOC_N(char, length);
|
715
766
|
MEMZERO(buffer, char, length);
|
716
767
|
tSize bytes_read = hdfsRead(data->fs, data->file, buffer, NUM2INT(length));
|
@@ -730,6 +781,7 @@ VALUE HDFS_File_read(VALUE self, VALUE length) {
|
|
730
781
|
VALUE HDFS_File_write(VALUE self, VALUE bytes) {
|
731
782
|
FileData* data = NULL;
|
732
783
|
Data_Get_Struct(self, FileData, data);
|
784
|
+
ensure_file_open(data);
|
733
785
|
tSize bytes_written = hdfsWrite(data->fs, data->file, RSTRING_PTR(bytes), RSTRING_LEN(bytes));
|
734
786
|
if (bytes_written == -1) {
|
735
787
|
rb_raise(e_file_error, "Failed to write data");
|
@@ -747,6 +799,7 @@ VALUE HDFS_File_write(VALUE self, VALUE bytes) {
|
|
747
799
|
VALUE HDFS_File_tell(VALUE self) {
|
748
800
|
FileData* data = NULL;
|
749
801
|
Data_Get_Struct(self, FileData, data);
|
802
|
+
ensure_file_open(data);
|
750
803
|
tSize offset = hdfsTell(data->fs, data->file);
|
751
804
|
if (offset == -1) {
|
752
805
|
rb_raise(e_file_error, "Failed to read position");
|
@@ -764,6 +817,7 @@ VALUE HDFS_File_tell(VALUE self) {
|
|
764
817
|
VALUE HDFS_File_seek(VALUE self, VALUE offset) {
|
765
818
|
FileData* data = NULL;
|
766
819
|
Data_Get_Struct(self, FileData, data);
|
820
|
+
ensure_file_open(data);
|
767
821
|
int result = hdfsSeek(data->fs, data->file, NUM2INT(offset));
|
768
822
|
if (result != 0) {
|
769
823
|
rb_raise(e_file_error, "Failed to seek to position %d", NUM2INT(offset));
|
@@ -782,6 +836,7 @@ VALUE HDFS_File_seek(VALUE self, VALUE offset) {
|
|
782
836
|
VALUE HDFS_File_flush(VALUE self) {
|
783
837
|
FileData* data = NULL;
|
784
838
|
Data_Get_Struct(self, FileData, data);
|
839
|
+
ensure_file_open(data);
|
785
840
|
int result = hdfsFlush(data->fs, data->file);
|
786
841
|
if (result != 0) {
|
787
842
|
rb_raise(e_file_error, "Flush failed");
|
@@ -799,6 +854,7 @@ VALUE HDFS_File_flush(VALUE self) {
|
|
799
854
|
VALUE HDFS_File_available(VALUE self) {
|
800
855
|
FileData* data = NULL;
|
801
856
|
Data_Get_Struct(self, FileData, data);
|
857
|
+
ensure_file_open(data);
|
802
858
|
int result = hdfsAvailable(data->fs, data->file);
|
803
859
|
if (result == -1) {
|
804
860
|
rb_raise(e_file_error, "Failed to get available data");
|
@@ -816,12 +872,47 @@ VALUE HDFS_File_close(VALUE self) {
|
|
816
872
|
FileData* data = NULL;
|
817
873
|
Data_Get_Struct(self, FileData, data);
|
818
874
|
if (data->file != NULL) {
|
819
|
-
hdfsCloseFile(data->fs, data->file)
|
875
|
+
if (hdfsCloseFile(data->fs, data->file) < 0) {
|
876
|
+
rb_raise(e_file_error, "Could not close file");
|
877
|
+
return Qnil;
|
878
|
+
}
|
820
879
|
data->file = NULL;
|
821
880
|
}
|
822
881
|
return Qtrue;
|
823
882
|
}
|
824
883
|
|
884
|
+
/**
|
885
|
+
* call-seq:
|
886
|
+
* file.read_open -> open_for_read
|
887
|
+
*
|
888
|
+
* Returns True if this file is open for reading; otherwise returns False.
|
889
|
+
*/
|
890
|
+
VALUE HDFS_File_read_open(VALUE self) {
|
891
|
+
FileData* data = NULL;
|
892
|
+
Data_Get_Struct(self, FileData, data);
|
893
|
+
if (data->file) {
|
894
|
+
return hdfsFileIsOpenForRead(data->file) ? Qtrue : Qfalse;
|
895
|
+
} else {
|
896
|
+
return Qfalse;
|
897
|
+
}
|
898
|
+
}
|
899
|
+
|
900
|
+
/**
|
901
|
+
* call-seq:
|
902
|
+
* file.write_open -> open_for_write
|
903
|
+
*
|
904
|
+
* Returns True if this file is open for writing; otherwise returns False.
|
905
|
+
*/
|
906
|
+
VALUE HDFS_File_write_open(VALUE self) {
|
907
|
+
FileData* data = NULL;
|
908
|
+
Data_Get_Struct(self, FileData, data);
|
909
|
+
if (data->file) {
|
910
|
+
return hdfsFileIsOpenForWrite(data->file) ? Qtrue : Qfalse;
|
911
|
+
} else {
|
912
|
+
return Qfalse;
|
913
|
+
}
|
914
|
+
}
|
915
|
+
|
825
916
|
/**
|
826
917
|
* HDFS File Info interface
|
827
918
|
*/
|
@@ -1004,16 +1095,21 @@ void Init_hdfs() {
|
|
1004
1095
|
|
1005
1096
|
c_file_system = rb_define_class_under(m_dfs, "FileSystem", rb_cObject);
|
1006
1097
|
rb_define_alloc_func(c_file_system, HDFS_File_System_alloc);
|
1007
|
-
rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize,
|
1008
|
-
|
1098
|
+
rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize,
|
1099
|
+
-1);
|
1100
|
+
rb_define_method(c_file_system, "disconnect", HDFS_File_System_disconnect,
|
1101
|
+
0);
|
1009
1102
|
rb_define_method(c_file_system, "open", HDFS_File_System_open, -1);
|
1010
1103
|
rb_define_method(c_file_system, "delete", HDFS_File_System_delete, -1);
|
1011
1104
|
rb_define_method(c_file_system, "rename", HDFS_File_System_rename, 2);
|
1012
1105
|
rb_define_method(c_file_system, "exist?", HDFS_File_System_exist, 1);
|
1013
|
-
rb_define_method(c_file_system, "create_directory",
|
1014
|
-
|
1106
|
+
rb_define_method(c_file_system, "create_directory",
|
1107
|
+
HDFS_File_System_create_directory, 1);
|
1108
|
+
rb_define_method(c_file_system, "list_directory",
|
1109
|
+
HDFS_File_System_list_directory, 1);
|
1015
1110
|
rb_define_method(c_file_system, "stat", HDFS_File_System_stat, 1);
|
1016
|
-
rb_define_method(c_file_system, "set_replication",
|
1111
|
+
rb_define_method(c_file_system, "set_replication",
|
1112
|
+
HDFS_File_System_set_replication, -1);
|
1017
1113
|
rb_define_method(c_file_system, "cd", HDFS_File_System_cd, 1);
|
1018
1114
|
rb_define_method(c_file_system, "cwd", HDFS_File_System_cwd, 0);
|
1019
1115
|
rb_define_method(c_file_system, "chgrp", HDFS_File_System_chgrp, 2);
|
@@ -1025,6 +1121,7 @@ void Init_hdfs() {
|
|
1025
1121
|
HDFS_File_System_default_block_size, 0);
|
1026
1122
|
rb_define_method(c_file_system, "default_block_size_at_path",
|
1027
1123
|
HDFS_File_System_default_block_size_at_path, 1);
|
1124
|
+
rb_define_method(c_file_system, "get_hosts", HDFS_File_System_get_hosts, 3);
|
1028
1125
|
rb_define_method(c_file_system, "move", HDFS_File_System_move, -1);
|
1029
1126
|
rb_define_method(c_file_system, "used", HDFS_File_System_used, 0);
|
1030
1127
|
rb_define_method(c_file_system, "utime", HDFS_File_System_utime, -1);
|
@@ -1038,14 +1135,18 @@ void Init_hdfs() {
|
|
1038
1135
|
rb_define_method(c_file, "flush", HDFS_File_flush, 0);
|
1039
1136
|
rb_define_method(c_file, "available", HDFS_File_available, 0);
|
1040
1137
|
rb_define_method(c_file, "close", HDFS_File_close, 0);
|
1138
|
+
rb_define_method(c_file, "read_open?", HDFS_File_read_open, 0);
|
1139
|
+
rb_define_method(c_file, "write_open?", HDFS_File_write_open, 0);
|
1041
1140
|
|
1042
1141
|
c_file_info = rb_define_class_under(m_dfs, "FileInfo", rb_cObject);
|
1043
1142
|
rb_define_method(c_file_info, "block_size", HDFS_File_Info_block_size, 0);
|
1044
1143
|
rb_define_method(c_file_info, "group", HDFS_File_Info_group, 0);
|
1045
|
-
rb_define_method(c_file_info, "is_directory?", HDFS_File_Info_is_directory,
|
1144
|
+
rb_define_method(c_file_info, "is_directory?", HDFS_File_Info_is_directory,
|
1145
|
+
0);
|
1046
1146
|
rb_define_method(c_file_info, "is_file?", HDFS_File_Info_is_file, 0);
|
1047
1147
|
rb_define_method(c_file_info, "last_access", HDFS_File_Info_last_access, 0);
|
1048
|
-
rb_define_method(c_file_info, "last_modified", HDFS_File_Info_last_modified,
|
1148
|
+
rb_define_method(c_file_info, "last_modified", HDFS_File_Info_last_modified,
|
1149
|
+
0);
|
1049
1150
|
rb_define_method(c_file_info, "mode", HDFS_File_Info_mode, 0);
|
1050
1151
|
rb_define_method(c_file_info, "name", HDFS_File_Info_name, 0);
|
1051
1152
|
rb_define_method(c_file_info, "owner", HDFS_File_Info_owner, 0);
|
@@ -1054,14 +1155,21 @@ void Init_hdfs() {
|
|
1054
1155
|
rb_define_method(c_file_info, "to_s", HDFS_File_Info_to_s, 0);
|
1055
1156
|
|
1056
1157
|
c_file_info_file = rb_define_class_under(c_file_info, "File", c_file_info);
|
1057
|
-
rb_define_method(c_file_info_file, "is_file?", HDFS_File_Info_File_is_file,
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1158
|
+
rb_define_method(c_file_info_file, "is_file?", HDFS_File_Info_File_is_file,
|
1159
|
+
0);
|
1160
|
+
|
1161
|
+
c_file_info_directory = rb_define_class_under(c_file_info, "Directory",
|
1162
|
+
c_file_info);
|
1163
|
+
rb_define_method(c_file_info_directory, "is_directory?",
|
1164
|
+
HDFS_File_Info_Directory_is_directory, 0);
|
1165
|
+
|
1166
|
+
e_dfs_exception = rb_define_class_under(m_dfs, "DFSException",
|
1167
|
+
rb_eStandardError);
|
1168
|
+
e_connect_error = rb_define_class_under(m_dfs, "ConnectError",
|
1169
|
+
e_dfs_exception);
|
1064
1170
|
e_file_error = rb_define_class_under(m_dfs, "FileError", e_dfs_exception);
|
1065
|
-
e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError",
|
1066
|
-
|
1171
|
+
e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError",
|
1172
|
+
e_file_error);
|
1173
|
+
e_does_not_exist = rb_define_class_under(m_dfs, "DoesNotExistError",
|
1174
|
+
e_file_error);
|
1067
1175
|
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-hdfs-cdh4
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2013-05-
|
14
|
+
date: 2013-05-25 00:00:00.000000000 Z
|
15
15
|
dependencies: []
|
16
16
|
description: ruby hadoop libhdfs client with support for cdh4
|
17
17
|
email:
|
@@ -48,7 +48,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
48
48
|
version: 1.8.10
|
49
49
|
requirements: []
|
50
50
|
rubyforge_project:
|
51
|
-
rubygems_version: 1.8.
|
51
|
+
rubygems_version: 1.8.24
|
52
52
|
signing_key:
|
53
53
|
specification_version: 3
|
54
54
|
summary: ruby hadoop libhdfs client with support for cdh4
|