ruby-hdfs-cdh4 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/ext/hdfs/hdfs.c +149 -41
  2. metadata +3 -3
data/ext/hdfs/hdfs.c CHANGED
@@ -19,13 +19,13 @@ static VALUE e_file_error;
19
19
  static VALUE e_could_not_open;
20
20
  static VALUE e_does_not_exist;
21
21
 
22
- static const int32_t HDFS_DEFAULT_BLOCK_SIZE = 134217728;
23
22
  static const int16_t HDFS_DEFAULT_REPLICATION = 3;
24
23
  static const short HDFS_DEFAULT_MODE = 0644;
25
24
  static const char* HDFS_DEFAULT_HOST = "0.0.0.0";
26
25
  static const int HDFS_DEFAULT_RECURSIVE_DELETE = 0;
27
26
  static const int HDFS_DEFAULT_PATH_STRING_LENGTH = 1024;
28
27
  static const int HDFS_DEFAULT_PORT = 8020;
28
+ static const char* HDFS_DEFAULT_USER = NULL;
29
29
 
30
30
  /*
31
31
  * Data structs
@@ -109,6 +109,12 @@ int octal_decimal(int n) {
109
109
  return decimal;
110
110
  }
111
111
 
112
+ void ensure_file_open(FileData* data) {
113
+ if (data->file == NULL) {
114
+ rb_raise(e_file_error, "File is closed");
115
+ }
116
+ }
117
+
112
118
  /*
113
119
  * Copies an hdfsFileInfo struct into a Hadoop::DFS::FileInfo derivative
114
120
  * object.
@@ -157,14 +163,17 @@ VALUE HDFS_File_System_alloc(VALUE klass) {
157
163
  * call-seq:
158
164
  * hdfs.new(options={}) -> hdfs
159
165
  *
160
- * Creates a new HDFS client connection, returning a new
166
+ * Creates a new HDFS client connection, configured by options, returning a new
161
167
  * Hadoop::DFS::FileSystem object if successful. If this fails, raises a
162
168
  * ConnectError.
163
169
  *
164
- * Options:
165
- * :local
166
- * :host
167
- * :port
170
+ * options can have the following keys:
171
+ *
172
+ * * *local*: whether to use the local filesystem instead of HDFS
173
+ * (default: false)
174
+ * * *host*: hostname or IP address of a Hadoop NameNode (default: '0.0.0.0')
175
+ * * *port*: port through which to connect to Hadoop NameNode (default: 8020)
176
+ * * *user*: user to connect to filesystem as (default: current user)
168
177
  */
169
178
  VALUE HDFS_File_System_initialize(int argc, VALUE* argv, VALUE self) {
170
179
  VALUE options;
@@ -177,26 +186,23 @@ VALUE HDFS_File_System_initialize(int argc, VALUE* argv, VALUE self) {
177
186
  options = rb_hash_new();
178
187
  }
179
188
 
189
+ VALUE r_user = rb_hash_aref(options, rb_eval_string(":user"));
190
+ char* hdfs_user = RTEST(r_user) ? RSTRING_PTR(r_user) :
191
+ (char*) HDFS_DEFAULT_USER;
192
+
180
193
  VALUE r_local = rb_hash_aref(options, rb_eval_string(":local"));
181
194
  if (r_local == Qtrue) {
182
- data->fs = hdfsConnect(NULL, 0);
195
+ data->fs = hdfsConnectAsUser(NULL, 0, hdfs_user);
183
196
  } else {
184
197
  VALUE r_host = rb_hash_aref(options, rb_eval_string(":host"));
185
198
  VALUE r_port = rb_hash_aref(options, rb_eval_string(":port"));
186
199
 
187
200
  // Sets default values for host and port if not supplied by user.
188
- char* hdfs_host = (char*) HDFS_DEFAULT_HOST;
189
- int hdfs_port = HDFS_DEFAULT_PORT;
190
-
191
- if (RTEST(r_host)) {
192
- hdfs_host = RSTRING_PTR(r_host);
193
- }
194
-
195
- if (RTEST(r_port)) {
196
- hdfs_port = NUM2INT(r_port);
197
- }
198
-
199
- data->fs = hdfsConnect(hdfs_host, hdfs_port);
201
+ char* hdfs_host = RTEST(r_host) ? RSTRING_PTR(r_host) :
202
+ (char*) HDFS_DEFAULT_HOST;
203
+ int hdfs_port = RTEST(r_port) ? NUM2INT(r_port) :
204
+ HDFS_DEFAULT_PORT;
205
+ data->fs = hdfsConnectAsUser(hdfs_host, hdfs_port, hdfs_user);
200
206
  }
201
207
 
202
208
  if (data->fs == NULL) {
@@ -424,7 +430,7 @@ VALUE HDFS_File_System_cwd(VALUE self) {
424
430
  VALUE HDFS_File_System_chgrp(VALUE self, VALUE path, VALUE group) {
425
431
  FSData* data = NULL;
426
432
  Data_Get_Struct(self, FSData, data);
427
- if (hdfsChgrp(data->fs, RSTRING_PTR(path), NULL, RSTRING_PTR(group)) < 0) {
433
+ if (hdfsChown(data->fs, RSTRING_PTR(path), NULL, RSTRING_PTR(group)) < 0) {
428
434
  rb_raise(e_dfs_exception, "Failed to chgrp path: %s to group: %s",
429
435
  RSTRING_PTR(path), RSTRING_PTR(group));
430
436
  return Qnil;
@@ -538,7 +544,8 @@ VALUE HDFS_File_System_move(int argc, VALUE* argv, VALUE self) {
538
544
  }
539
545
  if (hdfsMove(data->fs, RSTRING_PTR(from_path), destFS,
540
546
  RSTRING_PTR(to_path)) < 0) {
541
- rb_raise(e_dfs_exception, "Error while retrieving capacity");
547
+ rb_raise(e_dfs_exception, "Error while moving path: %s to path: %s",
548
+ RSTRING_PTR(from_path), RSTRING_PTR(to_path));
542
549
  return Qnil;
543
550
  }
544
551
  return Qtrue;
@@ -599,6 +606,39 @@ VALUE HDFS_File_System_default_block_size_at_path(VALUE self, VALUE path) {
599
606
  return LONG2NUM(block_size);
600
607
  }
601
608
 
609
+ /**
610
+ * call-seq:
611
+ * hdfs.get_hosts(path, start, length) -> retval
612
+ *
613
+ * Returns the hostnames of the DataNodes which serve the portion of the file
614
+ * between the provided start and length bytes. Raises a DFSException if this
615
+ * fails.
616
+ */
617
+ VALUE HDFS_File_System_get_hosts(VALUE self, VALUE path, VALUE start,
618
+ VALUE length) {
619
+ FSData* data = NULL;
620
+ Data_Get_Struct(self, FSData, data);
621
+ char*** hosts = hdfsGetHosts(data->fs, RSTRING_PTR(path), NUM2LONG(start),
622
+ NUM2LONG(length));
623
+ if (hosts == NULL) {
624
+ rb_raise(e_dfs_exception,
625
+ "Error while retrieving hosts at path: %s, start: %lu, length: %lu",
626
+ RSTRING_PTR(path), NUM2LONG(start), NUM2LONG(length));
627
+ return Qnil;
628
+ }
629
+ // Builds a Ruby Array object out of the hosts reported by HDFS.
630
+ VALUE hosts_array = rb_ary_new();
631
+ size_t i, j;
632
+ for (i = 0; hosts[i]; i++) {
633
+ VALUE cur_block_hosts = rb_ary_new();
634
+ for (j = 0; hosts[i][j]; j++) {
635
+ rb_ary_push(cur_block_hosts, rb_str_new2(hosts[i][j]));
636
+ }
637
+ rb_ary_push(hosts_array, cur_block_hosts);
638
+ }
639
+ return hosts_array;
640
+ }
641
+
602
642
  /**
603
643
  * call-seq:
604
644
  * hdfs.used -> retval
@@ -655,8 +695,18 @@ VALUE HDFS_File_System_utime(int argc, VALUE* argv, VALUE self) {
655
695
  * call-seq:
656
696
  * hdfs.open(path, mode='r', options={}) -> file
657
697
  *
658
- * Opens a file. If the file cannot be opened, raises a CouldNotOpenError;
659
- * otherwise, returns a Hadoop::DFS::File object corresponding to the file.
698
+ * Opens a file using the supplied mode and options. If the file cannot be
699
+ * opened, raises a CouldNotOpenError; otherwise, returns a Hadoop::DFS::File
700
+ * object corresponding to the file.
701
+ *
702
+ * options can have the following keys:
703
+ *
704
+ * * *buffer_size*: size in bytes of buffer to use for file accesses
705
+ * (default: default buffer size as configured by HDFS)
706
+ * * *replication*: the number of nodes this file should be replicated against
707
+ * (default: default replication as configured by HDFS)
708
+ * * *block_size*: the HDFS block size in bytes to use for this file
709
+ * (default: default block size as configured by HDFS)
660
710
  */
661
711
  VALUE HDFS_File_System_open(int argc, VALUE* argv, VALUE self) {
662
712
  VALUE path, mode, options;
@@ -684,7 +734,7 @@ VALUE HDFS_File_System_open(int argc, VALUE* argv, VALUE self) {
684
734
  hdfsFile file = hdfsOpenFile(data->fs, RSTRING_PTR(path), flags,
685
735
  RTEST(r_buffer_size) ? NUM2INT(r_buffer_size) : 0,
686
736
  RTEST(r_replication) ? NUM2INT(r_replication) : 0,
687
- RTEST(r_block_size) ? NUM2INT(r_block_size) : HDFS_DEFAULT_BLOCK_SIZE);
737
+ RTEST(r_block_size) ? NUM2INT(r_block_size) : 0);
688
738
  if (file == NULL) {
689
739
  rb_raise(e_could_not_open, "Could not open file %s", RSTRING_PTR(path));
690
740
  return Qnil;
@@ -711,6 +761,7 @@ VALUE HDFS_File_System_open(int argc, VALUE* argv, VALUE self) {
711
761
  VALUE HDFS_File_read(VALUE self, VALUE length) {
712
762
  FileData* data = NULL;
713
763
  Data_Get_Struct(self, FileData, data);
764
+ ensure_file_open(data);
714
765
  char* buffer = ALLOC_N(char, length);
715
766
  MEMZERO(buffer, char, length);
716
767
  tSize bytes_read = hdfsRead(data->fs, data->file, buffer, NUM2INT(length));
@@ -730,6 +781,7 @@ VALUE HDFS_File_read(VALUE self, VALUE length) {
730
781
  VALUE HDFS_File_write(VALUE self, VALUE bytes) {
731
782
  FileData* data = NULL;
732
783
  Data_Get_Struct(self, FileData, data);
784
+ ensure_file_open(data);
733
785
  tSize bytes_written = hdfsWrite(data->fs, data->file, RSTRING_PTR(bytes), RSTRING_LEN(bytes));
734
786
  if (bytes_written == -1) {
735
787
  rb_raise(e_file_error, "Failed to write data");
@@ -747,6 +799,7 @@ VALUE HDFS_File_write(VALUE self, VALUE bytes) {
747
799
  VALUE HDFS_File_tell(VALUE self) {
748
800
  FileData* data = NULL;
749
801
  Data_Get_Struct(self, FileData, data);
802
+ ensure_file_open(data);
750
803
  tSize offset = hdfsTell(data->fs, data->file);
751
804
  if (offset == -1) {
752
805
  rb_raise(e_file_error, "Failed to read position");
@@ -764,6 +817,7 @@ VALUE HDFS_File_tell(VALUE self) {
764
817
  VALUE HDFS_File_seek(VALUE self, VALUE offset) {
765
818
  FileData* data = NULL;
766
819
  Data_Get_Struct(self, FileData, data);
820
+ ensure_file_open(data);
767
821
  int result = hdfsSeek(data->fs, data->file, NUM2INT(offset));
768
822
  if (result != 0) {
769
823
  rb_raise(e_file_error, "Failed to seek to position %d", NUM2INT(offset));
@@ -782,6 +836,7 @@ VALUE HDFS_File_seek(VALUE self, VALUE offset) {
782
836
  VALUE HDFS_File_flush(VALUE self) {
783
837
  FileData* data = NULL;
784
838
  Data_Get_Struct(self, FileData, data);
839
+ ensure_file_open(data);
785
840
  int result = hdfsFlush(data->fs, data->file);
786
841
  if (result != 0) {
787
842
  rb_raise(e_file_error, "Flush failed");
@@ -799,6 +854,7 @@ VALUE HDFS_File_flush(VALUE self) {
799
854
  VALUE HDFS_File_available(VALUE self) {
800
855
  FileData* data = NULL;
801
856
  Data_Get_Struct(self, FileData, data);
857
+ ensure_file_open(data);
802
858
  int result = hdfsAvailable(data->fs, data->file);
803
859
  if (result == -1) {
804
860
  rb_raise(e_file_error, "Failed to get available data");
@@ -816,12 +872,47 @@ VALUE HDFS_File_close(VALUE self) {
816
872
  FileData* data = NULL;
817
873
  Data_Get_Struct(self, FileData, data);
818
874
  if (data->file != NULL) {
819
- hdfsCloseFile(data->fs, data->file);
875
+ if (hdfsCloseFile(data->fs, data->file) < 0) {
876
+ rb_raise(e_file_error, "Could not close file");
877
+ return Qnil;
878
+ }
820
879
  data->file = NULL;
821
880
  }
822
881
  return Qtrue;
823
882
  }
824
883
 
884
+ /**
885
+ * call-seq:
886
+ * file.read_open -> open_for_read
887
+ *
888
+ * Returns True if this file is open for reading; otherwise returns False.
889
+ */
890
+ VALUE HDFS_File_read_open(VALUE self) {
891
+ FileData* data = NULL;
892
+ Data_Get_Struct(self, FileData, data);
893
+ if (data->file) {
894
+ return hdfsFileIsOpenForRead(data->file) ? Qtrue : Qfalse;
895
+ } else {
896
+ return Qfalse;
897
+ }
898
+ }
899
+
900
+ /**
901
+ * call-seq:
902
+ * file.write_open -> open_for_write
903
+ *
904
+ * Returns True if this file is open for writing; otherwise returns False.
905
+ */
906
+ VALUE HDFS_File_write_open(VALUE self) {
907
+ FileData* data = NULL;
908
+ Data_Get_Struct(self, FileData, data);
909
+ if (data->file) {
910
+ return hdfsFileIsOpenForWrite(data->file) ? Qtrue : Qfalse;
911
+ } else {
912
+ return Qfalse;
913
+ }
914
+ }
915
+
825
916
  /**
826
917
  * HDFS File Info interface
827
918
  */
@@ -1004,16 +1095,21 @@ void Init_hdfs() {
1004
1095
 
1005
1096
  c_file_system = rb_define_class_under(m_dfs, "FileSystem", rb_cObject);
1006
1097
  rb_define_alloc_func(c_file_system, HDFS_File_System_alloc);
1007
- rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize, -1);
1008
- rb_define_method(c_file_system, "disconnect", HDFS_File_System_disconnect, 0);
1098
+ rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize,
1099
+ -1);
1100
+ rb_define_method(c_file_system, "disconnect", HDFS_File_System_disconnect,
1101
+ 0);
1009
1102
  rb_define_method(c_file_system, "open", HDFS_File_System_open, -1);
1010
1103
  rb_define_method(c_file_system, "delete", HDFS_File_System_delete, -1);
1011
1104
  rb_define_method(c_file_system, "rename", HDFS_File_System_rename, 2);
1012
1105
  rb_define_method(c_file_system, "exist?", HDFS_File_System_exist, 1);
1013
- rb_define_method(c_file_system, "create_directory", HDFS_File_System_create_directory, 1);
1014
- rb_define_method(c_file_system, "list_directory", HDFS_File_System_list_directory, 1);
1106
+ rb_define_method(c_file_system, "create_directory",
1107
+ HDFS_File_System_create_directory, 1);
1108
+ rb_define_method(c_file_system, "list_directory",
1109
+ HDFS_File_System_list_directory, 1);
1015
1110
  rb_define_method(c_file_system, "stat", HDFS_File_System_stat, 1);
1016
- rb_define_method(c_file_system, "set_replication", HDFS_File_System_set_replication, -1);
1111
+ rb_define_method(c_file_system, "set_replication",
1112
+ HDFS_File_System_set_replication, -1);
1017
1113
  rb_define_method(c_file_system, "cd", HDFS_File_System_cd, 1);
1018
1114
  rb_define_method(c_file_system, "cwd", HDFS_File_System_cwd, 0);
1019
1115
  rb_define_method(c_file_system, "chgrp", HDFS_File_System_chgrp, 2);
@@ -1025,6 +1121,7 @@ void Init_hdfs() {
1025
1121
  HDFS_File_System_default_block_size, 0);
1026
1122
  rb_define_method(c_file_system, "default_block_size_at_path",
1027
1123
  HDFS_File_System_default_block_size_at_path, 1);
1124
+ rb_define_method(c_file_system, "get_hosts", HDFS_File_System_get_hosts, 3);
1028
1125
  rb_define_method(c_file_system, "move", HDFS_File_System_move, -1);
1029
1126
  rb_define_method(c_file_system, "used", HDFS_File_System_used, 0);
1030
1127
  rb_define_method(c_file_system, "utime", HDFS_File_System_utime, -1);
@@ -1038,14 +1135,18 @@ void Init_hdfs() {
1038
1135
  rb_define_method(c_file, "flush", HDFS_File_flush, 0);
1039
1136
  rb_define_method(c_file, "available", HDFS_File_available, 0);
1040
1137
  rb_define_method(c_file, "close", HDFS_File_close, 0);
1138
+ rb_define_method(c_file, "read_open?", HDFS_File_read_open, 0);
1139
+ rb_define_method(c_file, "write_open?", HDFS_File_write_open, 0);
1041
1140
 
1042
1141
  c_file_info = rb_define_class_under(m_dfs, "FileInfo", rb_cObject);
1043
1142
  rb_define_method(c_file_info, "block_size", HDFS_File_Info_block_size, 0);
1044
1143
  rb_define_method(c_file_info, "group", HDFS_File_Info_group, 0);
1045
- rb_define_method(c_file_info, "is_directory?", HDFS_File_Info_is_directory, 0);
1144
+ rb_define_method(c_file_info, "is_directory?", HDFS_File_Info_is_directory,
1145
+ 0);
1046
1146
  rb_define_method(c_file_info, "is_file?", HDFS_File_Info_is_file, 0);
1047
1147
  rb_define_method(c_file_info, "last_access", HDFS_File_Info_last_access, 0);
1048
- rb_define_method(c_file_info, "last_modified", HDFS_File_Info_last_modified, 0);
1148
+ rb_define_method(c_file_info, "last_modified", HDFS_File_Info_last_modified,
1149
+ 0);
1049
1150
  rb_define_method(c_file_info, "mode", HDFS_File_Info_mode, 0);
1050
1151
  rb_define_method(c_file_info, "name", HDFS_File_Info_name, 0);
1051
1152
  rb_define_method(c_file_info, "owner", HDFS_File_Info_owner, 0);
@@ -1054,14 +1155,21 @@ void Init_hdfs() {
1054
1155
  rb_define_method(c_file_info, "to_s", HDFS_File_Info_to_s, 0);
1055
1156
 
1056
1157
  c_file_info_file = rb_define_class_under(c_file_info, "File", c_file_info);
1057
- rb_define_method(c_file_info_file, "is_file?", HDFS_File_Info_File_is_file, 0);
1058
-
1059
- c_file_info_directory = rb_define_class_under(c_file_info, "Directory", c_file_info);
1060
- rb_define_method(c_file_info_directory, "is_directory?", HDFS_File_Info_Directory_is_directory, 0);
1061
-
1062
- e_dfs_exception = rb_define_class_under(m_dfs, "DFSException", rb_eStandardError);
1063
- e_connect_error = rb_define_class_under(m_dfs, "ConnectError", e_dfs_exception);
1158
+ rb_define_method(c_file_info_file, "is_file?", HDFS_File_Info_File_is_file,
1159
+ 0);
1160
+
1161
+ c_file_info_directory = rb_define_class_under(c_file_info, "Directory",
1162
+ c_file_info);
1163
+ rb_define_method(c_file_info_directory, "is_directory?",
1164
+ HDFS_File_Info_Directory_is_directory, 0);
1165
+
1166
+ e_dfs_exception = rb_define_class_under(m_dfs, "DFSException",
1167
+ rb_eStandardError);
1168
+ e_connect_error = rb_define_class_under(m_dfs, "ConnectError",
1169
+ e_dfs_exception);
1064
1170
  e_file_error = rb_define_class_under(m_dfs, "FileError", e_dfs_exception);
1065
- e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError", e_file_error);
1066
- e_does_not_exist = rb_define_class_under(m_dfs, "DoesNotExistError", e_file_error);
1171
+ e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError",
1172
+ e_file_error);
1173
+ e_does_not_exist = rb_define_class_under(m_dfs, "DoesNotExistError",
1174
+ e_file_error);
1067
1175
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-hdfs-cdh4
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2013-05-22 00:00:00.000000000 Z
14
+ date: 2013-05-25 00:00:00.000000000 Z
15
15
  dependencies: []
16
16
  description: ruby hadoop libhdfs client with support for cdh4
17
17
  email:
@@ -48,7 +48,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
48
48
  version: 1.8.10
49
49
  requirements: []
50
50
  rubyforge_project:
51
- rubygems_version: 1.8.23
51
+ rubygems_version: 1.8.24
52
52
  signing_key:
53
53
  specification_version: 3
54
54
  summary: ruby hadoop libhdfs client with support for cdh4