ruby-hdfs-cdh4 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/ext/hdfs/hdfs.c +149 -41
  2. metadata +3 -3
data/ext/hdfs/hdfs.c CHANGED
@@ -19,13 +19,13 @@ static VALUE e_file_error;
19
19
  static VALUE e_could_not_open;
20
20
  static VALUE e_does_not_exist;
21
21
 
22
- static const int32_t HDFS_DEFAULT_BLOCK_SIZE = 134217728;
23
22
  static const int16_t HDFS_DEFAULT_REPLICATION = 3;
24
23
  static const short HDFS_DEFAULT_MODE = 0644;
25
24
  static const char* HDFS_DEFAULT_HOST = "0.0.0.0";
26
25
  static const int HDFS_DEFAULT_RECURSIVE_DELETE = 0;
27
26
  static const int HDFS_DEFAULT_PATH_STRING_LENGTH = 1024;
28
27
  static const int HDFS_DEFAULT_PORT = 8020;
28
+ static const char* HDFS_DEFAULT_USER = NULL;
29
29
 
30
30
  /*
31
31
  * Data structs
@@ -109,6 +109,12 @@ int octal_decimal(int n) {
109
109
  return decimal;
110
110
  }
111
111
 
112
+ void ensure_file_open(FileData* data) {
113
+ if (data->file == NULL) {
114
+ rb_raise(e_file_error, "File is closed");
115
+ }
116
+ }
117
+
112
118
  /*
113
119
  * Copies an hdfsFileInfo struct into a Hadoop::DFS::FileInfo derivative
114
120
  * object.
@@ -157,14 +163,17 @@ VALUE HDFS_File_System_alloc(VALUE klass) {
157
163
  * call-seq:
158
164
  * hdfs.new(options={}) -> hdfs
159
165
  *
160
- * Creates a new HDFS client connection, returning a new
166
+ * Creates a new HDFS client connection, configured by options, returning a new
161
167
  * Hadoop::DFS::FileSystem object if successful. If this fails, raises a
162
168
  * ConnectError.
163
169
  *
164
- * Options:
165
- * :local
166
- * :host
167
- * :port
170
+ * options can have the following keys:
171
+ *
172
+ * * *local*: whether to use the local filesystem instead of HDFS
173
+ * (default: false)
174
+ * * *host*: hostname or IP address of a Hadoop NameNode (default: '0.0.0.0')
175
+ * * *port*: port through which to connect to Hadoop NameNode (default: 8020)
176
+ * * *user*: user to connect to filesystem as (default: current user)
168
177
  */
169
178
  VALUE HDFS_File_System_initialize(int argc, VALUE* argv, VALUE self) {
170
179
  VALUE options;
@@ -177,26 +186,23 @@ VALUE HDFS_File_System_initialize(int argc, VALUE* argv, VALUE self) {
177
186
  options = rb_hash_new();
178
187
  }
179
188
 
189
+ VALUE r_user = rb_hash_aref(options, rb_eval_string(":user"));
190
+ char* hdfs_user = RTEST(r_user) ? RSTRING_PTR(r_user) :
191
+ (char*) HDFS_DEFAULT_USER;
192
+
180
193
  VALUE r_local = rb_hash_aref(options, rb_eval_string(":local"));
181
194
  if (r_local == Qtrue) {
182
- data->fs = hdfsConnect(NULL, 0);
195
+ data->fs = hdfsConnectAsUser(NULL, 0, hdfs_user);
183
196
  } else {
184
197
  VALUE r_host = rb_hash_aref(options, rb_eval_string(":host"));
185
198
  VALUE r_port = rb_hash_aref(options, rb_eval_string(":port"));
186
199
 
187
200
  // Sets default values for host and port if not supplied by user.
188
- char* hdfs_host = (char*) HDFS_DEFAULT_HOST;
189
- int hdfs_port = HDFS_DEFAULT_PORT;
190
-
191
- if (RTEST(r_host)) {
192
- hdfs_host = RSTRING_PTR(r_host);
193
- }
194
-
195
- if (RTEST(r_port)) {
196
- hdfs_port = NUM2INT(r_port);
197
- }
198
-
199
- data->fs = hdfsConnect(hdfs_host, hdfs_port);
201
+ char* hdfs_host = RTEST(r_host) ? RSTRING_PTR(r_host) :
202
+ (char*) HDFS_DEFAULT_HOST;
203
+ int hdfs_port = RTEST(r_port) ? NUM2INT(r_port) :
204
+ HDFS_DEFAULT_PORT;
205
+ data->fs = hdfsConnectAsUser(hdfs_host, hdfs_port, hdfs_user);
200
206
  }
201
207
 
202
208
  if (data->fs == NULL) {
@@ -424,7 +430,7 @@ VALUE HDFS_File_System_cwd(VALUE self) {
424
430
  VALUE HDFS_File_System_chgrp(VALUE self, VALUE path, VALUE group) {
425
431
  FSData* data = NULL;
426
432
  Data_Get_Struct(self, FSData, data);
427
- if (hdfsChgrp(data->fs, RSTRING_PTR(path), NULL, RSTRING_PTR(group)) < 0) {
433
+ if (hdfsChown(data->fs, RSTRING_PTR(path), NULL, RSTRING_PTR(group)) < 0) {
428
434
  rb_raise(e_dfs_exception, "Failed to chgrp path: %s to group: %s",
429
435
  RSTRING_PTR(path), RSTRING_PTR(group));
430
436
  return Qnil;
@@ -538,7 +544,8 @@ VALUE HDFS_File_System_move(int argc, VALUE* argv, VALUE self) {
538
544
  }
539
545
  if (hdfsMove(data->fs, RSTRING_PTR(from_path), destFS,
540
546
  RSTRING_PTR(to_path)) < 0) {
541
- rb_raise(e_dfs_exception, "Error while retrieving capacity");
547
+ rb_raise(e_dfs_exception, "Error while moving path: %s to path: %s",
548
+ RSTRING_PTR(from_path), RSTRING_PTR(to_path));
542
549
  return Qnil;
543
550
  }
544
551
  return Qtrue;
@@ -599,6 +606,39 @@ VALUE HDFS_File_System_default_block_size_at_path(VALUE self, VALUE path) {
599
606
  return LONG2NUM(block_size);
600
607
  }
601
608
 
609
+ /**
610
+ * call-seq:
611
+ * hdfs.get_hosts(path, start, length) -> retval
612
+ *
613
+ * Returns the hostnames of the DataNodes which serve the portion of the file
614
+ * between the provided start and length bytes. Raises a DFSException if this
615
+ * fails.
616
+ */
617
+ VALUE HDFS_File_System_get_hosts(VALUE self, VALUE path, VALUE start,
618
+ VALUE length) {
619
+ FSData* data = NULL;
620
+ Data_Get_Struct(self, FSData, data);
621
+ char*** hosts = hdfsGetHosts(data->fs, RSTRING_PTR(path), NUM2LONG(start),
622
+ NUM2LONG(length));
623
+ if (hosts == NULL) {
624
+ rb_raise(e_dfs_exception,
625
+ "Error while retrieving hosts at path: %s, start: %lu, length: %lu",
626
+ RSTRING_PTR(path), NUM2LONG(start), NUM2LONG(length));
627
+ return Qnil;
628
+ }
629
+ // Builds a Ruby Array object out of the hosts reported by HDFS.
630
+ VALUE hosts_array = rb_ary_new();
631
+ size_t i, j;
632
+ for (i = 0; hosts[i]; i++) {
633
+ VALUE cur_block_hosts = rb_ary_new();
634
+ for (j = 0; hosts[i][j]; j++) {
635
+ rb_ary_push(cur_block_hosts, rb_str_new2(hosts[i][j]));
636
+ }
637
+ rb_ary_push(hosts_array, cur_block_hosts);
638
+ }
639
+ return hosts_array;
640
+ }
641
+
602
642
  /**
603
643
  * call-seq:
604
644
  * hdfs.used -> retval
@@ -655,8 +695,18 @@ VALUE HDFS_File_System_utime(int argc, VALUE* argv, VALUE self) {
655
695
  * call-seq:
656
696
  * hdfs.open(path, mode='r', options={}) -> file
657
697
  *
658
- * Opens a file. If the file cannot be opened, raises a CouldNotOpenError;
659
- * otherwise, returns a Hadoop::DFS::File object corresponding to the file.
698
+ * Opens a file using the supplied mode and options. If the file cannot be
699
+ * opened, raises a CouldNotOpenError; otherwise, returns a Hadoop::DFS::File
700
+ * object corresponding to the file.
701
+ *
702
+ * options can have the following keys:
703
+ *
704
+ * * *buffer_size*: size in bytes of buffer to use for file accesses
705
+ * (default: default buffer size as configured by HDFS)
706
+ * * *replication*: the number of nodes this file should be replicated against
707
+ * (default: default replication as configured by HDFS)
708
+ * * *block_size*: the HDFS block size in bytes to use for this file
709
+ * (default: default block size as configured by HDFS)
660
710
  */
661
711
  VALUE HDFS_File_System_open(int argc, VALUE* argv, VALUE self) {
662
712
  VALUE path, mode, options;
@@ -684,7 +734,7 @@ VALUE HDFS_File_System_open(int argc, VALUE* argv, VALUE self) {
684
734
  hdfsFile file = hdfsOpenFile(data->fs, RSTRING_PTR(path), flags,
685
735
  RTEST(r_buffer_size) ? NUM2INT(r_buffer_size) : 0,
686
736
  RTEST(r_replication) ? NUM2INT(r_replication) : 0,
687
- RTEST(r_block_size) ? NUM2INT(r_block_size) : HDFS_DEFAULT_BLOCK_SIZE);
737
+ RTEST(r_block_size) ? NUM2INT(r_block_size) : 0);
688
738
  if (file == NULL) {
689
739
  rb_raise(e_could_not_open, "Could not open file %s", RSTRING_PTR(path));
690
740
  return Qnil;
@@ -711,6 +761,7 @@ VALUE HDFS_File_System_open(int argc, VALUE* argv, VALUE self) {
711
761
  VALUE HDFS_File_read(VALUE self, VALUE length) {
712
762
  FileData* data = NULL;
713
763
  Data_Get_Struct(self, FileData, data);
764
+ ensure_file_open(data);
714
765
  char* buffer = ALLOC_N(char, length);
715
766
  MEMZERO(buffer, char, length);
716
767
  tSize bytes_read = hdfsRead(data->fs, data->file, buffer, NUM2INT(length));
@@ -730,6 +781,7 @@ VALUE HDFS_File_read(VALUE self, VALUE length) {
730
781
  VALUE HDFS_File_write(VALUE self, VALUE bytes) {
731
782
  FileData* data = NULL;
732
783
  Data_Get_Struct(self, FileData, data);
784
+ ensure_file_open(data);
733
785
  tSize bytes_written = hdfsWrite(data->fs, data->file, RSTRING_PTR(bytes), RSTRING_LEN(bytes));
734
786
  if (bytes_written == -1) {
735
787
  rb_raise(e_file_error, "Failed to write data");
@@ -747,6 +799,7 @@ VALUE HDFS_File_write(VALUE self, VALUE bytes) {
747
799
  VALUE HDFS_File_tell(VALUE self) {
748
800
  FileData* data = NULL;
749
801
  Data_Get_Struct(self, FileData, data);
802
+ ensure_file_open(data);
750
803
  tSize offset = hdfsTell(data->fs, data->file);
751
804
  if (offset == -1) {
752
805
  rb_raise(e_file_error, "Failed to read position");
@@ -764,6 +817,7 @@ VALUE HDFS_File_tell(VALUE self) {
764
817
  VALUE HDFS_File_seek(VALUE self, VALUE offset) {
765
818
  FileData* data = NULL;
766
819
  Data_Get_Struct(self, FileData, data);
820
+ ensure_file_open(data);
767
821
  int result = hdfsSeek(data->fs, data->file, NUM2INT(offset));
768
822
  if (result != 0) {
769
823
  rb_raise(e_file_error, "Failed to seek to position %d", NUM2INT(offset));
@@ -782,6 +836,7 @@ VALUE HDFS_File_seek(VALUE self, VALUE offset) {
782
836
  VALUE HDFS_File_flush(VALUE self) {
783
837
  FileData* data = NULL;
784
838
  Data_Get_Struct(self, FileData, data);
839
+ ensure_file_open(data);
785
840
  int result = hdfsFlush(data->fs, data->file);
786
841
  if (result != 0) {
787
842
  rb_raise(e_file_error, "Flush failed");
@@ -799,6 +854,7 @@ VALUE HDFS_File_flush(VALUE self) {
799
854
  VALUE HDFS_File_available(VALUE self) {
800
855
  FileData* data = NULL;
801
856
  Data_Get_Struct(self, FileData, data);
857
+ ensure_file_open(data);
802
858
  int result = hdfsAvailable(data->fs, data->file);
803
859
  if (result == -1) {
804
860
  rb_raise(e_file_error, "Failed to get available data");
@@ -816,12 +872,47 @@ VALUE HDFS_File_close(VALUE self) {
816
872
  FileData* data = NULL;
817
873
  Data_Get_Struct(self, FileData, data);
818
874
  if (data->file != NULL) {
819
- hdfsCloseFile(data->fs, data->file);
875
+ if (hdfsCloseFile(data->fs, data->file) < 0) {
876
+ rb_raise(e_file_error, "Could not close file");
877
+ return Qnil;
878
+ }
820
879
  data->file = NULL;
821
880
  }
822
881
  return Qtrue;
823
882
  }
824
883
 
884
+ /**
885
+ * call-seq:
886
+ * file.read_open -> open_for_read
887
+ *
888
+ * Returns True if this file is open for reading; otherwise returns False.
889
+ */
890
+ VALUE HDFS_File_read_open(VALUE self) {
891
+ FileData* data = NULL;
892
+ Data_Get_Struct(self, FileData, data);
893
+ if (data->file) {
894
+ return hdfsFileIsOpenForRead(data->file) ? Qtrue : Qfalse;
895
+ } else {
896
+ return Qfalse;
897
+ }
898
+ }
899
+
900
+ /**
901
+ * call-seq:
902
+ * file.write_open -> open_for_write
903
+ *
904
+ * Returns True if this file is open for writing; otherwise returns False.
905
+ */
906
+ VALUE HDFS_File_write_open(VALUE self) {
907
+ FileData* data = NULL;
908
+ Data_Get_Struct(self, FileData, data);
909
+ if (data->file) {
910
+ return hdfsFileIsOpenForWrite(data->file) ? Qtrue : Qfalse;
911
+ } else {
912
+ return Qfalse;
913
+ }
914
+ }
915
+
825
916
  /**
826
917
  * HDFS File Info interface
827
918
  */
@@ -1004,16 +1095,21 @@ void Init_hdfs() {
1004
1095
 
1005
1096
  c_file_system = rb_define_class_under(m_dfs, "FileSystem", rb_cObject);
1006
1097
  rb_define_alloc_func(c_file_system, HDFS_File_System_alloc);
1007
- rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize, -1);
1008
- rb_define_method(c_file_system, "disconnect", HDFS_File_System_disconnect, 0);
1098
+ rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize,
1099
+ -1);
1100
+ rb_define_method(c_file_system, "disconnect", HDFS_File_System_disconnect,
1101
+ 0);
1009
1102
  rb_define_method(c_file_system, "open", HDFS_File_System_open, -1);
1010
1103
  rb_define_method(c_file_system, "delete", HDFS_File_System_delete, -1);
1011
1104
  rb_define_method(c_file_system, "rename", HDFS_File_System_rename, 2);
1012
1105
  rb_define_method(c_file_system, "exist?", HDFS_File_System_exist, 1);
1013
- rb_define_method(c_file_system, "create_directory", HDFS_File_System_create_directory, 1);
1014
- rb_define_method(c_file_system, "list_directory", HDFS_File_System_list_directory, 1);
1106
+ rb_define_method(c_file_system, "create_directory",
1107
+ HDFS_File_System_create_directory, 1);
1108
+ rb_define_method(c_file_system, "list_directory",
1109
+ HDFS_File_System_list_directory, 1);
1015
1110
  rb_define_method(c_file_system, "stat", HDFS_File_System_stat, 1);
1016
- rb_define_method(c_file_system, "set_replication", HDFS_File_System_set_replication, -1);
1111
+ rb_define_method(c_file_system, "set_replication",
1112
+ HDFS_File_System_set_replication, -1);
1017
1113
  rb_define_method(c_file_system, "cd", HDFS_File_System_cd, 1);
1018
1114
  rb_define_method(c_file_system, "cwd", HDFS_File_System_cwd, 0);
1019
1115
  rb_define_method(c_file_system, "chgrp", HDFS_File_System_chgrp, 2);
@@ -1025,6 +1121,7 @@ void Init_hdfs() {
1025
1121
  HDFS_File_System_default_block_size, 0);
1026
1122
  rb_define_method(c_file_system, "default_block_size_at_path",
1027
1123
  HDFS_File_System_default_block_size_at_path, 1);
1124
+ rb_define_method(c_file_system, "get_hosts", HDFS_File_System_get_hosts, 3);
1028
1125
  rb_define_method(c_file_system, "move", HDFS_File_System_move, -1);
1029
1126
  rb_define_method(c_file_system, "used", HDFS_File_System_used, 0);
1030
1127
  rb_define_method(c_file_system, "utime", HDFS_File_System_utime, -1);
@@ -1038,14 +1135,18 @@ void Init_hdfs() {
1038
1135
  rb_define_method(c_file, "flush", HDFS_File_flush, 0);
1039
1136
  rb_define_method(c_file, "available", HDFS_File_available, 0);
1040
1137
  rb_define_method(c_file, "close", HDFS_File_close, 0);
1138
+ rb_define_method(c_file, "read_open?", HDFS_File_read_open, 0);
1139
+ rb_define_method(c_file, "write_open?", HDFS_File_write_open, 0);
1041
1140
 
1042
1141
  c_file_info = rb_define_class_under(m_dfs, "FileInfo", rb_cObject);
1043
1142
  rb_define_method(c_file_info, "block_size", HDFS_File_Info_block_size, 0);
1044
1143
  rb_define_method(c_file_info, "group", HDFS_File_Info_group, 0);
1045
- rb_define_method(c_file_info, "is_directory?", HDFS_File_Info_is_directory, 0);
1144
+ rb_define_method(c_file_info, "is_directory?", HDFS_File_Info_is_directory,
1145
+ 0);
1046
1146
  rb_define_method(c_file_info, "is_file?", HDFS_File_Info_is_file, 0);
1047
1147
  rb_define_method(c_file_info, "last_access", HDFS_File_Info_last_access, 0);
1048
- rb_define_method(c_file_info, "last_modified", HDFS_File_Info_last_modified, 0);
1148
+ rb_define_method(c_file_info, "last_modified", HDFS_File_Info_last_modified,
1149
+ 0);
1049
1150
  rb_define_method(c_file_info, "mode", HDFS_File_Info_mode, 0);
1050
1151
  rb_define_method(c_file_info, "name", HDFS_File_Info_name, 0);
1051
1152
  rb_define_method(c_file_info, "owner", HDFS_File_Info_owner, 0);
@@ -1054,14 +1155,21 @@ void Init_hdfs() {
1054
1155
  rb_define_method(c_file_info, "to_s", HDFS_File_Info_to_s, 0);
1055
1156
 
1056
1157
  c_file_info_file = rb_define_class_under(c_file_info, "File", c_file_info);
1057
- rb_define_method(c_file_info_file, "is_file?", HDFS_File_Info_File_is_file, 0);
1058
-
1059
- c_file_info_directory = rb_define_class_under(c_file_info, "Directory", c_file_info);
1060
- rb_define_method(c_file_info_directory, "is_directory?", HDFS_File_Info_Directory_is_directory, 0);
1061
-
1062
- e_dfs_exception = rb_define_class_under(m_dfs, "DFSException", rb_eStandardError);
1063
- e_connect_error = rb_define_class_under(m_dfs, "ConnectError", e_dfs_exception);
1158
+ rb_define_method(c_file_info_file, "is_file?", HDFS_File_Info_File_is_file,
1159
+ 0);
1160
+
1161
+ c_file_info_directory = rb_define_class_under(c_file_info, "Directory",
1162
+ c_file_info);
1163
+ rb_define_method(c_file_info_directory, "is_directory?",
1164
+ HDFS_File_Info_Directory_is_directory, 0);
1165
+
1166
+ e_dfs_exception = rb_define_class_under(m_dfs, "DFSException",
1167
+ rb_eStandardError);
1168
+ e_connect_error = rb_define_class_under(m_dfs, "ConnectError",
1169
+ e_dfs_exception);
1064
1170
  e_file_error = rb_define_class_under(m_dfs, "FileError", e_dfs_exception);
1065
- e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError", e_file_error);
1066
- e_does_not_exist = rb_define_class_under(m_dfs, "DoesNotExistError", e_file_error);
1171
+ e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError",
1172
+ e_file_error);
1173
+ e_does_not_exist = rb_define_class_under(m_dfs, "DoesNotExistError",
1174
+ e_file_error);
1067
1175
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-hdfs-cdh4
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2013-05-22 00:00:00.000000000 Z
14
+ date: 2013-05-25 00:00:00.000000000 Z
15
15
  dependencies: []
16
16
  description: ruby hadoop libhdfs client with support for cdh4
17
17
  email:
@@ -48,7 +48,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
48
48
  version: 1.8.10
49
49
  requirements: []
50
50
  rubyforge_project:
51
- rubygems_version: 1.8.23
51
+ rubygems_version: 1.8.24
52
52
  signing_key:
53
53
  specification_version: 3
54
54
  summary: ruby hadoop libhdfs client with support for cdh4