ruby-hdfs 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,22 @@
1
+ # MacOS X
2
+ .DS_Store
3
+
4
+ # Emacs
5
+ *~
6
+ \#*
7
+ .\#*
8
+
9
+ # Vim
10
+ *.swp
11
+
12
+ # General
13
+ coverage
14
+ rdoc
15
+ pkg
16
+
17
+ # Project-specific
18
+ *.bak
19
+ /ext/hdfs/Makefile
20
+ /ext/hdfs/*.log
21
+ *.o
22
+ *.so
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Alexander Staubo
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,59 @@
1
+ == Hadoop DFS (HDFS) bindings for Ruby
2
+
3
+ This library provides native C bindings to Hadoop's libhdfs, for interacting with Hadoop DFS.
4
+
5
+ === Requirements
6
+
7
+ You will need:
8
+
9
+ * Java JDK and JRE (yes, both). The build file will attempt to find it for you.
10
+ * Hadoop's libhdfs. On Ubuntu/Debian you will need libhdfs0 and libhdfs0-dev.
11
+ * Hadoop Core and DFS libraries.
12
+
13
+ === Installation
14
+
15
+ Install from gems. Note that you will need to provide JAVA_HOME so the compiler can find the
16
+ required libraries.
17
+
18
+ The installation will attempt to discover the location of the libaries, but if it fails,
19
+ you can try setting the environment variable JAVA_LIB to the library path of the JDK/JRE.
20
+
21
+ Installing with a specific Java JDK:
22
+
23
+ sudo env JAVA_HOME=/usr/lib/jvm/java-6-openjdk gem install ruby-hdfs
24
+
25
+ === Using
26
+
27
+ The library also depends on an installation of Hadoop DFS. The Cloudera distribution of
28
+ Hadoop is pretty good:
29
+
30
+ http://www.cloudera.com/distribution
31
+
32
+ Sample classpath setup (yes, welcome to JAR hell):
33
+
34
+ export CLASSPATH=$CLASSPATH:/usr/lib/hadoop/hadoop-0.18.3-6cloudera0.3.0-core.jar
35
+ for jarfile in /usr/lib/hadoop/lib/*.jar; do
36
+ export CLASSPATH=$CLASSPATH:$jarfile
37
+ done
38
+
39
+ Wait, there's more. You will also need libjvm.so in your library path, which comes with
40
+ the JRE. This might work:
41
+
42
+ export LD_LIBRARY_PATH=/usr/lib/jvm/java-6-openjdk/jre/lib/i386/server
43
+
44
+ === Known issues
45
+
46
+ libhdfs will sometimes throw exceptions, which will be output instead of caught by Ruby.
47
+ This is annoying but harmless.
48
+
49
+ === Building from source
50
+
51
+ To build from source:
52
+
53
+ rake compile
54
+
55
+ On completion, the compiled extension will be available in ext/hdfs.
56
+
57
+ == Copyright
58
+
59
+ Copyright (c) 2010 Alexander Staubo. See LICENSE for details.
@@ -0,0 +1,58 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/extensiontask' # From rake-compiler gem
4
+
5
+ begin
6
+ require 'jeweler'
7
+ Jeweler::Tasks.new do |gem|
8
+ gem.name = "ruby-hdfs"
9
+ gem.summary = %Q{Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.}
10
+ gem.description = %Q{Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.}
11
+ gem.email = "alex@bengler.no"
12
+ gem.homepage = "http://github.com/alexstaubo/ruby-hdfs"
13
+ gem.authors = ["Alexander Staubo"]
14
+ gem.extensions = ["ext/hdfs/extconf.rb"]
15
+ gem.require_paths = ["lib"]
16
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
17
+ end
18
+ Jeweler::GemcutterTasks.new
19
+ rescue LoadError
20
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
21
+ end
22
+
23
+ Rake::ExtensionTask.new('hdfs') do |ext|
24
+ end
25
+
26
+ require 'rake/testtask'
27
+ Rake::TestTask.new(:test) do |test|
28
+ test.libs << 'lib' << 'test'
29
+ test.pattern = 'test/**/test_*.rb'
30
+ test.verbose = true
31
+ end
32
+
33
+ begin
34
+ require 'rcov/rcovtask'
35
+ Rcov::RcovTask.new do |test|
36
+ test.libs << 'test'
37
+ test.pattern = 'test/**/test_*.rb'
38
+ test.verbose = true
39
+ end
40
+ rescue LoadError
41
+ task :rcov do
42
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
43
+ end
44
+ end
45
+
46
+ task :test => :check_dependencies
47
+
48
+ task :default => :test
49
+
50
+ require 'rake/rdoctask'
51
+ Rake::RDocTask.new do |rdoc|
52
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
53
+
54
+ rdoc.rdoc_dir = 'rdoc'
55
+ rdoc.title = "ruby-hdfs #{version}"
56
+ rdoc.rdoc_files.include('README*')
57
+ rdoc.rdoc_files.include('lib/**/*.rb')
58
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,49 @@
1
+ require 'mkmf'
2
+
3
+ java_home = ENV["JAVA_HOME"]
4
+ unless java_home
5
+ %w(
6
+ /usr/lib/jvm/java-6-openjdk
7
+ ).each do |path|
8
+ if File.directory?(path)
9
+ java_home = path
10
+ $stderr << "Warning: Automatically guessed #{path} as Java home, might not be correct.\n"
11
+ end
12
+ end
13
+ abort("JAVA_HOME needs to be defined.") unless java_home
14
+ end
15
+ puts("Java home: #{java_home}")
16
+
17
+ java_lib_path = ENV["JAVA_LIB"]
18
+ unless java_lib_path
19
+ libjvm = "libjvm.so"
20
+ [
21
+ "#{java_home}/lib",
22
+ "#{java_home}/lib/*/client",
23
+ "#{java_home}/lib/*/server",
24
+ "#{java_home}/jre/lib",
25
+ "#{java_home}/jre/lib/*/client",
26
+ "#{java_home}/jre/lib/*/server"
27
+ ].each do |glob|
28
+ Dir.glob(glob).each do |path|
29
+ if File.exist?(File.join(path, libjvm))
30
+ java_lib_path ||= path
31
+ break
32
+ end
33
+ end
34
+ end
35
+ abort("Could not determine Java library path (need #{libjvm})") unless java_lib_path
36
+ end
37
+ puts("Java library path: #{java_lib_path}")
38
+
39
+ java_include_paths = Dir.glob("#{java_home}/include/**/.").map { |s| s.gsub(/\/\.$/, '') }
40
+ puts("Java include paths: #{java_include_paths.join(', ')}")
41
+ java_include_paths.each do |path|
42
+ $INCFLAGS << " -I#{path}"
43
+ end
44
+
45
+ dir_config("hdfs")
46
+ find_library("jvm", nil, java_lib_path)
47
+ find_library("hdfs", nil, java_lib_path)
48
+ have_library("c", "main")
49
+ create_makefile("hdfs")
@@ -0,0 +1,247 @@
1
+ #include "ruby.h"
2
+ #include "hdfs.h"
3
+
4
+ #include <assert.h>
5
+ #include <string.h>
6
+ #include <ctype.h>
7
+
8
+ // Override flag in hdfs.h
9
+ #define HDFS_O_RDONLY 0
10
+ #define HDFS_O_WRONLY 2
11
+
12
+ static VALUE m_hadoop;
13
+ static VALUE m_dfs;
14
+ static VALUE c_file_system;
15
+ static VALUE c_file;
16
+ static VALUE e_dfs_exception;
17
+ static VALUE e_file_error;
18
+ static VALUE e_could_not_open;
19
+
20
+ static const HDFS_DEFAULT_BLOCK_SIZE = 134217728;
21
+ static const char* HDFS_DEFAULT_HOST = "localhost";
22
+ static const int HDFS_DEFAULT_PORT = 9000;
23
+
24
+ /*
25
+ * Data structs
26
+ */
27
+
28
+ typedef struct FSData {
29
+ hdfsFS fs;
30
+ } FSData;
31
+
32
+ typedef struct FileData {
33
+ hdfsFS fs;
34
+ hdfsFile file;
35
+ } FileData;
36
+
37
+ void free_fs_data(FSData* data) {
38
+ if (data && data->fs != NULL) {
39
+ hdfsDisconnect(data->fs);
40
+ data->fs = NULL;
41
+ }
42
+ }
43
+
44
+ void free_file_data(FileData* data) {
45
+ if (data && data->file != NULL) {
46
+ hdfsCloseFile(data->fs, data->file);
47
+ data->file = NULL;
48
+ }
49
+ }
50
+
51
+ /*
52
+ * File system interface
53
+ */
54
+
55
+ VALUE HDFS_File_System_alloc(VALUE klass) {
56
+ FSData* data = ALLOC_N(FSData, 1);
57
+ data->fs = NULL;
58
+ VALUE instance = Data_Wrap_Struct(klass, NULL, free_fs_data, data);
59
+ return instance;
60
+ }
61
+
62
+ /**
63
+ * call-seq:
64
+ * hdfs.new -> hdfs
65
+ *
66
+ * Creates a new HDFS client connection.
67
+ */
68
+ VALUE HDFS_File_System_initialize(VALUE self, VALUE host, VALUE port) {
69
+ FSData* data = NULL;
70
+ Data_Get_Struct(self, FSData, data);
71
+ data->fs = hdfsConnect(
72
+ RTEST(host) ? RSTRING_PTR(host) : HDFS_DEFAULT_HOST,
73
+ RTEST(port) ? NUM2INT(port) : HDFS_DEFAULT_PORT);
74
+ return self;
75
+ }
76
+
77
+ /**
78
+ * call-seq:
79
+ * hdfs.disconnect -> nil
80
+ *
81
+ * Disconnects the client connection.
82
+ */
83
+ VALUE HDFS_File_System_disconnect(VALUE self) {
84
+ FSData* data = NULL;
85
+ Data_Get_Struct(self, FSData, data);
86
+ if (data->fs != NULL) {
87
+ hdfsDisconnect(data->fs);
88
+ data->fs = NULL;
89
+ }
90
+ return Qnil;
91
+ }
92
+
93
+ VALUE HDFS_File_System_delete(VALUE self, VALUE path) {
94
+ FSData* data = NULL;
95
+ Data_Get_Struct(self, FSData, data);
96
+ int value = hdfsDelete(data->fs, RSTRING_PTR(path));
97
+ return value == 0 ? Qtrue : Qfalse;
98
+ }
99
+
100
+ VALUE HDFS_File_System_exist(VALUE self, VALUE path) {
101
+ FSData* data = NULL;
102
+ Data_Get_Struct(self, FSData, data);
103
+ int value = hdfsExists(data->fs, RSTRING_PTR(path));
104
+ return value == 0 ? Qtrue : Qfalse;
105
+ }
106
+
107
+ /**
108
+ * call-seq:
109
+ * hdfs.open -> file
110
+ *
111
+ * Opens a file.
112
+ */
113
+ VALUE HDFS_File_System_open(VALUE self, VALUE path, VALUE mode, VALUE options) {
114
+ FSData* data = NULL;
115
+ Data_Get_Struct(self, FSData, data);
116
+
117
+ int flags = 0;
118
+ if (strcmp("r", STR2CSTR(mode)) == 0) {
119
+ flags = HDFS_O_RDONLY;
120
+ } else if (strcmp("w", STR2CSTR(mode)) == 0) {
121
+ flags = HDFS_O_WRONLY;
122
+ } else {
123
+ rb_raise(rb_eArgError, "Mode must be 'r' or 'w'");
124
+ return;
125
+ }
126
+ VALUE r_buffer_size = rb_hash_aref(options, rb_eval_string(":buffer_size"));
127
+ VALUE r_replication = rb_hash_aref(options, rb_eval_string(":replication"));
128
+ VALUE r_block_size = rb_hash_aref(options, rb_eval_string(":block_size"));
129
+ hdfsFile file = hdfsOpenFile(data->fs, RSTRING_PTR(path), flags,
130
+ RTEST(r_buffer_size) ? NUM2INT(r_buffer_size) : 0,
131
+ RTEST(r_replication) ? NUM2INT(r_replication) : 0,
132
+ RTEST(r_block_size) ? NUM2INT(r_block_size) : HDFS_DEFAULT_BLOCK_SIZE);
133
+ if (file == NULL) {
134
+ rb_raise(e_could_not_open, "Could not open file %s", RSTRING_PTR(path));
135
+ return;
136
+ }
137
+
138
+ FileData* file_data = ALLOC_N(FileData, 1);
139
+ file_data->fs = data->fs;
140
+ file_data->file = file;
141
+ VALUE file_instance = Data_Wrap_Struct(c_file, NULL, free_file_data, file_data);
142
+ return file_instance;
143
+ }
144
+
145
+ /*
146
+ * File interface
147
+ */
148
+
149
+ VALUE HDFS_File_read(VALUE self, VALUE length) {
150
+ FileData* data = NULL;
151
+ Data_Get_Struct(self, FileData, data);
152
+ char* buffer = ALLOC_N(char, length);
153
+ MEMZERO(buffer, char, length);
154
+ tSize bytes_read = hdfsRead(data->fs, data->file, buffer, NUM2INT(length));
155
+ if (bytes_read == -1) {
156
+ rb_raise(e_file_error, "Failed to read data");
157
+ }
158
+ return rb_tainted_str_new2(buffer);
159
+ }
160
+
161
+ VALUE HDFS_File_write(VALUE self, VALUE bytes) {
162
+ FileData* data = NULL;
163
+ Data_Get_Struct(self, FileData, data);
164
+ tSize bytes_written = hdfsWrite(data->fs, data->file, RSTRING_PTR(bytes), RSTRING_LEN(bytes));
165
+ if (bytes_written == -1) {
166
+ rb_raise(e_file_error, "Failed to write data");
167
+ }
168
+ return INT2NUM(bytes_written);
169
+ }
170
+
171
+ VALUE HDFS_File_tell(VALUE self) {
172
+ FileData* data = NULL;
173
+ Data_Get_Struct(self, FileData, data);
174
+ tSize offset = hdfsTell(data->fs, data->file);
175
+ if (offset == -1) {
176
+ rb_raise(e_file_error, "Failed to read position");
177
+ }
178
+ return INT2NUM(offset);
179
+ }
180
+
181
+ VALUE HDFS_File_seek(VALUE self, VALUE offset) {
182
+ FileData* data = NULL;
183
+ Data_Get_Struct(self, FileData, data);
184
+ int result = hdfsSeek(data->fs, data->file, NUM2INT(offset));
185
+ return result == 0 ? Qtrue : Qfalse;
186
+ }
187
+
188
+ VALUE HDFS_File_flush(VALUE self) {
189
+ FileData* data = NULL;
190
+ Data_Get_Struct(self, FileData, data);
191
+ int result = hdfsFlush(data->fs, data->file);
192
+ if (result != 0) {
193
+ rb_raise(e_file_error, "Flush failed");
194
+ }
195
+ return Qnil;
196
+ }
197
+
198
+ VALUE HDFS_File_available(VALUE self) {
199
+ FileData* data = NULL;
200
+ Data_Get_Struct(self, FileData, data);
201
+ int result = hdfsAvailable(data->fs, data->file);
202
+ if (result == -1) {
203
+ rb_raise(e_file_error, "Failed to get available data");
204
+ }
205
+ return INT2NUM(result);
206
+ }
207
+
208
+ VALUE HDFS_File_close(VALUE self) {
209
+ FileData* data = NULL;
210
+ Data_Get_Struct(self, FileData, data);
211
+ if (data->file != NULL) {
212
+ hdfsCloseFile(data->fs, data->file);
213
+ data->file = NULL;
214
+ }
215
+ return Qnil;
216
+ }
217
+
218
+ /*
219
+ * Extension initialization
220
+ */
221
+
222
+ void Init_hdfs() {
223
+ m_hadoop = rb_define_module("Hadoop");
224
+ m_dfs = rb_define_module_under(m_hadoop, "DFS");
225
+
226
+ c_file_system = rb_define_class_under(m_dfs, "FileSystem", rb_cObject);
227
+ rb_define_alloc_func(c_file_system, HDFS_File_System_alloc);
228
+ rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize, 2);
229
+ rb_define_method(c_file_system, "disconnect", HDFS_File_System_disconnect, 0);
230
+ rb_define_method(c_file_system, "open", HDFS_File_System_open, 3);
231
+ rb_define_method(c_file_system, "delete", HDFS_File_System_delete, 1);
232
+ rb_define_method(c_file_system, "exist?", HDFS_File_System_exist, 1);
233
+
234
+ c_file = rb_define_class_under(m_dfs, "File", rb_cObject);
235
+ rb_define_method(c_file, "read", HDFS_File_read, 1);
236
+ rb_define_method(c_file, "write", HDFS_File_write, 1);
237
+ rb_define_method(c_file, "<<", HDFS_File_write, 1);
238
+ rb_define_method(c_file, "seek", HDFS_File_seek, 1);
239
+ rb_define_method(c_file, "tell", HDFS_File_tell, 0);
240
+ rb_define_method(c_file, "flush", HDFS_File_flush, 0);
241
+ rb_define_method(c_file, "available", HDFS_File_available, 0);
242
+ rb_define_method(c_file, "close", HDFS_File_close, 0);
243
+
244
+ e_dfs_exception = rb_define_class_under(m_dfs, "DFSException", rb_eStandardError);
245
+ e_file_error = rb_define_class_under(m_dfs, "FileError", e_dfs_exception);
246
+ e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError", e_file_error);
247
+ }
@@ -0,0 +1,414 @@
1
+ /**
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #ifndef LIBHDFS_HDFS_H
20
+ #define LIBHDFS_HDFS_H
21
+
22
+ #include <sys/types.h>
23
+ #include <sys/stat.h>
24
+
25
+ #include <fcntl.h>
26
+ #include <stdio.h>
27
+ #include <stdint.h>
28
+ #include <string.h>
29
+ #include <stdlib.h>
30
+ #include <time.h>
31
+ #include <errno.h>
32
+
33
+ #include <jni.h>
34
+
35
+ #ifndef O_RDONLY
36
+ #define O_RDONLY 1
37
+ #endif
38
+
39
+ #ifndef O_WRONLY
40
+ #define O_WRONLY 2
41
+ #endif
42
+
43
+ #ifndef EINTERNAL
44
+ #define EINTERNAL 255
45
+ #endif
46
+
47
+
48
+ /** All APIs set errno to meaningful values */
49
+ #ifdef __cplusplus
50
+ extern "C" {
51
+ #endif
52
+
53
+ /**
54
+ * Some utility decls used in libhdfs.
55
+ */
56
+
57
+ typedef int32_t tSize; /// size of data for read/write io ops
58
+ typedef time_t tTime; /// time type
59
+ typedef int64_t tOffset;/// offset within the file
60
+ typedef uint16_t tPort; /// port
61
+ typedef enum tObjectKind {
62
+ kObjectKindFile = 'F',
63
+ kObjectKindDirectory = 'D',
64
+ } tObjectKind;
65
+
66
+
67
+ /**
68
+ * The C reflection of org.apache.org.hadoop.FileSystem .
69
+ */
70
+ typedef void* hdfsFS;
71
+
72
+
73
+ /**
74
+ * The C equivalent of org.apache.org.hadoop.FSData(Input|Output)Stream .
75
+ */
76
+ enum hdfsStreamType
77
+ {
78
+ UNINITIALIZED = 0,
79
+ INPUT = 1,
80
+ OUTPUT = 2,
81
+ };
82
+
83
+
84
+ /**
85
+ * The 'file-handle' to a file in hdfs.
86
+ */
87
+ struct hdfsFile_internal {
88
+ void* file;
89
+ enum hdfsStreamType type;
90
+ };
91
+ typedef struct hdfsFile_internal* hdfsFile;
92
+
93
+
94
+ /**
95
+ * hdfsConnect - Connect to a hdfs file system.
96
+ * Connect to the hdfs.
97
+ * @param host A string containing either a host name, or an ip address
98
+ * of the namenode of a hdfs cluster. 'host' should be passed as NULL if
99
+ * you want to connect to local filesystem. 'host' should be passed as
100
+ * 'default' (and port as 0) to used the 'configured' filesystem
101
+ * (hadoop-site/hadoop-default.xml).
102
+ * @param port The port on which the server is listening.
103
+ * @return Returns a handle to the filesystem or NULL on error.
104
+ */
105
+ hdfsFS hdfsConnect(const char* host, tPort port);
106
+
107
+
108
+ /**
109
+ * hdfsDisconnect - Disconnect from the hdfs file system.
110
+ * Disconnect from hdfs.
111
+ * @param fs The configured filesystem handle.
112
+ * @return Returns 0 on success, -1 on error.
113
+ */
114
+ int hdfsDisconnect(hdfsFS fs);
115
+
116
+
117
+ /**
118
+ * hdfsOpenFile - Open a hdfs file in given mode.
119
+ * @param fs The configured filesystem handle.
120
+ * @param path The full path to the file.
121
+ * @param flags Either O_RDONLY or O_WRONLY, for read-only or write-only.
122
+ * @param bufferSize Size of buffer for read/write - pass 0 if you want
123
+ * to use the default configured values.
124
+ * @param replication Block replication - pass 0 if you want to use
125
+ * the default configured values.
126
+ * @param blocksize Size of block - pass 0 if you want to use the
127
+ * default configured values.
128
+ * @return Returns the handle to the open file or NULL on error.
129
+ */
130
+ hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags,
131
+ int bufferSize, short replication, tSize blocksize);
132
+
133
+
134
+ /**
135
+ * hdfsCloseFile - Close an open file.
136
+ * @param fs The configured filesystem handle.
137
+ * @param file The file handle.
138
+ * @return Returns 0 on success, -1 on error.
139
+ */
140
+ int hdfsCloseFile(hdfsFS fs, hdfsFile file);
141
+
142
+
143
+ /**
144
+ * hdfsExists - Checks if a given path exsits on the filesystem
145
+ * @param fs The configured filesystem handle.
146
+ * @param path The path to look for
147
+ * @return Returns 0 on success, -1 on error.
148
+ */
149
+ int hdfsExists(hdfsFS fs, const char *path);
150
+
151
+
152
+ /**
153
+ * hdfsSeek - Seek to given offset in file.
154
+ * This works only for files opened in read-only mode.
155
+ * @param fs The configured filesystem handle.
156
+ * @param file The file handle.
157
+ * @param desiredPos Offset into the file to seek into.
158
+ * @return Returns 0 on success, -1 on error.
159
+ */
160
+ int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos);
161
+
162
+
163
+ /**
164
+ * hdfsTell - Get the current offset in the file, in bytes.
165
+ * @param fs The configured filesystem handle.
166
+ * @param file The file handle.
167
+ * @return Current offset, -1 on error.
168
+ */
169
+ tOffset hdfsTell(hdfsFS fs, hdfsFile file);
170
+
171
+
172
+ /**
173
+ * hdfsRead - Read data from an open file.
174
+ * @param fs The configured filesystem handle.
175
+ * @param file The file handle.
176
+ * @param buffer The buffer to copy read bytes into.
177
+ * @param length The length of the buffer.
178
+ * @return Returns the number of bytes actually read, possibly less
179
+ * than than length;-1 on error.
180
+ */
181
+ tSize hdfsRead(hdfsFS fs, hdfsFile file, void* buffer, tSize length);
182
+
183
+
184
+ /**
185
+ * hdfsPread - Positional read of data from an open file.
186
+ * @param fs The configured filesystem handle.
187
+ * @param file The file handle.
188
+ * @param position Position from which to read
189
+ * @param buffer The buffer to copy read bytes into.
190
+ * @param length The length of the buffer.
191
+ * @return Returns the number of bytes actually read, possibly less than
192
+ * than length;-1 on error.
193
+ */
194
+ tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position,
195
+ void* buffer, tSize length);
196
+
197
+
198
+ /**
199
+ * hdfsWrite - Write data into an open file.
200
+ * @param fs The configured filesystem handle.
201
+ * @param file The file handle.
202
+ * @param buffer The data.
203
+ * @param length The no. of bytes to write.
204
+ * @return Returns the number of bytes written, -1 on error.
205
+ */
206
+ tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void* buffer,
207
+ tSize length);
208
+
209
+
210
+ /**
211
+ * hdfsWrite - Flush the data.
212
+ * @param fs The configured filesystem handle.
213
+ * @param file The file handle.
214
+ * @return Returns 0 on success, -1 on error.
215
+ */
216
+ int hdfsFlush(hdfsFS fs, hdfsFile file);
217
+
218
+
219
+ /**
220
+ * hdfsAvailable - Number of bytes that can be read from this
221
+ * input stream without blocking.
222
+ * @param fs The configured filesystem handle.
223
+ * @param file The file handle.
224
+ * @return Returns available bytes; -1 on error.
225
+ */
226
+ int hdfsAvailable(hdfsFS fs, hdfsFile file);
227
+
228
+
229
+ /**
230
+ * hdfsCopy - Copy file from one filesystem to another.
231
+ * @param srcFS The handle to source filesystem.
232
+ * @param src The path of source file.
233
+ * @param dstFS The handle to destination filesystem.
234
+ * @param dst The path of destination file.
235
+ * @return Returns 0 on success, -1 on error.
236
+ */
237
+ int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
238
+
239
+
240
+ /**
241
+ * hdfsMove - Move file from one filesystem to another.
242
+ * @param srcFS The handle to source filesystem.
243
+ * @param src The path of source file.
244
+ * @param dstFS The handle to destination filesystem.
245
+ * @param dst The path of destination file.
246
+ * @return Returns 0 on success, -1 on error.
247
+ */
248
+ int hdfsMove(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
249
+
250
+
251
+ /**
252
+ * hdfsDelete - Delete file.
253
+ * @param fs The configured filesystem handle.
254
+ * @param path The path of the file.
255
+ * @return Returns 0 on success, -1 on error.
256
+ */
257
+ int hdfsDelete(hdfsFS fs, const char* path);
258
+
259
+
260
+ /**
261
+ * hdfsRename - Rename file.
262
+ * @param fs The configured filesystem handle.
263
+ * @param oldPath The path of the source file.
264
+ * @param newPath The path of the destination file.
265
+ * @return Returns 0 on success, -1 on error.
266
+ */
267
+ int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath);
268
+
269
+
270
+ /**
271
+ * hdfsGetWorkingDirectory - Get the current working directory for
272
+ * the given filesystem.
273
+ * @param fs The configured filesystem handle.
274
+ * @param buffer The user-buffer to copy path of cwd into.
275
+ * @param bufferSize The length of user-buffer.
276
+ * @return Returns buffer, NULL on error.
277
+ */
278
+ char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize);
279
+
280
+
281
+ /**
282
+ * hdfsSetWorkingDirectory - Set the working directory. All relative
283
+ * paths will be resolved relative to it.
284
+ * @param fs The configured filesystem handle.
285
+ * @param path The path of the new 'cwd'.
286
+ * @return Returns 0 on success, -1 on error.
287
+ */
288
+ int hdfsSetWorkingDirectory(hdfsFS fs, const char* path);
289
+
290
+
291
+ /**
292
+ * hdfsCreateDirectory - Make the given file and all non-existent
293
+ * parents into directories.
294
+ * @param fs The configured filesystem handle.
295
+ * @param path The path of the directory.
296
+ * @return Returns 0 on success, -1 on error.
297
+ */
298
+ int hdfsCreateDirectory(hdfsFS fs, const char* path);
299
+
300
+
301
+ /**
302
+ * hdfsSetReplication - Set the replication of the specified
303
+ * file to the supplied value
304
+ * @param fs The configured filesystem handle.
305
+ * @param path The path of the file.
306
+ * @return Returns 0 on success, -1 on error.
307
+ */
308
+ int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication);
309
+
310
+
311
+ /**
312
+ * hdfsFileInfo - Information about a file/directory.
313
+ */
314
+ typedef struct {
315
+ tObjectKind mKind; /* file or directory */
316
+ char *mName; /* the name of the file */
317
+ tTime mLastMod; /* the last modification time for the file*/
318
+ tOffset mSize; /* the size of the file in bytes */
319
+ short mReplication; /* the count of replicas */
320
+ tOffset mBlockSize; /* the block size for the file */
321
+ } hdfsFileInfo;
322
+
323
+
324
+ /**
325
+ * hdfsListDirectory - Get list of files/directories for a given
326
+ * directory-path. hdfsFreeFileInfo should be called to deallocate memory.
327
+ * @param fs The configured filesystem handle.
328
+ * @param path The path of the directory.
329
+ * @param numEntries Set to the number of files/directories in path.
330
+ * @return Returns a dynamically-allocated array of hdfsFileInfo
331
+ * objects; NULL on error.
332
+ */
333
+ hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path,
334
+ int *numEntries);
335
+
336
+
337
+ /**
338
+ * hdfsGetPathInfo - Get information about a path as a (dynamically
339
+ * allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be
340
+ * called when the pointer is no longer needed.
341
+ * @param fs The configured filesystem handle.
342
+ * @param path The path of the file.
343
+ * @return Returns a dynamically-allocated hdfsFileInfo object;
344
+ * NULL on error.
345
+ */
346
+ hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path);
347
+
348
+
349
+ /**
350
+ * hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields)
351
+ * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
352
+ * objects.
353
+ * @param numEntries The size of the array.
354
+ */
355
+ void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries);
356
+
357
+
358
+ /**
359
+ * hdfsGetHosts - Get hostnames where a particular block (determined by
360
+ * pos & blocksize) of a file is stored. The last element in the array
361
+ * is NULL. Due to replication, a single block could be present on
362
+ * multiple hosts.
363
+ * @param fs The configured filesystem handle.
364
+ * @param path The path of the file.
365
+ * @param start The start of the block.
366
+ * @param length The length of the block.
367
+ * @return Returns a dynamically-allocated 2-d array of blocks-hosts;
368
+ * NULL on error.
369
+ */
370
+ char*** hdfsGetHosts(hdfsFS fs, const char* path,
371
+ tOffset start, tOffset length);
372
+
373
+
374
+ /**
375
+ * hdfsFreeHosts - Free up the structure returned by hdfsGetHosts
376
+ * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
377
+ * objects.
378
+ * @param numEntries The size of the array.
379
+ */
380
+ void hdfsFreeHosts(char ***blockHosts);
381
+
382
+
383
+ /**
384
+ * hdfsGetDefaultBlockSize - Get the optimum blocksize.
385
+ * @param fs The configured filesystem handle.
386
+ * @return Returns the blocksize; -1 on error.
387
+ */
388
+ tOffset hdfsGetDefaultBlockSize(hdfsFS fs);
389
+
390
+
391
+ /**
392
+ * hdfsGetCapacity - Return the raw capacity of the filesystem.
393
+ * @param fs The configured filesystem handle.
394
+ * @return Returns the raw-capacity; -1 on error.
395
+ */
396
+ tOffset hdfsGetCapacity(hdfsFS fs);
397
+
398
+
399
+ /**
400
+ * hdfsGetUsed - Return the total raw size of all files in the filesystem.
401
+ * @param fs The configured filesystem handle.
402
+ * @return Returns the total-size; -1 on error.
403
+ */
404
+ tOffset hdfsGetUsed(hdfsFS fs);
405
+
406
+ #ifdef __cplusplus
407
+ }
408
+ #endif
409
+
410
+ #endif /*LIBHDFS_HDFS_H*/
411
+
412
+ /**
413
+ * vim: ts=4: sw=4: et
414
+ */
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'ruby-hdfs'
8
+
9
+ class Test::Unit::TestCase
10
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestRubyHdfs < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-hdfs
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Alexander Staubo
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-03-04 00:00:00 +01:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.
22
+ email: alex@bengler.no
23
+ executables: []
24
+
25
+ extensions:
26
+ - ext/hdfs/extconf.rb
27
+ extra_rdoc_files:
28
+ - LICENSE
29
+ - README.rdoc
30
+ files:
31
+ - .document
32
+ - .gitignore
33
+ - LICENSE
34
+ - README.rdoc
35
+ - Rakefile
36
+ - VERSION
37
+ - ext/hdfs/extconf.rb
38
+ - ext/hdfs/hdfs.c
39
+ - ext/hdfs/hdfs.h
40
+ - test/helper.rb
41
+ - test/test_ruby-hdfs.rb
42
+ has_rdoc: true
43
+ homepage: http://github.com/alexstaubo/ruby-hdfs
44
+ licenses: []
45
+
46
+ post_install_message:
47
+ rdoc_options:
48
+ - --charset=UTF-8
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 0
57
+ version: "0"
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ segments:
63
+ - 0
64
+ version: "0"
65
+ requirements: []
66
+
67
+ rubyforge_project:
68
+ rubygems_version: 1.3.6
69
+ signing_key:
70
+ specification_version: 3
71
+ summary: Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.
72
+ test_files:
73
+ - test/helper.rb
74
+ - test/test_ruby-hdfs.rb