ruby-hdfs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,22 @@
1
+ # MacOS X
2
+ .DS_Store
3
+
4
+ # Emacs
5
+ *~
6
+ \#*
7
+ .\#*
8
+
9
+ # Vim
10
+ *.swp
11
+
12
+ # General
13
+ coverage
14
+ rdoc
15
+ pkg
16
+
17
+ # Project-specific
18
+ *.bak
19
+ /ext/hdfs/Makefile
20
+ /ext/hdfs/*.log
21
+ *.o
22
+ *.so
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Alexander Staubo
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,59 @@
1
+ == Hadoop DFS (HDFS) bindings for Ruby
2
+
3
+ This library provides native C bindings to Hadoop's libhdfs, for interacting with Hadoop DFS.
4
+
5
+ === Requirements
6
+
7
+ You will need:
8
+
9
+ * Java JDK and JRE (yes, both). The build file will attempt to find it for you.
10
+ * Hadoop's libhdfs. On Ubuntu/Debian you will need libhdfs0 and libhdfs0-dev.
11
+ * Hadoop Core and DFS libraries.
12
+
13
+ === Installation
14
+
15
+ Install from gems. Note that you will need to provide JAVA_HOME so the compiler can find the
16
+ required libraries.
17
+
18
+ The installation will attempt to discover the location of the libaries, but if it fails,
19
+ you can try setting the environment variable JAVA_LIB to the library path of the JDK/JRE.
20
+
21
+ Installing with a specific Java JDK:
22
+
23
+ sudo env JAVA_HOME=/usr/lib/jvm/java-6-openjdk gem install ruby-hdfs
24
+
25
+ === Using
26
+
27
+ The library also depends on an installation of Hadoop DFS. The Cloudera distribution of
28
+ Hadoop is pretty good:
29
+
30
+ http://www.cloudera.com/distribution
31
+
32
+ Sample classpath setup (yes, welcome to JAR hell):
33
+
34
+ export CLASSPATH=$CLASSPATH:/usr/lib/hadoop/hadoop-0.18.3-6cloudera0.3.0-core.jar
35
+ for jarfile in /usr/lib/hadoop/lib/*.jar; do
36
+ export CLASSPATH=$CLASSPATH:$jarfile
37
+ done
38
+
39
+ Wait, there's more. You will also need libjvm.so in your library path, which comes with
40
+ the JRE. This might work:
41
+
42
+ export LD_LIBRARY_PATH=/usr/lib/jvm/java-6-openjdk/jre/lib/i386/server
43
+
44
+ === Known issues
45
+
46
+ libhdfs will sometimes throw exceptions, which will be output instead of caught by Ruby.
47
+ This is annoying but harmless.
48
+
49
+ === Building from source
50
+
51
+ To build from source:
52
+
53
+ rake compile
54
+
55
+ On completion, the compiled extension will be available in ext/hdfs.
56
+
57
+ == Copyright
58
+
59
+ Copyright (c) 2010 Alexander Staubo. See LICENSE for details.
@@ -0,0 +1,58 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/extensiontask' # From rake-compiler gem
4
+
5
+ begin
6
+ require 'jeweler'
7
+ Jeweler::Tasks.new do |gem|
8
+ gem.name = "ruby-hdfs"
9
+ gem.summary = %Q{Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.}
10
+ gem.description = %Q{Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.}
11
+ gem.email = "alex@bengler.no"
12
+ gem.homepage = "http://github.com/alexstaubo/ruby-hdfs"
13
+ gem.authors = ["Alexander Staubo"]
14
+ gem.extensions = ["ext/hdfs/extconf.rb"]
15
+ gem.require_paths = ["lib"]
16
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
17
+ end
18
+ Jeweler::GemcutterTasks.new
19
+ rescue LoadError
20
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
21
+ end
22
+
23
+ Rake::ExtensionTask.new('hdfs') do |ext|
24
+ end
25
+
26
+ require 'rake/testtask'
27
+ Rake::TestTask.new(:test) do |test|
28
+ test.libs << 'lib' << 'test'
29
+ test.pattern = 'test/**/test_*.rb'
30
+ test.verbose = true
31
+ end
32
+
33
+ begin
34
+ require 'rcov/rcovtask'
35
+ Rcov::RcovTask.new do |test|
36
+ test.libs << 'test'
37
+ test.pattern = 'test/**/test_*.rb'
38
+ test.verbose = true
39
+ end
40
+ rescue LoadError
41
+ task :rcov do
42
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
43
+ end
44
+ end
45
+
46
+ task :test => :check_dependencies
47
+
48
+ task :default => :test
49
+
50
+ require 'rake/rdoctask'
51
+ Rake::RDocTask.new do |rdoc|
52
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
53
+
54
+ rdoc.rdoc_dir = 'rdoc'
55
+ rdoc.title = "ruby-hdfs #{version}"
56
+ rdoc.rdoc_files.include('README*')
57
+ rdoc.rdoc_files.include('lib/**/*.rb')
58
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,49 @@
1
+ require 'mkmf'
2
+
3
+ java_home = ENV["JAVA_HOME"]
4
+ unless java_home
5
+ %w(
6
+ /usr/lib/jvm/java-6-openjdk
7
+ ).each do |path|
8
+ if File.directory?(path)
9
+ java_home = path
10
+ $stderr << "Warning: Automatically guessed #{path} as Java home, might not be correct.\n"
11
+ end
12
+ end
13
+ abort("JAVA_HOME needs to be defined.") unless java_home
14
+ end
15
+ puts("Java home: #{java_home}")
16
+
17
+ java_lib_path = ENV["JAVA_LIB"]
18
+ unless java_lib_path
19
+ libjvm = "libjvm.so"
20
+ [
21
+ "#{java_home}/lib",
22
+ "#{java_home}/lib/*/client",
23
+ "#{java_home}/lib/*/server",
24
+ "#{java_home}/jre/lib",
25
+ "#{java_home}/jre/lib/*/client",
26
+ "#{java_home}/jre/lib/*/server"
27
+ ].each do |glob|
28
+ Dir.glob(glob).each do |path|
29
+ if File.exist?(File.join(path, libjvm))
30
+ java_lib_path ||= path
31
+ break
32
+ end
33
+ end
34
+ end
35
+ abort("Could not determine Java library path (need #{libjvm})") unless java_lib_path
36
+ end
37
+ puts("Java library path: #{java_lib_path}")
38
+
39
+ java_include_paths = Dir.glob("#{java_home}/include/**/.").map { |s| s.gsub(/\/\.$/, '') }
40
+ puts("Java include paths: #{java_include_paths.join(', ')}")
41
+ java_include_paths.each do |path|
42
+ $INCFLAGS << " -I#{path}"
43
+ end
44
+
45
+ dir_config("hdfs")
46
+ find_library("jvm", nil, java_lib_path)
47
+ find_library("hdfs", nil, java_lib_path)
48
+ have_library("c", "main")
49
+ create_makefile("hdfs")
@@ -0,0 +1,247 @@
1
+ #include "ruby.h"
2
+ #include "hdfs.h"
3
+
4
+ #include <assert.h>
5
+ #include <string.h>
6
+ #include <ctype.h>
7
+
8
+ // Override flag in hdfs.h
9
+ #define HDFS_O_RDONLY 0
10
+ #define HDFS_O_WRONLY 2
11
+
12
+ static VALUE m_hadoop;
13
+ static VALUE m_dfs;
14
+ static VALUE c_file_system;
15
+ static VALUE c_file;
16
+ static VALUE e_dfs_exception;
17
+ static VALUE e_file_error;
18
+ static VALUE e_could_not_open;
19
+
20
+ static const HDFS_DEFAULT_BLOCK_SIZE = 134217728;
21
+ static const char* HDFS_DEFAULT_HOST = "localhost";
22
+ static const int HDFS_DEFAULT_PORT = 9000;
23
+
24
+ /*
25
+ * Data structs
26
+ */
27
+
28
+ typedef struct FSData {
29
+ hdfsFS fs;
30
+ } FSData;
31
+
32
+ typedef struct FileData {
33
+ hdfsFS fs;
34
+ hdfsFile file;
35
+ } FileData;
36
+
37
+ void free_fs_data(FSData* data) {
38
+ if (data && data->fs != NULL) {
39
+ hdfsDisconnect(data->fs);
40
+ data->fs = NULL;
41
+ }
42
+ }
43
+
44
+ void free_file_data(FileData* data) {
45
+ if (data && data->file != NULL) {
46
+ hdfsCloseFile(data->fs, data->file);
47
+ data->file = NULL;
48
+ }
49
+ }
50
+
51
+ /*
52
+ * File system interface
53
+ */
54
+
55
+ VALUE HDFS_File_System_alloc(VALUE klass) {
56
+ FSData* data = ALLOC_N(FSData, 1);
57
+ data->fs = NULL;
58
+ VALUE instance = Data_Wrap_Struct(klass, NULL, free_fs_data, data);
59
+ return instance;
60
+ }
61
+
62
+ /**
63
+ * call-seq:
64
+ * hdfs.new -> hdfs
65
+ *
66
+ * Creates a new HDFS client connection.
67
+ */
68
+ VALUE HDFS_File_System_initialize(VALUE self, VALUE host, VALUE port) {
69
+ FSData* data = NULL;
70
+ Data_Get_Struct(self, FSData, data);
71
+ data->fs = hdfsConnect(
72
+ RTEST(host) ? RSTRING_PTR(host) : HDFS_DEFAULT_HOST,
73
+ RTEST(port) ? NUM2INT(port) : HDFS_DEFAULT_PORT);
74
+ return self;
75
+ }
76
+
77
+ /**
78
+ * call-seq:
79
+ * hdfs.disconnect -> nil
80
+ *
81
+ * Disconnects the client connection.
82
+ */
83
+ VALUE HDFS_File_System_disconnect(VALUE self) {
84
+ FSData* data = NULL;
85
+ Data_Get_Struct(self, FSData, data);
86
+ if (data->fs != NULL) {
87
+ hdfsDisconnect(data->fs);
88
+ data->fs = NULL;
89
+ }
90
+ return Qnil;
91
+ }
92
+
93
+ VALUE HDFS_File_System_delete(VALUE self, VALUE path) {
94
+ FSData* data = NULL;
95
+ Data_Get_Struct(self, FSData, data);
96
+ int value = hdfsDelete(data->fs, RSTRING_PTR(path));
97
+ return value == 0 ? Qtrue : Qfalse;
98
+ }
99
+
100
+ VALUE HDFS_File_System_exist(VALUE self, VALUE path) {
101
+ FSData* data = NULL;
102
+ Data_Get_Struct(self, FSData, data);
103
+ int value = hdfsExists(data->fs, RSTRING_PTR(path));
104
+ return value == 0 ? Qtrue : Qfalse;
105
+ }
106
+
107
+ /**
108
+ * call-seq:
109
+ * hdfs.open -> file
110
+ *
111
+ * Opens a file.
112
+ */
113
+ VALUE HDFS_File_System_open(VALUE self, VALUE path, VALUE mode, VALUE options) {
114
+ FSData* data = NULL;
115
+ Data_Get_Struct(self, FSData, data);
116
+
117
+ int flags = 0;
118
+ if (strcmp("r", STR2CSTR(mode)) == 0) {
119
+ flags = HDFS_O_RDONLY;
120
+ } else if (strcmp("w", STR2CSTR(mode)) == 0) {
121
+ flags = HDFS_O_WRONLY;
122
+ } else {
123
+ rb_raise(rb_eArgError, "Mode must be 'r' or 'w'");
124
+ return;
125
+ }
126
+ VALUE r_buffer_size = rb_hash_aref(options, rb_eval_string(":buffer_size"));
127
+ VALUE r_replication = rb_hash_aref(options, rb_eval_string(":replication"));
128
+ VALUE r_block_size = rb_hash_aref(options, rb_eval_string(":block_size"));
129
+ hdfsFile file = hdfsOpenFile(data->fs, RSTRING_PTR(path), flags,
130
+ RTEST(r_buffer_size) ? NUM2INT(r_buffer_size) : 0,
131
+ RTEST(r_replication) ? NUM2INT(r_replication) : 0,
132
+ RTEST(r_block_size) ? NUM2INT(r_block_size) : HDFS_DEFAULT_BLOCK_SIZE);
133
+ if (file == NULL) {
134
+ rb_raise(e_could_not_open, "Could not open file %s", RSTRING_PTR(path));
135
+ return;
136
+ }
137
+
138
+ FileData* file_data = ALLOC_N(FileData, 1);
139
+ file_data->fs = data->fs;
140
+ file_data->file = file;
141
+ VALUE file_instance = Data_Wrap_Struct(c_file, NULL, free_file_data, file_data);
142
+ return file_instance;
143
+ }
144
+
145
+ /*
146
+ * File interface
147
+ */
148
+
149
+ VALUE HDFS_File_read(VALUE self, VALUE length) {
150
+ FileData* data = NULL;
151
+ Data_Get_Struct(self, FileData, data);
152
+ char* buffer = ALLOC_N(char, length);
153
+ MEMZERO(buffer, char, length);
154
+ tSize bytes_read = hdfsRead(data->fs, data->file, buffer, NUM2INT(length));
155
+ if (bytes_read == -1) {
156
+ rb_raise(e_file_error, "Failed to read data");
157
+ }
158
+ return rb_tainted_str_new2(buffer);
159
+ }
160
+
161
+ VALUE HDFS_File_write(VALUE self, VALUE bytes) {
162
+ FileData* data = NULL;
163
+ Data_Get_Struct(self, FileData, data);
164
+ tSize bytes_written = hdfsWrite(data->fs, data->file, RSTRING_PTR(bytes), RSTRING_LEN(bytes));
165
+ if (bytes_written == -1) {
166
+ rb_raise(e_file_error, "Failed to write data");
167
+ }
168
+ return INT2NUM(bytes_written);
169
+ }
170
+
171
+ VALUE HDFS_File_tell(VALUE self) {
172
+ FileData* data = NULL;
173
+ Data_Get_Struct(self, FileData, data);
174
+ tSize offset = hdfsTell(data->fs, data->file);
175
+ if (offset == -1) {
176
+ rb_raise(e_file_error, "Failed to read position");
177
+ }
178
+ return INT2NUM(offset);
179
+ }
180
+
181
+ VALUE HDFS_File_seek(VALUE self, VALUE offset) {
182
+ FileData* data = NULL;
183
+ Data_Get_Struct(self, FileData, data);
184
+ int result = hdfsSeek(data->fs, data->file, NUM2INT(offset));
185
+ return result == 0 ? Qtrue : Qfalse;
186
+ }
187
+
188
+ VALUE HDFS_File_flush(VALUE self) {
189
+ FileData* data = NULL;
190
+ Data_Get_Struct(self, FileData, data);
191
+ int result = hdfsFlush(data->fs, data->file);
192
+ if (result != 0) {
193
+ rb_raise(e_file_error, "Flush failed");
194
+ }
195
+ return Qnil;
196
+ }
197
+
198
+ VALUE HDFS_File_available(VALUE self) {
199
+ FileData* data = NULL;
200
+ Data_Get_Struct(self, FileData, data);
201
+ int result = hdfsAvailable(data->fs, data->file);
202
+ if (result == -1) {
203
+ rb_raise(e_file_error, "Failed to get available data");
204
+ }
205
+ return INT2NUM(result);
206
+ }
207
+
208
+ VALUE HDFS_File_close(VALUE self) {
209
+ FileData* data = NULL;
210
+ Data_Get_Struct(self, FileData, data);
211
+ if (data->file != NULL) {
212
+ hdfsCloseFile(data->fs, data->file);
213
+ data->file = NULL;
214
+ }
215
+ return Qnil;
216
+ }
217
+
218
+ /*
219
+ * Extension initialization
220
+ */
221
+
222
+ void Init_hdfs() {
223
+ m_hadoop = rb_define_module("Hadoop");
224
+ m_dfs = rb_define_module_under(m_hadoop, "DFS");
225
+
226
+ c_file_system = rb_define_class_under(m_dfs, "FileSystem", rb_cObject);
227
+ rb_define_alloc_func(c_file_system, HDFS_File_System_alloc);
228
+ rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize, 2);
229
+ rb_define_method(c_file_system, "disconnect", HDFS_File_System_disconnect, 0);
230
+ rb_define_method(c_file_system, "open", HDFS_File_System_open, 3);
231
+ rb_define_method(c_file_system, "delete", HDFS_File_System_delete, 1);
232
+ rb_define_method(c_file_system, "exist?", HDFS_File_System_exist, 1);
233
+
234
+ c_file = rb_define_class_under(m_dfs, "File", rb_cObject);
235
+ rb_define_method(c_file, "read", HDFS_File_read, 1);
236
+ rb_define_method(c_file, "write", HDFS_File_write, 1);
237
+ rb_define_method(c_file, "<<", HDFS_File_write, 1);
238
+ rb_define_method(c_file, "seek", HDFS_File_seek, 1);
239
+ rb_define_method(c_file, "tell", HDFS_File_tell, 0);
240
+ rb_define_method(c_file, "flush", HDFS_File_flush, 0);
241
+ rb_define_method(c_file, "available", HDFS_File_available, 0);
242
+ rb_define_method(c_file, "close", HDFS_File_close, 0);
243
+
244
+ e_dfs_exception = rb_define_class_under(m_dfs, "DFSException", rb_eStandardError);
245
+ e_file_error = rb_define_class_under(m_dfs, "FileError", e_dfs_exception);
246
+ e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError", e_file_error);
247
+ }
@@ -0,0 +1,414 @@
1
+ /**
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #ifndef LIBHDFS_HDFS_H
20
+ #define LIBHDFS_HDFS_H
21
+
22
+ #include <sys/types.h>
23
+ #include <sys/stat.h>
24
+
25
+ #include <fcntl.h>
26
+ #include <stdio.h>
27
+ #include <stdint.h>
28
+ #include <string.h>
29
+ #include <stdlib.h>
30
+ #include <time.h>
31
+ #include <errno.h>
32
+
33
+ #include <jni.h>
34
+
35
+ #ifndef O_RDONLY
36
+ #define O_RDONLY 1
37
+ #endif
38
+
39
+ #ifndef O_WRONLY
40
+ #define O_WRONLY 2
41
+ #endif
42
+
43
+ #ifndef EINTERNAL
44
+ #define EINTERNAL 255
45
+ #endif
46
+
47
+
48
+ /** All APIs set errno to meaningful values */
49
+ #ifdef __cplusplus
50
+ extern "C" {
51
+ #endif
52
+
53
+ /**
54
+ * Some utility decls used in libhdfs.
55
+ */
56
+
57
+ typedef int32_t tSize; /// size of data for read/write io ops
58
+ typedef time_t tTime; /// time type
59
+ typedef int64_t tOffset;/// offset within the file
60
+ typedef uint16_t tPort; /// port
61
+ typedef enum tObjectKind {
62
+ kObjectKindFile = 'F',
63
+ kObjectKindDirectory = 'D',
64
+ } tObjectKind;
65
+
66
+
67
+ /**
68
+ * The C reflection of org.apache.org.hadoop.FileSystem .
69
+ */
70
+ typedef void* hdfsFS;
71
+
72
+
73
+ /**
74
+ * The C equivalent of org.apache.org.hadoop.FSData(Input|Output)Stream .
75
+ */
76
+ enum hdfsStreamType
77
+ {
78
+ UNINITIALIZED = 0,
79
+ INPUT = 1,
80
+ OUTPUT = 2,
81
+ };
82
+
83
+
84
+ /**
85
+ * The 'file-handle' to a file in hdfs.
86
+ */
87
+ struct hdfsFile_internal {
88
+ void* file;
89
+ enum hdfsStreamType type;
90
+ };
91
+ typedef struct hdfsFile_internal* hdfsFile;
92
+
93
+
94
+ /**
95
+ * hdfsConnect - Connect to a hdfs file system.
96
+ * Connect to the hdfs.
97
+ * @param host A string containing either a host name, or an ip address
98
+ * of the namenode of a hdfs cluster. 'host' should be passed as NULL if
99
+ * you want to connect to local filesystem. 'host' should be passed as
100
+ * 'default' (and port as 0) to used the 'configured' filesystem
101
+ * (hadoop-site/hadoop-default.xml).
102
+ * @param port The port on which the server is listening.
103
+ * @return Returns a handle to the filesystem or NULL on error.
104
+ */
105
+ hdfsFS hdfsConnect(const char* host, tPort port);
106
+
107
+
108
+ /**
109
+ * hdfsDisconnect - Disconnect from the hdfs file system.
110
+ * Disconnect from hdfs.
111
+ * @param fs The configured filesystem handle.
112
+ * @return Returns 0 on success, -1 on error.
113
+ */
114
+ int hdfsDisconnect(hdfsFS fs);
115
+
116
+
117
+ /**
118
+ * hdfsOpenFile - Open a hdfs file in given mode.
119
+ * @param fs The configured filesystem handle.
120
+ * @param path The full path to the file.
121
+ * @param flags Either O_RDONLY or O_WRONLY, for read-only or write-only.
122
+ * @param bufferSize Size of buffer for read/write - pass 0 if you want
123
+ * to use the default configured values.
124
+ * @param replication Block replication - pass 0 if you want to use
125
+ * the default configured values.
126
+ * @param blocksize Size of block - pass 0 if you want to use the
127
+ * default configured values.
128
+ * @return Returns the handle to the open file or NULL on error.
129
+ */
130
+ hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags,
131
+ int bufferSize, short replication, tSize blocksize);
132
+
133
+
134
+ /**
135
+ * hdfsCloseFile - Close an open file.
136
+ * @param fs The configured filesystem handle.
137
+ * @param file The file handle.
138
+ * @return Returns 0 on success, -1 on error.
139
+ */
140
+ int hdfsCloseFile(hdfsFS fs, hdfsFile file);
141
+
142
+
143
+ /**
144
+ * hdfsExists - Checks if a given path exsits on the filesystem
145
+ * @param fs The configured filesystem handle.
146
+ * @param path The path to look for
147
+ * @return Returns 0 on success, -1 on error.
148
+ */
149
+ int hdfsExists(hdfsFS fs, const char *path);
150
+
151
+
152
+ /**
153
+ * hdfsSeek - Seek to given offset in file.
154
+ * This works only for files opened in read-only mode.
155
+ * @param fs The configured filesystem handle.
156
+ * @param file The file handle.
157
+ * @param desiredPos Offset into the file to seek into.
158
+ * @return Returns 0 on success, -1 on error.
159
+ */
160
+ int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos);
161
+
162
+
163
+ /**
164
+ * hdfsTell - Get the current offset in the file, in bytes.
165
+ * @param fs The configured filesystem handle.
166
+ * @param file The file handle.
167
+ * @return Current offset, -1 on error.
168
+ */
169
+ tOffset hdfsTell(hdfsFS fs, hdfsFile file);
170
+
171
+
172
+ /**
173
+ * hdfsRead - Read data from an open file.
174
+ * @param fs The configured filesystem handle.
175
+ * @param file The file handle.
176
+ * @param buffer The buffer to copy read bytes into.
177
+ * @param length The length of the buffer.
178
+ * @return Returns the number of bytes actually read, possibly less
179
+ * than than length;-1 on error.
180
+ */
181
+ tSize hdfsRead(hdfsFS fs, hdfsFile file, void* buffer, tSize length);
182
+
183
+
184
+ /**
185
+ * hdfsPread - Positional read of data from an open file.
186
+ * @param fs The configured filesystem handle.
187
+ * @param file The file handle.
188
+ * @param position Position from which to read
189
+ * @param buffer The buffer to copy read bytes into.
190
+ * @param length The length of the buffer.
191
+ * @return Returns the number of bytes actually read, possibly less than
192
+ * than length;-1 on error.
193
+ */
194
+ tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position,
195
+ void* buffer, tSize length);
196
+
197
+
198
+ /**
199
+ * hdfsWrite - Write data into an open file.
200
+ * @param fs The configured filesystem handle.
201
+ * @param file The file handle.
202
+ * @param buffer The data.
203
+ * @param length The no. of bytes to write.
204
+ * @return Returns the number of bytes written, -1 on error.
205
+ */
206
+ tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void* buffer,
207
+ tSize length);
208
+
209
+
210
+ /**
211
+ * hdfsWrite - Flush the data.
212
+ * @param fs The configured filesystem handle.
213
+ * @param file The file handle.
214
+ * @return Returns 0 on success, -1 on error.
215
+ */
216
+ int hdfsFlush(hdfsFS fs, hdfsFile file);
217
+
218
+
219
+ /**
220
+ * hdfsAvailable - Number of bytes that can be read from this
221
+ * input stream without blocking.
222
+ * @param fs The configured filesystem handle.
223
+ * @param file The file handle.
224
+ * @return Returns available bytes; -1 on error.
225
+ */
226
+ int hdfsAvailable(hdfsFS fs, hdfsFile file);
227
+
228
+
229
+ /**
230
+ * hdfsCopy - Copy file from one filesystem to another.
231
+ * @param srcFS The handle to source filesystem.
232
+ * @param src The path of source file.
233
+ * @param dstFS The handle to destination filesystem.
234
+ * @param dst The path of destination file.
235
+ * @return Returns 0 on success, -1 on error.
236
+ */
237
+ int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
238
+
239
+
240
+ /**
241
+ * hdfsMove - Move file from one filesystem to another.
242
+ * @param srcFS The handle to source filesystem.
243
+ * @param src The path of source file.
244
+ * @param dstFS The handle to destination filesystem.
245
+ * @param dst The path of destination file.
246
+ * @return Returns 0 on success, -1 on error.
247
+ */
248
+ int hdfsMove(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
249
+
250
+
251
+ /**
252
+ * hdfsDelete - Delete file.
253
+ * @param fs The configured filesystem handle.
254
+ * @param path The path of the file.
255
+ * @return Returns 0 on success, -1 on error.
256
+ */
257
+ int hdfsDelete(hdfsFS fs, const char* path);
258
+
259
+
260
+ /**
261
+ * hdfsRename - Rename file.
262
+ * @param fs The configured filesystem handle.
263
+ * @param oldPath The path of the source file.
264
+ * @param newPath The path of the destination file.
265
+ * @return Returns 0 on success, -1 on error.
266
+ */
267
+ int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath);
268
+
269
+
270
+ /**
271
+ * hdfsGetWorkingDirectory - Get the current working directory for
272
+ * the given filesystem.
273
+ * @param fs The configured filesystem handle.
274
+ * @param buffer The user-buffer to copy path of cwd into.
275
+ * @param bufferSize The length of user-buffer.
276
+ * @return Returns buffer, NULL on error.
277
+ */
278
+ char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize);
279
+
280
+
281
+ /**
282
+ * hdfsSetWorkingDirectory - Set the working directory. All relative
283
+ * paths will be resolved relative to it.
284
+ * @param fs The configured filesystem handle.
285
+ * @param path The path of the new 'cwd'.
286
+ * @return Returns 0 on success, -1 on error.
287
+ */
288
+ int hdfsSetWorkingDirectory(hdfsFS fs, const char* path);
289
+
290
+
291
+ /**
292
+ * hdfsCreateDirectory - Make the given file and all non-existent
293
+ * parents into directories.
294
+ * @param fs The configured filesystem handle.
295
+ * @param path The path of the directory.
296
+ * @return Returns 0 on success, -1 on error.
297
+ */
298
+ int hdfsCreateDirectory(hdfsFS fs, const char* path);
299
+
300
+
301
+ /**
302
+ * hdfsSetReplication - Set the replication of the specified
303
+ * file to the supplied value
304
+ * @param fs The configured filesystem handle.
305
+ * @param path The path of the file.
306
+ * @return Returns 0 on success, -1 on error.
307
+ */
308
+ int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication);
309
+
310
+
311
+ /**
312
+ * hdfsFileInfo - Information about a file/directory.
313
+ */
314
+ typedef struct {
315
+ tObjectKind mKind; /* file or directory */
316
+ char *mName; /* the name of the file */
317
+ tTime mLastMod; /* the last modification time for the file*/
318
+ tOffset mSize; /* the size of the file in bytes */
319
+ short mReplication; /* the count of replicas */
320
+ tOffset mBlockSize; /* the block size for the file */
321
+ } hdfsFileInfo;
322
+
323
+
324
+ /**
325
+ * hdfsListDirectory - Get list of files/directories for a given
326
+ * directory-path. hdfsFreeFileInfo should be called to deallocate memory.
327
+ * @param fs The configured filesystem handle.
328
+ * @param path The path of the directory.
329
+ * @param numEntries Set to the number of files/directories in path.
330
+ * @return Returns a dynamically-allocated array of hdfsFileInfo
331
+ * objects; NULL on error.
332
+ */
333
+ hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path,
334
+ int *numEntries);
335
+
336
+
337
+ /**
338
+ * hdfsGetPathInfo - Get information about a path as a (dynamically
339
+ * allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be
340
+ * called when the pointer is no longer needed.
341
+ * @param fs The configured filesystem handle.
342
+ * @param path The path of the file.
343
+ * @return Returns a dynamically-allocated hdfsFileInfo object;
344
+ * NULL on error.
345
+ */
346
+ hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path);
347
+
348
+
349
+ /**
350
+ * hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields)
351
+ * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
352
+ * objects.
353
+ * @param numEntries The size of the array.
354
+ */
355
+ void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries);
356
+
357
+
358
+ /**
359
+ * hdfsGetHosts - Get hostnames where a particular block (determined by
360
+ * pos & blocksize) of a file is stored. The last element in the array
361
+ * is NULL. Due to replication, a single block could be present on
362
+ * multiple hosts.
363
+ * @param fs The configured filesystem handle.
364
+ * @param path The path of the file.
365
+ * @param start The start of the block.
366
+ * @param length The length of the block.
367
+ * @return Returns a dynamically-allocated 2-d array of blocks-hosts;
368
+ * NULL on error.
369
+ */
370
+ char*** hdfsGetHosts(hdfsFS fs, const char* path,
371
+ tOffset start, tOffset length);
372
+
373
+
374
+ /**
375
+ * hdfsFreeHosts - Free up the structure returned by hdfsGetHosts
376
+ * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
377
+ * objects.
378
+ * @param numEntries The size of the array.
379
+ */
380
+ void hdfsFreeHosts(char ***blockHosts);
381
+
382
+
383
+ /**
384
+ * hdfsGetDefaultBlockSize - Get the optimum blocksize.
385
+ * @param fs The configured filesystem handle.
386
+ * @return Returns the blocksize; -1 on error.
387
+ */
388
+ tOffset hdfsGetDefaultBlockSize(hdfsFS fs);
389
+
390
+
391
+ /**
392
+ * hdfsGetCapacity - Return the raw capacity of the filesystem.
393
+ * @param fs The configured filesystem handle.
394
+ * @return Returns the raw-capacity; -1 on error.
395
+ */
396
+ tOffset hdfsGetCapacity(hdfsFS fs);
397
+
398
+
399
+ /**
400
+ * hdfsGetUsed - Return the total raw size of all files in the filesystem.
401
+ * @param fs The configured filesystem handle.
402
+ * @return Returns the total-size; -1 on error.
403
+ */
404
+ tOffset hdfsGetUsed(hdfsFS fs);
405
+
406
+ #ifdef __cplusplus
407
+ }
408
+ #endif
409
+
410
+ #endif /*LIBHDFS_HDFS_H*/
411
+
412
+ /**
413
+ * vim: ts=4: sw=4: et
414
+ */
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'ruby-hdfs'
8
+
9
+ class Test::Unit::TestCase
10
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestRubyHdfs < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-hdfs
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Alexander Staubo
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-03-04 00:00:00 +01:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.
22
+ email: alex@bengler.no
23
+ executables: []
24
+
25
+ extensions:
26
+ - ext/hdfs/extconf.rb
27
+ extra_rdoc_files:
28
+ - LICENSE
29
+ - README.rdoc
30
+ files:
31
+ - .document
32
+ - .gitignore
33
+ - LICENSE
34
+ - README.rdoc
35
+ - Rakefile
36
+ - VERSION
37
+ - ext/hdfs/extconf.rb
38
+ - ext/hdfs/hdfs.c
39
+ - ext/hdfs/hdfs.h
40
+ - test/helper.rb
41
+ - test/test_ruby-hdfs.rb
42
+ has_rdoc: true
43
+ homepage: http://github.com/alexstaubo/ruby-hdfs
44
+ licenses: []
45
+
46
+ post_install_message:
47
+ rdoc_options:
48
+ - --charset=UTF-8
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 0
57
+ version: "0"
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ segments:
63
+ - 0
64
+ version: "0"
65
+ requirements: []
66
+
67
+ rubyforge_project:
68
+ rubygems_version: 1.3.6
69
+ signing_key:
70
+ specification_version: 3
71
+ summary: Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.
72
+ test_files:
73
+ - test/helper.rb
74
+ - test/test_ruby-hdfs.rb