ruby-hdfs 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +22 -0
- data/LICENSE +20 -0
- data/README.rdoc +59 -0
- data/Rakefile +58 -0
- data/VERSION +1 -0
- data/ext/hdfs/extconf.rb +49 -0
- data/ext/hdfs/hdfs.c +247 -0
- data/ext/hdfs/hdfs.h +414 -0
- data/test/helper.rb +10 -0
- data/test/test_ruby-hdfs.rb +7 -0
- metadata +74 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Alexander Staubo
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
== Hadoop DFS (HDFS) bindings for Ruby
|
2
|
+
|
3
|
+
This library provides native C bindings to Hadoop's libhdfs, for interacting with Hadoop DFS.
|
4
|
+
|
5
|
+
=== Requirements
|
6
|
+
|
7
|
+
You will need:
|
8
|
+
|
9
|
+
* Java JDK and JRE (yes, both). The build file will attempt to find it for you.
|
10
|
+
* Hadoop's libhdfs. On Ubuntu/Debian you will need libhdfs0 and libhdfs0-dev.
|
11
|
+
* Hadoop Core and DFS libraries.
|
12
|
+
|
13
|
+
=== Installation
|
14
|
+
|
15
|
+
Install from gems. Note that you will need to provide JAVA_HOME so the compiler can find the
|
16
|
+
required libraries.
|
17
|
+
|
18
|
+
The installation will attempt to discover the location of the libaries, but if it fails,
|
19
|
+
you can try setting the environment variable JAVA_LIB to the library path of the JDK/JRE.
|
20
|
+
|
21
|
+
Installing with a specific Java JDK:
|
22
|
+
|
23
|
+
sudo env JAVA_HOME=/usr/lib/jvm/java-6-openjdk gem install ruby-hdfs
|
24
|
+
|
25
|
+
=== Using
|
26
|
+
|
27
|
+
The library also depends on an installation of Hadoop DFS. The Cloudera distribution of
|
28
|
+
Hadoop is pretty good:
|
29
|
+
|
30
|
+
http://www.cloudera.com/distribution
|
31
|
+
|
32
|
+
Sample classpath setup (yes, welcome to JAR hell):
|
33
|
+
|
34
|
+
export CLASSPATH=$CLASSPATH:/usr/lib/hadoop/hadoop-0.18.3-6cloudera0.3.0-core.jar
|
35
|
+
for jarfile in /usr/lib/hadoop/lib/*.jar; do
|
36
|
+
export CLASSPATH=$CLASSPATH:$jarfile
|
37
|
+
done
|
38
|
+
|
39
|
+
Wait, there's more. You will also need libjvm.so in your library path, which comes with
|
40
|
+
the JRE. This might work:
|
41
|
+
|
42
|
+
export LD_LIBRARY_PATH=/usr/lib/jvm/java-6-openjdk/jre/lib/i386/server
|
43
|
+
|
44
|
+
=== Known issues
|
45
|
+
|
46
|
+
libhdfs will sometimes throw exceptions, which will be output instead of caught by Ruby.
|
47
|
+
This is annoying but harmless.
|
48
|
+
|
49
|
+
=== Building from source
|
50
|
+
|
51
|
+
To build from source:
|
52
|
+
|
53
|
+
rake compile
|
54
|
+
|
55
|
+
On completion, the compiled extension will be available in ext/hdfs.
|
56
|
+
|
57
|
+
== Copyright
|
58
|
+
|
59
|
+
Copyright (c) 2010 Alexander Staubo. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/extensiontask' # From rake-compiler gem
|
4
|
+
|
5
|
+
begin
|
6
|
+
require 'jeweler'
|
7
|
+
Jeweler::Tasks.new do |gem|
|
8
|
+
gem.name = "ruby-hdfs"
|
9
|
+
gem.summary = %Q{Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.}
|
10
|
+
gem.description = %Q{Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.}
|
11
|
+
gem.email = "alex@bengler.no"
|
12
|
+
gem.homepage = "http://github.com/alexstaubo/ruby-hdfs"
|
13
|
+
gem.authors = ["Alexander Staubo"]
|
14
|
+
gem.extensions = ["ext/hdfs/extconf.rb"]
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
17
|
+
end
|
18
|
+
Jeweler::GemcutterTasks.new
|
19
|
+
rescue LoadError
|
20
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
21
|
+
end
|
22
|
+
|
23
|
+
Rake::ExtensionTask.new('hdfs') do |ext|
|
24
|
+
end
|
25
|
+
|
26
|
+
require 'rake/testtask'
|
27
|
+
Rake::TestTask.new(:test) do |test|
|
28
|
+
test.libs << 'lib' << 'test'
|
29
|
+
test.pattern = 'test/**/test_*.rb'
|
30
|
+
test.verbose = true
|
31
|
+
end
|
32
|
+
|
33
|
+
begin
|
34
|
+
require 'rcov/rcovtask'
|
35
|
+
Rcov::RcovTask.new do |test|
|
36
|
+
test.libs << 'test'
|
37
|
+
test.pattern = 'test/**/test_*.rb'
|
38
|
+
test.verbose = true
|
39
|
+
end
|
40
|
+
rescue LoadError
|
41
|
+
task :rcov do
|
42
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
task :test => :check_dependencies
|
47
|
+
|
48
|
+
task :default => :test
|
49
|
+
|
50
|
+
require 'rake/rdoctask'
|
51
|
+
Rake::RDocTask.new do |rdoc|
|
52
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
53
|
+
|
54
|
+
rdoc.rdoc_dir = 'rdoc'
|
55
|
+
rdoc.title = "ruby-hdfs #{version}"
|
56
|
+
rdoc.rdoc_files.include('README*')
|
57
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
58
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/ext/hdfs/extconf.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
|
3
|
+
java_home = ENV["JAVA_HOME"]
|
4
|
+
unless java_home
|
5
|
+
%w(
|
6
|
+
/usr/lib/jvm/java-6-openjdk
|
7
|
+
).each do |path|
|
8
|
+
if File.directory?(path)
|
9
|
+
java_home = path
|
10
|
+
$stderr << "Warning: Automatically guessed #{path} as Java home, might not be correct.\n"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
abort("JAVA_HOME needs to be defined.") unless java_home
|
14
|
+
end
|
15
|
+
puts("Java home: #{java_home}")
|
16
|
+
|
17
|
+
java_lib_path = ENV["JAVA_LIB"]
|
18
|
+
unless java_lib_path
|
19
|
+
libjvm = "libjvm.so"
|
20
|
+
[
|
21
|
+
"#{java_home}/lib",
|
22
|
+
"#{java_home}/lib/*/client",
|
23
|
+
"#{java_home}/lib/*/server",
|
24
|
+
"#{java_home}/jre/lib",
|
25
|
+
"#{java_home}/jre/lib/*/client",
|
26
|
+
"#{java_home}/jre/lib/*/server"
|
27
|
+
].each do |glob|
|
28
|
+
Dir.glob(glob).each do |path|
|
29
|
+
if File.exist?(File.join(path, libjvm))
|
30
|
+
java_lib_path ||= path
|
31
|
+
break
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
abort("Could not determine Java library path (need #{libjvm})") unless java_lib_path
|
36
|
+
end
|
37
|
+
puts("Java library path: #{java_lib_path}")
|
38
|
+
|
39
|
+
java_include_paths = Dir.glob("#{java_home}/include/**/.").map { |s| s.gsub(/\/\.$/, '') }
|
40
|
+
puts("Java include paths: #{java_include_paths.join(', ')}")
|
41
|
+
java_include_paths.each do |path|
|
42
|
+
$INCFLAGS << " -I#{path}"
|
43
|
+
end
|
44
|
+
|
45
|
+
dir_config("hdfs")
|
46
|
+
find_library("jvm", nil, java_lib_path)
|
47
|
+
find_library("hdfs", nil, java_lib_path)
|
48
|
+
have_library("c", "main")
|
49
|
+
create_makefile("hdfs")
|
data/ext/hdfs/hdfs.c
ADDED
@@ -0,0 +1,247 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "hdfs.h"
|
3
|
+
|
4
|
+
#include <assert.h>
|
5
|
+
#include <string.h>
|
6
|
+
#include <ctype.h>
|
7
|
+
|
8
|
+
// Override flag in hdfs.h
|
9
|
+
#define HDFS_O_RDONLY 0
|
10
|
+
#define HDFS_O_WRONLY 2
|
11
|
+
|
12
|
+
static VALUE m_hadoop;
|
13
|
+
static VALUE m_dfs;
|
14
|
+
static VALUE c_file_system;
|
15
|
+
static VALUE c_file;
|
16
|
+
static VALUE e_dfs_exception;
|
17
|
+
static VALUE e_file_error;
|
18
|
+
static VALUE e_could_not_open;
|
19
|
+
|
20
|
+
static const HDFS_DEFAULT_BLOCK_SIZE = 134217728;
|
21
|
+
static const char* HDFS_DEFAULT_HOST = "localhost";
|
22
|
+
static const int HDFS_DEFAULT_PORT = 9000;
|
23
|
+
|
24
|
+
/*
|
25
|
+
* Data structs
|
26
|
+
*/
|
27
|
+
|
28
|
+
typedef struct FSData {
|
29
|
+
hdfsFS fs;
|
30
|
+
} FSData;
|
31
|
+
|
32
|
+
typedef struct FileData {
|
33
|
+
hdfsFS fs;
|
34
|
+
hdfsFile file;
|
35
|
+
} FileData;
|
36
|
+
|
37
|
+
void free_fs_data(FSData* data) {
|
38
|
+
if (data && data->fs != NULL) {
|
39
|
+
hdfsDisconnect(data->fs);
|
40
|
+
data->fs = NULL;
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
44
|
+
void free_file_data(FileData* data) {
|
45
|
+
if (data && data->file != NULL) {
|
46
|
+
hdfsCloseFile(data->fs, data->file);
|
47
|
+
data->file = NULL;
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
/*
|
52
|
+
* File system interface
|
53
|
+
*/
|
54
|
+
|
55
|
+
VALUE HDFS_File_System_alloc(VALUE klass) {
|
56
|
+
FSData* data = ALLOC_N(FSData, 1);
|
57
|
+
data->fs = NULL;
|
58
|
+
VALUE instance = Data_Wrap_Struct(klass, NULL, free_fs_data, data);
|
59
|
+
return instance;
|
60
|
+
}
|
61
|
+
|
62
|
+
/**
|
63
|
+
* call-seq:
|
64
|
+
* hdfs.new -> hdfs
|
65
|
+
*
|
66
|
+
* Creates a new HDFS client connection.
|
67
|
+
*/
|
68
|
+
VALUE HDFS_File_System_initialize(VALUE self, VALUE host, VALUE port) {
|
69
|
+
FSData* data = NULL;
|
70
|
+
Data_Get_Struct(self, FSData, data);
|
71
|
+
data->fs = hdfsConnect(
|
72
|
+
RTEST(host) ? RSTRING_PTR(host) : HDFS_DEFAULT_HOST,
|
73
|
+
RTEST(port) ? NUM2INT(port) : HDFS_DEFAULT_PORT);
|
74
|
+
return self;
|
75
|
+
}
|
76
|
+
|
77
|
+
/**
|
78
|
+
* call-seq:
|
79
|
+
* hdfs.disconnect -> nil
|
80
|
+
*
|
81
|
+
* Disconnects the client connection.
|
82
|
+
*/
|
83
|
+
VALUE HDFS_File_System_disconnect(VALUE self) {
|
84
|
+
FSData* data = NULL;
|
85
|
+
Data_Get_Struct(self, FSData, data);
|
86
|
+
if (data->fs != NULL) {
|
87
|
+
hdfsDisconnect(data->fs);
|
88
|
+
data->fs = NULL;
|
89
|
+
}
|
90
|
+
return Qnil;
|
91
|
+
}
|
92
|
+
|
93
|
+
VALUE HDFS_File_System_delete(VALUE self, VALUE path) {
|
94
|
+
FSData* data = NULL;
|
95
|
+
Data_Get_Struct(self, FSData, data);
|
96
|
+
int value = hdfsDelete(data->fs, RSTRING_PTR(path));
|
97
|
+
return value == 0 ? Qtrue : Qfalse;
|
98
|
+
}
|
99
|
+
|
100
|
+
VALUE HDFS_File_System_exist(VALUE self, VALUE path) {
|
101
|
+
FSData* data = NULL;
|
102
|
+
Data_Get_Struct(self, FSData, data);
|
103
|
+
int value = hdfsExists(data->fs, RSTRING_PTR(path));
|
104
|
+
return value == 0 ? Qtrue : Qfalse;
|
105
|
+
}
|
106
|
+
|
107
|
+
/**
|
108
|
+
* call-seq:
|
109
|
+
* hdfs.open -> file
|
110
|
+
*
|
111
|
+
* Opens a file.
|
112
|
+
*/
|
113
|
+
VALUE HDFS_File_System_open(VALUE self, VALUE path, VALUE mode, VALUE options) {
|
114
|
+
FSData* data = NULL;
|
115
|
+
Data_Get_Struct(self, FSData, data);
|
116
|
+
|
117
|
+
int flags = 0;
|
118
|
+
if (strcmp("r", STR2CSTR(mode)) == 0) {
|
119
|
+
flags = HDFS_O_RDONLY;
|
120
|
+
} else if (strcmp("w", STR2CSTR(mode)) == 0) {
|
121
|
+
flags = HDFS_O_WRONLY;
|
122
|
+
} else {
|
123
|
+
rb_raise(rb_eArgError, "Mode must be 'r' or 'w'");
|
124
|
+
return;
|
125
|
+
}
|
126
|
+
VALUE r_buffer_size = rb_hash_aref(options, rb_eval_string(":buffer_size"));
|
127
|
+
VALUE r_replication = rb_hash_aref(options, rb_eval_string(":replication"));
|
128
|
+
VALUE r_block_size = rb_hash_aref(options, rb_eval_string(":block_size"));
|
129
|
+
hdfsFile file = hdfsOpenFile(data->fs, RSTRING_PTR(path), flags,
|
130
|
+
RTEST(r_buffer_size) ? NUM2INT(r_buffer_size) : 0,
|
131
|
+
RTEST(r_replication) ? NUM2INT(r_replication) : 0,
|
132
|
+
RTEST(r_block_size) ? NUM2INT(r_block_size) : HDFS_DEFAULT_BLOCK_SIZE);
|
133
|
+
if (file == NULL) {
|
134
|
+
rb_raise(e_could_not_open, "Could not open file %s", RSTRING_PTR(path));
|
135
|
+
return;
|
136
|
+
}
|
137
|
+
|
138
|
+
FileData* file_data = ALLOC_N(FileData, 1);
|
139
|
+
file_data->fs = data->fs;
|
140
|
+
file_data->file = file;
|
141
|
+
VALUE file_instance = Data_Wrap_Struct(c_file, NULL, free_file_data, file_data);
|
142
|
+
return file_instance;
|
143
|
+
}
|
144
|
+
|
145
|
+
/*
|
146
|
+
* File interface
|
147
|
+
*/
|
148
|
+
|
149
|
+
VALUE HDFS_File_read(VALUE self, VALUE length) {
|
150
|
+
FileData* data = NULL;
|
151
|
+
Data_Get_Struct(self, FileData, data);
|
152
|
+
char* buffer = ALLOC_N(char, length);
|
153
|
+
MEMZERO(buffer, char, length);
|
154
|
+
tSize bytes_read = hdfsRead(data->fs, data->file, buffer, NUM2INT(length));
|
155
|
+
if (bytes_read == -1) {
|
156
|
+
rb_raise(e_file_error, "Failed to read data");
|
157
|
+
}
|
158
|
+
return rb_tainted_str_new2(buffer);
|
159
|
+
}
|
160
|
+
|
161
|
+
VALUE HDFS_File_write(VALUE self, VALUE bytes) {
|
162
|
+
FileData* data = NULL;
|
163
|
+
Data_Get_Struct(self, FileData, data);
|
164
|
+
tSize bytes_written = hdfsWrite(data->fs, data->file, RSTRING_PTR(bytes), RSTRING_LEN(bytes));
|
165
|
+
if (bytes_written == -1) {
|
166
|
+
rb_raise(e_file_error, "Failed to write data");
|
167
|
+
}
|
168
|
+
return INT2NUM(bytes_written);
|
169
|
+
}
|
170
|
+
|
171
|
+
VALUE HDFS_File_tell(VALUE self) {
|
172
|
+
FileData* data = NULL;
|
173
|
+
Data_Get_Struct(self, FileData, data);
|
174
|
+
tSize offset = hdfsTell(data->fs, data->file);
|
175
|
+
if (offset == -1) {
|
176
|
+
rb_raise(e_file_error, "Failed to read position");
|
177
|
+
}
|
178
|
+
return INT2NUM(offset);
|
179
|
+
}
|
180
|
+
|
181
|
+
VALUE HDFS_File_seek(VALUE self, VALUE offset) {
|
182
|
+
FileData* data = NULL;
|
183
|
+
Data_Get_Struct(self, FileData, data);
|
184
|
+
int result = hdfsSeek(data->fs, data->file, NUM2INT(offset));
|
185
|
+
return result == 0 ? Qtrue : Qfalse;
|
186
|
+
}
|
187
|
+
|
188
|
+
VALUE HDFS_File_flush(VALUE self) {
|
189
|
+
FileData* data = NULL;
|
190
|
+
Data_Get_Struct(self, FileData, data);
|
191
|
+
int result = hdfsFlush(data->fs, data->file);
|
192
|
+
if (result != 0) {
|
193
|
+
rb_raise(e_file_error, "Flush failed");
|
194
|
+
}
|
195
|
+
return Qnil;
|
196
|
+
}
|
197
|
+
|
198
|
+
VALUE HDFS_File_available(VALUE self) {
|
199
|
+
FileData* data = NULL;
|
200
|
+
Data_Get_Struct(self, FileData, data);
|
201
|
+
int result = hdfsAvailable(data->fs, data->file);
|
202
|
+
if (result == -1) {
|
203
|
+
rb_raise(e_file_error, "Failed to get available data");
|
204
|
+
}
|
205
|
+
return INT2NUM(result);
|
206
|
+
}
|
207
|
+
|
208
|
+
VALUE HDFS_File_close(VALUE self) {
|
209
|
+
FileData* data = NULL;
|
210
|
+
Data_Get_Struct(self, FileData, data);
|
211
|
+
if (data->file != NULL) {
|
212
|
+
hdfsCloseFile(data->fs, data->file);
|
213
|
+
data->file = NULL;
|
214
|
+
}
|
215
|
+
return Qnil;
|
216
|
+
}
|
217
|
+
|
218
|
+
/*
|
219
|
+
* Extension initialization
|
220
|
+
*/
|
221
|
+
|
222
|
+
void Init_hdfs() {
|
223
|
+
m_hadoop = rb_define_module("Hadoop");
|
224
|
+
m_dfs = rb_define_module_under(m_hadoop, "DFS");
|
225
|
+
|
226
|
+
c_file_system = rb_define_class_under(m_dfs, "FileSystem", rb_cObject);
|
227
|
+
rb_define_alloc_func(c_file_system, HDFS_File_System_alloc);
|
228
|
+
rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize, 2);
|
229
|
+
rb_define_method(c_file_system, "disconnect", HDFS_File_System_disconnect, 0);
|
230
|
+
rb_define_method(c_file_system, "open", HDFS_File_System_open, 3);
|
231
|
+
rb_define_method(c_file_system, "delete", HDFS_File_System_delete, 1);
|
232
|
+
rb_define_method(c_file_system, "exist?", HDFS_File_System_exist, 1);
|
233
|
+
|
234
|
+
c_file = rb_define_class_under(m_dfs, "File", rb_cObject);
|
235
|
+
rb_define_method(c_file, "read", HDFS_File_read, 1);
|
236
|
+
rb_define_method(c_file, "write", HDFS_File_write, 1);
|
237
|
+
rb_define_method(c_file, "<<", HDFS_File_write, 1);
|
238
|
+
rb_define_method(c_file, "seek", HDFS_File_seek, 1);
|
239
|
+
rb_define_method(c_file, "tell", HDFS_File_tell, 0);
|
240
|
+
rb_define_method(c_file, "flush", HDFS_File_flush, 0);
|
241
|
+
rb_define_method(c_file, "available", HDFS_File_available, 0);
|
242
|
+
rb_define_method(c_file, "close", HDFS_File_close, 0);
|
243
|
+
|
244
|
+
e_dfs_exception = rb_define_class_under(m_dfs, "DFSException", rb_eStandardError);
|
245
|
+
e_file_error = rb_define_class_under(m_dfs, "FileError", e_dfs_exception);
|
246
|
+
e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError", e_file_error);
|
247
|
+
}
|
data/ext/hdfs/hdfs.h
ADDED
@@ -0,0 +1,414 @@
|
|
1
|
+
/**
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
* See the License for the specific language governing permissions and
|
16
|
+
* limitations under the License.
|
17
|
+
*/
|
18
|
+
|
19
|
+
#ifndef LIBHDFS_HDFS_H
|
20
|
+
#define LIBHDFS_HDFS_H
|
21
|
+
|
22
|
+
#include <sys/types.h>
|
23
|
+
#include <sys/stat.h>
|
24
|
+
|
25
|
+
#include <fcntl.h>
|
26
|
+
#include <stdio.h>
|
27
|
+
#include <stdint.h>
|
28
|
+
#include <string.h>
|
29
|
+
#include <stdlib.h>
|
30
|
+
#include <time.h>
|
31
|
+
#include <errno.h>
|
32
|
+
|
33
|
+
#include <jni.h>
|
34
|
+
|
35
|
+
#ifndef O_RDONLY
|
36
|
+
#define O_RDONLY 1
|
37
|
+
#endif
|
38
|
+
|
39
|
+
#ifndef O_WRONLY
|
40
|
+
#define O_WRONLY 2
|
41
|
+
#endif
|
42
|
+
|
43
|
+
#ifndef EINTERNAL
|
44
|
+
#define EINTERNAL 255
|
45
|
+
#endif
|
46
|
+
|
47
|
+
|
48
|
+
/** All APIs set errno to meaningful values */
|
49
|
+
#ifdef __cplusplus
|
50
|
+
extern "C" {
|
51
|
+
#endif
|
52
|
+
|
53
|
+
/**
|
54
|
+
* Some utility decls used in libhdfs.
|
55
|
+
*/
|
56
|
+
|
57
|
+
typedef int32_t tSize; /// size of data for read/write io ops
|
58
|
+
typedef time_t tTime; /// time type
|
59
|
+
typedef int64_t tOffset;/// offset within the file
|
60
|
+
typedef uint16_t tPort; /// port
|
61
|
+
typedef enum tObjectKind {
|
62
|
+
kObjectKindFile = 'F',
|
63
|
+
kObjectKindDirectory = 'D',
|
64
|
+
} tObjectKind;
|
65
|
+
|
66
|
+
|
67
|
+
/**
|
68
|
+
* The C reflection of org.apache.org.hadoop.FileSystem .
|
69
|
+
*/
|
70
|
+
typedef void* hdfsFS;
|
71
|
+
|
72
|
+
|
73
|
+
/**
|
74
|
+
* The C equivalent of org.apache.org.hadoop.FSData(Input|Output)Stream .
|
75
|
+
*/
|
76
|
+
enum hdfsStreamType
|
77
|
+
{
|
78
|
+
UNINITIALIZED = 0,
|
79
|
+
INPUT = 1,
|
80
|
+
OUTPUT = 2,
|
81
|
+
};
|
82
|
+
|
83
|
+
|
84
|
+
/**
|
85
|
+
* The 'file-handle' to a file in hdfs.
|
86
|
+
*/
|
87
|
+
struct hdfsFile_internal {
|
88
|
+
void* file;
|
89
|
+
enum hdfsStreamType type;
|
90
|
+
};
|
91
|
+
typedef struct hdfsFile_internal* hdfsFile;
|
92
|
+
|
93
|
+
|
94
|
+
/**
|
95
|
+
* hdfsConnect - Connect to a hdfs file system.
|
96
|
+
* Connect to the hdfs.
|
97
|
+
* @param host A string containing either a host name, or an ip address
|
98
|
+
* of the namenode of a hdfs cluster. 'host' should be passed as NULL if
|
99
|
+
* you want to connect to local filesystem. 'host' should be passed as
|
100
|
+
* 'default' (and port as 0) to used the 'configured' filesystem
|
101
|
+
* (hadoop-site/hadoop-default.xml).
|
102
|
+
* @param port The port on which the server is listening.
|
103
|
+
* @return Returns a handle to the filesystem or NULL on error.
|
104
|
+
*/
|
105
|
+
hdfsFS hdfsConnect(const char* host, tPort port);
|
106
|
+
|
107
|
+
|
108
|
+
/**
|
109
|
+
* hdfsDisconnect - Disconnect from the hdfs file system.
|
110
|
+
* Disconnect from hdfs.
|
111
|
+
* @param fs The configured filesystem handle.
|
112
|
+
* @return Returns 0 on success, -1 on error.
|
113
|
+
*/
|
114
|
+
int hdfsDisconnect(hdfsFS fs);
|
115
|
+
|
116
|
+
|
117
|
+
/**
|
118
|
+
* hdfsOpenFile - Open a hdfs file in given mode.
|
119
|
+
* @param fs The configured filesystem handle.
|
120
|
+
* @param path The full path to the file.
|
121
|
+
* @param flags Either O_RDONLY or O_WRONLY, for read-only or write-only.
|
122
|
+
* @param bufferSize Size of buffer for read/write - pass 0 if you want
|
123
|
+
* to use the default configured values.
|
124
|
+
* @param replication Block replication - pass 0 if you want to use
|
125
|
+
* the default configured values.
|
126
|
+
* @param blocksize Size of block - pass 0 if you want to use the
|
127
|
+
* default configured values.
|
128
|
+
* @return Returns the handle to the open file or NULL on error.
|
129
|
+
*/
|
130
|
+
hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags,
|
131
|
+
int bufferSize, short replication, tSize blocksize);
|
132
|
+
|
133
|
+
|
134
|
+
/**
|
135
|
+
* hdfsCloseFile - Close an open file.
|
136
|
+
* @param fs The configured filesystem handle.
|
137
|
+
* @param file The file handle.
|
138
|
+
* @return Returns 0 on success, -1 on error.
|
139
|
+
*/
|
140
|
+
int hdfsCloseFile(hdfsFS fs, hdfsFile file);
|
141
|
+
|
142
|
+
|
143
|
+
/**
|
144
|
+
* hdfsExists - Checks if a given path exsits on the filesystem
|
145
|
+
* @param fs The configured filesystem handle.
|
146
|
+
* @param path The path to look for
|
147
|
+
* @return Returns 0 on success, -1 on error.
|
148
|
+
*/
|
149
|
+
int hdfsExists(hdfsFS fs, const char *path);
|
150
|
+
|
151
|
+
|
152
|
+
/**
|
153
|
+
* hdfsSeek - Seek to given offset in file.
|
154
|
+
* This works only for files opened in read-only mode.
|
155
|
+
* @param fs The configured filesystem handle.
|
156
|
+
* @param file The file handle.
|
157
|
+
* @param desiredPos Offset into the file to seek into.
|
158
|
+
* @return Returns 0 on success, -1 on error.
|
159
|
+
*/
|
160
|
+
int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos);
|
161
|
+
|
162
|
+
|
163
|
+
/**
|
164
|
+
* hdfsTell - Get the current offset in the file, in bytes.
|
165
|
+
* @param fs The configured filesystem handle.
|
166
|
+
* @param file The file handle.
|
167
|
+
* @return Current offset, -1 on error.
|
168
|
+
*/
|
169
|
+
tOffset hdfsTell(hdfsFS fs, hdfsFile file);
|
170
|
+
|
171
|
+
|
172
|
+
/**
|
173
|
+
* hdfsRead - Read data from an open file.
|
174
|
+
* @param fs The configured filesystem handle.
|
175
|
+
* @param file The file handle.
|
176
|
+
* @param buffer The buffer to copy read bytes into.
|
177
|
+
* @param length The length of the buffer.
|
178
|
+
* @return Returns the number of bytes actually read, possibly less
|
179
|
+
* than than length;-1 on error.
|
180
|
+
*/
|
181
|
+
tSize hdfsRead(hdfsFS fs, hdfsFile file, void* buffer, tSize length);
|
182
|
+
|
183
|
+
|
184
|
+
/**
|
185
|
+
* hdfsPread - Positional read of data from an open file.
|
186
|
+
* @param fs The configured filesystem handle.
|
187
|
+
* @param file The file handle.
|
188
|
+
* @param position Position from which to read
|
189
|
+
* @param buffer The buffer to copy read bytes into.
|
190
|
+
* @param length The length of the buffer.
|
191
|
+
* @return Returns the number of bytes actually read, possibly less than
|
192
|
+
* than length;-1 on error.
|
193
|
+
*/
|
194
|
+
tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position,
|
195
|
+
void* buffer, tSize length);
|
196
|
+
|
197
|
+
|
198
|
+
/**
|
199
|
+
* hdfsWrite - Write data into an open file.
|
200
|
+
* @param fs The configured filesystem handle.
|
201
|
+
* @param file The file handle.
|
202
|
+
* @param buffer The data.
|
203
|
+
* @param length The no. of bytes to write.
|
204
|
+
* @return Returns the number of bytes written, -1 on error.
|
205
|
+
*/
|
206
|
+
tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void* buffer,
|
207
|
+
tSize length);
|
208
|
+
|
209
|
+
|
210
|
+
/**
|
211
|
+
* hdfsWrite - Flush the data.
|
212
|
+
* @param fs The configured filesystem handle.
|
213
|
+
* @param file The file handle.
|
214
|
+
* @return Returns 0 on success, -1 on error.
|
215
|
+
*/
|
216
|
+
int hdfsFlush(hdfsFS fs, hdfsFile file);
|
217
|
+
|
218
|
+
|
219
|
+
/**
|
220
|
+
* hdfsAvailable - Number of bytes that can be read from this
|
221
|
+
* input stream without blocking.
|
222
|
+
* @param fs The configured filesystem handle.
|
223
|
+
* @param file The file handle.
|
224
|
+
* @return Returns available bytes; -1 on error.
|
225
|
+
*/
|
226
|
+
int hdfsAvailable(hdfsFS fs, hdfsFile file);
|
227
|
+
|
228
|
+
|
229
|
+
/**
|
230
|
+
* hdfsCopy - Copy file from one filesystem to another.
|
231
|
+
* @param srcFS The handle to source filesystem.
|
232
|
+
* @param src The path of source file.
|
233
|
+
* @param dstFS The handle to destination filesystem.
|
234
|
+
* @param dst The path of destination file.
|
235
|
+
* @return Returns 0 on success, -1 on error.
|
236
|
+
*/
|
237
|
+
int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
|
238
|
+
|
239
|
+
|
240
|
+
/**
|
241
|
+
* hdfsMove - Move file from one filesystem to another.
|
242
|
+
* @param srcFS The handle to source filesystem.
|
243
|
+
* @param src The path of source file.
|
244
|
+
* @param dstFS The handle to destination filesystem.
|
245
|
+
* @param dst The path of destination file.
|
246
|
+
* @return Returns 0 on success, -1 on error.
|
247
|
+
*/
|
248
|
+
int hdfsMove(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
|
249
|
+
|
250
|
+
|
251
|
+
/**
|
252
|
+
* hdfsDelete - Delete file.
|
253
|
+
* @param fs The configured filesystem handle.
|
254
|
+
* @param path The path of the file.
|
255
|
+
* @return Returns 0 on success, -1 on error.
|
256
|
+
*/
|
257
|
+
int hdfsDelete(hdfsFS fs, const char* path);
|
258
|
+
|
259
|
+
|
260
|
+
/**
|
261
|
+
* hdfsRename - Rename file.
|
262
|
+
* @param fs The configured filesystem handle.
|
263
|
+
* @param oldPath The path of the source file.
|
264
|
+
* @param newPath The path of the destination file.
|
265
|
+
* @return Returns 0 on success, -1 on error.
|
266
|
+
*/
|
267
|
+
int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath);
|
268
|
+
|
269
|
+
|
270
|
+
/**
|
271
|
+
* hdfsGetWorkingDirectory - Get the current working directory for
|
272
|
+
* the given filesystem.
|
273
|
+
* @param fs The configured filesystem handle.
|
274
|
+
* @param buffer The user-buffer to copy path of cwd into.
|
275
|
+
* @param bufferSize The length of user-buffer.
|
276
|
+
* @return Returns buffer, NULL on error.
|
277
|
+
*/
|
278
|
+
char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize);
|
279
|
+
|
280
|
+
|
281
|
+
/**
|
282
|
+
* hdfsSetWorkingDirectory - Set the working directory. All relative
|
283
|
+
* paths will be resolved relative to it.
|
284
|
+
* @param fs The configured filesystem handle.
|
285
|
+
* @param path The path of the new 'cwd'.
|
286
|
+
* @return Returns 0 on success, -1 on error.
|
287
|
+
*/
|
288
|
+
int hdfsSetWorkingDirectory(hdfsFS fs, const char* path);
|
289
|
+
|
290
|
+
|
291
|
+
/**
|
292
|
+
* hdfsCreateDirectory - Make the given file and all non-existent
|
293
|
+
* parents into directories.
|
294
|
+
* @param fs The configured filesystem handle.
|
295
|
+
* @param path The path of the directory.
|
296
|
+
* @return Returns 0 on success, -1 on error.
|
297
|
+
*/
|
298
|
+
int hdfsCreateDirectory(hdfsFS fs, const char* path);
|
299
|
+
|
300
|
+
|
301
|
+
/**
|
302
|
+
* hdfsSetReplication - Set the replication of the specified
|
303
|
+
* file to the supplied value
|
304
|
+
* @param fs The configured filesystem handle.
|
305
|
+
* @param path The path of the file.
|
306
|
+
* @return Returns 0 on success, -1 on error.
|
307
|
+
*/
|
308
|
+
int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication);
|
309
|
+
|
310
|
+
|
311
|
+
/**
|
312
|
+
* hdfsFileInfo - Information about a file/directory.
|
313
|
+
*/
|
314
|
+
typedef struct {
|
315
|
+
tObjectKind mKind; /* file or directory */
|
316
|
+
char *mName; /* the name of the file */
|
317
|
+
tTime mLastMod; /* the last modification time for the file*/
|
318
|
+
tOffset mSize; /* the size of the file in bytes */
|
319
|
+
short mReplication; /* the count of replicas */
|
320
|
+
tOffset mBlockSize; /* the block size for the file */
|
321
|
+
} hdfsFileInfo;
|
322
|
+
|
323
|
+
|
324
|
+
/**
|
325
|
+
* hdfsListDirectory - Get list of files/directories for a given
|
326
|
+
* directory-path. hdfsFreeFileInfo should be called to deallocate memory.
|
327
|
+
* @param fs The configured filesystem handle.
|
328
|
+
* @param path The path of the directory.
|
329
|
+
* @param numEntries Set to the number of files/directories in path.
|
330
|
+
* @return Returns a dynamically-allocated array of hdfsFileInfo
|
331
|
+
* objects; NULL on error.
|
332
|
+
*/
|
333
|
+
hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path,
|
334
|
+
int *numEntries);
|
335
|
+
|
336
|
+
|
337
|
+
/**
|
338
|
+
* hdfsGetPathInfo - Get information about a path as a (dynamically
|
339
|
+
* allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be
|
340
|
+
* called when the pointer is no longer needed.
|
341
|
+
* @param fs The configured filesystem handle.
|
342
|
+
* @param path The path of the file.
|
343
|
+
* @return Returns a dynamically-allocated hdfsFileInfo object;
|
344
|
+
* NULL on error.
|
345
|
+
*/
|
346
|
+
hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path);
|
347
|
+
|
348
|
+
|
349
|
+
/**
|
350
|
+
* hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields)
|
351
|
+
* @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
|
352
|
+
* objects.
|
353
|
+
* @param numEntries The size of the array.
|
354
|
+
*/
|
355
|
+
void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries);
|
356
|
+
|
357
|
+
|
358
|
+
/**
|
359
|
+
* hdfsGetHosts - Get hostnames where a particular block (determined by
|
360
|
+
* pos & blocksize) of a file is stored. The last element in the array
|
361
|
+
* is NULL. Due to replication, a single block could be present on
|
362
|
+
* multiple hosts.
|
363
|
+
* @param fs The configured filesystem handle.
|
364
|
+
* @param path The path of the file.
|
365
|
+
* @param start The start of the block.
|
366
|
+
* @param length The length of the block.
|
367
|
+
* @return Returns a dynamically-allocated 2-d array of blocks-hosts;
|
368
|
+
* NULL on error.
|
369
|
+
*/
|
370
|
+
char*** hdfsGetHosts(hdfsFS fs, const char* path,
|
371
|
+
tOffset start, tOffset length);
|
372
|
+
|
373
|
+
|
374
|
+
/**
|
375
|
+
* hdfsFreeHosts - Free up the structure returned by hdfsGetHosts
|
376
|
+
* @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
|
377
|
+
* objects.
|
378
|
+
* @param numEntries The size of the array.
|
379
|
+
*/
|
380
|
+
void hdfsFreeHosts(char ***blockHosts);
|
381
|
+
|
382
|
+
|
383
|
+
/**
|
384
|
+
* hdfsGetDefaultBlockSize - Get the optimum blocksize.
|
385
|
+
* @param fs The configured filesystem handle.
|
386
|
+
* @return Returns the blocksize; -1 on error.
|
387
|
+
*/
|
388
|
+
tOffset hdfsGetDefaultBlockSize(hdfsFS fs);
|
389
|
+
|
390
|
+
|
391
|
+
/**
|
392
|
+
* hdfsGetCapacity - Return the raw capacity of the filesystem.
|
393
|
+
* @param fs The configured filesystem handle.
|
394
|
+
* @return Returns the raw-capacity; -1 on error.
|
395
|
+
*/
|
396
|
+
tOffset hdfsGetCapacity(hdfsFS fs);
|
397
|
+
|
398
|
+
|
399
|
+
/**
|
400
|
+
* hdfsGetUsed - Return the total raw size of all files in the filesystem.
|
401
|
+
* @param fs The configured filesystem handle.
|
402
|
+
* @return Returns the total-size; -1 on error.
|
403
|
+
*/
|
404
|
+
tOffset hdfsGetUsed(hdfsFS fs);
|
405
|
+
|
406
|
+
#ifdef __cplusplus
|
407
|
+
}
|
408
|
+
#endif
|
409
|
+
|
410
|
+
#endif /*LIBHDFS_HDFS_H*/
|
411
|
+
|
412
|
+
/**
|
413
|
+
* vim: ts=4: sw=4: et
|
414
|
+
*/
|
data/test/helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ruby-hdfs
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
version: 0.1.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Alexander Staubo
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-03-04 00:00:00 +01:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.
|
22
|
+
email: alex@bengler.no
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions:
|
26
|
+
- ext/hdfs/extconf.rb
|
27
|
+
extra_rdoc_files:
|
28
|
+
- LICENSE
|
29
|
+
- README.rdoc
|
30
|
+
files:
|
31
|
+
- .document
|
32
|
+
- .gitignore
|
33
|
+
- LICENSE
|
34
|
+
- README.rdoc
|
35
|
+
- Rakefile
|
36
|
+
- VERSION
|
37
|
+
- ext/hdfs/extconf.rb
|
38
|
+
- ext/hdfs/hdfs.c
|
39
|
+
- ext/hdfs/hdfs.h
|
40
|
+
- test/helper.rb
|
41
|
+
- test/test_ruby-hdfs.rb
|
42
|
+
has_rdoc: true
|
43
|
+
homepage: http://github.com/alexstaubo/ruby-hdfs
|
44
|
+
licenses: []
|
45
|
+
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options:
|
48
|
+
- --charset=UTF-8
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
segments:
|
56
|
+
- 0
|
57
|
+
version: "0"
|
58
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
segments:
|
63
|
+
- 0
|
64
|
+
version: "0"
|
65
|
+
requirements: []
|
66
|
+
|
67
|
+
rubyforge_project:
|
68
|
+
rubygems_version: 1.3.6
|
69
|
+
signing_key:
|
70
|
+
specification_version: 3
|
71
|
+
summary: Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.
|
72
|
+
test_files:
|
73
|
+
- test/helper.rb
|
74
|
+
- test/test_ruby-hdfs.rb
|