ruby-hdfs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +22 -0
- data/LICENSE +20 -0
- data/README.rdoc +59 -0
- data/Rakefile +58 -0
- data/VERSION +1 -0
- data/ext/hdfs/extconf.rb +49 -0
- data/ext/hdfs/hdfs.c +247 -0
- data/ext/hdfs/hdfs.h +414 -0
- data/test/helper.rb +10 -0
- data/test/test_ruby-hdfs.rb +7 -0
- metadata +74 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Alexander Staubo
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
== Hadoop DFS (HDFS) bindings for Ruby
|
2
|
+
|
3
|
+
This library provides native C bindings to Hadoop's libhdfs, for interacting with Hadoop DFS.
|
4
|
+
|
5
|
+
=== Requirements
|
6
|
+
|
7
|
+
You will need:
|
8
|
+
|
9
|
+
* Java JDK and JRE (yes, both). The build file will attempt to find it for you.
|
10
|
+
* Hadoop's libhdfs. On Ubuntu/Debian you will need libhdfs0 and libhdfs0-dev.
|
11
|
+
* Hadoop Core and DFS libraries.
|
12
|
+
|
13
|
+
=== Installation
|
14
|
+
|
15
|
+
Install from gems. Note that you will need to provide JAVA_HOME so the compiler can find the
|
16
|
+
required libraries.
|
17
|
+
|
18
|
+
The installation will attempt to discover the location of the libaries, but if it fails,
|
19
|
+
you can try setting the environment variable JAVA_LIB to the library path of the JDK/JRE.
|
20
|
+
|
21
|
+
Installing with a specific Java JDK:
|
22
|
+
|
23
|
+
sudo env JAVA_HOME=/usr/lib/jvm/java-6-openjdk gem install ruby-hdfs
|
24
|
+
|
25
|
+
=== Using
|
26
|
+
|
27
|
+
The library also depends on an installation of Hadoop DFS. The Cloudera distribution of
|
28
|
+
Hadoop is pretty good:
|
29
|
+
|
30
|
+
http://www.cloudera.com/distribution
|
31
|
+
|
32
|
+
Sample classpath setup (yes, welcome to JAR hell):
|
33
|
+
|
34
|
+
export CLASSPATH=$CLASSPATH:/usr/lib/hadoop/hadoop-0.18.3-6cloudera0.3.0-core.jar
|
35
|
+
for jarfile in /usr/lib/hadoop/lib/*.jar; do
|
36
|
+
export CLASSPATH=$CLASSPATH:$jarfile
|
37
|
+
done
|
38
|
+
|
39
|
+
Wait, there's more. You will also need libjvm.so in your library path, which comes with
|
40
|
+
the JRE. This might work:
|
41
|
+
|
42
|
+
export LD_LIBRARY_PATH=/usr/lib/jvm/java-6-openjdk/jre/lib/i386/server
|
43
|
+
|
44
|
+
=== Known issues
|
45
|
+
|
46
|
+
libhdfs will sometimes throw exceptions, which will be output instead of caught by Ruby.
|
47
|
+
This is annoying but harmless.
|
48
|
+
|
49
|
+
=== Building from source
|
50
|
+
|
51
|
+
To build from source:
|
52
|
+
|
53
|
+
rake compile
|
54
|
+
|
55
|
+
On completion, the compiled extension will be available in ext/hdfs.
|
56
|
+
|
57
|
+
== Copyright
|
58
|
+
|
59
|
+
Copyright (c) 2010 Alexander Staubo. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/extensiontask' # From rake-compiler gem
|
4
|
+
|
5
|
+
begin
|
6
|
+
require 'jeweler'
|
7
|
+
Jeweler::Tasks.new do |gem|
|
8
|
+
gem.name = "ruby-hdfs"
|
9
|
+
gem.summary = %Q{Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.}
|
10
|
+
gem.description = %Q{Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.}
|
11
|
+
gem.email = "alex@bengler.no"
|
12
|
+
gem.homepage = "http://github.com/alexstaubo/ruby-hdfs"
|
13
|
+
gem.authors = ["Alexander Staubo"]
|
14
|
+
gem.extensions = ["ext/hdfs/extconf.rb"]
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
17
|
+
end
|
18
|
+
Jeweler::GemcutterTasks.new
|
19
|
+
rescue LoadError
|
20
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
21
|
+
end
|
22
|
+
|
23
|
+
Rake::ExtensionTask.new('hdfs') do |ext|
|
24
|
+
end
|
25
|
+
|
26
|
+
require 'rake/testtask'
|
27
|
+
Rake::TestTask.new(:test) do |test|
|
28
|
+
test.libs << 'lib' << 'test'
|
29
|
+
test.pattern = 'test/**/test_*.rb'
|
30
|
+
test.verbose = true
|
31
|
+
end
|
32
|
+
|
33
|
+
begin
|
34
|
+
require 'rcov/rcovtask'
|
35
|
+
Rcov::RcovTask.new do |test|
|
36
|
+
test.libs << 'test'
|
37
|
+
test.pattern = 'test/**/test_*.rb'
|
38
|
+
test.verbose = true
|
39
|
+
end
|
40
|
+
rescue LoadError
|
41
|
+
task :rcov do
|
42
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
task :test => :check_dependencies
|
47
|
+
|
48
|
+
task :default => :test
|
49
|
+
|
50
|
+
require 'rake/rdoctask'
|
51
|
+
Rake::RDocTask.new do |rdoc|
|
52
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
53
|
+
|
54
|
+
rdoc.rdoc_dir = 'rdoc'
|
55
|
+
rdoc.title = "ruby-hdfs #{version}"
|
56
|
+
rdoc.rdoc_files.include('README*')
|
57
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
58
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/ext/hdfs/extconf.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
|
3
|
+
java_home = ENV["JAVA_HOME"]
|
4
|
+
unless java_home
|
5
|
+
%w(
|
6
|
+
/usr/lib/jvm/java-6-openjdk
|
7
|
+
).each do |path|
|
8
|
+
if File.directory?(path)
|
9
|
+
java_home = path
|
10
|
+
$stderr << "Warning: Automatically guessed #{path} as Java home, might not be correct.\n"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
abort("JAVA_HOME needs to be defined.") unless java_home
|
14
|
+
end
|
15
|
+
puts("Java home: #{java_home}")
|
16
|
+
|
17
|
+
java_lib_path = ENV["JAVA_LIB"]
|
18
|
+
unless java_lib_path
|
19
|
+
libjvm = "libjvm.so"
|
20
|
+
[
|
21
|
+
"#{java_home}/lib",
|
22
|
+
"#{java_home}/lib/*/client",
|
23
|
+
"#{java_home}/lib/*/server",
|
24
|
+
"#{java_home}/jre/lib",
|
25
|
+
"#{java_home}/jre/lib/*/client",
|
26
|
+
"#{java_home}/jre/lib/*/server"
|
27
|
+
].each do |glob|
|
28
|
+
Dir.glob(glob).each do |path|
|
29
|
+
if File.exist?(File.join(path, libjvm))
|
30
|
+
java_lib_path ||= path
|
31
|
+
break
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
abort("Could not determine Java library path (need #{libjvm})") unless java_lib_path
|
36
|
+
end
|
37
|
+
puts("Java library path: #{java_lib_path}")
|
38
|
+
|
39
|
+
java_include_paths = Dir.glob("#{java_home}/include/**/.").map { |s| s.gsub(/\/\.$/, '') }
|
40
|
+
puts("Java include paths: #{java_include_paths.join(', ')}")
|
41
|
+
java_include_paths.each do |path|
|
42
|
+
$INCFLAGS << " -I#{path}"
|
43
|
+
end
|
44
|
+
|
45
|
+
dir_config("hdfs")
|
46
|
+
find_library("jvm", nil, java_lib_path)
|
47
|
+
find_library("hdfs", nil, java_lib_path)
|
48
|
+
have_library("c", "main")
|
49
|
+
create_makefile("hdfs")
|
data/ext/hdfs/hdfs.c
ADDED
@@ -0,0 +1,247 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "hdfs.h"
|
3
|
+
|
4
|
+
#include <assert.h>
|
5
|
+
#include <string.h>
|
6
|
+
#include <ctype.h>
|
7
|
+
|
8
|
+
// Override flag in hdfs.h
|
9
|
+
#define HDFS_O_RDONLY 0
|
10
|
+
#define HDFS_O_WRONLY 2
|
11
|
+
|
12
|
+
static VALUE m_hadoop;
|
13
|
+
static VALUE m_dfs;
|
14
|
+
static VALUE c_file_system;
|
15
|
+
static VALUE c_file;
|
16
|
+
static VALUE e_dfs_exception;
|
17
|
+
static VALUE e_file_error;
|
18
|
+
static VALUE e_could_not_open;
|
19
|
+
|
20
|
+
static const HDFS_DEFAULT_BLOCK_SIZE = 134217728;
|
21
|
+
static const char* HDFS_DEFAULT_HOST = "localhost";
|
22
|
+
static const int HDFS_DEFAULT_PORT = 9000;
|
23
|
+
|
24
|
+
/*
|
25
|
+
* Data structs
|
26
|
+
*/
|
27
|
+
|
28
|
+
typedef struct FSData {
|
29
|
+
hdfsFS fs;
|
30
|
+
} FSData;
|
31
|
+
|
32
|
+
typedef struct FileData {
|
33
|
+
hdfsFS fs;
|
34
|
+
hdfsFile file;
|
35
|
+
} FileData;
|
36
|
+
|
37
|
+
void free_fs_data(FSData* data) {
|
38
|
+
if (data && data->fs != NULL) {
|
39
|
+
hdfsDisconnect(data->fs);
|
40
|
+
data->fs = NULL;
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
44
|
+
void free_file_data(FileData* data) {
|
45
|
+
if (data && data->file != NULL) {
|
46
|
+
hdfsCloseFile(data->fs, data->file);
|
47
|
+
data->file = NULL;
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
/*
|
52
|
+
* File system interface
|
53
|
+
*/
|
54
|
+
|
55
|
+
VALUE HDFS_File_System_alloc(VALUE klass) {
|
56
|
+
FSData* data = ALLOC_N(FSData, 1);
|
57
|
+
data->fs = NULL;
|
58
|
+
VALUE instance = Data_Wrap_Struct(klass, NULL, free_fs_data, data);
|
59
|
+
return instance;
|
60
|
+
}
|
61
|
+
|
62
|
+
/**
|
63
|
+
* call-seq:
|
64
|
+
* hdfs.new -> hdfs
|
65
|
+
*
|
66
|
+
* Creates a new HDFS client connection.
|
67
|
+
*/
|
68
|
+
VALUE HDFS_File_System_initialize(VALUE self, VALUE host, VALUE port) {
|
69
|
+
FSData* data = NULL;
|
70
|
+
Data_Get_Struct(self, FSData, data);
|
71
|
+
data->fs = hdfsConnect(
|
72
|
+
RTEST(host) ? RSTRING_PTR(host) : HDFS_DEFAULT_HOST,
|
73
|
+
RTEST(port) ? NUM2INT(port) : HDFS_DEFAULT_PORT);
|
74
|
+
return self;
|
75
|
+
}
|
76
|
+
|
77
|
+
/**
|
78
|
+
* call-seq:
|
79
|
+
* hdfs.disconnect -> nil
|
80
|
+
*
|
81
|
+
* Disconnects the client connection.
|
82
|
+
*/
|
83
|
+
VALUE HDFS_File_System_disconnect(VALUE self) {
|
84
|
+
FSData* data = NULL;
|
85
|
+
Data_Get_Struct(self, FSData, data);
|
86
|
+
if (data->fs != NULL) {
|
87
|
+
hdfsDisconnect(data->fs);
|
88
|
+
data->fs = NULL;
|
89
|
+
}
|
90
|
+
return Qnil;
|
91
|
+
}
|
92
|
+
|
93
|
+
VALUE HDFS_File_System_delete(VALUE self, VALUE path) {
|
94
|
+
FSData* data = NULL;
|
95
|
+
Data_Get_Struct(self, FSData, data);
|
96
|
+
int value = hdfsDelete(data->fs, RSTRING_PTR(path));
|
97
|
+
return value == 0 ? Qtrue : Qfalse;
|
98
|
+
}
|
99
|
+
|
100
|
+
VALUE HDFS_File_System_exist(VALUE self, VALUE path) {
|
101
|
+
FSData* data = NULL;
|
102
|
+
Data_Get_Struct(self, FSData, data);
|
103
|
+
int value = hdfsExists(data->fs, RSTRING_PTR(path));
|
104
|
+
return value == 0 ? Qtrue : Qfalse;
|
105
|
+
}
|
106
|
+
|
107
|
+
/**
|
108
|
+
* call-seq:
|
109
|
+
* hdfs.open -> file
|
110
|
+
*
|
111
|
+
* Opens a file.
|
112
|
+
*/
|
113
|
+
VALUE HDFS_File_System_open(VALUE self, VALUE path, VALUE mode, VALUE options) {
|
114
|
+
FSData* data = NULL;
|
115
|
+
Data_Get_Struct(self, FSData, data);
|
116
|
+
|
117
|
+
int flags = 0;
|
118
|
+
if (strcmp("r", STR2CSTR(mode)) == 0) {
|
119
|
+
flags = HDFS_O_RDONLY;
|
120
|
+
} else if (strcmp("w", STR2CSTR(mode)) == 0) {
|
121
|
+
flags = HDFS_O_WRONLY;
|
122
|
+
} else {
|
123
|
+
rb_raise(rb_eArgError, "Mode must be 'r' or 'w'");
|
124
|
+
return;
|
125
|
+
}
|
126
|
+
VALUE r_buffer_size = rb_hash_aref(options, rb_eval_string(":buffer_size"));
|
127
|
+
VALUE r_replication = rb_hash_aref(options, rb_eval_string(":replication"));
|
128
|
+
VALUE r_block_size = rb_hash_aref(options, rb_eval_string(":block_size"));
|
129
|
+
hdfsFile file = hdfsOpenFile(data->fs, RSTRING_PTR(path), flags,
|
130
|
+
RTEST(r_buffer_size) ? NUM2INT(r_buffer_size) : 0,
|
131
|
+
RTEST(r_replication) ? NUM2INT(r_replication) : 0,
|
132
|
+
RTEST(r_block_size) ? NUM2INT(r_block_size) : HDFS_DEFAULT_BLOCK_SIZE);
|
133
|
+
if (file == NULL) {
|
134
|
+
rb_raise(e_could_not_open, "Could not open file %s", RSTRING_PTR(path));
|
135
|
+
return;
|
136
|
+
}
|
137
|
+
|
138
|
+
FileData* file_data = ALLOC_N(FileData, 1);
|
139
|
+
file_data->fs = data->fs;
|
140
|
+
file_data->file = file;
|
141
|
+
VALUE file_instance = Data_Wrap_Struct(c_file, NULL, free_file_data, file_data);
|
142
|
+
return file_instance;
|
143
|
+
}
|
144
|
+
|
145
|
+
/*
|
146
|
+
* File interface
|
147
|
+
*/
|
148
|
+
|
149
|
+
VALUE HDFS_File_read(VALUE self, VALUE length) {
|
150
|
+
FileData* data = NULL;
|
151
|
+
Data_Get_Struct(self, FileData, data);
|
152
|
+
char* buffer = ALLOC_N(char, length);
|
153
|
+
MEMZERO(buffer, char, length);
|
154
|
+
tSize bytes_read = hdfsRead(data->fs, data->file, buffer, NUM2INT(length));
|
155
|
+
if (bytes_read == -1) {
|
156
|
+
rb_raise(e_file_error, "Failed to read data");
|
157
|
+
}
|
158
|
+
return rb_tainted_str_new2(buffer);
|
159
|
+
}
|
160
|
+
|
161
|
+
VALUE HDFS_File_write(VALUE self, VALUE bytes) {
|
162
|
+
FileData* data = NULL;
|
163
|
+
Data_Get_Struct(self, FileData, data);
|
164
|
+
tSize bytes_written = hdfsWrite(data->fs, data->file, RSTRING_PTR(bytes), RSTRING_LEN(bytes));
|
165
|
+
if (bytes_written == -1) {
|
166
|
+
rb_raise(e_file_error, "Failed to write data");
|
167
|
+
}
|
168
|
+
return INT2NUM(bytes_written);
|
169
|
+
}
|
170
|
+
|
171
|
+
VALUE HDFS_File_tell(VALUE self) {
|
172
|
+
FileData* data = NULL;
|
173
|
+
Data_Get_Struct(self, FileData, data);
|
174
|
+
tSize offset = hdfsTell(data->fs, data->file);
|
175
|
+
if (offset == -1) {
|
176
|
+
rb_raise(e_file_error, "Failed to read position");
|
177
|
+
}
|
178
|
+
return INT2NUM(offset);
|
179
|
+
}
|
180
|
+
|
181
|
+
VALUE HDFS_File_seek(VALUE self, VALUE offset) {
|
182
|
+
FileData* data = NULL;
|
183
|
+
Data_Get_Struct(self, FileData, data);
|
184
|
+
int result = hdfsSeek(data->fs, data->file, NUM2INT(offset));
|
185
|
+
return result == 0 ? Qtrue : Qfalse;
|
186
|
+
}
|
187
|
+
|
188
|
+
VALUE HDFS_File_flush(VALUE self) {
|
189
|
+
FileData* data = NULL;
|
190
|
+
Data_Get_Struct(self, FileData, data);
|
191
|
+
int result = hdfsFlush(data->fs, data->file);
|
192
|
+
if (result != 0) {
|
193
|
+
rb_raise(e_file_error, "Flush failed");
|
194
|
+
}
|
195
|
+
return Qnil;
|
196
|
+
}
|
197
|
+
|
198
|
+
VALUE HDFS_File_available(VALUE self) {
|
199
|
+
FileData* data = NULL;
|
200
|
+
Data_Get_Struct(self, FileData, data);
|
201
|
+
int result = hdfsAvailable(data->fs, data->file);
|
202
|
+
if (result == -1) {
|
203
|
+
rb_raise(e_file_error, "Failed to get available data");
|
204
|
+
}
|
205
|
+
return INT2NUM(result);
|
206
|
+
}
|
207
|
+
|
208
|
+
VALUE HDFS_File_close(VALUE self) {
|
209
|
+
FileData* data = NULL;
|
210
|
+
Data_Get_Struct(self, FileData, data);
|
211
|
+
if (data->file != NULL) {
|
212
|
+
hdfsCloseFile(data->fs, data->file);
|
213
|
+
data->file = NULL;
|
214
|
+
}
|
215
|
+
return Qnil;
|
216
|
+
}
|
217
|
+
|
218
|
+
/*
|
219
|
+
* Extension initialization
|
220
|
+
*/
|
221
|
+
|
222
|
+
void Init_hdfs() {
|
223
|
+
m_hadoop = rb_define_module("Hadoop");
|
224
|
+
m_dfs = rb_define_module_under(m_hadoop, "DFS");
|
225
|
+
|
226
|
+
c_file_system = rb_define_class_under(m_dfs, "FileSystem", rb_cObject);
|
227
|
+
rb_define_alloc_func(c_file_system, HDFS_File_System_alloc);
|
228
|
+
rb_define_method(c_file_system, "initialize", HDFS_File_System_initialize, 2);
|
229
|
+
rb_define_method(c_file_system, "disconnect", HDFS_File_System_disconnect, 0);
|
230
|
+
rb_define_method(c_file_system, "open", HDFS_File_System_open, 3);
|
231
|
+
rb_define_method(c_file_system, "delete", HDFS_File_System_delete, 1);
|
232
|
+
rb_define_method(c_file_system, "exist?", HDFS_File_System_exist, 1);
|
233
|
+
|
234
|
+
c_file = rb_define_class_under(m_dfs, "File", rb_cObject);
|
235
|
+
rb_define_method(c_file, "read", HDFS_File_read, 1);
|
236
|
+
rb_define_method(c_file, "write", HDFS_File_write, 1);
|
237
|
+
rb_define_method(c_file, "<<", HDFS_File_write, 1);
|
238
|
+
rb_define_method(c_file, "seek", HDFS_File_seek, 1);
|
239
|
+
rb_define_method(c_file, "tell", HDFS_File_tell, 0);
|
240
|
+
rb_define_method(c_file, "flush", HDFS_File_flush, 0);
|
241
|
+
rb_define_method(c_file, "available", HDFS_File_available, 0);
|
242
|
+
rb_define_method(c_file, "close", HDFS_File_close, 0);
|
243
|
+
|
244
|
+
e_dfs_exception = rb_define_class_under(m_dfs, "DFSException", rb_eStandardError);
|
245
|
+
e_file_error = rb_define_class_under(m_dfs, "FileError", e_dfs_exception);
|
246
|
+
e_could_not_open = rb_define_class_under(m_dfs, "CouldNotOpenFileError", e_file_error);
|
247
|
+
}
|
data/ext/hdfs/hdfs.h
ADDED
@@ -0,0 +1,414 @@
|
|
1
|
+
/**
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
* See the License for the specific language governing permissions and
|
16
|
+
* limitations under the License.
|
17
|
+
*/
|
18
|
+
|
19
|
+
#ifndef LIBHDFS_HDFS_H
|
20
|
+
#define LIBHDFS_HDFS_H
|
21
|
+
|
22
|
+
#include <sys/types.h>
|
23
|
+
#include <sys/stat.h>
|
24
|
+
|
25
|
+
#include <fcntl.h>
|
26
|
+
#include <stdio.h>
|
27
|
+
#include <stdint.h>
|
28
|
+
#include <string.h>
|
29
|
+
#include <stdlib.h>
|
30
|
+
#include <time.h>
|
31
|
+
#include <errno.h>
|
32
|
+
|
33
|
+
#include <jni.h>
|
34
|
+
|
35
|
+
#ifndef O_RDONLY
|
36
|
+
#define O_RDONLY 1
|
37
|
+
#endif
|
38
|
+
|
39
|
+
#ifndef O_WRONLY
|
40
|
+
#define O_WRONLY 2
|
41
|
+
#endif
|
42
|
+
|
43
|
+
#ifndef EINTERNAL
|
44
|
+
#define EINTERNAL 255
|
45
|
+
#endif
|
46
|
+
|
47
|
+
|
48
|
+
/** All APIs set errno to meaningful values */
|
49
|
+
#ifdef __cplusplus
|
50
|
+
extern "C" {
|
51
|
+
#endif
|
52
|
+
|
53
|
+
/**
|
54
|
+
* Some utility decls used in libhdfs.
|
55
|
+
*/
|
56
|
+
|
57
|
+
typedef int32_t tSize; /// size of data for read/write io ops
|
58
|
+
typedef time_t tTime; /// time type
|
59
|
+
typedef int64_t tOffset;/// offset within the file
|
60
|
+
typedef uint16_t tPort; /// port
|
61
|
+
typedef enum tObjectKind {
|
62
|
+
kObjectKindFile = 'F',
|
63
|
+
kObjectKindDirectory = 'D',
|
64
|
+
} tObjectKind;
|
65
|
+
|
66
|
+
|
67
|
+
/**
|
68
|
+
* The C reflection of org.apache.org.hadoop.FileSystem .
|
69
|
+
*/
|
70
|
+
typedef void* hdfsFS;
|
71
|
+
|
72
|
+
|
73
|
+
/**
|
74
|
+
* The C equivalent of org.apache.org.hadoop.FSData(Input|Output)Stream .
|
75
|
+
*/
|
76
|
+
enum hdfsStreamType
|
77
|
+
{
|
78
|
+
UNINITIALIZED = 0,
|
79
|
+
INPUT = 1,
|
80
|
+
OUTPUT = 2,
|
81
|
+
};
|
82
|
+
|
83
|
+
|
84
|
+
/**
|
85
|
+
* The 'file-handle' to a file in hdfs.
|
86
|
+
*/
|
87
|
+
struct hdfsFile_internal {
|
88
|
+
void* file;
|
89
|
+
enum hdfsStreamType type;
|
90
|
+
};
|
91
|
+
typedef struct hdfsFile_internal* hdfsFile;
|
92
|
+
|
93
|
+
|
94
|
+
/**
|
95
|
+
* hdfsConnect - Connect to a hdfs file system.
|
96
|
+
* Connect to the hdfs.
|
97
|
+
* @param host A string containing either a host name, or an ip address
|
98
|
+
* of the namenode of a hdfs cluster. 'host' should be passed as NULL if
|
99
|
+
* you want to connect to local filesystem. 'host' should be passed as
|
100
|
+
* 'default' (and port as 0) to used the 'configured' filesystem
|
101
|
+
* (hadoop-site/hadoop-default.xml).
|
102
|
+
* @param port The port on which the server is listening.
|
103
|
+
* @return Returns a handle to the filesystem or NULL on error.
|
104
|
+
*/
|
105
|
+
hdfsFS hdfsConnect(const char* host, tPort port);
|
106
|
+
|
107
|
+
|
108
|
+
/**
|
109
|
+
* hdfsDisconnect - Disconnect from the hdfs file system.
|
110
|
+
* Disconnect from hdfs.
|
111
|
+
* @param fs The configured filesystem handle.
|
112
|
+
* @return Returns 0 on success, -1 on error.
|
113
|
+
*/
|
114
|
+
int hdfsDisconnect(hdfsFS fs);
|
115
|
+
|
116
|
+
|
117
|
+
/**
|
118
|
+
* hdfsOpenFile - Open a hdfs file in given mode.
|
119
|
+
* @param fs The configured filesystem handle.
|
120
|
+
* @param path The full path to the file.
|
121
|
+
* @param flags Either O_RDONLY or O_WRONLY, for read-only or write-only.
|
122
|
+
* @param bufferSize Size of buffer for read/write - pass 0 if you want
|
123
|
+
* to use the default configured values.
|
124
|
+
* @param replication Block replication - pass 0 if you want to use
|
125
|
+
* the default configured values.
|
126
|
+
* @param blocksize Size of block - pass 0 if you want to use the
|
127
|
+
* default configured values.
|
128
|
+
* @return Returns the handle to the open file or NULL on error.
|
129
|
+
*/
|
130
|
+
hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags,
|
131
|
+
int bufferSize, short replication, tSize blocksize);
|
132
|
+
|
133
|
+
|
134
|
+
/**
|
135
|
+
* hdfsCloseFile - Close an open file.
|
136
|
+
* @param fs The configured filesystem handle.
|
137
|
+
* @param file The file handle.
|
138
|
+
* @return Returns 0 on success, -1 on error.
|
139
|
+
*/
|
140
|
+
int hdfsCloseFile(hdfsFS fs, hdfsFile file);
|
141
|
+
|
142
|
+
|
143
|
+
/**
|
144
|
+
* hdfsExists - Checks if a given path exsits on the filesystem
|
145
|
+
* @param fs The configured filesystem handle.
|
146
|
+
* @param path The path to look for
|
147
|
+
* @return Returns 0 on success, -1 on error.
|
148
|
+
*/
|
149
|
+
int hdfsExists(hdfsFS fs, const char *path);
|
150
|
+
|
151
|
+
|
152
|
+
/**
|
153
|
+
* hdfsSeek - Seek to given offset in file.
|
154
|
+
* This works only for files opened in read-only mode.
|
155
|
+
* @param fs The configured filesystem handle.
|
156
|
+
* @param file The file handle.
|
157
|
+
* @param desiredPos Offset into the file to seek into.
|
158
|
+
* @return Returns 0 on success, -1 on error.
|
159
|
+
*/
|
160
|
+
int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos);
|
161
|
+
|
162
|
+
|
163
|
+
/**
|
164
|
+
* hdfsTell - Get the current offset in the file, in bytes.
|
165
|
+
* @param fs The configured filesystem handle.
|
166
|
+
* @param file The file handle.
|
167
|
+
* @return Current offset, -1 on error.
|
168
|
+
*/
|
169
|
+
tOffset hdfsTell(hdfsFS fs, hdfsFile file);
|
170
|
+
|
171
|
+
|
172
|
+
/**
|
173
|
+
* hdfsRead - Read data from an open file.
|
174
|
+
* @param fs The configured filesystem handle.
|
175
|
+
* @param file The file handle.
|
176
|
+
* @param buffer The buffer to copy read bytes into.
|
177
|
+
* @param length The length of the buffer.
|
178
|
+
* @return Returns the number of bytes actually read, possibly less
|
179
|
+
* than than length;-1 on error.
|
180
|
+
*/
|
181
|
+
tSize hdfsRead(hdfsFS fs, hdfsFile file, void* buffer, tSize length);
|
182
|
+
|
183
|
+
|
184
|
+
/**
|
185
|
+
* hdfsPread - Positional read of data from an open file.
|
186
|
+
* @param fs The configured filesystem handle.
|
187
|
+
* @param file The file handle.
|
188
|
+
* @param position Position from which to read
|
189
|
+
* @param buffer The buffer to copy read bytes into.
|
190
|
+
* @param length The length of the buffer.
|
191
|
+
* @return Returns the number of bytes actually read, possibly less than
|
192
|
+
* than length;-1 on error.
|
193
|
+
*/
|
194
|
+
tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position,
|
195
|
+
void* buffer, tSize length);
|
196
|
+
|
197
|
+
|
198
|
+
/**
|
199
|
+
* hdfsWrite - Write data into an open file.
|
200
|
+
* @param fs The configured filesystem handle.
|
201
|
+
* @param file The file handle.
|
202
|
+
* @param buffer The data.
|
203
|
+
* @param length The no. of bytes to write.
|
204
|
+
* @return Returns the number of bytes written, -1 on error.
|
205
|
+
*/
|
206
|
+
tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void* buffer,
|
207
|
+
tSize length);
|
208
|
+
|
209
|
+
|
210
|
+
/**
|
211
|
+
* hdfsWrite - Flush the data.
|
212
|
+
* @param fs The configured filesystem handle.
|
213
|
+
* @param file The file handle.
|
214
|
+
* @return Returns 0 on success, -1 on error.
|
215
|
+
*/
|
216
|
+
int hdfsFlush(hdfsFS fs, hdfsFile file);
|
217
|
+
|
218
|
+
|
219
|
+
/**
|
220
|
+
* hdfsAvailable - Number of bytes that can be read from this
|
221
|
+
* input stream without blocking.
|
222
|
+
* @param fs The configured filesystem handle.
|
223
|
+
* @param file The file handle.
|
224
|
+
* @return Returns available bytes; -1 on error.
|
225
|
+
*/
|
226
|
+
int hdfsAvailable(hdfsFS fs, hdfsFile file);
|
227
|
+
|
228
|
+
|
229
|
+
/**
|
230
|
+
* hdfsCopy - Copy file from one filesystem to another.
|
231
|
+
* @param srcFS The handle to source filesystem.
|
232
|
+
* @param src The path of source file.
|
233
|
+
* @param dstFS The handle to destination filesystem.
|
234
|
+
* @param dst The path of destination file.
|
235
|
+
* @return Returns 0 on success, -1 on error.
|
236
|
+
*/
|
237
|
+
int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
|
238
|
+
|
239
|
+
|
240
|
+
/**
|
241
|
+
* hdfsMove - Move file from one filesystem to another.
|
242
|
+
* @param srcFS The handle to source filesystem.
|
243
|
+
* @param src The path of source file.
|
244
|
+
* @param dstFS The handle to destination filesystem.
|
245
|
+
* @param dst The path of destination file.
|
246
|
+
* @return Returns 0 on success, -1 on error.
|
247
|
+
*/
|
248
|
+
int hdfsMove(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);
|
249
|
+
|
250
|
+
|
251
|
+
/**
|
252
|
+
* hdfsDelete - Delete file.
|
253
|
+
* @param fs The configured filesystem handle.
|
254
|
+
* @param path The path of the file.
|
255
|
+
* @return Returns 0 on success, -1 on error.
|
256
|
+
*/
|
257
|
+
int hdfsDelete(hdfsFS fs, const char* path);
|
258
|
+
|
259
|
+
|
260
|
+
/**
|
261
|
+
* hdfsRename - Rename file.
|
262
|
+
* @param fs The configured filesystem handle.
|
263
|
+
* @param oldPath The path of the source file.
|
264
|
+
* @param newPath The path of the destination file.
|
265
|
+
* @return Returns 0 on success, -1 on error.
|
266
|
+
*/
|
267
|
+
int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath);
|
268
|
+
|
269
|
+
|
270
|
+
/**
|
271
|
+
* hdfsGetWorkingDirectory - Get the current working directory for
|
272
|
+
* the given filesystem.
|
273
|
+
* @param fs The configured filesystem handle.
|
274
|
+
* @param buffer The user-buffer to copy path of cwd into.
|
275
|
+
* @param bufferSize The length of user-buffer.
|
276
|
+
* @return Returns buffer, NULL on error.
|
277
|
+
*/
|
278
|
+
char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize);
|
279
|
+
|
280
|
+
|
281
|
+
/**
|
282
|
+
* hdfsSetWorkingDirectory - Set the working directory. All relative
|
283
|
+
* paths will be resolved relative to it.
|
284
|
+
* @param fs The configured filesystem handle.
|
285
|
+
* @param path The path of the new 'cwd'.
|
286
|
+
* @return Returns 0 on success, -1 on error.
|
287
|
+
*/
|
288
|
+
int hdfsSetWorkingDirectory(hdfsFS fs, const char* path);
|
289
|
+
|
290
|
+
|
291
|
+
/**
|
292
|
+
* hdfsCreateDirectory - Make the given file and all non-existent
|
293
|
+
* parents into directories.
|
294
|
+
* @param fs The configured filesystem handle.
|
295
|
+
* @param path The path of the directory.
|
296
|
+
* @return Returns 0 on success, -1 on error.
|
297
|
+
*/
|
298
|
+
int hdfsCreateDirectory(hdfsFS fs, const char* path);
|
299
|
+
|
300
|
+
|
301
|
+
/**
|
302
|
+
* hdfsSetReplication - Set the replication of the specified
|
303
|
+
* file to the supplied value
|
304
|
+
* @param fs The configured filesystem handle.
|
305
|
+
* @param path The path of the file.
|
306
|
+
* @return Returns 0 on success, -1 on error.
|
307
|
+
*/
|
308
|
+
int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication);
|
309
|
+
|
310
|
+
|
311
|
+
/**
|
312
|
+
* hdfsFileInfo - Information about a file/directory.
|
313
|
+
*/
|
314
|
+
typedef struct {
|
315
|
+
tObjectKind mKind; /* file or directory */
|
316
|
+
char *mName; /* the name of the file */
|
317
|
+
tTime mLastMod; /* the last modification time for the file*/
|
318
|
+
tOffset mSize; /* the size of the file in bytes */
|
319
|
+
short mReplication; /* the count of replicas */
|
320
|
+
tOffset mBlockSize; /* the block size for the file */
|
321
|
+
} hdfsFileInfo;
|
322
|
+
|
323
|
+
|
324
|
+
/**
|
325
|
+
* hdfsListDirectory - Get list of files/directories for a given
|
326
|
+
* directory-path. hdfsFreeFileInfo should be called to deallocate memory.
|
327
|
+
* @param fs The configured filesystem handle.
|
328
|
+
* @param path The path of the directory.
|
329
|
+
* @param numEntries Set to the number of files/directories in path.
|
330
|
+
* @return Returns a dynamically-allocated array of hdfsFileInfo
|
331
|
+
* objects; NULL on error.
|
332
|
+
*/
|
333
|
+
hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path,
|
334
|
+
int *numEntries);
|
335
|
+
|
336
|
+
|
337
|
+
/**
|
338
|
+
* hdfsGetPathInfo - Get information about a path as a (dynamically
|
339
|
+
* allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be
|
340
|
+
* called when the pointer is no longer needed.
|
341
|
+
* @param fs The configured filesystem handle.
|
342
|
+
* @param path The path of the file.
|
343
|
+
* @return Returns a dynamically-allocated hdfsFileInfo object;
|
344
|
+
* NULL on error.
|
345
|
+
*/
|
346
|
+
hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path);
|
347
|
+
|
348
|
+
|
349
|
+
/**
|
350
|
+
* hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields)
|
351
|
+
* @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
|
352
|
+
* objects.
|
353
|
+
* @param numEntries The size of the array.
|
354
|
+
*/
|
355
|
+
void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries);
|
356
|
+
|
357
|
+
|
358
|
+
/**
|
359
|
+
* hdfsGetHosts - Get hostnames where a particular block (determined by
|
360
|
+
* pos & blocksize) of a file is stored. The last element in the array
|
361
|
+
* is NULL. Due to replication, a single block could be present on
|
362
|
+
* multiple hosts.
|
363
|
+
* @param fs The configured filesystem handle.
|
364
|
+
* @param path The path of the file.
|
365
|
+
* @param start The start of the block.
|
366
|
+
* @param length The length of the block.
|
367
|
+
* @return Returns a dynamically-allocated 2-d array of blocks-hosts;
|
368
|
+
* NULL on error.
|
369
|
+
*/
|
370
|
+
char*** hdfsGetHosts(hdfsFS fs, const char* path,
|
371
|
+
tOffset start, tOffset length);
|
372
|
+
|
373
|
+
|
374
|
+
/**
|
375
|
+
* hdfsFreeHosts - Free up the structure returned by hdfsGetHosts
|
376
|
+
* @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo
|
377
|
+
* objects.
|
378
|
+
* @param numEntries The size of the array.
|
379
|
+
*/
|
380
|
+
void hdfsFreeHosts(char ***blockHosts);
|
381
|
+
|
382
|
+
|
383
|
+
/**
|
384
|
+
* hdfsGetDefaultBlockSize - Get the optimum blocksize.
|
385
|
+
* @param fs The configured filesystem handle.
|
386
|
+
* @return Returns the blocksize; -1 on error.
|
387
|
+
*/
|
388
|
+
tOffset hdfsGetDefaultBlockSize(hdfsFS fs);
|
389
|
+
|
390
|
+
|
391
|
+
/**
|
392
|
+
* hdfsGetCapacity - Return the raw capacity of the filesystem.
|
393
|
+
* @param fs The configured filesystem handle.
|
394
|
+
* @return Returns the raw-capacity; -1 on error.
|
395
|
+
*/
|
396
|
+
tOffset hdfsGetCapacity(hdfsFS fs);
|
397
|
+
|
398
|
+
|
399
|
+
/**
|
400
|
+
* hdfsGetUsed - Return the total raw size of all files in the filesystem.
|
401
|
+
* @param fs The configured filesystem handle.
|
402
|
+
* @return Returns the total-size; -1 on error.
|
403
|
+
*/
|
404
|
+
tOffset hdfsGetUsed(hdfsFS fs);
|
405
|
+
|
406
|
+
#ifdef __cplusplus
|
407
|
+
}
|
408
|
+
#endif
|
409
|
+
|
410
|
+
#endif /*LIBHDFS_HDFS_H*/
|
411
|
+
|
412
|
+
/**
|
413
|
+
* vim: ts=4: sw=4: et
|
414
|
+
*/
|
data/test/helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ruby-hdfs
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
version: 0.1.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Alexander Staubo
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-03-04 00:00:00 +01:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.
|
22
|
+
email: alex@bengler.no
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions:
|
26
|
+
- ext/hdfs/extconf.rb
|
27
|
+
extra_rdoc_files:
|
28
|
+
- LICENSE
|
29
|
+
- README.rdoc
|
30
|
+
files:
|
31
|
+
- .document
|
32
|
+
- .gitignore
|
33
|
+
- LICENSE
|
34
|
+
- README.rdoc
|
35
|
+
- Rakefile
|
36
|
+
- VERSION
|
37
|
+
- ext/hdfs/extconf.rb
|
38
|
+
- ext/hdfs/hdfs.c
|
39
|
+
- ext/hdfs/hdfs.h
|
40
|
+
- test/helper.rb
|
41
|
+
- test/test_ruby-hdfs.rb
|
42
|
+
has_rdoc: true
|
43
|
+
homepage: http://github.com/alexstaubo/ruby-hdfs
|
44
|
+
licenses: []
|
45
|
+
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options:
|
48
|
+
- --charset=UTF-8
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
segments:
|
56
|
+
- 0
|
57
|
+
version: "0"
|
58
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
segments:
|
63
|
+
- 0
|
64
|
+
version: "0"
|
65
|
+
requirements: []
|
66
|
+
|
67
|
+
rubyforge_project:
|
68
|
+
rubygems_version: 1.3.6
|
69
|
+
signing_key:
|
70
|
+
specification_version: 3
|
71
|
+
summary: Native C bindings to Hadoop's libhdfs, for interacting with Hadoop HDFS.
|
72
|
+
test_files:
|
73
|
+
- test/helper.rb
|
74
|
+
- test/test_ruby-hdfs.rb
|