ruby-hdf5 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0a5d336f6b45f913a829873d20aba6c80c5724db14038965e073235f36477733
4
+ data.tar.gz: 1fe31c4c72258d31d57ca6ce043aee21f7cd9083700c77405a501a79f1ea5323
5
+ SHA512:
6
+ metadata.gz: 786821e23be9bdc249352c34dc5e293be4a6f3e931c69cc89a1f51ee284d53e00d2ccdb130ee49a2e5bf467ff89cf60f184c455ffc3271309ea0c37a189b28eb
7
+ data.tar.gz: 98150386a48675d3208416cca2c1d4f1668f9519c2acafbb2935ae0d8111789a1de890d1516640c2649bc463d8c07f1144acad050a018f209fe429cf5d394821
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2024 kojix2
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,115 @@
1
+ # ruby-hdf5
2
+
3
+ [![test](https://github.com/red-data-tools/ruby-hdf5/actions/workflows/test.yml/badge.svg)](https://github.com/red-data-tools/ruby-hdf5/actions/workflows/test.yml)
4
+
5
+ Ruby bindings for the HDF5 library.
6
+
7
+ ## Scope
8
+
9
+ This gem currently provides practical high-level wrappers for:
10
+
11
+ - opening and creating files
12
+ - creating groups
13
+ - creating, writing, and reading one-dimensional numeric datasets
14
+ - reading attributes
15
+
16
+ Unsupported at this stage:
17
+
18
+ - string dataset read/write
19
+ - attribute write
20
+ - multidimensional array write
21
+
22
+ ## Supported HDF5 Versions
23
+
24
+ - HDF5 1.10
25
+ - HDF5 1.14
26
+ - HDF5 2.x
27
+
28
+ HDF5 versions older than 1.10 are not supported.
29
+
30
+ ## Install
31
+
32
+ Add to your Gemfile:
33
+
34
+ ```ruby
35
+ gem 'ruby-hdf5'
36
+ ```
37
+
38
+ Install:
39
+
40
+ ```sh
41
+ bundle install
42
+ ```
43
+
44
+ System library (`libhdf5`) is required.
45
+
46
+ ## Runtime Notes
47
+
48
+ - The gem loads `libhdf5` through FFI.
49
+ - If the shared library cannot be found automatically, set `HDF5_LIB_PATH`.
50
+
51
+ Examples:
52
+
53
+ ```sh
54
+ # Point to a directory containing libhdf5.so
55
+ export HDF5_LIB_PATH=/usr/lib
56
+
57
+ # Or point directly to the shared object
58
+ export HDF5_LIB_PATH=/usr/lib/libhdf5.so
59
+ ```
60
+
61
+ ## Quick Start
62
+
63
+ ### Read an existing file
64
+
65
+ ```ruby
66
+ require 'hdf5'
67
+
68
+ HDF5::File.open('example.h5') do |file|
69
+ group = file['foo']
70
+ dataset = group['bar_int']
71
+ p dataset.shape
72
+ p dataset.dtype
73
+ p dataset.read
74
+ end
75
+ ```
76
+
77
+ ### Create and write a file
78
+
79
+ ```ruby
80
+ require 'hdf5'
81
+
82
+ HDF5::File.create('numbers.h5') do |file|
83
+ file.create_group('values') do |group|
84
+ group.create_dataset('ints', [1, 2, 3, 4])
85
+ end
86
+ end
87
+
88
+ reopened = HDF5::File.open('numbers.h5')
89
+ p reopened['values']['ints'].read
90
+ reopened.close
91
+ ```
92
+
93
+ ## Error Handling
94
+
95
+ High-level API failures raise `HDF5::Error`.
96
+
97
+ ```ruby
98
+ begin
99
+ HDF5::File.open('missing.h5')
100
+ rescue HDF5::Error => e
101
+ warn e.message
102
+ end
103
+ ```
104
+
105
+ ## Development
106
+
107
+ After more than a decade, it is clear that the Ruby community does not have enough resources to sustainably maintain an HDF5 library. For that reason, development of this library is intentionally AI-assisted. Something is better than nothing.
108
+
109
+ ## Acknowledgement
110
+
111
+ [https://github.com/edmundhighcock/hdf5](https://github.com/edmundhighcock/hdf5)
112
+
113
+ ## License
114
+
115
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,60 @@
1
+ module HDF5
2
+ class Attribute
3
+ def initialize(dataset_id, attr_name)
4
+ @dataset_id = dataset_id
5
+ @attr_name = attr_name
6
+ @attr_id = FFI.H5Aopen(@dataset_id, @attr_name, 0)
7
+ raise 'Failed to open attribute' if @attr_id < 0
8
+ end
9
+
10
+ def read
11
+ type_id = FFI.H5Aget_type(@attr_id)
12
+ space_id = FFI.H5Aget_space(@attr_id)
13
+
14
+ size = FFI.H5Sget_simple_extent_npoints(space_id)
15
+
16
+ buffer = \
17
+ case FFI.H5Tget_class(type_id)
18
+ when :H5T_INTEGER
19
+ ::FFI::MemoryPointer.new(:int, size)
20
+ when :H5T_FLOAT
21
+ ::FFI::MemoryPointer.new(:double, size)
22
+ when :H5T_STRING
23
+ ::FFI::MemoryPointer.new(:pointer, size)
24
+ else
25
+ raise 'Unsupported data type'
26
+ end
27
+
28
+ status = FFI.H5Aread(@attr_id, type_id, buffer)
29
+ raise 'Failed to read attribute' if status < 0
30
+
31
+ case FFI.H5Tget_class(type_id)
32
+ when :H5T_INTEGER
33
+ buffer.read_array_of_int(size)
34
+ when :H5T_FLOAT
35
+ buffer.read_array_of_double(size)
36
+ when :H5T_STRING
37
+ buffer.read_pointer.read_string
38
+ else
39
+ raise 'Unsupported data type'
40
+ end
41
+ end
42
+
43
+ def close
44
+ FFI.H5Aclose(@attr_id)
45
+ end
46
+ end
47
+
48
+ class AttributeManager
49
+ def initialize(dataset_id)
50
+ @dataset_id = dataset_id
51
+ end
52
+
53
+ def [](attr_name)
54
+ attr = Attribute.new(@dataset_id, attr_name)
55
+ attr.read
56
+ ensure
57
+ attr.close if attr
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,176 @@
1
+ module HDF5
2
+ class Dataset
3
+ class << self
4
+ def create(parent_id, name, data)
5
+ values = normalize_data(data)
6
+ dims = ::FFI::MemoryPointer.new(:ulong_long, 1)
7
+ dims.write_array_of_ulong_long([values.length])
8
+ datatype_id = datatype_id_for(values)
9
+ dataspace_id = HDF5::FFI.H5Screate_simple(1, dims, nil)
10
+ raise HDF5::Error, "Failed to create dataspace for dataset: #{name}" if dataspace_id < 0
11
+
12
+ dataset = from_id(
13
+ HDF5::FFI.H5Dcreate2(parent_id, name, datatype_id, dataspace_id, HDF5::DEFAULT_PROPERTY_LIST,
14
+ HDF5::DEFAULT_PROPERTY_LIST, HDF5::DEFAULT_PROPERTY_LIST), name
15
+ )
16
+ dataset.write(values)
17
+ return dataset unless block_given?
18
+
19
+ begin
20
+ yield dataset
21
+ ensure
22
+ dataset.close
23
+ end
24
+ ensure
25
+ HDF5::FFI.H5Sclose(dataspace_id) if dataspace_id && dataspace_id >= 0
26
+ end
27
+
28
+ def open(parent_id, name)
29
+ dataset = from_id(HDF5::FFI.H5Dopen2(parent_id, name, HDF5::DEFAULT_PROPERTY_LIST), name)
30
+ return dataset unless block_given?
31
+
32
+ begin
33
+ yield dataset
34
+ ensure
35
+ dataset.close
36
+ end
37
+ end
38
+
39
+ def normalize_data(data)
40
+ values = data.is_a?(Array) ? data : [data]
41
+ raise HDF5::Error, 'Dataset data must not be empty' if values.empty?
42
+ raise HDF5::Error, 'Nested arrays are not supported' if values.any? { |value| value.is_a?(Array) }
43
+
44
+ values
45
+ end
46
+
47
+ def datatype_id_for(data)
48
+ if data.all? { |value| value.is_a?(Integer) }
49
+ HDF5::FFI.H5T_NATIVE_INT
50
+ elsif data.all? { |value| value.is_a?(Numeric) }
51
+ HDF5::FFI.H5T_NATIVE_DOUBLE
52
+ else
53
+ raise HDF5::Error, 'Only numeric dataset data is supported'
54
+ end
55
+ end
56
+
57
+ def buffer_for(data)
58
+ if data.all? { |value| value.is_a?(Integer) }
59
+ buffer = ::FFI::MemoryPointer.new(:int, data.length)
60
+ buffer.write_array_of_int(data)
61
+ else
62
+ buffer = ::FFI::MemoryPointer.new(:double, data.length)
63
+ buffer.write_array_of_double(data.map(&:to_f))
64
+ end
65
+
66
+ buffer
67
+ end
68
+
69
+ private
70
+
71
+ def from_id(dataset_id, name)
72
+ dataset = allocate
73
+ dataset.send(:initialize_from_id, dataset_id, name)
74
+ dataset
75
+ end
76
+ end
77
+
78
+ def initialize(parent_id, name)
79
+ initialize_from_id(HDF5::FFI.H5Dopen2(parent_id, name, HDF5::DEFAULT_PROPERTY_LIST), name)
80
+ end
81
+
82
+ def attrs
83
+ @attrs ||= AttributeManager.new(@dataset_id)
84
+ end
85
+
86
+ def write(data)
87
+ values = self.class.normalize_data(data)
88
+ buffer = self.class.buffer_for(values)
89
+ mem_type_id = self.class.datatype_id_for(values)
90
+ status = HDF5::FFI.H5Dwrite(@dataset_id, mem_type_id, HDF5::DEFAULT_PROPERTY_LIST, HDF5::DEFAULT_PROPERTY_LIST,
91
+ HDF5::DEFAULT_PROPERTY_LIST, buffer)
92
+ raise HDF5::Error, 'Failed to write dataset' if status < 0
93
+
94
+ data
95
+ end
96
+
97
+ def close
98
+ return if @dataset_id.nil?
99
+
100
+ HDF5::FFI.H5Dclose(@dataset_id)
101
+ @dataset_id = nil
102
+ end
103
+
104
+ def dtype
105
+ datatype_id = HDF5::FFI.H5Dget_type(@dataset_id)
106
+ raise HDF5::Error, 'Failed to get datatype' if datatype_id < 0
107
+
108
+ HDF5::FFI.H5Tget_class(datatype_id)
109
+ ensure
110
+ HDF5::FFI.H5Tclose(datatype_id) if datatype_id && datatype_id >= 0
111
+ end
112
+
113
+ def shape
114
+ dataspace_id = HDF5::FFI.H5Dget_space(@dataset_id)
115
+ raise HDF5::Error, 'Failed to get dataspace' if dataspace_id < 0
116
+
117
+ ndims = HDF5::FFI.H5Sget_simple_extent_ndims(dataspace_id)
118
+ raise HDF5::Error, 'Failed to get number of dimensions' if ndims < 0
119
+
120
+ dims = ::FFI::MemoryPointer.new(:ulong_long, ndims)
121
+ HDF5::FFI.H5Sget_simple_extent_dims(dataspace_id, dims, nil)
122
+
123
+ dims.read_array_of_uint64(ndims)
124
+ ensure
125
+ HDF5::FFI.H5Sclose(dataspace_id) if dataspace_id && dataspace_id >= 0
126
+ end
127
+
128
+ def read
129
+ current_dtype = dtype
130
+ current_shape = shape
131
+
132
+ total_elements = current_shape.inject(:*)
133
+ case current_dtype
134
+ when :H5T_INTEGER
135
+ read_integer_data(total_elements)
136
+ when :H5T_FLOAT
137
+ read_float_data(total_elements)
138
+ when :H5T_STRING
139
+ read_string_data(total_elements)
140
+ else
141
+ raise HDF5::Error, 'Unsupported datatype'
142
+ end
143
+ end
144
+
145
+ def read_integer_data(total_elements)
146
+ buffer = ::FFI::MemoryPointer.new(:int, total_elements)
147
+ status = HDF5::FFI.H5Dread(@dataset_id, HDF5::FFI.H5T_NATIVE_INT, HDF5::DEFAULT_PROPERTY_LIST,
148
+ HDF5::DEFAULT_PROPERTY_LIST, HDF5::DEFAULT_PROPERTY_LIST, buffer)
149
+ raise HDF5::Error, 'Failed to read integer dataset' if status < 0
150
+
151
+ buffer.read_array_of_int(total_elements)
152
+ end
153
+
154
+ def read_float_data(total_elements)
155
+ buffer = ::FFI::MemoryPointer.new(:double, total_elements)
156
+ status = HDF5::FFI.H5Dread(@dataset_id, HDF5::FFI.H5T_NATIVE_DOUBLE, HDF5::DEFAULT_PROPERTY_LIST,
157
+ HDF5::DEFAULT_PROPERTY_LIST, HDF5::DEFAULT_PROPERTY_LIST, buffer)
158
+ raise HDF5::Error, 'Failed to read float dataset' if status < 0
159
+
160
+ buffer.read_array_of_double(total_elements)
161
+ end
162
+
163
+ def read_string_data(total_elements)
164
+ raise NotImplementedError
165
+ end
166
+
167
+ private
168
+
169
+ def initialize_from_id(dataset_id, name)
170
+ raise HDF5::Error, "Failed to open dataset: #{name}" if dataset_id < 0
171
+
172
+ @dataset_id = dataset_id
173
+ @name = name
174
+ end
175
+ end
176
+ end
data/lib/hdf5/ffi.rb ADDED
@@ -0,0 +1,59 @@
1
+ module HDF5
2
+ module FFI
3
+ extend ::FFI::Library
4
+
5
+ class << self
6
+ attr_reader :backend
7
+ end
8
+
9
+ begin
10
+ ffi_lib HDF5.lib_path
11
+ rescue LoadError => e
12
+ raise LoadError, "#{e}\nCould not find #{HDF5.lib_path}"
13
+ end
14
+
15
+ # @!macro attach_function
16
+ # @!scope class
17
+ # @!method $1(${2--2})
18
+ # @return [${-1}] the return value of $0
19
+ def self.attach_function(*)
20
+ super
21
+ rescue ::FFI::NotFoundError => e
22
+ warn e.message # if $VERBOSE
23
+ end
24
+
25
+ def self.attach_variable(*)
26
+ super
27
+ rescue ::FFI::NotFoundError => e
28
+ warn e.message # if $VERBOSE
29
+ end
30
+
31
+ attach_function 'H5get_libversion', %i[
32
+ pointer
33
+ pointer
34
+ pointer
35
+ ], :int
36
+
37
+ major_ptr = ::FFI::MemoryPointer.new(:uint)
38
+ minor_ptr = ::FFI::MemoryPointer.new(:uint)
39
+ release_ptr = ::FFI::MemoryPointer.new(:uint)
40
+ HDF5::FFI.H5get_libversion(major_ptr, minor_ptr, release_ptr)
41
+
42
+ major = major_ptr.read_uint
43
+ minor = minor_ptr.read_uint
44
+ release = release_ptr.read_uint
45
+
46
+ @backend = case [major, minor]
47
+ in [1, 10..13]
48
+ 'ffi_10'
49
+ in [1, 14..]
50
+ 'ffi_14'
51
+ in [2.., _]
52
+ 'ffi_14'
53
+ else
54
+ raise "Unsupported HDF5 version #{major}.#{minor}.#{release}"
55
+ end
56
+
57
+ require_relative @backend
58
+ end
59
+ end