ruby-zstds 1.0.6 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +65 -31
- data/ext/extconf.rb +110 -25
- data/ext/zstds_ext/buffer.c +4 -5
- data/ext/zstds_ext/buffer.h +5 -5
- data/ext/zstds_ext/dictionary.c +122 -43
- data/ext/zstds_ext/dictionary.h +9 -1
- data/ext/zstds_ext/error.c +15 -16
- data/ext/zstds_ext/error.h +3 -1
- data/ext/zstds_ext/gvl.h +24 -0
- data/ext/zstds_ext/io.c +229 -138
- data/ext/zstds_ext/macro.h +6 -0
- data/ext/zstds_ext/main.c +3 -1
- data/ext/zstds_ext/option.c +44 -46
- data/ext/zstds_ext/option.h +62 -51
- data/ext/zstds_ext/stream/compressor.c +103 -53
- data/ext/zstds_ext/stream/compressor.h +4 -1
- data/ext/zstds_ext/stream/decompressor.c +46 -25
- data/ext/zstds_ext/stream/decompressor.h +4 -1
- data/ext/zstds_ext/string.c +93 -63
- data/lib/zstds/dictionary.rb +10 -4
- data/lib/zstds/error.rb +2 -1
- data/lib/zstds/file.rb +2 -2
- data/lib/zstds/option.rb +16 -4
- data/lib/zstds/stream/abstract.rb +5 -3
- data/lib/zstds/stream/reader.rb +3 -2
- data/lib/zstds/stream/reader_helpers.rb +1 -1
- data/lib/zstds/validation.rb +3 -1
- data/lib/zstds/version.rb +1 -1
- metadata +74 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 80ccd3c8a7210dcf3fabdfadcca8e37ce82d5fe78c7f761f5ae7c355ff746854
|
4
|
+
data.tar.gz: a4ed4e5dd4c89b7f95438c5e3033d09eb25721ab32fda8e1eac9351774ab3215
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 967b5d866cd79b139bdbca97e1fdacf9063e3bc425f441be239980dd839d123173fcad15c0830576a32fe7c0bb5166f4fd5159717a9ac7f038fe57a435859f70
|
7
|
+
data.tar.gz: 78c54fc20f8facaa556eb68438407cfa7ff96d33425a9dce73584fc26435177dda8478c729a412f00d9b28a9d860b3dec6a059a728926a587f3a83dae9fe1836
|
data/README.md
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
# Ruby bindings for zstd library
|
2
2
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
| [![
|
3
|
+
| AppVeyor | Circle | Github actions | Codecov | Gem |
|
4
|
+
| :------: | :----: | :------------: | :-----: | :--: |
|
5
|
+
| [![AppVeyor test status](https://ci.appveyor.com/api/projects/status/github/andrew-aladev/ruby-zstds?branch=master&svg=true)](https://ci.appveyor.com/project/andrew-aladev/ruby-zstds/branch/master) | [![Circle test status](https://circleci.com/gh/andrew-aladev/ruby-zstds/tree/master.svg?style=shield)](https://circleci.com/gh/andrew-aladev/ruby-zstds/tree/master) | [![Github Actions test status](https://github.com/andrew-aladev/ruby-zstds/workflows/test/badge.svg?branch=master)](https://github.com/andrew-aladev/ruby-zstds/actions) | [![Codecov](https://codecov.io/gh/andrew-aladev/ruby-zstds/branch/master/graph/badge.svg)](https://codecov.io/gh/andrew-aladev/ruby-zstds) | [![Gem](https://img.shields.io/gem/v/ruby-zstds.svg)](https://rubygems.org/gems/ruby-zstds) |
|
6
6
|
|
7
7
|
See [zstd library](https://github.com/facebook/zstd).
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
11
|
-
Please install zstd library first, use latest 1.4.
|
11
|
+
Please install zstd library first, use latest 1.4.0+ version.
|
12
12
|
|
13
13
|
```sh
|
14
14
|
gem install ruby-zstds
|
@@ -70,16 +70,16 @@ require "zstds"
|
|
70
70
|
samples = (Array.new(8) { ::SecureRandom.random_bytes(1 << 8) } + ["sample string"]).shuffle
|
71
71
|
|
72
72
|
dictionary = ZSTDS::Dictionary.train samples
|
73
|
-
File.write "dictionary.bin", dictionary.buffer
|
73
|
+
File.write "dictionary.bin", dictionary.buffer, :mode => "wb"
|
74
74
|
|
75
|
-
dictionary_buffer = File.read "dictionary.bin"
|
75
|
+
dictionary_buffer = File.read "dictionary.bin", :mode => "rb"
|
76
76
|
dictionary = ZSTDS::Dictionary.new dictionary_buffer
|
77
77
|
|
78
78
|
data = ZSTDS::String.compress "sample string", :dictionary => dictionary
|
79
79
|
puts ZSTDS::String.decompress(data, :dictionary => dictionary)
|
80
80
|
```
|
81
81
|
|
82
|
-
You can create and read `tar.zst` archives with
|
82
|
+
You can create and read `tar.zst` archives with [minitar](https://github.com/halostatue/minitar).
|
83
83
|
|
84
84
|
```ruby
|
85
85
|
require "zstds"
|
@@ -101,12 +101,37 @@ ZSTDS::Stream::Reader.open "file.tar.zst" do |reader|
|
|
101
101
|
end
|
102
102
|
```
|
103
103
|
|
104
|
+
You can also use `Content-Encoding: zstd` with [sinatra](http://sinatrarb.com):
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
require "zstds"
|
108
|
+
require "sinatra"
|
109
|
+
|
110
|
+
get "/" do
|
111
|
+
headers["Content-Encoding"] = "zstd"
|
112
|
+
ZSTDS::String.compress "sample string"
|
113
|
+
end
|
114
|
+
```
|
115
|
+
|
116
|
+
All functionality (including streaming) can be used inside multiple threads with [parallel](https://github.com/grosser/parallel).
|
117
|
+
This code will provide heavy load for your CPU.
|
118
|
+
|
119
|
+
```ruby
|
120
|
+
require "zstds"
|
121
|
+
require "parallel"
|
122
|
+
|
123
|
+
Parallel.each large_datas do |large_data|
|
124
|
+
ZSTDS::String.compress large_data
|
125
|
+
end
|
126
|
+
```
|
127
|
+
|
104
128
|
## Options
|
105
129
|
|
106
130
|
| Option | Values | Default | Description |
|
107
131
|
|---------------------------------|----------------|------------|-------------|
|
108
132
|
| `source_buffer_length` | 0 - inf | 0 (auto) | internal buffer length for source data |
|
109
133
|
| `destination_buffer_length` | 0 - inf | 0 (auto) | internal buffer length for description data |
|
134
|
+
| `gvl` | true/false | false | enables global VM lock where possible |
|
110
135
|
| `compression_level` | -131072 - 22 | 0 (auto) | compression level |
|
111
136
|
| `window_log` | 10 - 31 | 0 (auto) | maximum back-reference distance (power of 2) |
|
112
137
|
| `hash_log` | 6 - 30 | 0 (auto) | size of the initial probe table (power of 2) |
|
@@ -134,6 +159,10 @@ There are internal buffers for compressed and decompressed data.
|
|
134
159
|
For example you want to use 1 KB as `source_buffer_length` for compressor - please use 256 B as `destination_buffer_length`.
|
135
160
|
You want to use 256 B as `source_buffer_length` for decompressor - please use 1 KB as `destination_buffer_length`.
|
136
161
|
|
162
|
+
`gvl` is disabled by default, this mode allows running multiple compressors/decompressors in different threads simultaneously.
|
163
|
+
Please consider enabling `gvl` if you don't want to launch processors in separate threads.
|
164
|
+
If `gvl` is enabled ruby won't waste time on acquiring/releasing VM lock.
|
165
|
+
|
137
166
|
`String` and `File` will set `:pledged_size` automaticaly.
|
138
167
|
|
139
168
|
You can also read zstd docs for more info about options.
|
@@ -161,6 +190,7 @@ Possible compressor options:
|
|
161
190
|
```
|
162
191
|
:source_buffer_length
|
163
192
|
:destination_buffer_length
|
193
|
+
:gvl
|
164
194
|
:compression_level
|
165
195
|
:window_log
|
166
196
|
:hash_log
|
@@ -188,6 +218,7 @@ Possible decompressor options:
|
|
188
218
|
```
|
189
219
|
:source_buffer_length
|
190
220
|
:destination_buffer_length
|
221
|
+
:gvl
|
191
222
|
:window_log_max
|
192
223
|
:dictionary
|
193
224
|
```
|
@@ -201,18 +232,6 @@ data = ZSTDS::String.compress "sample string", :compression_level => 5
|
|
201
232
|
puts ZSTDS::String.decompress(data, :window_log_max => 11)
|
202
233
|
```
|
203
234
|
|
204
|
-
HTTP encoding (`Content-Encoding: zstd`) using default options:
|
205
|
-
|
206
|
-
```ruby
|
207
|
-
require "zstds"
|
208
|
-
require "sinatra"
|
209
|
-
|
210
|
-
get "/" do
|
211
|
-
headers["Content-Encoding"] = "zstd"
|
212
|
-
ZSTDS::String.compress "sample string"
|
213
|
-
end
|
214
|
-
```
|
215
|
-
|
216
235
|
## String
|
217
236
|
|
218
237
|
String maintains destination buffer only, so it accepts `destination_buffer_length` option only.
|
@@ -237,7 +256,7 @@ File maintains both source and destination buffers, it accepts both `source_buff
|
|
237
256
|
|
238
257
|
## Stream::Writer
|
239
258
|
|
240
|
-
Its behaviour is similar to builtin [`Zlib::GzipWriter`](https://ruby-doc.org/stdlib
|
259
|
+
Its behaviour is similar to builtin [`Zlib::GzipWriter`](https://ruby-doc.org/stdlib/libdoc/zlib/rdoc/Zlib/GzipWriter.html).
|
241
260
|
|
242
261
|
Writer maintains destination buffer only, so it accepts `destination_buffer_length` option only.
|
243
262
|
|
@@ -275,7 +294,7 @@ Set another encodings, `nil` is just for compatibility with `IO`.
|
|
275
294
|
#tell
|
276
295
|
```
|
277
296
|
|
278
|
-
See [`IO`](https://ruby-doc.org/core
|
297
|
+
See [`IO`](https://ruby-doc.org/core/IO.html) docs.
|
279
298
|
|
280
299
|
```
|
281
300
|
#write(*objects)
|
@@ -285,7 +304,7 @@ See [`IO`](https://ruby-doc.org/core-2.7.0/IO.html) docs.
|
|
285
304
|
#closed?
|
286
305
|
```
|
287
306
|
|
288
|
-
See [`Zlib::GzipWriter`](https://ruby-doc.org/stdlib
|
307
|
+
See [`Zlib::GzipWriter`](https://ruby-doc.org/stdlib/libdoc/zlib/rdoc/Zlib/GzipWriter.html) docs.
|
289
308
|
|
290
309
|
```
|
291
310
|
#write_nonblock(object, *options)
|
@@ -299,6 +318,9 @@ Special asynchronous methods missing in `Zlib::GzipWriter`.
|
|
299
318
|
So it is possible to have asynchronous variants for these synchronous methods.
|
300
319
|
Behaviour is the same as `IO#write_nonblock` method.
|
301
320
|
|
321
|
+
All nonblock operations for file will raise `EBADF` error on Windows.
|
322
|
+
Setting file into nonblocking mode is [not available on Windows](https://github.com/ruby/ruby/blob/master/win32/win32.c#L4388).
|
323
|
+
|
302
324
|
```
|
303
325
|
#<<(object)
|
304
326
|
#print(*objects)
|
@@ -307,11 +329,11 @@ Behaviour is the same as `IO#write_nonblock` method.
|
|
307
329
|
#puts(*objects)
|
308
330
|
```
|
309
331
|
|
310
|
-
Typical helpers, see [`Zlib::GzipWriter`](https://ruby-doc.org/stdlib
|
332
|
+
Typical helpers, see [`Zlib::GzipWriter`](https://ruby-doc.org/stdlib/libdoc/zlib/rdoc/Zlib/GzipWriter.html) docs.
|
311
333
|
|
312
334
|
## Stream::Reader
|
313
335
|
|
314
|
-
Its behaviour is similar to builtin [`Zlib::GzipReader`](https://ruby-doc.org/stdlib
|
336
|
+
Its behaviour is similar to builtin [`Zlib::GzipReader`](https://ruby-doc.org/stdlib/libdoc/zlib/rdoc/Zlib/GzipReader.html).
|
315
337
|
|
316
338
|
Reader maintains both source and destination buffers, it accepts both `source_buffer_length` and `destination_buffer_length` options.
|
317
339
|
|
@@ -346,7 +368,7 @@ Set another encodings.
|
|
346
368
|
#tell
|
347
369
|
```
|
348
370
|
|
349
|
-
See [`IO`](https://ruby-doc.org/core
|
371
|
+
See [`IO`](https://ruby-doc.org/core/IO.html) docs.
|
350
372
|
|
351
373
|
```
|
352
374
|
#read(bytes_to_read = nil, out_buffer = nil)
|
@@ -356,14 +378,14 @@ See [`IO`](https://ruby-doc.org/core-2.7.0/IO.html) docs.
|
|
356
378
|
#closed?
|
357
379
|
```
|
358
380
|
|
359
|
-
See [`Zlib::GzipReader`](https://ruby-doc.org/stdlib
|
381
|
+
See [`Zlib::GzipReader`](https://ruby-doc.org/stdlib/libdoc/zlib/rdoc/Zlib/GzipReader.html) docs.
|
360
382
|
|
361
383
|
```
|
362
384
|
#readpartial(bytes_to_read = nil, out_buffer = nil)
|
363
385
|
#read_nonblock(bytes_to_read, out_buffer = nil, *options)
|
364
386
|
```
|
365
387
|
|
366
|
-
See [`IO`](https://ruby-doc.org/core
|
388
|
+
See [`IO`](https://ruby-doc.org/core/IO.html) docs.
|
367
389
|
|
368
390
|
```
|
369
391
|
#getbyte
|
@@ -386,7 +408,7 @@ See [`IO`](https://ruby-doc.org/core-2.7.0/IO.html) docs.
|
|
386
408
|
#ungetline(line)
|
387
409
|
```
|
388
410
|
|
389
|
-
Typical helpers, see [`Zlib::GzipReader`](https://ruby-doc.org/stdlib
|
411
|
+
Typical helpers, see [`Zlib::GzipReader`](https://ruby-doc.org/stdlib/libdoc/zlib/rdoc/Zlib/GzipReader.html) docs.
|
390
412
|
|
391
413
|
## Dictionary
|
392
414
|
|
@@ -418,12 +440,24 @@ Please use regular constructor to create dictionary from buffer.
|
|
418
440
|
|
419
441
|
Read dictionary id from buffer.
|
420
442
|
|
443
|
+
## Thread safety
|
444
|
+
|
445
|
+
`:gvl` option is disabled by default, you can use bindings effectively in multiple threads.
|
446
|
+
Please be careful: bindings are not thread safe.
|
447
|
+
You should lock all shared data between threads.
|
448
|
+
|
449
|
+
For example: you should not use same compressor/decompressor inside multiple threads.
|
450
|
+
Please verify that you are using each processor inside single thread at the same time.
|
451
|
+
|
452
|
+
## Operating systems
|
453
|
+
|
454
|
+
GNU/Linux, FreeBSD, OSX, Windows (MinGW).
|
455
|
+
|
421
456
|
## CI
|
422
457
|
|
423
|
-
See universal test script [scripts/ci_test.sh](scripts/ci_test.sh) for CI.
|
424
458
|
Please visit [scripts/test-images](scripts/test-images).
|
425
|
-
|
459
|
+
See universal test script [scripts/ci_test.sh](scripts/ci_test.sh) for CI.
|
426
460
|
|
427
461
|
## License
|
428
462
|
|
429
|
-
MIT license, see LICENSE and AUTHORS.
|
463
|
+
MIT license, see [LICENSE](LICENSE) and [AUTHORS](AUTHORS).
|
data/ext/extconf.rb
CHANGED
@@ -3,57 +3,142 @@
|
|
3
3
|
|
4
4
|
require "mkmf"
|
5
5
|
|
6
|
+
have_func "rb_thread_call_without_gvl", "ruby/thread.h"
|
7
|
+
|
8
|
+
# Old zstd versions has bug: underlinking against pthreads.
|
9
|
+
# https://bugs.gentoo.org/713940
|
6
10
|
$LDFLAGS << " -pthread" # rubocop:disable Style/GlobalVars
|
7
11
|
|
8
|
-
def require_header(name,
|
12
|
+
def require_header(name, constants: [], macroses: [], types: [])
|
9
13
|
abort "Can't find #{name} header" unless find_header name
|
10
14
|
|
15
|
+
constants.each do |constant|
|
16
|
+
abort "Can't find #{constant} constant in #{name} header" unless have_const constant, name
|
17
|
+
end
|
18
|
+
|
19
|
+
macroses.each do |macro|
|
20
|
+
abort "Can't find #{macro} macro in #{name} header" unless have_macro macro, name
|
21
|
+
end
|
22
|
+
|
11
23
|
types.each do |type|
|
12
24
|
abort "Can't find #{type} type in #{name} header" unless find_type type, nil, name
|
13
25
|
end
|
14
26
|
end
|
15
27
|
|
16
|
-
require_header "zstd_errors.h", %w[
|
17
|
-
ZSTD_ErrorCode
|
18
|
-
]
|
19
|
-
require_header "zstd.h", [
|
20
|
-
"ZSTD_CCtx *",
|
21
|
-
"ZSTD_DCtx *",
|
22
|
-
"ZSTD_strategy",
|
23
|
-
"ZSTD_bounds",
|
24
|
-
"ZSTD_inBuffer",
|
25
|
-
"ZSTD_outBuffer"
|
26
|
-
]
|
27
28
|
require_header "zdict.h"
|
28
29
|
|
30
|
+
require_header(
|
31
|
+
"zstd.h",
|
32
|
+
:constants => %w[
|
33
|
+
ZSTD_btlazy2
|
34
|
+
ZSTD_btopt
|
35
|
+
ZSTD_btultra
|
36
|
+
ZSTD_btultra2
|
37
|
+
ZSTD_c_chainLog
|
38
|
+
ZSTD_c_checksumFlag
|
39
|
+
ZSTD_c_compressionLevel
|
40
|
+
ZSTD_c_contentSizeFlag
|
41
|
+
ZSTD_c_dictIDFlag
|
42
|
+
ZSTD_c_enableLongDistanceMatching
|
43
|
+
ZSTD_c_hashLog
|
44
|
+
ZSTD_c_jobSize
|
45
|
+
ZSTD_c_ldmBucketSizeLog
|
46
|
+
ZSTD_c_ldmHashLog
|
47
|
+
ZSTD_c_ldmHashRateLog
|
48
|
+
ZSTD_c_ldmMinMatch
|
49
|
+
ZSTD_c_minMatch
|
50
|
+
ZSTD_c_nbWorkers
|
51
|
+
ZSTD_c_overlapLog
|
52
|
+
ZSTD_c_searchLog
|
53
|
+
ZSTD_c_strategy
|
54
|
+
ZSTD_c_targetLength
|
55
|
+
ZSTD_c_windowLog
|
56
|
+
ZSTD_dfast
|
57
|
+
ZSTD_d_windowLogMax
|
58
|
+
ZSTD_e_continue
|
59
|
+
ZSTD_e_end
|
60
|
+
ZSTD_e_flush
|
61
|
+
ZSTD_fast
|
62
|
+
ZSTD_greedy
|
63
|
+
ZSTD_lazy
|
64
|
+
ZSTD_lazy2
|
65
|
+
],
|
66
|
+
:macroses => %w[ZSTD_VERSION_STRING],
|
67
|
+
:types => [
|
68
|
+
"ZSTD_bounds",
|
69
|
+
"ZSTD_CCtx *",
|
70
|
+
"ZSTD_DCtx *",
|
71
|
+
"ZSTD_inBuffer",
|
72
|
+
"ZSTD_outBuffer",
|
73
|
+
"ZSTD_strategy"
|
74
|
+
]
|
75
|
+
)
|
76
|
+
|
77
|
+
require_header(
|
78
|
+
"zstd_errors.h",
|
79
|
+
:constants => %w[
|
80
|
+
ZSTD_error_checksum_wrong
|
81
|
+
ZSTD_error_corruption_detected
|
82
|
+
ZSTD_error_dictionaryCreation_failed
|
83
|
+
ZSTD_error_dictionary_corrupted
|
84
|
+
ZSTD_error_dictionary_wrong
|
85
|
+
ZSTD_error_dstBuffer_null
|
86
|
+
ZSTD_error_dstSize_tooSmall
|
87
|
+
ZSTD_error_frameParameter_unsupported
|
88
|
+
ZSTD_error_frameParameter_windowTooLarge
|
89
|
+
ZSTD_error_init_missing
|
90
|
+
ZSTD_error_maxSymbolValue_tooLarge
|
91
|
+
ZSTD_error_maxSymbolValue_tooSmall
|
92
|
+
ZSTD_error_memory_allocation
|
93
|
+
ZSTD_error_parameter_outOfBound
|
94
|
+
ZSTD_error_parameter_unsupported
|
95
|
+
ZSTD_error_prefix_unknown
|
96
|
+
ZSTD_error_srcSize_wrong
|
97
|
+
ZSTD_error_stage_wrong
|
98
|
+
ZSTD_error_tableLog_tooLarge
|
99
|
+
ZSTD_error_version_unsupported
|
100
|
+
ZSTD_error_workSpace_tooSmall
|
101
|
+
],
|
102
|
+
:types => %w[ZSTD_ErrorCode]
|
103
|
+
)
|
104
|
+
|
29
105
|
def require_library(name, functions)
|
30
106
|
functions.each do |function|
|
31
107
|
abort "Can't find #{function} function in #{name} library" unless find_library name, function
|
32
108
|
end
|
33
109
|
end
|
34
110
|
|
111
|
+
# rubocop:disable Style/GlobalVars
|
112
|
+
if find_library "zstd", "ZDICT_getDictHeaderSize"
|
113
|
+
$defs.push "-DHAVE_ZDICT_HEADER_SIZE"
|
114
|
+
end
|
115
|
+
# rubocop:enable Style/GlobalVars
|
116
|
+
|
35
117
|
require_library(
|
36
118
|
"zstd",
|
37
119
|
%w[
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
ZSTD_freeCCtx
|
43
|
-
ZSTD_freeDCtx
|
120
|
+
ZDICT_getDictID
|
121
|
+
ZDICT_isError
|
122
|
+
ZDICT_trainFromBuffer
|
123
|
+
ZSTD_CCtx_loadDictionary
|
44
124
|
ZSTD_CCtx_setParameter
|
45
|
-
ZSTD_DCtx_setParameter
|
46
125
|
ZSTD_CCtx_setPledgedSrcSize
|
47
|
-
ZSTD_cParam_getBounds
|
48
|
-
ZSTD_dParam_getBounds
|
49
126
|
ZSTD_CStreamInSize
|
50
127
|
ZSTD_CStreamOutSize
|
128
|
+
ZSTD_compressStream2
|
129
|
+
ZSTD_cParam_getBounds
|
130
|
+
ZSTD_createCCtx
|
131
|
+
ZSTD_createDCtx
|
132
|
+
ZSTD_DCtx_setParameter
|
133
|
+
ZSTD_DCtx_loadDictionary
|
51
134
|
ZSTD_DStreamInSize
|
52
135
|
ZSTD_DStreamOutSize
|
53
|
-
ZSTD_compressStream2
|
54
136
|
ZSTD_decompressStream
|
55
|
-
|
56
|
-
|
137
|
+
ZSTD_dParam_getBounds
|
138
|
+
ZSTD_freeCCtx
|
139
|
+
ZSTD_freeDCtx
|
140
|
+
ZSTD_getErrorCode
|
141
|
+
ZSTD_isError
|
57
142
|
]
|
58
143
|
)
|
59
144
|
|
@@ -82,7 +167,7 @@ $libs = $libs.split(%r{\s})
|
|
82
167
|
.uniq
|
83
168
|
.join " "
|
84
169
|
|
85
|
-
if ENV["CI"]
|
170
|
+
if ENV["CI"]
|
86
171
|
$CFLAGS << " --coverage"
|
87
172
|
$LDFLAGS << " --coverage"
|
88
173
|
end
|
data/ext/zstds_ext/buffer.c
CHANGED
@@ -5,17 +5,16 @@
|
|
5
5
|
|
6
6
|
#include <zstd.h>
|
7
7
|
|
8
|
-
#include "ruby.h"
|
9
|
-
|
10
8
|
VALUE zstds_ext_create_string_buffer(VALUE length)
|
11
9
|
{
|
12
10
|
return rb_str_new(NULL, NUM2SIZET(length));
|
13
11
|
}
|
14
12
|
|
15
|
-
VALUE zstds_ext_resize_string_buffer(VALUE
|
13
|
+
VALUE zstds_ext_resize_string_buffer(VALUE buffer_args)
|
16
14
|
{
|
17
|
-
VALUE buffer = rb_ary_entry(
|
18
|
-
VALUE length = rb_ary_entry(
|
15
|
+
VALUE buffer = rb_ary_entry(buffer_args, 0);
|
16
|
+
VALUE length = rb_ary_entry(buffer_args, 1);
|
17
|
+
|
19
18
|
return rb_str_resize(buffer, NUM2SIZET(length));
|
20
19
|
}
|
21
20
|
|
data/ext/zstds_ext/buffer.h
CHANGED
@@ -11,12 +11,12 @@ VALUE zstds_ext_create_string_buffer(VALUE length);
|
|
11
11
|
#define ZSTDS_EXT_CREATE_STRING_BUFFER(buffer, length, exception) \
|
12
12
|
VALUE buffer = rb_protect(zstds_ext_create_string_buffer, SIZET2NUM(length), &exception);
|
13
13
|
|
14
|
-
VALUE zstds_ext_resize_string_buffer(VALUE
|
14
|
+
VALUE zstds_ext_resize_string_buffer(VALUE buffer_args);
|
15
15
|
|
16
|
-
#define ZSTDS_EXT_RESIZE_STRING_BUFFER(buffer, length, exception)
|
17
|
-
VALUE
|
18
|
-
buffer
|
19
|
-
RB_GC_GUARD(
|
16
|
+
#define ZSTDS_EXT_RESIZE_STRING_BUFFER(buffer, length, exception) \
|
17
|
+
VALUE buffer_args = rb_ary_new_from_args(2, buffer, SIZET2NUM(length)); \
|
18
|
+
buffer = rb_protect(zstds_ext_resize_string_buffer, buffer_args, &exception); \
|
19
|
+
RB_GC_GUARD(buffer_args);
|
20
20
|
|
21
21
|
void zstds_ext_buffer_exports(VALUE root_module);
|
22
22
|
|
data/ext/zstds_ext/dictionary.c
CHANGED
@@ -6,39 +6,93 @@
|
|
6
6
|
#include <string.h>
|
7
7
|
#include <zdict.h>
|
8
8
|
|
9
|
-
#include "ruby.h"
|
10
9
|
#include "zstds_ext/buffer.h"
|
11
10
|
#include "zstds_ext/error.h"
|
12
|
-
#include "zstds_ext/
|
11
|
+
#include "zstds_ext/gvl.h"
|
13
12
|
#include "zstds_ext/option.h"
|
14
13
|
|
15
|
-
|
14
|
+
// -- initialization --
|
15
|
+
|
16
|
+
typedef struct
|
16
17
|
{
|
17
|
-
|
18
|
-
|
19
|
-
|
18
|
+
const char* data;
|
19
|
+
size_t size;
|
20
|
+
} sample_t;
|
21
|
+
|
22
|
+
typedef struct
|
23
|
+
{
|
24
|
+
const sample_t* samples;
|
25
|
+
size_t length;
|
26
|
+
char* buffer;
|
27
|
+
size_t capacity;
|
28
|
+
zstds_result_t result;
|
29
|
+
zstds_ext_result_t ext_result;
|
30
|
+
} train_args_t;
|
31
|
+
|
32
|
+
static inline void* train_wrapper(void* data)
|
33
|
+
{
|
34
|
+
train_args_t* args = data;
|
35
|
+
const sample_t* samples = args->samples;
|
36
|
+
size_t length = args->length;
|
37
|
+
size_t size = 0;
|
38
|
+
|
39
|
+
for (size_t index = 0; index < length; index++) {
|
40
|
+
size += samples[index].size;
|
20
41
|
}
|
21
42
|
|
22
|
-
|
43
|
+
zstds_ext_byte_t* group = malloc(size);
|
44
|
+
if (group == NULL) {
|
45
|
+
args->ext_result = ZSTDS_EXT_ERROR_ALLOCATE_FAILED;
|
46
|
+
return NULL;
|
47
|
+
}
|
48
|
+
|
49
|
+
size_t* sizes = malloc(length * sizeof(size_t));
|
50
|
+
if (sizes == NULL) {
|
51
|
+
free(group);
|
52
|
+
args->ext_result = ZSTDS_EXT_ERROR_ALLOCATE_FAILED;
|
53
|
+
return NULL;
|
54
|
+
}
|
55
|
+
|
56
|
+
size_t offset = 0;
|
57
|
+
|
58
|
+
for (size_t index = 0; index < length; index++) {
|
59
|
+
const sample_t* sample_ptr = &samples[index];
|
60
|
+
size_t sample_size = sample_ptr->size;
|
61
|
+
|
62
|
+
memmove(group + offset, sample_ptr->data, sample_size);
|
63
|
+
offset += sample_size;
|
64
|
+
|
65
|
+
sizes[index] = sample_size;
|
66
|
+
}
|
67
|
+
|
68
|
+
args->result = ZDICT_trainFromBuffer((void*) args->buffer, args->capacity, group, sizes, (unsigned int) length);
|
69
|
+
|
70
|
+
free(group);
|
71
|
+
free(sizes);
|
72
|
+
|
73
|
+
if (ZDICT_isError(args->result)) {
|
74
|
+
args->ext_result = zstds_ext_get_error(ZSTD_getErrorCode(args->result));
|
75
|
+
return NULL;
|
76
|
+
}
|
77
|
+
|
78
|
+
args->ext_result = 0;
|
79
|
+
|
80
|
+
return NULL;
|
23
81
|
}
|
24
82
|
|
25
|
-
VALUE zstds_ext_train_dictionary_buffer(VALUE ZSTDS_EXT_UNUSED(self), VALUE
|
83
|
+
VALUE zstds_ext_train_dictionary_buffer(VALUE ZSTDS_EXT_UNUSED(self), VALUE raw_samples, VALUE options)
|
26
84
|
{
|
27
|
-
Check_Type(
|
28
|
-
|
29
|
-
size_t sample_index;
|
30
|
-
unsigned int samples_length = (unsigned int)RARRAY_LEN(samples);
|
31
|
-
size_t samples_size = 0;
|
85
|
+
Check_Type(raw_samples, T_ARRAY);
|
32
86
|
|
33
|
-
|
34
|
-
VALUE sample = rb_ary_entry(samples, sample_index);
|
35
|
-
Check_Type(sample, T_STRING);
|
87
|
+
size_t length = RARRAY_LEN(raw_samples);
|
36
88
|
|
37
|
-
|
89
|
+
for (size_t index = 0; index < length; index++) {
|
90
|
+
Check_Type(rb_ary_entry(raw_samples, index), T_STRING);
|
38
91
|
}
|
39
92
|
|
40
93
|
Check_Type(options, T_HASH);
|
41
|
-
|
94
|
+
ZSTDS_EXT_GET_BOOL_OPTION(options, gvl);
|
95
|
+
ZSTDS_EXT_GET_SIZE_OPTION(options, capacity);
|
42
96
|
|
43
97
|
if (capacity == 0) {
|
44
98
|
capacity = ZSTDS_EXT_DEFAULT_DICTIONARY_CAPACITY;
|
@@ -51,53 +105,78 @@ VALUE zstds_ext_train_dictionary_buffer(VALUE ZSTDS_EXT_UNUSED(self), VALUE samp
|
|
51
105
|
zstds_ext_raise_error(ZSTDS_EXT_ERROR_ALLOCATE_FAILED);
|
52
106
|
}
|
53
107
|
|
54
|
-
|
55
|
-
if (
|
108
|
+
sample_t* samples = malloc(sizeof(sample_t) * length);
|
109
|
+
if (samples == NULL) {
|
56
110
|
zstds_ext_raise_error(ZSTDS_EXT_ERROR_ALLOCATE_FAILED);
|
57
111
|
}
|
58
112
|
|
59
|
-
size_t
|
60
|
-
|
61
|
-
|
62
|
-
|
113
|
+
for (size_t index = 0; index < length; index++) {
|
114
|
+
VALUE raw_sample = rb_ary_entry(raw_samples, index);
|
115
|
+
sample_t* sample = &samples[index];
|
116
|
+
|
117
|
+
sample->data = RSTRING_PTR(raw_sample);
|
118
|
+
sample->size = RSTRING_LEN(raw_sample);
|
63
119
|
}
|
64
120
|
|
65
|
-
|
121
|
+
train_args_t args = {
|
122
|
+
.samples = samples,
|
123
|
+
.length = length,
|
124
|
+
.buffer = RSTRING_PTR(buffer),
|
125
|
+
.capacity = capacity,
|
126
|
+
};
|
66
127
|
|
67
|
-
|
68
|
-
|
69
|
-
const char* sample_data = RSTRING_PTR(sample);
|
70
|
-
size_t sample_size = RSTRING_LEN(sample);
|
128
|
+
ZSTDS_EXT_GVL_WRAP(gvl, train_wrapper, &args);
|
129
|
+
free(samples);
|
71
130
|
|
72
|
-
|
73
|
-
|
131
|
+
if (args.ext_result != 0) {
|
132
|
+
zstds_ext_raise_error(args.ext_result);
|
133
|
+
}
|
74
134
|
|
75
|
-
|
135
|
+
ZSTDS_EXT_RESIZE_STRING_BUFFER(buffer, args.result, exception);
|
136
|
+
if (exception != 0) {
|
137
|
+
zstds_ext_raise_error(ZSTDS_EXT_ERROR_ALLOCATE_FAILED);
|
76
138
|
}
|
77
139
|
|
78
|
-
|
79
|
-
|
80
|
-
samples_buffer, samples_sizes, samples_length);
|
140
|
+
return buffer;
|
141
|
+
}
|
81
142
|
|
82
|
-
|
83
|
-
free(samples_sizes);
|
143
|
+
// -- other --
|
84
144
|
|
85
|
-
|
86
|
-
|
145
|
+
VALUE zstds_ext_get_dictionary_buffer_id(VALUE ZSTDS_EXT_UNUSED(self), VALUE buffer)
|
146
|
+
{
|
147
|
+
unsigned int id = ZDICT_getDictID(RSTRING_PTR(buffer), RSTRING_LEN(buffer));
|
148
|
+
if (id == 0) {
|
149
|
+
zstds_ext_raise_error(ZSTDS_EXT_ERROR_VALIDATE_FAILED);
|
87
150
|
}
|
88
151
|
|
89
|
-
|
90
|
-
|
91
|
-
|
152
|
+
return UINT2NUM(id);
|
153
|
+
}
|
154
|
+
|
155
|
+
#if defined(HAVE_ZDICT_HEADER_SIZE)
|
156
|
+
VALUE zstds_ext_get_dictionary_header_size(VALUE self, VALUE buffer)
|
157
|
+
{
|
158
|
+
zstds_result_t result = ZDICT_getDictHeaderSize(RSTRING_PTR(buffer), RSTRING_LEN(buffer));
|
159
|
+
if (ZDICT_isError(result)) {
|
160
|
+
zstds_ext_raise_error(zstds_ext_get_error(ZSTD_getErrorCode(result)));
|
92
161
|
}
|
93
162
|
|
94
|
-
return
|
163
|
+
return SIZET2NUM(result);
|
95
164
|
}
|
96
165
|
|
166
|
+
#else
|
167
|
+
ZSTDS_EXT_NORETURN VALUE zstds_ext_get_dictionary_header_size(VALUE self, VALUE buffer)
|
168
|
+
{
|
169
|
+
zstds_ext_raise_error(ZSTDS_EXT_ERROR_NOT_IMPLEMENTED);
|
170
|
+
};
|
171
|
+
#endif
|
172
|
+
|
173
|
+
// -- exports --
|
174
|
+
|
97
175
|
void zstds_ext_dictionary_exports(VALUE root_module)
|
98
176
|
{
|
99
177
|
VALUE dictionary = rb_define_class_under(root_module, "Dictionary", rb_cObject);
|
100
178
|
|
101
179
|
rb_define_singleton_method(dictionary, "get_buffer_id", zstds_ext_get_dictionary_buffer_id, 1);
|
180
|
+
rb_define_singleton_method(dictionary, "get_header_size", zstds_ext_get_dictionary_header_size, 1);
|
102
181
|
rb_define_singleton_method(dictionary, "train_buffer", zstds_ext_train_dictionary_buffer, 2);
|
103
182
|
}
|