ruby-zstds 1.0.6 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 359ef05325e4af260831df1c3d71b70d64d1b4f235631ad3cd0bf10be63be308
4
- data.tar.gz: 39b138930c23a914317db548081db091a607b00867300788d3de3c22e7e56616
3
+ metadata.gz: 80ccd3c8a7210dcf3fabdfadcca8e37ce82d5fe78c7f761f5ae7c355ff746854
4
+ data.tar.gz: a4ed4e5dd4c89b7f95438c5e3033d09eb25721ab32fda8e1eac9351774ab3215
5
5
  SHA512:
6
- metadata.gz: 0c76ce3fe240fb963dab49e55ff167a2cfea690e1bb48b107d164821d5bba1b5090eb81b73ec221a86aa4c94c05f9082d540d6c0423ae99326face5deee199d8
7
- data.tar.gz: 00db3a70938e3ca436d253f9c2e715941837c252569bb6d72d1ddb94b1c8c9e85aeeb1bd67e9c85a0ab965c5393f785f273e67a0a3869a1ae770a915ae23ca9e
6
+ metadata.gz: 967b5d866cd79b139bdbca97e1fdacf9063e3bc425f441be239980dd839d123173fcad15c0830576a32fe7c0bb5166f4fd5159717a9ac7f038fe57a435859f70
7
+ data.tar.gz: 78c54fc20f8facaa556eb68438407cfa7ff96d33425a9dce73584fc26435177dda8478c729a412f00d9b28a9d860b3dec6a059a728926a587f3a83dae9fe1836
data/README.md CHANGED
@@ -1,14 +1,14 @@
1
1
  # Ruby bindings for zstd library
2
2
 
3
- | Travis | AppVeyor | Circle | Codecov |
4
- | :---: | :---: | :---: | :---: |
5
- | [![Travis test status](https://travis-ci.com/andrew-aladev/ruby-zstds.svg?branch=master)](https://travis-ci.com/andrew-aladev/ruby-zstds) | [![AppVeyor test status](https://ci.appveyor.com/api/projects/status/github/andrew-aladev/ruby-zstds?branch=master&svg=true)](https://ci.appveyor.com/project/andrew-aladev/ruby-zstds/branch/master) | [![Circle test status](https://circleci.com/gh/andrew-aladev/ruby-zstds/tree/master.svg?style=shield)](https://circleci.com/gh/andrew-aladev/ruby-zstds/tree/master) | [![Codecov](https://codecov.io/gh/andrew-aladev/ruby-zstds/branch/master/graph/badge.svg)](https://codecov.io/gh/andrew-aladev/ruby-zstds) |
3
+ | AppVeyor | Circle | Github actions | Codecov | Gem |
4
+ | :------: | :----: | :------------: | :-----: | :--: |
5
+ | [![AppVeyor test status](https://ci.appveyor.com/api/projects/status/github/andrew-aladev/ruby-zstds?branch=master&svg=true)](https://ci.appveyor.com/project/andrew-aladev/ruby-zstds/branch/master) | [![Circle test status](https://circleci.com/gh/andrew-aladev/ruby-zstds/tree/master.svg?style=shield)](https://circleci.com/gh/andrew-aladev/ruby-zstds/tree/master) | [![Github Actions test status](https://github.com/andrew-aladev/ruby-zstds/workflows/test/badge.svg?branch=master)](https://github.com/andrew-aladev/ruby-zstds/actions) | [![Codecov](https://codecov.io/gh/andrew-aladev/ruby-zstds/branch/master/graph/badge.svg)](https://codecov.io/gh/andrew-aladev/ruby-zstds) | [![Gem](https://img.shields.io/gem/v/ruby-zstds.svg)](https://rubygems.org/gems/ruby-zstds) |
6
6
 
7
7
  See [zstd library](https://github.com/facebook/zstd).
8
8
 
9
9
  ## Installation
10
10
 
11
- Please install zstd library first, use latest 1.4.3+ version.
11
+ Please install zstd library first, use latest 1.4.0+ version.
12
12
 
13
13
  ```sh
14
14
  gem install ruby-zstds
@@ -70,16 +70,16 @@ require "zstds"
70
70
  samples = (Array.new(8) { ::SecureRandom.random_bytes(1 << 8) } + ["sample string"]).shuffle
71
71
 
72
72
  dictionary = ZSTDS::Dictionary.train samples
73
- File.write "dictionary.bin", dictionary.buffer
73
+ File.write "dictionary.bin", dictionary.buffer, :mode => "wb"
74
74
 
75
- dictionary_buffer = File.read "dictionary.bin"
75
+ dictionary_buffer = File.read "dictionary.bin", :mode => "rb"
76
76
  dictionary = ZSTDS::Dictionary.new dictionary_buffer
77
77
 
78
78
  data = ZSTDS::String.compress "sample string", :dictionary => dictionary
79
79
  puts ZSTDS::String.decompress(data, :dictionary => dictionary)
80
80
  ```
81
81
 
82
- You can create and read `tar.zst` archives with `minitar` for example.
82
+ You can create and read `tar.zst` archives with [minitar](https://github.com/halostatue/minitar).
83
83
 
84
84
  ```ruby
85
85
  require "zstds"
@@ -101,12 +101,37 @@ ZSTDS::Stream::Reader.open "file.tar.zst" do |reader|
101
101
  end
102
102
  ```
103
103
 
104
+ You can also use `Content-Encoding: zstd` with [sinatra](http://sinatrarb.com):
105
+
106
+ ```ruby
107
+ require "zstds"
108
+ require "sinatra"
109
+
110
+ get "/" do
111
+ headers["Content-Encoding"] = "zstd"
112
+ ZSTDS::String.compress "sample string"
113
+ end
114
+ ```
115
+
116
+ All functionality (including streaming) can be used inside multiple threads with [parallel](https://github.com/grosser/parallel).
117
+ This code will provide heavy load for your CPU.
118
+
119
+ ```ruby
120
+ require "zstds"
121
+ require "parallel"
122
+
123
+ Parallel.each large_datas do |large_data|
124
+ ZSTDS::String.compress large_data
125
+ end
126
+ ```
127
+
104
128
  ## Options
105
129
 
106
130
  | Option | Values | Default | Description |
107
131
  |---------------------------------|----------------|------------|-------------|
108
132
  | `source_buffer_length` | 0 - inf | 0 (auto) | internal buffer length for source data |
109
133
  | `destination_buffer_length` | 0 - inf | 0 (auto) | internal buffer length for description data |
134
+ | `gvl` | true/false | false | enables global VM lock where possible |
110
135
  | `compression_level` | -131072 - 22 | 0 (auto) | compression level |
111
136
  | `window_log` | 10 - 31 | 0 (auto) | maximum back-reference distance (power of 2) |
112
137
  | `hash_log` | 6 - 30 | 0 (auto) | size of the initial probe table (power of 2) |
@@ -134,6 +159,10 @@ There are internal buffers for compressed and decompressed data.
134
159
  For example you want to use 1 KB as `source_buffer_length` for compressor - please use 256 B as `destination_buffer_length`.
135
160
  You want to use 256 B as `source_buffer_length` for decompressor - please use 1 KB as `destination_buffer_length`.
136
161
 
162
+ `gvl` is disabled by default, this mode allows running multiple compressors/decompressors in different threads simultaneously.
163
+ Please consider enabling `gvl` if you don't want to launch processors in separate threads.
164
+ If `gvl` is enabled ruby won't waste time on acquiring/releasing VM lock.
165
+
137
166
  `String` and `File` will set `:pledged_size` automaticaly.
138
167
 
139
168
  You can also read zstd docs for more info about options.
@@ -161,6 +190,7 @@ Possible compressor options:
161
190
  ```
162
191
  :source_buffer_length
163
192
  :destination_buffer_length
193
+ :gvl
164
194
  :compression_level
165
195
  :window_log
166
196
  :hash_log
@@ -188,6 +218,7 @@ Possible decompressor options:
188
218
  ```
189
219
  :source_buffer_length
190
220
  :destination_buffer_length
221
+ :gvl
191
222
  :window_log_max
192
223
  :dictionary
193
224
  ```
@@ -201,18 +232,6 @@ data = ZSTDS::String.compress "sample string", :compression_level => 5
201
232
  puts ZSTDS::String.decompress(data, :window_log_max => 11)
202
233
  ```
203
234
 
204
- HTTP encoding (`Content-Encoding: zstd`) using default options:
205
-
206
- ```ruby
207
- require "zstds"
208
- require "sinatra"
209
-
210
- get "/" do
211
- headers["Content-Encoding"] = "zstd"
212
- ZSTDS::String.compress "sample string"
213
- end
214
- ```
215
-
216
235
  ## String
217
236
 
218
237
  String maintains destination buffer only, so it accepts `destination_buffer_length` option only.
@@ -237,7 +256,7 @@ File maintains both source and destination buffers, it accepts both `source_buff
237
256
 
238
257
  ## Stream::Writer
239
258
 
240
- Its behaviour is similar to builtin [`Zlib::GzipWriter`](https://ruby-doc.org/stdlib-2.7.0/libdoc/zlib/rdoc/Zlib/GzipWriter.html).
259
+ Its behaviour is similar to builtin [`Zlib::GzipWriter`](https://ruby-doc.org/stdlib/libdoc/zlib/rdoc/Zlib/GzipWriter.html).
241
260
 
242
261
  Writer maintains destination buffer only, so it accepts `destination_buffer_length` option only.
243
262
 
@@ -275,7 +294,7 @@ Set another encodings, `nil` is just for compatibility with `IO`.
275
294
  #tell
276
295
  ```
277
296
 
278
- See [`IO`](https://ruby-doc.org/core-2.7.0/IO.html) docs.
297
+ See [`IO`](https://ruby-doc.org/core/IO.html) docs.
279
298
 
280
299
  ```
281
300
  #write(*objects)
@@ -285,7 +304,7 @@ See [`IO`](https://ruby-doc.org/core-2.7.0/IO.html) docs.
285
304
  #closed?
286
305
  ```
287
306
 
288
- See [`Zlib::GzipWriter`](https://ruby-doc.org/stdlib-2.7.0/libdoc/zlib/rdoc/Zlib/GzipWriter.html) docs.
307
+ See [`Zlib::GzipWriter`](https://ruby-doc.org/stdlib/libdoc/zlib/rdoc/Zlib/GzipWriter.html) docs.
289
308
 
290
309
  ```
291
310
  #write_nonblock(object, *options)
@@ -299,6 +318,9 @@ Special asynchronous methods missing in `Zlib::GzipWriter`.
299
318
  So it is possible to have asynchronous variants for these synchronous methods.
300
319
  Behaviour is the same as `IO#write_nonblock` method.
301
320
 
321
+ All nonblock operations for file will raise `EBADF` error on Windows.
322
+ Setting file into nonblocking mode is [not available on Windows](https://github.com/ruby/ruby/blob/master/win32/win32.c#L4388).
323
+
302
324
  ```
303
325
  #<<(object)
304
326
  #print(*objects)
@@ -307,11 +329,11 @@ Behaviour is the same as `IO#write_nonblock` method.
307
329
  #puts(*objects)
308
330
  ```
309
331
 
310
- Typical helpers, see [`Zlib::GzipWriter`](https://ruby-doc.org/stdlib-2.7.0/libdoc/zlib/rdoc/Zlib/GzipWriter.html) docs.
332
+ Typical helpers, see [`Zlib::GzipWriter`](https://ruby-doc.org/stdlib/libdoc/zlib/rdoc/Zlib/GzipWriter.html) docs.
311
333
 
312
334
  ## Stream::Reader
313
335
 
314
- Its behaviour is similar to builtin [`Zlib::GzipReader`](https://ruby-doc.org/stdlib-2.7.0/libdoc/zlib/rdoc/Zlib/GzipReader.html).
336
+ Its behaviour is similar to builtin [`Zlib::GzipReader`](https://ruby-doc.org/stdlib/libdoc/zlib/rdoc/Zlib/GzipReader.html).
315
337
 
316
338
  Reader maintains both source and destination buffers, it accepts both `source_buffer_length` and `destination_buffer_length` options.
317
339
 
@@ -346,7 +368,7 @@ Set another encodings.
346
368
  #tell
347
369
  ```
348
370
 
349
- See [`IO`](https://ruby-doc.org/core-2.7.0/IO.html) docs.
371
+ See [`IO`](https://ruby-doc.org/core/IO.html) docs.
350
372
 
351
373
  ```
352
374
  #read(bytes_to_read = nil, out_buffer = nil)
@@ -356,14 +378,14 @@ See [`IO`](https://ruby-doc.org/core-2.7.0/IO.html) docs.
356
378
  #closed?
357
379
  ```
358
380
 
359
- See [`Zlib::GzipReader`](https://ruby-doc.org/stdlib-2.7.0/libdoc/zlib/rdoc/Zlib/GzipReader.html) docs.
381
+ See [`Zlib::GzipReader`](https://ruby-doc.org/stdlib/libdoc/zlib/rdoc/Zlib/GzipReader.html) docs.
360
382
 
361
383
  ```
362
384
  #readpartial(bytes_to_read = nil, out_buffer = nil)
363
385
  #read_nonblock(bytes_to_read, out_buffer = nil, *options)
364
386
  ```
365
387
 
366
- See [`IO`](https://ruby-doc.org/core-2.7.0/IO.html) docs.
388
+ See [`IO`](https://ruby-doc.org/core/IO.html) docs.
367
389
 
368
390
  ```
369
391
  #getbyte
@@ -386,7 +408,7 @@ See [`IO`](https://ruby-doc.org/core-2.7.0/IO.html) docs.
386
408
  #ungetline(line)
387
409
  ```
388
410
 
389
- Typical helpers, see [`Zlib::GzipReader`](https://ruby-doc.org/stdlib-2.7.0/libdoc/zlib/rdoc/Zlib/GzipReader.html) docs.
411
+ Typical helpers, see [`Zlib::GzipReader`](https://ruby-doc.org/stdlib/libdoc/zlib/rdoc/Zlib/GzipReader.html) docs.
390
412
 
391
413
  ## Dictionary
392
414
 
@@ -418,12 +440,24 @@ Please use regular constructor to create dictionary from buffer.
418
440
 
419
441
  Read dictionary id from buffer.
420
442
 
443
+ ## Thread safety
444
+
445
+ `:gvl` option is disabled by default, you can use bindings effectively in multiple threads.
446
+ Please be careful: bindings are not thread safe.
447
+ You should lock all shared data between threads.
448
+
449
+ For example: you should not use same compressor/decompressor inside multiple threads.
450
+ Please verify that you are using each processor inside single thread at the same time.
451
+
452
+ ## Operating systems
453
+
454
+ GNU/Linux, FreeBSD, OSX, Windows (MinGW).
455
+
421
456
  ## CI
422
457
 
423
- See universal test script [scripts/ci_test.sh](scripts/ci_test.sh) for CI.
424
458
  Please visit [scripts/test-images](scripts/test-images).
425
- You can run this test script using many native and cross images.
459
+ See universal test script [scripts/ci_test.sh](scripts/ci_test.sh) for CI.
426
460
 
427
461
  ## License
428
462
 
429
- MIT license, see LICENSE and AUTHORS.
463
+ MIT license, see [LICENSE](LICENSE) and [AUTHORS](AUTHORS).
data/ext/extconf.rb CHANGED
@@ -3,57 +3,142 @@
3
3
 
4
4
  require "mkmf"
5
5
 
6
+ have_func "rb_thread_call_without_gvl", "ruby/thread.h"
7
+
8
+ # Old zstd versions has bug: underlinking against pthreads.
9
+ # https://bugs.gentoo.org/713940
6
10
  $LDFLAGS << " -pthread" # rubocop:disable Style/GlobalVars
7
11
 
8
- def require_header(name, types = [])
12
+ def require_header(name, constants: [], macroses: [], types: [])
9
13
  abort "Can't find #{name} header" unless find_header name
10
14
 
15
+ constants.each do |constant|
16
+ abort "Can't find #{constant} constant in #{name} header" unless have_const constant, name
17
+ end
18
+
19
+ macroses.each do |macro|
20
+ abort "Can't find #{macro} macro in #{name} header" unless have_macro macro, name
21
+ end
22
+
11
23
  types.each do |type|
12
24
  abort "Can't find #{type} type in #{name} header" unless find_type type, nil, name
13
25
  end
14
26
  end
15
27
 
16
- require_header "zstd_errors.h", %w[
17
- ZSTD_ErrorCode
18
- ]
19
- require_header "zstd.h", [
20
- "ZSTD_CCtx *",
21
- "ZSTD_DCtx *",
22
- "ZSTD_strategy",
23
- "ZSTD_bounds",
24
- "ZSTD_inBuffer",
25
- "ZSTD_outBuffer"
26
- ]
27
28
  require_header "zdict.h"
28
29
 
30
+ require_header(
31
+ "zstd.h",
32
+ :constants => %w[
33
+ ZSTD_btlazy2
34
+ ZSTD_btopt
35
+ ZSTD_btultra
36
+ ZSTD_btultra2
37
+ ZSTD_c_chainLog
38
+ ZSTD_c_checksumFlag
39
+ ZSTD_c_compressionLevel
40
+ ZSTD_c_contentSizeFlag
41
+ ZSTD_c_dictIDFlag
42
+ ZSTD_c_enableLongDistanceMatching
43
+ ZSTD_c_hashLog
44
+ ZSTD_c_jobSize
45
+ ZSTD_c_ldmBucketSizeLog
46
+ ZSTD_c_ldmHashLog
47
+ ZSTD_c_ldmHashRateLog
48
+ ZSTD_c_ldmMinMatch
49
+ ZSTD_c_minMatch
50
+ ZSTD_c_nbWorkers
51
+ ZSTD_c_overlapLog
52
+ ZSTD_c_searchLog
53
+ ZSTD_c_strategy
54
+ ZSTD_c_targetLength
55
+ ZSTD_c_windowLog
56
+ ZSTD_dfast
57
+ ZSTD_d_windowLogMax
58
+ ZSTD_e_continue
59
+ ZSTD_e_end
60
+ ZSTD_e_flush
61
+ ZSTD_fast
62
+ ZSTD_greedy
63
+ ZSTD_lazy
64
+ ZSTD_lazy2
65
+ ],
66
+ :macroses => %w[ZSTD_VERSION_STRING],
67
+ :types => [
68
+ "ZSTD_bounds",
69
+ "ZSTD_CCtx *",
70
+ "ZSTD_DCtx *",
71
+ "ZSTD_inBuffer",
72
+ "ZSTD_outBuffer",
73
+ "ZSTD_strategy"
74
+ ]
75
+ )
76
+
77
+ require_header(
78
+ "zstd_errors.h",
79
+ :constants => %w[
80
+ ZSTD_error_checksum_wrong
81
+ ZSTD_error_corruption_detected
82
+ ZSTD_error_dictionaryCreation_failed
83
+ ZSTD_error_dictionary_corrupted
84
+ ZSTD_error_dictionary_wrong
85
+ ZSTD_error_dstBuffer_null
86
+ ZSTD_error_dstSize_tooSmall
87
+ ZSTD_error_frameParameter_unsupported
88
+ ZSTD_error_frameParameter_windowTooLarge
89
+ ZSTD_error_init_missing
90
+ ZSTD_error_maxSymbolValue_tooLarge
91
+ ZSTD_error_maxSymbolValue_tooSmall
92
+ ZSTD_error_memory_allocation
93
+ ZSTD_error_parameter_outOfBound
94
+ ZSTD_error_parameter_unsupported
95
+ ZSTD_error_prefix_unknown
96
+ ZSTD_error_srcSize_wrong
97
+ ZSTD_error_stage_wrong
98
+ ZSTD_error_tableLog_tooLarge
99
+ ZSTD_error_version_unsupported
100
+ ZSTD_error_workSpace_tooSmall
101
+ ],
102
+ :types => %w[ZSTD_ErrorCode]
103
+ )
104
+
29
105
  def require_library(name, functions)
30
106
  functions.each do |function|
31
107
  abort "Can't find #{function} function in #{name} library" unless find_library name, function
32
108
  end
33
109
  end
34
110
 
111
+ # rubocop:disable Style/GlobalVars
112
+ if find_library "zstd", "ZDICT_getDictHeaderSize"
113
+ $defs.push "-DHAVE_ZDICT_HEADER_SIZE"
114
+ end
115
+ # rubocop:enable Style/GlobalVars
116
+
35
117
  require_library(
36
118
  "zstd",
37
119
  %w[
38
- ZSTD_isError
39
- ZSTD_getErrorCode
40
- ZSTD_createCCtx
41
- ZSTD_createDCtx
42
- ZSTD_freeCCtx
43
- ZSTD_freeDCtx
120
+ ZDICT_getDictID
121
+ ZDICT_isError
122
+ ZDICT_trainFromBuffer
123
+ ZSTD_CCtx_loadDictionary
44
124
  ZSTD_CCtx_setParameter
45
- ZSTD_DCtx_setParameter
46
125
  ZSTD_CCtx_setPledgedSrcSize
47
- ZSTD_cParam_getBounds
48
- ZSTD_dParam_getBounds
49
126
  ZSTD_CStreamInSize
50
127
  ZSTD_CStreamOutSize
128
+ ZSTD_compressStream2
129
+ ZSTD_cParam_getBounds
130
+ ZSTD_createCCtx
131
+ ZSTD_createDCtx
132
+ ZSTD_DCtx_setParameter
133
+ ZSTD_DCtx_loadDictionary
51
134
  ZSTD_DStreamInSize
52
135
  ZSTD_DStreamOutSize
53
- ZSTD_compressStream2
54
136
  ZSTD_decompressStream
55
- ZDICT_getDictID
56
- ZDICT_trainFromBuffer
137
+ ZSTD_dParam_getBounds
138
+ ZSTD_freeCCtx
139
+ ZSTD_freeDCtx
140
+ ZSTD_getErrorCode
141
+ ZSTD_isError
57
142
  ]
58
143
  )
59
144
 
@@ -82,7 +167,7 @@ $libs = $libs.split(%r{\s})
82
167
  .uniq
83
168
  .join " "
84
169
 
85
- if ENV["CI"] || ENV["COVERAGE"]
170
+ if ENV["CI"]
86
171
  $CFLAGS << " --coverage"
87
172
  $LDFLAGS << " --coverage"
88
173
  end
@@ -5,17 +5,16 @@
5
5
 
6
6
  #include <zstd.h>
7
7
 
8
- #include "ruby.h"
9
-
10
8
  VALUE zstds_ext_create_string_buffer(VALUE length)
11
9
  {
12
10
  return rb_str_new(NULL, NUM2SIZET(length));
13
11
  }
14
12
 
15
- VALUE zstds_ext_resize_string_buffer(VALUE args)
13
+ VALUE zstds_ext_resize_string_buffer(VALUE buffer_args)
16
14
  {
17
- VALUE buffer = rb_ary_entry(args, 0);
18
- VALUE length = rb_ary_entry(args, 1);
15
+ VALUE buffer = rb_ary_entry(buffer_args, 0);
16
+ VALUE length = rb_ary_entry(buffer_args, 1);
17
+
19
18
  return rb_str_resize(buffer, NUM2SIZET(length));
20
19
  }
21
20
 
@@ -11,12 +11,12 @@ VALUE zstds_ext_create_string_buffer(VALUE length);
11
11
  #define ZSTDS_EXT_CREATE_STRING_BUFFER(buffer, length, exception) \
12
12
  VALUE buffer = rb_protect(zstds_ext_create_string_buffer, SIZET2NUM(length), &exception);
13
13
 
14
- VALUE zstds_ext_resize_string_buffer(VALUE args);
14
+ VALUE zstds_ext_resize_string_buffer(VALUE buffer_args);
15
15
 
16
- #define ZSTDS_EXT_RESIZE_STRING_BUFFER(buffer, length, exception) \
17
- VALUE args = rb_ary_new_from_args(2, buffer, SIZET2NUM(length)); \
18
- buffer = rb_protect(zstds_ext_resize_string_buffer, args, &exception); \
19
- RB_GC_GUARD(args);
16
+ #define ZSTDS_EXT_RESIZE_STRING_BUFFER(buffer, length, exception) \
17
+ VALUE buffer_args = rb_ary_new_from_args(2, buffer, SIZET2NUM(length)); \
18
+ buffer = rb_protect(zstds_ext_resize_string_buffer, buffer_args, &exception); \
19
+ RB_GC_GUARD(buffer_args);
20
20
 
21
21
  void zstds_ext_buffer_exports(VALUE root_module);
22
22
 
@@ -6,39 +6,93 @@
6
6
  #include <string.h>
7
7
  #include <zdict.h>
8
8
 
9
- #include "ruby.h"
10
9
  #include "zstds_ext/buffer.h"
11
10
  #include "zstds_ext/error.h"
12
- #include "zstds_ext/macro.h"
11
+ #include "zstds_ext/gvl.h"
13
12
  #include "zstds_ext/option.h"
14
13
 
15
- VALUE zstds_ext_get_dictionary_buffer_id(VALUE ZSTDS_EXT_UNUSED(self), VALUE buffer)
14
+ // -- initialization --
15
+
16
+ typedef struct
16
17
  {
17
- unsigned int id = ZDICT_getDictID(RSTRING_PTR(buffer), RSTRING_LEN(buffer));
18
- if (id == 0) {
19
- zstds_ext_raise_error(ZSTDS_EXT_ERROR_VALIDATE_FAILED);
18
+ const char* data;
19
+ size_t size;
20
+ } sample_t;
21
+
22
+ typedef struct
23
+ {
24
+ const sample_t* samples;
25
+ size_t length;
26
+ char* buffer;
27
+ size_t capacity;
28
+ zstds_result_t result;
29
+ zstds_ext_result_t ext_result;
30
+ } train_args_t;
31
+
32
+ static inline void* train_wrapper(void* data)
33
+ {
34
+ train_args_t* args = data;
35
+ const sample_t* samples = args->samples;
36
+ size_t length = args->length;
37
+ size_t size = 0;
38
+
39
+ for (size_t index = 0; index < length; index++) {
40
+ size += samples[index].size;
20
41
  }
21
42
 
22
- return UINT2NUM(id);
43
+ zstds_ext_byte_t* group = malloc(size);
44
+ if (group == NULL) {
45
+ args->ext_result = ZSTDS_EXT_ERROR_ALLOCATE_FAILED;
46
+ return NULL;
47
+ }
48
+
49
+ size_t* sizes = malloc(length * sizeof(size_t));
50
+ if (sizes == NULL) {
51
+ free(group);
52
+ args->ext_result = ZSTDS_EXT_ERROR_ALLOCATE_FAILED;
53
+ return NULL;
54
+ }
55
+
56
+ size_t offset = 0;
57
+
58
+ for (size_t index = 0; index < length; index++) {
59
+ const sample_t* sample_ptr = &samples[index];
60
+ size_t sample_size = sample_ptr->size;
61
+
62
+ memmove(group + offset, sample_ptr->data, sample_size);
63
+ offset += sample_size;
64
+
65
+ sizes[index] = sample_size;
66
+ }
67
+
68
+ args->result = ZDICT_trainFromBuffer((void*) args->buffer, args->capacity, group, sizes, (unsigned int) length);
69
+
70
+ free(group);
71
+ free(sizes);
72
+
73
+ if (ZDICT_isError(args->result)) {
74
+ args->ext_result = zstds_ext_get_error(ZSTD_getErrorCode(args->result));
75
+ return NULL;
76
+ }
77
+
78
+ args->ext_result = 0;
79
+
80
+ return NULL;
23
81
  }
24
82
 
25
- VALUE zstds_ext_train_dictionary_buffer(VALUE ZSTDS_EXT_UNUSED(self), VALUE samples, VALUE options)
83
+ VALUE zstds_ext_train_dictionary_buffer(VALUE ZSTDS_EXT_UNUSED(self), VALUE raw_samples, VALUE options)
26
84
  {
27
- Check_Type(samples, T_ARRAY);
28
-
29
- size_t sample_index;
30
- unsigned int samples_length = (unsigned int)RARRAY_LEN(samples);
31
- size_t samples_size = 0;
85
+ Check_Type(raw_samples, T_ARRAY);
32
86
 
33
- for (sample_index = 0; sample_index < samples_length; sample_index++) {
34
- VALUE sample = rb_ary_entry(samples, sample_index);
35
- Check_Type(sample, T_STRING);
87
+ size_t length = RARRAY_LEN(raw_samples);
36
88
 
37
- samples_size += RSTRING_LEN(sample);
89
+ for (size_t index = 0; index < length; index++) {
90
+ Check_Type(rb_ary_entry(raw_samples, index), T_STRING);
38
91
  }
39
92
 
40
93
  Check_Type(options, T_HASH);
41
- ZSTDS_EXT_GET_BUFFER_LENGTH_OPTION(options, capacity);
94
+ ZSTDS_EXT_GET_BOOL_OPTION(options, gvl);
95
+ ZSTDS_EXT_GET_SIZE_OPTION(options, capacity);
42
96
 
43
97
  if (capacity == 0) {
44
98
  capacity = ZSTDS_EXT_DEFAULT_DICTIONARY_CAPACITY;
@@ -51,53 +105,78 @@ VALUE zstds_ext_train_dictionary_buffer(VALUE ZSTDS_EXT_UNUSED(self), VALUE samp
51
105
  zstds_ext_raise_error(ZSTDS_EXT_ERROR_ALLOCATE_FAILED);
52
106
  }
53
107
 
54
- zstds_ext_byte_t* samples_buffer = malloc(samples_size);
55
- if (samples_buffer == NULL) {
108
+ sample_t* samples = malloc(sizeof(sample_t) * length);
109
+ if (samples == NULL) {
56
110
  zstds_ext_raise_error(ZSTDS_EXT_ERROR_ALLOCATE_FAILED);
57
111
  }
58
112
 
59
- size_t* samples_sizes = malloc(samples_length * sizeof(size_t));
60
- if (samples_sizes == NULL) {
61
- free(samples_buffer);
62
- zstds_ext_raise_error(ZSTDS_EXT_ERROR_ALLOCATE_FAILED);
113
+ for (size_t index = 0; index < length; index++) {
114
+ VALUE raw_sample = rb_ary_entry(raw_samples, index);
115
+ sample_t* sample = &samples[index];
116
+
117
+ sample->data = RSTRING_PTR(raw_sample);
118
+ sample->size = RSTRING_LEN(raw_sample);
63
119
  }
64
120
 
65
- size_t sample_offset = 0;
121
+ train_args_t args = {
122
+ .samples = samples,
123
+ .length = length,
124
+ .buffer = RSTRING_PTR(buffer),
125
+ .capacity = capacity,
126
+ };
66
127
 
67
- for (sample_index = 0; sample_index < samples_length; sample_index++) {
68
- VALUE sample = rb_ary_entry(samples, sample_index);
69
- const char* sample_data = RSTRING_PTR(sample);
70
- size_t sample_size = RSTRING_LEN(sample);
128
+ ZSTDS_EXT_GVL_WRAP(gvl, train_wrapper, &args);
129
+ free(samples);
71
130
 
72
- memmove(samples_buffer + sample_offset, sample_data, sample_size);
73
- sample_offset += sample_size;
131
+ if (args.ext_result != 0) {
132
+ zstds_ext_raise_error(args.ext_result);
133
+ }
74
134
 
75
- samples_sizes[sample_index] = sample_size;
135
+ ZSTDS_EXT_RESIZE_STRING_BUFFER(buffer, args.result, exception);
136
+ if (exception != 0) {
137
+ zstds_ext_raise_error(ZSTDS_EXT_ERROR_ALLOCATE_FAILED);
76
138
  }
77
139
 
78
- zstds_result_t result = ZDICT_trainFromBuffer(
79
- RSTRING_PTR(buffer), capacity,
80
- samples_buffer, samples_sizes, samples_length);
140
+ return buffer;
141
+ }
81
142
 
82
- free(samples_buffer);
83
- free(samples_sizes);
143
+ // -- other --
84
144
 
85
- if (ZSTD_isError(result)) {
86
- zstds_ext_raise_error(zstds_ext_get_error(ZSTD_getErrorCode(result)));
145
+ VALUE zstds_ext_get_dictionary_buffer_id(VALUE ZSTDS_EXT_UNUSED(self), VALUE buffer)
146
+ {
147
+ unsigned int id = ZDICT_getDictID(RSTRING_PTR(buffer), RSTRING_LEN(buffer));
148
+ if (id == 0) {
149
+ zstds_ext_raise_error(ZSTDS_EXT_ERROR_VALIDATE_FAILED);
87
150
  }
88
151
 
89
- ZSTDS_EXT_RESIZE_STRING_BUFFER(buffer, result, exception);
90
- if (exception != 0) {
91
- zstds_ext_raise_error(ZSTDS_EXT_ERROR_ALLOCATE_FAILED);
152
+ return UINT2NUM(id);
153
+ }
154
+
155
+ #if defined(HAVE_ZDICT_HEADER_SIZE)
156
+ VALUE zstds_ext_get_dictionary_header_size(VALUE self, VALUE buffer)
157
+ {
158
+ zstds_result_t result = ZDICT_getDictHeaderSize(RSTRING_PTR(buffer), RSTRING_LEN(buffer));
159
+ if (ZDICT_isError(result)) {
160
+ zstds_ext_raise_error(zstds_ext_get_error(ZSTD_getErrorCode(result)));
92
161
  }
93
162
 
94
- return buffer;
163
+ return SIZET2NUM(result);
95
164
  }
96
165
 
166
+ #else
167
+ ZSTDS_EXT_NORETURN VALUE zstds_ext_get_dictionary_header_size(VALUE self, VALUE buffer)
168
+ {
169
+ zstds_ext_raise_error(ZSTDS_EXT_ERROR_NOT_IMPLEMENTED);
170
+ };
171
+ #endif
172
+
173
+ // -- exports --
174
+
97
175
  void zstds_ext_dictionary_exports(VALUE root_module)
98
176
  {
99
177
  VALUE dictionary = rb_define_class_under(root_module, "Dictionary", rb_cObject);
100
178
 
101
179
  rb_define_singleton_method(dictionary, "get_buffer_id", zstds_ext_get_dictionary_buffer_id, 1);
180
+ rb_define_singleton_method(dictionary, "get_header_size", zstds_ext_get_dictionary_header_size, 1);
102
181
  rb_define_singleton_method(dictionary, "train_buffer", zstds_ext_train_dictionary_buffer, 2);
103
182
  }