yaz0 0.3.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/ext/yaz0/ext_yaz0.c +165 -0
- data/ext/yaz0/ext_yaz0.h +8 -0
- data/ext/yaz0/extconf.rb +11 -1
- data/lib/yaz0/version.rb +1 -1
- data/lib/yaz0/yaz0.so +0 -0
- data/lib/yaz0.rb +42 -0
- data/libyaz0/include/yaz0.h +34 -0
- data/libyaz0/src/libyaz0/CMakeLists.txt +3 -0
- data/libyaz0/src/libyaz0/compress.c +392 -0
- data/libyaz0/src/libyaz0/decompress.c +197 -0
- data/libyaz0/src/libyaz0/libyaz0.c +64 -0
- data/libyaz0/src/libyaz0/libyaz0.h +49 -0
- data/libyaz0/src/libyaz0/util.c +6 -0
- data/spec/spec_helper.rb +14 -0
- data/spec/yaz0_spec.rb +39 -0
- data/yaz0.gemspec +13 -3
- metadata +19 -16
- data/.gitignore +0 -19
- data/.travis.yml +0 -6
- data/bin/console +0 -14
- data/bin/setup +0 -8
- data/ext/yaz0/buffer.c +0 -29
- data/ext/yaz0/compress.c +0 -319
- data/ext/yaz0/decompress.c +0 -65
- data/ext/yaz0/yaz0.c +0 -48
- data/ext/yaz0/yaz0.h +0 -38
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 857c2a0a5b230d48ad3e295e5877096d6287dd1c6bb1f072817a83e5a00b6851
|
4
|
+
data.tar.gz: 87bdadd4138b0f29d95061dc82073373c7cff344b00f0dd2b40cc581f2264c71
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 40aab200a225dbc5131a96500dd08011f5054fc8916525caf6fcfa7fed7a3344d559de56f0a36875bc9785773d1be9c9301bd899821e58f447f4fbfc155f699a
|
7
|
+
data.tar.gz: 29aa243dd3c0111414cd33a600cacc0dbc47c3dc159199ab7539813949c5b4fb8dd7726c6bd95c92c8f377a8bf68bb7354e73dc0a3c873258ced29b7797e1aa9
|
data/Gemfile
CHANGED
data/ext/yaz0/ext_yaz0.c
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
#include "ext_yaz0.h"
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <stdint.h>
|
4
|
+
|
5
|
+
#define BUFSIZE 0x4000
|
6
|
+
|
7
|
+
static void ext_yaz0_stream_free(void*);
|
8
|
+
|
9
|
+
static VALUE class_yaz0_stream;
|
10
|
+
static VALUE class_yaz0_error;
|
11
|
+
static VALUE class_yaz0_error_bad_magic;
|
12
|
+
static VALUE class_yaz0_error_end_of_file;
|
13
|
+
|
14
|
+
static struct rb_data_type_struct type_yaz0_stream = {
|
15
|
+
"yaz0_stream",
|
16
|
+
{ NULL, ext_yaz0_stream_free, NULL },
|
17
|
+
NULL,
|
18
|
+
NULL,
|
19
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
20
|
+
};
|
21
|
+
|
22
|
+
static void
|
23
|
+
ext_yaz0_stream_free(void* s)
|
24
|
+
{
|
25
|
+
yaz0Destroy((Yaz0Stream*)s);
|
26
|
+
}
|
27
|
+
|
28
|
+
static VALUE
|
29
|
+
ext_yaz0_stream_alloc(VALUE klass)
|
30
|
+
{
|
31
|
+
Yaz0Stream* s;
|
32
|
+
int ret;
|
33
|
+
|
34
|
+
ret = yaz0Init(&s);
|
35
|
+
if (ret == YAZ0_OUT_OF_MEMORY)
|
36
|
+
rb_raise(rb_eNoMemError, "Out of memory");
|
37
|
+
return TypedData_Wrap_Struct(klass, &type_yaz0_stream, s);
|
38
|
+
}
|
39
|
+
|
40
|
+
static void*
|
41
|
+
run_NoGVL(void* arg)
|
42
|
+
{
|
43
|
+
return (void*)(intptr_t)yaz0Run((Yaz0Stream*)arg);
|
44
|
+
}
|
45
|
+
|
46
|
+
static VALUE
|
47
|
+
run(VALUE self, VALUE io_in, VALUE io_out, int compress, int size, int level)
|
48
|
+
{
|
49
|
+
Yaz0Stream* s;
|
50
|
+
VALUE buffer_in;
|
51
|
+
VALUE buffer_out;
|
52
|
+
VALUE tmp;
|
53
|
+
int in_is_str;
|
54
|
+
int ret;
|
55
|
+
|
56
|
+
in_is_str = TYPE(io_in) == T_STRING;
|
57
|
+
|
58
|
+
TypedData_Get_Struct(self, Yaz0Stream, &type_yaz0_stream, s);
|
59
|
+
if (compress)
|
60
|
+
yaz0ModeCompress(s, size, level);
|
61
|
+
else
|
62
|
+
yaz0ModeDecompress(s);
|
63
|
+
|
64
|
+
/* Init the buffers */
|
65
|
+
if (!in_is_str)
|
66
|
+
{
|
67
|
+
buffer_in = rb_str_new(NULL, 0);
|
68
|
+
rb_str_resize(buffer_in, BUFSIZE);
|
69
|
+
rb_gc_register_address(&buffer_in);
|
70
|
+
rb_funcall(io_in, rb_intern("read"), 2, INT2FIX(BUFSIZE), buffer_in);
|
71
|
+
}
|
72
|
+
else
|
73
|
+
{
|
74
|
+
buffer_in = rb_obj_dup(io_in);
|
75
|
+
rb_gc_register_address(&buffer_in);
|
76
|
+
}
|
77
|
+
yaz0Input(s, RSTRING_PTR(buffer_in), (uint32_t)RSTRING_LEN(buffer_in));
|
78
|
+
|
79
|
+
buffer_out = rb_str_new(NULL, 0);
|
80
|
+
rb_str_resize(buffer_out, BUFSIZE);
|
81
|
+
rb_gc_register_address(&buffer_out);
|
82
|
+
yaz0Output(s, RSTRING_PTR(buffer_out), BUFSIZE);
|
83
|
+
|
84
|
+
for (;;)
|
85
|
+
{
|
86
|
+
ret = (int)(intptr_t)rb_thread_call_without_gvl(run_NoGVL, s, RUBY_UBF_IO, NULL);
|
87
|
+
switch (ret)
|
88
|
+
{
|
89
|
+
case YAZ0_NEED_AVAIL_IN:
|
90
|
+
/* Need more input */
|
91
|
+
if (in_is_str)
|
92
|
+
{
|
93
|
+
rb_gc_unregister_address(&buffer_in);
|
94
|
+
rb_gc_unregister_address(&buffer_out);
|
95
|
+
rb_raise(class_yaz0_error_end_of_file, "Unexpected end of file");
|
96
|
+
}
|
97
|
+
|
98
|
+
tmp = rb_funcall(io_in, rb_intern("read"), 2, INT2FIX(BUFSIZE), buffer_in);
|
99
|
+
if (tmp == Qnil)
|
100
|
+
{
|
101
|
+
rb_gc_unregister_address(&buffer_in);
|
102
|
+
rb_gc_unregister_address(&buffer_out);
|
103
|
+
rb_raise(class_yaz0_error_end_of_file, "Unexpected end of file");
|
104
|
+
}
|
105
|
+
yaz0Input(s, RSTRING_PTR(buffer_in), (uint32_t)RSTRING_LEN(buffer_in));
|
106
|
+
break;
|
107
|
+
case YAZ0_NEED_AVAIL_OUT:
|
108
|
+
/* Need more output */
|
109
|
+
rb_str_resize(buffer_out, yaz0OutputChunkSize(s));
|
110
|
+
rb_funcall(io_out, rb_intern("write"), 1, buffer_out);
|
111
|
+
rb_str_resize(buffer_out, BUFSIZE);
|
112
|
+
yaz0Output(s, RSTRING_PTR(buffer_out), BUFSIZE);
|
113
|
+
break;
|
114
|
+
case YAZ0_BAD_MAGIC:
|
115
|
+
rb_gc_unregister_address(&buffer_in);
|
116
|
+
rb_gc_unregister_address(&buffer_out);
|
117
|
+
rb_raise(class_yaz0_error_bad_magic, "Bad magic");
|
118
|
+
break;
|
119
|
+
case YAZ0_OK:
|
120
|
+
goto end;
|
121
|
+
}
|
122
|
+
}
|
123
|
+
|
124
|
+
end:
|
125
|
+
/* There might still be unflushed output */
|
126
|
+
rb_str_resize(buffer_out, yaz0OutputChunkSize(s));
|
127
|
+
rb_funcall(io_out, rb_intern("write"), 1, buffer_out);
|
128
|
+
|
129
|
+
rb_gc_unregister_address(&buffer_in);
|
130
|
+
rb_gc_unregister_address(&buffer_out);
|
131
|
+
|
132
|
+
return Qnil;
|
133
|
+
}
|
134
|
+
|
135
|
+
static VALUE
|
136
|
+
ext_yaz0_stream_raw_decompress(VALUE self, VALUE io_in, VALUE io_out)
|
137
|
+
{
|
138
|
+
return run(self, io_in, io_out, 0, 0, 0);
|
139
|
+
}
|
140
|
+
|
141
|
+
static VALUE
|
142
|
+
ext_yaz0_stream_raw_compress(VALUE self, VALUE io_in, VALUE io_out, VALUE size, VALUE level)
|
143
|
+
{
|
144
|
+
Check_Type(size, T_FIXNUM);
|
145
|
+
Check_Type(level, T_FIXNUM);
|
146
|
+
return run(self, io_in, io_out, 1, FIX2INT(size), FIX2INT(level));
|
147
|
+
}
|
148
|
+
|
149
|
+
void
|
150
|
+
Init_yaz0(void)
|
151
|
+
{
|
152
|
+
VALUE mod;
|
153
|
+
mod = rb_define_module("Yaz0");
|
154
|
+
|
155
|
+
/* Error classes */
|
156
|
+
class_yaz0_error = rb_define_class_under(mod, "Error", rb_eStandardError);
|
157
|
+
class_yaz0_error_bad_magic = rb_define_class_under(mod, "BadMagicError", class_yaz0_error);
|
158
|
+
class_yaz0_error_end_of_file = rb_define_class_under(mod, "EndOfFileError", class_yaz0_error);
|
159
|
+
|
160
|
+
/* Stream */
|
161
|
+
class_yaz0_stream = rb_define_class_under(mod, "Stream", rb_cObject);
|
162
|
+
rb_define_alloc_func(class_yaz0_stream, ext_yaz0_stream_alloc);
|
163
|
+
rb_define_method(class_yaz0_stream, "raw_decompress", ext_yaz0_stream_raw_decompress, 2);
|
164
|
+
rb_define_method(class_yaz0_stream, "raw_compress", ext_yaz0_stream_raw_compress, 4);
|
165
|
+
}
|
data/ext/yaz0/ext_yaz0.h
ADDED
data/ext/yaz0/extconf.rb
CHANGED
@@ -1,3 +1,13 @@
|
|
1
1
|
require "mkmf"
|
2
2
|
|
3
|
-
|
3
|
+
extension_name = 'yaz0/yaz0'
|
4
|
+
dir_config(extension_name)
|
5
|
+
|
6
|
+
libyaz0_src = Dir[File.join(__dir__, "../../libyaz0/src/libyaz0/**/*.c")].map{|x| File.expand_path(x)}
|
7
|
+
|
8
|
+
$srcs = libyaz0_src + ["ext_yaz0.c"]
|
9
|
+
|
10
|
+
$VPATH << File.expand_path(File.join(__dir__, "../../libyaz0/src/libyaz0"))
|
11
|
+
$INCFLAGS << " -I#{File.expand_path(File.join(__dir__, "../../libyaz0/include"))}"
|
12
|
+
|
13
|
+
create_makefile(extension_name)
|
data/lib/yaz0/version.rb
CHANGED
data/lib/yaz0/yaz0.so
ADDED
Binary file
|
data/lib/yaz0.rb
CHANGED
@@ -1,5 +1,47 @@
|
|
1
|
+
require "stringio"
|
1
2
|
require "yaz0/version"
|
2
3
|
require "yaz0/yaz0"
|
3
4
|
|
4
5
|
module Yaz0
|
6
|
+
class Stream
|
7
|
+
def decompress(src, dst = nil)
|
8
|
+
if dst
|
9
|
+
raw_decompress(src, dst)
|
10
|
+
nil
|
11
|
+
else
|
12
|
+
dst = StringIO.new
|
13
|
+
raw_decompress(src, dst)
|
14
|
+
dst.string
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def compress(src, dst_or_opts = nil, opts = {})
|
19
|
+
if dst_or_opts.is_a?(Hash)
|
20
|
+
opts = dst_or_opts
|
21
|
+
dst = nil
|
22
|
+
else
|
23
|
+
dst = dst_or_opts
|
24
|
+
end
|
25
|
+
|
26
|
+
level = opts[:level] || 6
|
27
|
+
size = opts[:size] || src.size
|
28
|
+
|
29
|
+
if dst
|
30
|
+
raw_compress(src, dst, size, level)
|
31
|
+
nil
|
32
|
+
else
|
33
|
+
dst = StringIO.new
|
34
|
+
raw_compress(src, dst, size, level)
|
35
|
+
dst.string
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.decompress(*args)
|
41
|
+
Yaz0::Stream.new.decompress(*args)
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.compress(*args)
|
45
|
+
Yaz0::Stream.new.compress(*args)
|
46
|
+
end
|
5
47
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#ifndef YAZ0_H
|
2
|
+
#define YAZ0_H
|
3
|
+
|
4
|
+
#include <stddef.h>
|
5
|
+
#include <stdint.h>
|
6
|
+
|
7
|
+
#if defined(__cplusplus)
|
8
|
+
# define YAZ0_API extern "C"
|
9
|
+
#else
|
10
|
+
# define YAZ0_API
|
11
|
+
#endif
|
12
|
+
|
13
|
+
#define YAZ0_OK 0
|
14
|
+
#define YAZ0_NEED_AVAIL_IN 1
|
15
|
+
#define YAZ0_NEED_AVAIL_OUT 2
|
16
|
+
#define YAZ0_BAD_MAGIC (-1)
|
17
|
+
#define YAZ0_OUT_OF_MEMORY (-2)
|
18
|
+
|
19
|
+
#define YAZ0_DEFAULT_LEVEL 6
|
20
|
+
|
21
|
+
typedef struct Yaz0Stream Yaz0Stream;
|
22
|
+
|
23
|
+
YAZ0_API int yaz0Init(Yaz0Stream** stream);
|
24
|
+
YAZ0_API int yaz0Destroy(Yaz0Stream* stream);
|
25
|
+
YAZ0_API int yaz0ModeDecompress(Yaz0Stream* stream);
|
26
|
+
YAZ0_API int yaz0ModeCompress(Yaz0Stream* stream, uint32_t size, int level);
|
27
|
+
YAZ0_API int yaz0Run(Yaz0Stream* stream);
|
28
|
+
YAZ0_API int yaz0Input(Yaz0Stream* stream, const void* data, uint32_t size);
|
29
|
+
YAZ0_API int yaz0Output(Yaz0Stream* stream, void* data, uint32_t size);
|
30
|
+
|
31
|
+
YAZ0_API uint32_t yaz0OutputChunkSize(const Yaz0Stream* stream);
|
32
|
+
YAZ0_API uint32_t yaz0DecompressedSize(const Yaz0Stream* stream);
|
33
|
+
|
34
|
+
#endif /* YAZ0_H */
|
@@ -0,0 +1,392 @@
|
|
1
|
+
#include <string.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include "libyaz0.h"
|
4
|
+
|
5
|
+
static const uint32_t kProbesPerLevel[] = {
|
6
|
+
0x0,
|
7
|
+
0x1,
|
8
|
+
0x2,
|
9
|
+
0x4,
|
10
|
+
0x8,
|
11
|
+
0x10,
|
12
|
+
0x40,
|
13
|
+
0x100,
|
14
|
+
0x200,
|
15
|
+
0x1000
|
16
|
+
};
|
17
|
+
|
18
|
+
static uint32_t hash(uint8_t a, uint8_t b, uint8_t c)
|
19
|
+
{
|
20
|
+
uint32_t x = (uint32_t)a | ((uint32_t)b << 8) | ((uint32_t)c << 16);
|
21
|
+
x = ((x >> 16) ^ x) * 0x45d9f3b;
|
22
|
+
x = (x >> 16) ^ x;
|
23
|
+
return x;
|
24
|
+
}
|
25
|
+
|
26
|
+
static void hashWrite(Yaz0Stream* s, uint32_t h, uint32_t offset)
|
27
|
+
{
|
28
|
+
uint32_t maxProbes;
|
29
|
+
uint32_t bucket;
|
30
|
+
uint32_t tmpBucket;
|
31
|
+
uint32_t oldest;
|
32
|
+
uint32_t entry;
|
33
|
+
int32_t pos;
|
34
|
+
|
35
|
+
oldest = 0xffffffff;
|
36
|
+
maxProbes = kProbesPerLevel[s->level];
|
37
|
+
for (uint32_t i = 0; i < maxProbes; ++i)
|
38
|
+
{
|
39
|
+
tmpBucket = (h + i) % HASH_MAX_ENTRIES;
|
40
|
+
entry = s->htEntries[tmpBucket];
|
41
|
+
if (entry == 0xffffffff)
|
42
|
+
{
|
43
|
+
s->htSize++;
|
44
|
+
bucket = tmpBucket;
|
45
|
+
break;
|
46
|
+
}
|
47
|
+
pos = (int32_t)(s->totalOut - entry);
|
48
|
+
if (pos > 0x1000)
|
49
|
+
{
|
50
|
+
bucket = tmpBucket;
|
51
|
+
break;
|
52
|
+
}
|
53
|
+
if (entry < oldest)
|
54
|
+
{
|
55
|
+
oldest = entry;
|
56
|
+
bucket = tmpBucket;
|
57
|
+
}
|
58
|
+
}
|
59
|
+
s->htEntries[bucket] = s->totalOut + offset;
|
60
|
+
s->htHashes[bucket] = h;
|
61
|
+
}
|
62
|
+
|
63
|
+
static void rebuildHashTable(Yaz0Stream* s)
|
64
|
+
{
|
65
|
+
uint32_t newEntries[HASH_MAX_ENTRIES];
|
66
|
+
uint32_t newHashes[HASH_MAX_ENTRIES];
|
67
|
+
uint32_t entry;
|
68
|
+
uint32_t pos;
|
69
|
+
uint32_t h;
|
70
|
+
uint32_t bucket;
|
71
|
+
uint32_t size;
|
72
|
+
|
73
|
+
memset(newEntries, 0xff, sizeof(newEntries));
|
74
|
+
memset(newHashes, 0xff, sizeof(newHashes));
|
75
|
+
size = 0;
|
76
|
+
|
77
|
+
for (uint32_t i = 0; i < HASH_MAX_ENTRIES; ++i)
|
78
|
+
{
|
79
|
+
entry = s->htEntries[i];
|
80
|
+
if (entry == 0xffffffff)
|
81
|
+
continue;
|
82
|
+
pos = s->totalOut - entry;
|
83
|
+
if (pos > 0x1000)
|
84
|
+
continue;
|
85
|
+
|
86
|
+
/* Entry still good - move to the table */
|
87
|
+
h = s->htHashes[i];
|
88
|
+
bucket = h % HASH_MAX_ENTRIES;
|
89
|
+
for (;;)
|
90
|
+
{
|
91
|
+
if (newEntries[bucket] == 0xffffffff)
|
92
|
+
{
|
93
|
+
newEntries[bucket] = entry;
|
94
|
+
newHashes[bucket] = h;
|
95
|
+
size++;
|
96
|
+
break;
|
97
|
+
}
|
98
|
+
bucket = (bucket + 1) % HASH_MAX_ENTRIES;
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
s->htSize = size;
|
103
|
+
memcpy(s->htEntries, newEntries, sizeof(newEntries));
|
104
|
+
memcpy(s->htHashes, newHashes, sizeof(newHashes));
|
105
|
+
}
|
106
|
+
|
107
|
+
static uint32_t maxSize(Yaz0Stream* stream)
|
108
|
+
{
|
109
|
+
/* the extra byte is for look-aheads */
|
110
|
+
static const uint32_t maxNecessary = 0x888 + 1;
|
111
|
+
uint32_t max;
|
112
|
+
|
113
|
+
max = stream->decompSize - stream->totalOut;
|
114
|
+
if (max > maxNecessary)
|
115
|
+
max = maxNecessary;
|
116
|
+
return max;
|
117
|
+
}
|
118
|
+
|
119
|
+
/* start: start of avail data */
|
120
|
+
/* end: end of avail data */
|
121
|
+
/* We need to write more data at the end of the window */
|
122
|
+
|
123
|
+
static int feed(Yaz0Stream* s)
|
124
|
+
{
|
125
|
+
uint32_t avail;
|
126
|
+
uint32_t min;
|
127
|
+
uint32_t max;
|
128
|
+
uint32_t size;
|
129
|
+
int ret;
|
130
|
+
|
131
|
+
/* Check how much data we have */
|
132
|
+
if (s->window_start > s->window_end)
|
133
|
+
avail = WINDOW_SIZE - s->window_start + s->window_end;
|
134
|
+
else
|
135
|
+
avail = s->window_end - s->window_start;
|
136
|
+
if (avail >= maxSize(s))
|
137
|
+
return YAZ0_OK;
|
138
|
+
|
139
|
+
/* We need more data */
|
140
|
+
min = maxSize(s) - avail;
|
141
|
+
max = WINDOW_SIZE - 0x1000 - avail;
|
142
|
+
if (max > s->sizeIn - s->cursorIn)
|
143
|
+
max = s->sizeIn - s->cursorIn;
|
144
|
+
if (max < min)
|
145
|
+
ret = YAZ0_NEED_AVAIL_IN;
|
146
|
+
else
|
147
|
+
ret = YAZ0_OK;
|
148
|
+
if (s->window_end + max >= WINDOW_SIZE)
|
149
|
+
{
|
150
|
+
/* We might need two copies */
|
151
|
+
size = WINDOW_SIZE - s->window_end;
|
152
|
+
memcpy(s->window + s->window_end, s->in + s->cursorIn, size);
|
153
|
+
s->cursorIn += size;
|
154
|
+
s->window_end = 0;
|
155
|
+
max -= size;
|
156
|
+
}
|
157
|
+
memcpy(s->window + s->window_end, s->in + s->cursorIn, max);
|
158
|
+
s->cursorIn += max;
|
159
|
+
s->window_end += max;
|
160
|
+
return ret;
|
161
|
+
}
|
162
|
+
|
163
|
+
static uint32_t matchSize(Yaz0Stream* s, uint32_t offset, uint32_t pos, uint32_t hintSize)
|
164
|
+
{
|
165
|
+
uint32_t size = 0;
|
166
|
+
uint32_t start = s->window_start + offset;
|
167
|
+
uint32_t cursorA = (start + WINDOW_SIZE - pos) % WINDOW_SIZE;
|
168
|
+
uint32_t cursorB = start % WINDOW_SIZE;
|
169
|
+
uint32_t maxSize;
|
170
|
+
|
171
|
+
maxSize = s->decompSize - s->totalOut;
|
172
|
+
if (maxSize > 0x111)
|
173
|
+
maxSize = 0x111;
|
174
|
+
if (hintSize)
|
175
|
+
{
|
176
|
+
if (s->window[(cursorA + hintSize) % WINDOW_SIZE] != s->window[(cursorB + hintSize) % WINDOW_SIZE])
|
177
|
+
return 0;
|
178
|
+
}
|
179
|
+
for (;;)
|
180
|
+
{
|
181
|
+
if (s->window[(cursorA + size) % WINDOW_SIZE] != s->window[(cursorB + size) % WINDOW_SIZE])
|
182
|
+
break;
|
183
|
+
size++;
|
184
|
+
if (size == maxSize)
|
185
|
+
break;
|
186
|
+
}
|
187
|
+
return size;
|
188
|
+
}
|
189
|
+
|
190
|
+
static void findHashMatch(Yaz0Stream* s, uint32_t h, uint32_t offset, uint32_t* outSize, uint32_t* outPos)
|
191
|
+
{
|
192
|
+
uint32_t bucket;
|
193
|
+
uint32_t entry;
|
194
|
+
uint32_t bestSize;
|
195
|
+
uint32_t bestPos;
|
196
|
+
uint32_t size;
|
197
|
+
uint32_t pos;
|
198
|
+
uint32_t maxProbes;
|
199
|
+
|
200
|
+
bestSize = 0;
|
201
|
+
bestPos = 0;
|
202
|
+
maxProbes = kProbesPerLevel[s->level];
|
203
|
+
for (uint32_t i = 0; i < maxProbes; ++i)
|
204
|
+
{
|
205
|
+
bucket = (h + i) % HASH_MAX_ENTRIES;
|
206
|
+
entry = s->htEntries[bucket];
|
207
|
+
if (entry == 0xffffffff)
|
208
|
+
break;
|
209
|
+
if (s->htHashes[bucket] == h)
|
210
|
+
{
|
211
|
+
pos = s->totalOut + offset - entry;
|
212
|
+
if (pos > 0x1000)
|
213
|
+
continue;
|
214
|
+
size = matchSize(s, offset, pos, bestSize);
|
215
|
+
if (size > bestSize)
|
216
|
+
{
|
217
|
+
bestSize = size;
|
218
|
+
bestPos = pos;
|
219
|
+
}
|
220
|
+
}
|
221
|
+
}
|
222
|
+
|
223
|
+
if (bestSize < 3)
|
224
|
+
{
|
225
|
+
*outSize = 0;
|
226
|
+
}
|
227
|
+
else
|
228
|
+
{
|
229
|
+
*outSize = bestSize;
|
230
|
+
*outPos = bestPos;
|
231
|
+
}
|
232
|
+
}
|
233
|
+
|
234
|
+
static void emitGroup(Yaz0Stream* s, int count, const uint32_t* arrSize, const uint32_t* arrPos)
|
235
|
+
{
|
236
|
+
uint8_t header;
|
237
|
+
uint32_t size;
|
238
|
+
uint32_t pos;
|
239
|
+
|
240
|
+
header = 0;
|
241
|
+
for (int i = 0; i < count; ++i)
|
242
|
+
{
|
243
|
+
if (!arrSize[i])
|
244
|
+
header |= (1 << (7 - i));
|
245
|
+
}
|
246
|
+
s->out[s->cursorOut++] = header;
|
247
|
+
for (int i = 0; i < count; ++i)
|
248
|
+
{
|
249
|
+
size = arrSize[i];
|
250
|
+
pos = arrPos[i];
|
251
|
+
if (!size)
|
252
|
+
s->out[s->cursorOut++] = (uint8_t)pos;
|
253
|
+
else
|
254
|
+
{
|
255
|
+
pos--;
|
256
|
+
if (size >= 0x12)
|
257
|
+
{
|
258
|
+
/* 3 bytes */
|
259
|
+
s->out[s->cursorOut++] = (uint8_t)(pos >> 8);
|
260
|
+
s->out[s->cursorOut++] = (uint8_t)pos;
|
261
|
+
s->out[s->cursorOut++] = (uint8_t)(size - 0x12);
|
262
|
+
}
|
263
|
+
else
|
264
|
+
{
|
265
|
+
/* 2 bytes */
|
266
|
+
s->out[s->cursorOut++] = (uint8_t)(pos >> 8) | (uint8_t)((size - 2) << 4);
|
267
|
+
s->out[s->cursorOut++] = (uint8_t)pos;
|
268
|
+
}
|
269
|
+
}
|
270
|
+
}
|
271
|
+
}
|
272
|
+
|
273
|
+
static void compressGroup(Yaz0Stream* s)
|
274
|
+
{
|
275
|
+
int groupCount;
|
276
|
+
uint32_t h;
|
277
|
+
uint32_t size;
|
278
|
+
uint32_t pos;
|
279
|
+
uint32_t nextSize;
|
280
|
+
uint32_t nextPos;
|
281
|
+
uint32_t arrSize[8];
|
282
|
+
uint32_t arrPos[8];
|
283
|
+
uint8_t a;
|
284
|
+
uint8_t b;
|
285
|
+
uint8_t c;
|
286
|
+
uint8_t d;
|
287
|
+
|
288
|
+
for (groupCount = 0; groupCount < 8; ++groupCount)
|
289
|
+
{
|
290
|
+
a = s->window[s->window_start];
|
291
|
+
b = s->window[(s->window_start + 1) % WINDOW_SIZE];
|
292
|
+
c = s->window[(s->window_start + 2) % WINDOW_SIZE];
|
293
|
+
d = s->window[(s->window_start + 3) % WINDOW_SIZE];
|
294
|
+
h = hash(a, b, c);
|
295
|
+
findHashMatch(s, h, 0, &size, &pos);
|
296
|
+
hashWrite(s, h, 0);
|
297
|
+
|
298
|
+
h = hash(b, c, d);
|
299
|
+
findHashMatch(s, h, 1, &nextSize, &nextPos);
|
300
|
+
|
301
|
+
if (!size || nextSize > size)
|
302
|
+
{
|
303
|
+
arrSize[groupCount] = 0;
|
304
|
+
arrPos[groupCount] = s->window[s->window_start];
|
305
|
+
s->window_start += 1;
|
306
|
+
s->totalOut += 1;
|
307
|
+
}
|
308
|
+
else
|
309
|
+
{
|
310
|
+
arrSize[groupCount] = size;
|
311
|
+
arrPos[groupCount] = pos;
|
312
|
+
for (uint32_t i = 1; i < size; ++i)
|
313
|
+
{
|
314
|
+
a = b;
|
315
|
+
b = c;
|
316
|
+
c = s->window[(s->window_start + 2 + i) % WINDOW_SIZE];
|
317
|
+
h = hash(a, b, c);
|
318
|
+
hashWrite(s, h, i);
|
319
|
+
}
|
320
|
+
s->window_start += size;
|
321
|
+
s->totalOut += size;
|
322
|
+
}
|
323
|
+
s->window_start %= WINDOW_SIZE;
|
324
|
+
if (s->totalOut >= s->decompSize)
|
325
|
+
{
|
326
|
+
groupCount++;
|
327
|
+
break;
|
328
|
+
}
|
329
|
+
}
|
330
|
+
if (s->htSize > HASH_REBUILD)
|
331
|
+
rebuildHashTable(s);
|
332
|
+
emitGroup(s, groupCount, arrSize, arrPos);
|
333
|
+
}
|
334
|
+
|
335
|
+
int yaz0ModeCompress(Yaz0Stream* s, uint32_t size, int level)
|
336
|
+
{
|
337
|
+
memset(s, 0, sizeof(*s));
|
338
|
+
s->mode = MODE_COMPRESS;
|
339
|
+
s->decompSize = size;
|
340
|
+
if (level < 1)
|
341
|
+
level = 1;
|
342
|
+
else if (level > 9)
|
343
|
+
level = 9;
|
344
|
+
s->level = level;
|
345
|
+
for (int i = 0; i < HASH_MAX_ENTRIES; ++i)
|
346
|
+
{
|
347
|
+
s->htHashes[i] = 0xffffffff;
|
348
|
+
s->htEntries[i] = 0xffffffff;
|
349
|
+
}
|
350
|
+
return YAZ0_OK;
|
351
|
+
}
|
352
|
+
|
353
|
+
int yaz0_RunCompress(Yaz0Stream* stream)
|
354
|
+
{
|
355
|
+
uint32_t tmp;
|
356
|
+
int ret;
|
357
|
+
|
358
|
+
/* Write headers */
|
359
|
+
if (!stream->headersDone)
|
360
|
+
{
|
361
|
+
if (stream->sizeOut < 16)
|
362
|
+
return YAZ0_NEED_AVAIL_OUT;
|
363
|
+
memcpy(stream->out, "Yaz0", 4);
|
364
|
+
tmp = swap32(stream->decompSize);
|
365
|
+
memcpy(stream->out + 4, &tmp, 4);
|
366
|
+
tmp = 0;
|
367
|
+
memcpy(stream->out + 8, &tmp, 4);
|
368
|
+
memcpy(stream->out + 12, &tmp, 4);
|
369
|
+
stream->cursorOut += 16;
|
370
|
+
stream->headersDone = 1;
|
371
|
+
}
|
372
|
+
|
373
|
+
/* Compress */
|
374
|
+
for (;;)
|
375
|
+
{
|
376
|
+
/* Check EOF */
|
377
|
+
if (stream->totalOut >= stream->decompSize)
|
378
|
+
return YAZ0_OK;
|
379
|
+
|
380
|
+
/* Check output space */
|
381
|
+
if (stream->sizeOut - stream->cursorOut < 1 + 8 * 3)
|
382
|
+
return YAZ0_NEED_AVAIL_OUT;
|
383
|
+
|
384
|
+
/* Check that we have consumed enough input */
|
385
|
+
ret = feed(stream);
|
386
|
+
if (ret)
|
387
|
+
return ret;
|
388
|
+
|
389
|
+
/* Compress one chunk */
|
390
|
+
compressGroup(stream);
|
391
|
+
}
|
392
|
+
}
|