yaz0 0.3.0 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/ext/yaz0/ext_yaz0.c +165 -0
- data/ext/yaz0/ext_yaz0.h +8 -0
- data/ext/yaz0/extconf.rb +11 -1
- data/lib/yaz0/version.rb +1 -1
- data/lib/yaz0/yaz0.so +0 -0
- data/lib/yaz0.rb +42 -0
- data/libyaz0/include/yaz0.h +34 -0
- data/libyaz0/src/libyaz0/CMakeLists.txt +3 -0
- data/libyaz0/src/libyaz0/compress.c +392 -0
- data/libyaz0/src/libyaz0/decompress.c +197 -0
- data/libyaz0/src/libyaz0/libyaz0.c +64 -0
- data/libyaz0/src/libyaz0/libyaz0.h +49 -0
- data/libyaz0/src/libyaz0/util.c +6 -0
- data/spec/spec_helper.rb +14 -0
- data/spec/yaz0_spec.rb +39 -0
- data/yaz0.gemspec +13 -3
- metadata +19 -16
- data/.gitignore +0 -19
- data/.travis.yml +0 -6
- data/bin/console +0 -14
- data/bin/setup +0 -8
- data/ext/yaz0/buffer.c +0 -29
- data/ext/yaz0/compress.c +0 -319
- data/ext/yaz0/decompress.c +0 -65
- data/ext/yaz0/yaz0.c +0 -48
- data/ext/yaz0/yaz0.h +0 -38
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 857c2a0a5b230d48ad3e295e5877096d6287dd1c6bb1f072817a83e5a00b6851
|
4
|
+
data.tar.gz: 87bdadd4138b0f29d95061dc82073373c7cff344b00f0dd2b40cc581f2264c71
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 40aab200a225dbc5131a96500dd08011f5054fc8916525caf6fcfa7fed7a3344d559de56f0a36875bc9785773d1be9c9301bd899821e58f447f4fbfc155f699a
|
7
|
+
data.tar.gz: 29aa243dd3c0111414cd33a600cacc0dbc47c3dc159199ab7539813949c5b4fb8dd7726c6bd95c92c8f377a8bf68bb7354e73dc0a3c873258ced29b7797e1aa9
|
data/Gemfile
CHANGED
data/ext/yaz0/ext_yaz0.c
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
#include "ext_yaz0.h"
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <stdint.h>
|
4
|
+
|
5
|
+
#define BUFSIZE 0x4000
|
6
|
+
|
7
|
+
static void ext_yaz0_stream_free(void*);
|
8
|
+
|
9
|
+
static VALUE class_yaz0_stream;
|
10
|
+
static VALUE class_yaz0_error;
|
11
|
+
static VALUE class_yaz0_error_bad_magic;
|
12
|
+
static VALUE class_yaz0_error_end_of_file;
|
13
|
+
|
14
|
+
static struct rb_data_type_struct type_yaz0_stream = {
|
15
|
+
"yaz0_stream",
|
16
|
+
{ NULL, ext_yaz0_stream_free, NULL },
|
17
|
+
NULL,
|
18
|
+
NULL,
|
19
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
20
|
+
};
|
21
|
+
|
22
|
+
static void
|
23
|
+
ext_yaz0_stream_free(void* s)
|
24
|
+
{
|
25
|
+
yaz0Destroy((Yaz0Stream*)s);
|
26
|
+
}
|
27
|
+
|
28
|
+
static VALUE
|
29
|
+
ext_yaz0_stream_alloc(VALUE klass)
|
30
|
+
{
|
31
|
+
Yaz0Stream* s;
|
32
|
+
int ret;
|
33
|
+
|
34
|
+
ret = yaz0Init(&s);
|
35
|
+
if (ret == YAZ0_OUT_OF_MEMORY)
|
36
|
+
rb_raise(rb_eNoMemError, "Out of memory");
|
37
|
+
return TypedData_Wrap_Struct(klass, &type_yaz0_stream, s);
|
38
|
+
}
|
39
|
+
|
40
|
+
static void*
|
41
|
+
run_NoGVL(void* arg)
|
42
|
+
{
|
43
|
+
return (void*)(intptr_t)yaz0Run((Yaz0Stream*)arg);
|
44
|
+
}
|
45
|
+
|
46
|
+
static VALUE
|
47
|
+
run(VALUE self, VALUE io_in, VALUE io_out, int compress, int size, int level)
|
48
|
+
{
|
49
|
+
Yaz0Stream* s;
|
50
|
+
VALUE buffer_in;
|
51
|
+
VALUE buffer_out;
|
52
|
+
VALUE tmp;
|
53
|
+
int in_is_str;
|
54
|
+
int ret;
|
55
|
+
|
56
|
+
in_is_str = TYPE(io_in) == T_STRING;
|
57
|
+
|
58
|
+
TypedData_Get_Struct(self, Yaz0Stream, &type_yaz0_stream, s);
|
59
|
+
if (compress)
|
60
|
+
yaz0ModeCompress(s, size, level);
|
61
|
+
else
|
62
|
+
yaz0ModeDecompress(s);
|
63
|
+
|
64
|
+
/* Init the buffers */
|
65
|
+
if (!in_is_str)
|
66
|
+
{
|
67
|
+
buffer_in = rb_str_new(NULL, 0);
|
68
|
+
rb_str_resize(buffer_in, BUFSIZE);
|
69
|
+
rb_gc_register_address(&buffer_in);
|
70
|
+
rb_funcall(io_in, rb_intern("read"), 2, INT2FIX(BUFSIZE), buffer_in);
|
71
|
+
}
|
72
|
+
else
|
73
|
+
{
|
74
|
+
buffer_in = rb_obj_dup(io_in);
|
75
|
+
rb_gc_register_address(&buffer_in);
|
76
|
+
}
|
77
|
+
yaz0Input(s, RSTRING_PTR(buffer_in), (uint32_t)RSTRING_LEN(buffer_in));
|
78
|
+
|
79
|
+
buffer_out = rb_str_new(NULL, 0);
|
80
|
+
rb_str_resize(buffer_out, BUFSIZE);
|
81
|
+
rb_gc_register_address(&buffer_out);
|
82
|
+
yaz0Output(s, RSTRING_PTR(buffer_out), BUFSIZE);
|
83
|
+
|
84
|
+
for (;;)
|
85
|
+
{
|
86
|
+
ret = (int)(intptr_t)rb_thread_call_without_gvl(run_NoGVL, s, RUBY_UBF_IO, NULL);
|
87
|
+
switch (ret)
|
88
|
+
{
|
89
|
+
case YAZ0_NEED_AVAIL_IN:
|
90
|
+
/* Need more input */
|
91
|
+
if (in_is_str)
|
92
|
+
{
|
93
|
+
rb_gc_unregister_address(&buffer_in);
|
94
|
+
rb_gc_unregister_address(&buffer_out);
|
95
|
+
rb_raise(class_yaz0_error_end_of_file, "Unexpected end of file");
|
96
|
+
}
|
97
|
+
|
98
|
+
tmp = rb_funcall(io_in, rb_intern("read"), 2, INT2FIX(BUFSIZE), buffer_in);
|
99
|
+
if (tmp == Qnil)
|
100
|
+
{
|
101
|
+
rb_gc_unregister_address(&buffer_in);
|
102
|
+
rb_gc_unregister_address(&buffer_out);
|
103
|
+
rb_raise(class_yaz0_error_end_of_file, "Unexpected end of file");
|
104
|
+
}
|
105
|
+
yaz0Input(s, RSTRING_PTR(buffer_in), (uint32_t)RSTRING_LEN(buffer_in));
|
106
|
+
break;
|
107
|
+
case YAZ0_NEED_AVAIL_OUT:
|
108
|
+
/* Need more output */
|
109
|
+
rb_str_resize(buffer_out, yaz0OutputChunkSize(s));
|
110
|
+
rb_funcall(io_out, rb_intern("write"), 1, buffer_out);
|
111
|
+
rb_str_resize(buffer_out, BUFSIZE);
|
112
|
+
yaz0Output(s, RSTRING_PTR(buffer_out), BUFSIZE);
|
113
|
+
break;
|
114
|
+
case YAZ0_BAD_MAGIC:
|
115
|
+
rb_gc_unregister_address(&buffer_in);
|
116
|
+
rb_gc_unregister_address(&buffer_out);
|
117
|
+
rb_raise(class_yaz0_error_bad_magic, "Bad magic");
|
118
|
+
break;
|
119
|
+
case YAZ0_OK:
|
120
|
+
goto end;
|
121
|
+
}
|
122
|
+
}
|
123
|
+
|
124
|
+
end:
|
125
|
+
/* There might still be unflushed output */
|
126
|
+
rb_str_resize(buffer_out, yaz0OutputChunkSize(s));
|
127
|
+
rb_funcall(io_out, rb_intern("write"), 1, buffer_out);
|
128
|
+
|
129
|
+
rb_gc_unregister_address(&buffer_in);
|
130
|
+
rb_gc_unregister_address(&buffer_out);
|
131
|
+
|
132
|
+
return Qnil;
|
133
|
+
}
|
134
|
+
|
135
|
+
static VALUE
|
136
|
+
ext_yaz0_stream_raw_decompress(VALUE self, VALUE io_in, VALUE io_out)
|
137
|
+
{
|
138
|
+
return run(self, io_in, io_out, 0, 0, 0);
|
139
|
+
}
|
140
|
+
|
141
|
+
static VALUE
|
142
|
+
ext_yaz0_stream_raw_compress(VALUE self, VALUE io_in, VALUE io_out, VALUE size, VALUE level)
|
143
|
+
{
|
144
|
+
Check_Type(size, T_FIXNUM);
|
145
|
+
Check_Type(level, T_FIXNUM);
|
146
|
+
return run(self, io_in, io_out, 1, FIX2INT(size), FIX2INT(level));
|
147
|
+
}
|
148
|
+
|
149
|
+
void
|
150
|
+
Init_yaz0(void)
|
151
|
+
{
|
152
|
+
VALUE mod;
|
153
|
+
mod = rb_define_module("Yaz0");
|
154
|
+
|
155
|
+
/* Error classes */
|
156
|
+
class_yaz0_error = rb_define_class_under(mod, "Error", rb_eStandardError);
|
157
|
+
class_yaz0_error_bad_magic = rb_define_class_under(mod, "BadMagicError", class_yaz0_error);
|
158
|
+
class_yaz0_error_end_of_file = rb_define_class_under(mod, "EndOfFileError", class_yaz0_error);
|
159
|
+
|
160
|
+
/* Stream */
|
161
|
+
class_yaz0_stream = rb_define_class_under(mod, "Stream", rb_cObject);
|
162
|
+
rb_define_alloc_func(class_yaz0_stream, ext_yaz0_stream_alloc);
|
163
|
+
rb_define_method(class_yaz0_stream, "raw_decompress", ext_yaz0_stream_raw_decompress, 2);
|
164
|
+
rb_define_method(class_yaz0_stream, "raw_compress", ext_yaz0_stream_raw_compress, 4);
|
165
|
+
}
|
data/ext/yaz0/ext_yaz0.h
ADDED
data/ext/yaz0/extconf.rb
CHANGED
@@ -1,3 +1,13 @@
|
|
1
1
|
require "mkmf"
|
2
2
|
|
3
|
-
|
3
|
+
extension_name = 'yaz0/yaz0'
|
4
|
+
dir_config(extension_name)
|
5
|
+
|
6
|
+
libyaz0_src = Dir[File.join(__dir__, "../../libyaz0/src/libyaz0/**/*.c")].map{|x| File.expand_path(x)}
|
7
|
+
|
8
|
+
$srcs = libyaz0_src + ["ext_yaz0.c"]
|
9
|
+
|
10
|
+
$VPATH << File.expand_path(File.join(__dir__, "../../libyaz0/src/libyaz0"))
|
11
|
+
$INCFLAGS << " -I#{File.expand_path(File.join(__dir__, "../../libyaz0/include"))}"
|
12
|
+
|
13
|
+
create_makefile(extension_name)
|
data/lib/yaz0/version.rb
CHANGED
data/lib/yaz0/yaz0.so
ADDED
Binary file
|
data/lib/yaz0.rb
CHANGED
@@ -1,5 +1,47 @@
|
|
1
|
+
require "stringio"
|
1
2
|
require "yaz0/version"
|
2
3
|
require "yaz0/yaz0"
|
3
4
|
|
4
5
|
module Yaz0
|
6
|
+
class Stream
|
7
|
+
def decompress(src, dst = nil)
|
8
|
+
if dst
|
9
|
+
raw_decompress(src, dst)
|
10
|
+
nil
|
11
|
+
else
|
12
|
+
dst = StringIO.new
|
13
|
+
raw_decompress(src, dst)
|
14
|
+
dst.string
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def compress(src, dst_or_opts = nil, opts = {})
|
19
|
+
if dst_or_opts.is_a?(Hash)
|
20
|
+
opts = dst_or_opts
|
21
|
+
dst = nil
|
22
|
+
else
|
23
|
+
dst = dst_or_opts
|
24
|
+
end
|
25
|
+
|
26
|
+
level = opts[:level] || 6
|
27
|
+
size = opts[:size] || src.size
|
28
|
+
|
29
|
+
if dst
|
30
|
+
raw_compress(src, dst, size, level)
|
31
|
+
nil
|
32
|
+
else
|
33
|
+
dst = StringIO.new
|
34
|
+
raw_compress(src, dst, size, level)
|
35
|
+
dst.string
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.decompress(*args)
|
41
|
+
Yaz0::Stream.new.decompress(*args)
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.compress(*args)
|
45
|
+
Yaz0::Stream.new.compress(*args)
|
46
|
+
end
|
5
47
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#ifndef YAZ0_H
|
2
|
+
#define YAZ0_H
|
3
|
+
|
4
|
+
#include <stddef.h>
|
5
|
+
#include <stdint.h>
|
6
|
+
|
7
|
+
#if defined(__cplusplus)
|
8
|
+
# define YAZ0_API extern "C"
|
9
|
+
#else
|
10
|
+
# define YAZ0_API
|
11
|
+
#endif
|
12
|
+
|
13
|
+
#define YAZ0_OK 0
|
14
|
+
#define YAZ0_NEED_AVAIL_IN 1
|
15
|
+
#define YAZ0_NEED_AVAIL_OUT 2
|
16
|
+
#define YAZ0_BAD_MAGIC (-1)
|
17
|
+
#define YAZ0_OUT_OF_MEMORY (-2)
|
18
|
+
|
19
|
+
#define YAZ0_DEFAULT_LEVEL 6
|
20
|
+
|
21
|
+
typedef struct Yaz0Stream Yaz0Stream;
|
22
|
+
|
23
|
+
YAZ0_API int yaz0Init(Yaz0Stream** stream);
|
24
|
+
YAZ0_API int yaz0Destroy(Yaz0Stream* stream);
|
25
|
+
YAZ0_API int yaz0ModeDecompress(Yaz0Stream* stream);
|
26
|
+
YAZ0_API int yaz0ModeCompress(Yaz0Stream* stream, uint32_t size, int level);
|
27
|
+
YAZ0_API int yaz0Run(Yaz0Stream* stream);
|
28
|
+
YAZ0_API int yaz0Input(Yaz0Stream* stream, const void* data, uint32_t size);
|
29
|
+
YAZ0_API int yaz0Output(Yaz0Stream* stream, void* data, uint32_t size);
|
30
|
+
|
31
|
+
YAZ0_API uint32_t yaz0OutputChunkSize(const Yaz0Stream* stream);
|
32
|
+
YAZ0_API uint32_t yaz0DecompressedSize(const Yaz0Stream* stream);
|
33
|
+
|
34
|
+
#endif /* YAZ0_H */
|
@@ -0,0 +1,392 @@
|
|
1
|
+
#include <string.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include "libyaz0.h"
|
4
|
+
|
5
|
+
static const uint32_t kProbesPerLevel[] = {
|
6
|
+
0x0,
|
7
|
+
0x1,
|
8
|
+
0x2,
|
9
|
+
0x4,
|
10
|
+
0x8,
|
11
|
+
0x10,
|
12
|
+
0x40,
|
13
|
+
0x100,
|
14
|
+
0x200,
|
15
|
+
0x1000
|
16
|
+
};
|
17
|
+
|
18
|
+
static uint32_t hash(uint8_t a, uint8_t b, uint8_t c)
|
19
|
+
{
|
20
|
+
uint32_t x = (uint32_t)a | ((uint32_t)b << 8) | ((uint32_t)c << 16);
|
21
|
+
x = ((x >> 16) ^ x) * 0x45d9f3b;
|
22
|
+
x = (x >> 16) ^ x;
|
23
|
+
return x;
|
24
|
+
}
|
25
|
+
|
26
|
+
static void hashWrite(Yaz0Stream* s, uint32_t h, uint32_t offset)
|
27
|
+
{
|
28
|
+
uint32_t maxProbes;
|
29
|
+
uint32_t bucket;
|
30
|
+
uint32_t tmpBucket;
|
31
|
+
uint32_t oldest;
|
32
|
+
uint32_t entry;
|
33
|
+
int32_t pos;
|
34
|
+
|
35
|
+
oldest = 0xffffffff;
|
36
|
+
maxProbes = kProbesPerLevel[s->level];
|
37
|
+
for (uint32_t i = 0; i < maxProbes; ++i)
|
38
|
+
{
|
39
|
+
tmpBucket = (h + i) % HASH_MAX_ENTRIES;
|
40
|
+
entry = s->htEntries[tmpBucket];
|
41
|
+
if (entry == 0xffffffff)
|
42
|
+
{
|
43
|
+
s->htSize++;
|
44
|
+
bucket = tmpBucket;
|
45
|
+
break;
|
46
|
+
}
|
47
|
+
pos = (int32_t)(s->totalOut - entry);
|
48
|
+
if (pos > 0x1000)
|
49
|
+
{
|
50
|
+
bucket = tmpBucket;
|
51
|
+
break;
|
52
|
+
}
|
53
|
+
if (entry < oldest)
|
54
|
+
{
|
55
|
+
oldest = entry;
|
56
|
+
bucket = tmpBucket;
|
57
|
+
}
|
58
|
+
}
|
59
|
+
s->htEntries[bucket] = s->totalOut + offset;
|
60
|
+
s->htHashes[bucket] = h;
|
61
|
+
}
|
62
|
+
|
63
|
+
static void rebuildHashTable(Yaz0Stream* s)
|
64
|
+
{
|
65
|
+
uint32_t newEntries[HASH_MAX_ENTRIES];
|
66
|
+
uint32_t newHashes[HASH_MAX_ENTRIES];
|
67
|
+
uint32_t entry;
|
68
|
+
uint32_t pos;
|
69
|
+
uint32_t h;
|
70
|
+
uint32_t bucket;
|
71
|
+
uint32_t size;
|
72
|
+
|
73
|
+
memset(newEntries, 0xff, sizeof(newEntries));
|
74
|
+
memset(newHashes, 0xff, sizeof(newHashes));
|
75
|
+
size = 0;
|
76
|
+
|
77
|
+
for (uint32_t i = 0; i < HASH_MAX_ENTRIES; ++i)
|
78
|
+
{
|
79
|
+
entry = s->htEntries[i];
|
80
|
+
if (entry == 0xffffffff)
|
81
|
+
continue;
|
82
|
+
pos = s->totalOut - entry;
|
83
|
+
if (pos > 0x1000)
|
84
|
+
continue;
|
85
|
+
|
86
|
+
/* Entry still good - move to the table */
|
87
|
+
h = s->htHashes[i];
|
88
|
+
bucket = h % HASH_MAX_ENTRIES;
|
89
|
+
for (;;)
|
90
|
+
{
|
91
|
+
if (newEntries[bucket] == 0xffffffff)
|
92
|
+
{
|
93
|
+
newEntries[bucket] = entry;
|
94
|
+
newHashes[bucket] = h;
|
95
|
+
size++;
|
96
|
+
break;
|
97
|
+
}
|
98
|
+
bucket = (bucket + 1) % HASH_MAX_ENTRIES;
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
s->htSize = size;
|
103
|
+
memcpy(s->htEntries, newEntries, sizeof(newEntries));
|
104
|
+
memcpy(s->htHashes, newHashes, sizeof(newHashes));
|
105
|
+
}
|
106
|
+
|
107
|
+
static uint32_t maxSize(Yaz0Stream* stream)
|
108
|
+
{
|
109
|
+
/* the extra byte is for look-aheads */
|
110
|
+
static const uint32_t maxNecessary = 0x888 + 1;
|
111
|
+
uint32_t max;
|
112
|
+
|
113
|
+
max = stream->decompSize - stream->totalOut;
|
114
|
+
if (max > maxNecessary)
|
115
|
+
max = maxNecessary;
|
116
|
+
return max;
|
117
|
+
}
|
118
|
+
|
119
|
+
/* start: start of avail data */
|
120
|
+
/* end: end of avail data */
|
121
|
+
/* We need to write more data at the end of the window */
|
122
|
+
|
123
|
+
static int feed(Yaz0Stream* s)
|
124
|
+
{
|
125
|
+
uint32_t avail;
|
126
|
+
uint32_t min;
|
127
|
+
uint32_t max;
|
128
|
+
uint32_t size;
|
129
|
+
int ret;
|
130
|
+
|
131
|
+
/* Check how much data we have */
|
132
|
+
if (s->window_start > s->window_end)
|
133
|
+
avail = WINDOW_SIZE - s->window_start + s->window_end;
|
134
|
+
else
|
135
|
+
avail = s->window_end - s->window_start;
|
136
|
+
if (avail >= maxSize(s))
|
137
|
+
return YAZ0_OK;
|
138
|
+
|
139
|
+
/* We need more data */
|
140
|
+
min = maxSize(s) - avail;
|
141
|
+
max = WINDOW_SIZE - 0x1000 - avail;
|
142
|
+
if (max > s->sizeIn - s->cursorIn)
|
143
|
+
max = s->sizeIn - s->cursorIn;
|
144
|
+
if (max < min)
|
145
|
+
ret = YAZ0_NEED_AVAIL_IN;
|
146
|
+
else
|
147
|
+
ret = YAZ0_OK;
|
148
|
+
if (s->window_end + max >= WINDOW_SIZE)
|
149
|
+
{
|
150
|
+
/* We might need two copies */
|
151
|
+
size = WINDOW_SIZE - s->window_end;
|
152
|
+
memcpy(s->window + s->window_end, s->in + s->cursorIn, size);
|
153
|
+
s->cursorIn += size;
|
154
|
+
s->window_end = 0;
|
155
|
+
max -= size;
|
156
|
+
}
|
157
|
+
memcpy(s->window + s->window_end, s->in + s->cursorIn, max);
|
158
|
+
s->cursorIn += max;
|
159
|
+
s->window_end += max;
|
160
|
+
return ret;
|
161
|
+
}
|
162
|
+
|
163
|
+
static uint32_t matchSize(Yaz0Stream* s, uint32_t offset, uint32_t pos, uint32_t hintSize)
|
164
|
+
{
|
165
|
+
uint32_t size = 0;
|
166
|
+
uint32_t start = s->window_start + offset;
|
167
|
+
uint32_t cursorA = (start + WINDOW_SIZE - pos) % WINDOW_SIZE;
|
168
|
+
uint32_t cursorB = start % WINDOW_SIZE;
|
169
|
+
uint32_t maxSize;
|
170
|
+
|
171
|
+
maxSize = s->decompSize - s->totalOut;
|
172
|
+
if (maxSize > 0x111)
|
173
|
+
maxSize = 0x111;
|
174
|
+
if (hintSize)
|
175
|
+
{
|
176
|
+
if (s->window[(cursorA + hintSize) % WINDOW_SIZE] != s->window[(cursorB + hintSize) % WINDOW_SIZE])
|
177
|
+
return 0;
|
178
|
+
}
|
179
|
+
for (;;)
|
180
|
+
{
|
181
|
+
if (s->window[(cursorA + size) % WINDOW_SIZE] != s->window[(cursorB + size) % WINDOW_SIZE])
|
182
|
+
break;
|
183
|
+
size++;
|
184
|
+
if (size == maxSize)
|
185
|
+
break;
|
186
|
+
}
|
187
|
+
return size;
|
188
|
+
}
|
189
|
+
|
190
|
+
static void findHashMatch(Yaz0Stream* s, uint32_t h, uint32_t offset, uint32_t* outSize, uint32_t* outPos)
|
191
|
+
{
|
192
|
+
uint32_t bucket;
|
193
|
+
uint32_t entry;
|
194
|
+
uint32_t bestSize;
|
195
|
+
uint32_t bestPos;
|
196
|
+
uint32_t size;
|
197
|
+
uint32_t pos;
|
198
|
+
uint32_t maxProbes;
|
199
|
+
|
200
|
+
bestSize = 0;
|
201
|
+
bestPos = 0;
|
202
|
+
maxProbes = kProbesPerLevel[s->level];
|
203
|
+
for (uint32_t i = 0; i < maxProbes; ++i)
|
204
|
+
{
|
205
|
+
bucket = (h + i) % HASH_MAX_ENTRIES;
|
206
|
+
entry = s->htEntries[bucket];
|
207
|
+
if (entry == 0xffffffff)
|
208
|
+
break;
|
209
|
+
if (s->htHashes[bucket] == h)
|
210
|
+
{
|
211
|
+
pos = s->totalOut + offset - entry;
|
212
|
+
if (pos > 0x1000)
|
213
|
+
continue;
|
214
|
+
size = matchSize(s, offset, pos, bestSize);
|
215
|
+
if (size > bestSize)
|
216
|
+
{
|
217
|
+
bestSize = size;
|
218
|
+
bestPos = pos;
|
219
|
+
}
|
220
|
+
}
|
221
|
+
}
|
222
|
+
|
223
|
+
if (bestSize < 3)
|
224
|
+
{
|
225
|
+
*outSize = 0;
|
226
|
+
}
|
227
|
+
else
|
228
|
+
{
|
229
|
+
*outSize = bestSize;
|
230
|
+
*outPos = bestPos;
|
231
|
+
}
|
232
|
+
}
|
233
|
+
|
234
|
+
static void emitGroup(Yaz0Stream* s, int count, const uint32_t* arrSize, const uint32_t* arrPos)
|
235
|
+
{
|
236
|
+
uint8_t header;
|
237
|
+
uint32_t size;
|
238
|
+
uint32_t pos;
|
239
|
+
|
240
|
+
header = 0;
|
241
|
+
for (int i = 0; i < count; ++i)
|
242
|
+
{
|
243
|
+
if (!arrSize[i])
|
244
|
+
header |= (1 << (7 - i));
|
245
|
+
}
|
246
|
+
s->out[s->cursorOut++] = header;
|
247
|
+
for (int i = 0; i < count; ++i)
|
248
|
+
{
|
249
|
+
size = arrSize[i];
|
250
|
+
pos = arrPos[i];
|
251
|
+
if (!size)
|
252
|
+
s->out[s->cursorOut++] = (uint8_t)pos;
|
253
|
+
else
|
254
|
+
{
|
255
|
+
pos--;
|
256
|
+
if (size >= 0x12)
|
257
|
+
{
|
258
|
+
/* 3 bytes */
|
259
|
+
s->out[s->cursorOut++] = (uint8_t)(pos >> 8);
|
260
|
+
s->out[s->cursorOut++] = (uint8_t)pos;
|
261
|
+
s->out[s->cursorOut++] = (uint8_t)(size - 0x12);
|
262
|
+
}
|
263
|
+
else
|
264
|
+
{
|
265
|
+
/* 2 bytes */
|
266
|
+
s->out[s->cursorOut++] = (uint8_t)(pos >> 8) | (uint8_t)((size - 2) << 4);
|
267
|
+
s->out[s->cursorOut++] = (uint8_t)pos;
|
268
|
+
}
|
269
|
+
}
|
270
|
+
}
|
271
|
+
}
|
272
|
+
|
273
|
+
static void compressGroup(Yaz0Stream* s)
|
274
|
+
{
|
275
|
+
int groupCount;
|
276
|
+
uint32_t h;
|
277
|
+
uint32_t size;
|
278
|
+
uint32_t pos;
|
279
|
+
uint32_t nextSize;
|
280
|
+
uint32_t nextPos;
|
281
|
+
uint32_t arrSize[8];
|
282
|
+
uint32_t arrPos[8];
|
283
|
+
uint8_t a;
|
284
|
+
uint8_t b;
|
285
|
+
uint8_t c;
|
286
|
+
uint8_t d;
|
287
|
+
|
288
|
+
for (groupCount = 0; groupCount < 8; ++groupCount)
|
289
|
+
{
|
290
|
+
a = s->window[s->window_start];
|
291
|
+
b = s->window[(s->window_start + 1) % WINDOW_SIZE];
|
292
|
+
c = s->window[(s->window_start + 2) % WINDOW_SIZE];
|
293
|
+
d = s->window[(s->window_start + 3) % WINDOW_SIZE];
|
294
|
+
h = hash(a, b, c);
|
295
|
+
findHashMatch(s, h, 0, &size, &pos);
|
296
|
+
hashWrite(s, h, 0);
|
297
|
+
|
298
|
+
h = hash(b, c, d);
|
299
|
+
findHashMatch(s, h, 1, &nextSize, &nextPos);
|
300
|
+
|
301
|
+
if (!size || nextSize > size)
|
302
|
+
{
|
303
|
+
arrSize[groupCount] = 0;
|
304
|
+
arrPos[groupCount] = s->window[s->window_start];
|
305
|
+
s->window_start += 1;
|
306
|
+
s->totalOut += 1;
|
307
|
+
}
|
308
|
+
else
|
309
|
+
{
|
310
|
+
arrSize[groupCount] = size;
|
311
|
+
arrPos[groupCount] = pos;
|
312
|
+
for (uint32_t i = 1; i < size; ++i)
|
313
|
+
{
|
314
|
+
a = b;
|
315
|
+
b = c;
|
316
|
+
c = s->window[(s->window_start + 2 + i) % WINDOW_SIZE];
|
317
|
+
h = hash(a, b, c);
|
318
|
+
hashWrite(s, h, i);
|
319
|
+
}
|
320
|
+
s->window_start += size;
|
321
|
+
s->totalOut += size;
|
322
|
+
}
|
323
|
+
s->window_start %= WINDOW_SIZE;
|
324
|
+
if (s->totalOut >= s->decompSize)
|
325
|
+
{
|
326
|
+
groupCount++;
|
327
|
+
break;
|
328
|
+
}
|
329
|
+
}
|
330
|
+
if (s->htSize > HASH_REBUILD)
|
331
|
+
rebuildHashTable(s);
|
332
|
+
emitGroup(s, groupCount, arrSize, arrPos);
|
333
|
+
}
|
334
|
+
|
335
|
+
int yaz0ModeCompress(Yaz0Stream* s, uint32_t size, int level)
|
336
|
+
{
|
337
|
+
memset(s, 0, sizeof(*s));
|
338
|
+
s->mode = MODE_COMPRESS;
|
339
|
+
s->decompSize = size;
|
340
|
+
if (level < 1)
|
341
|
+
level = 1;
|
342
|
+
else if (level > 9)
|
343
|
+
level = 9;
|
344
|
+
s->level = level;
|
345
|
+
for (int i = 0; i < HASH_MAX_ENTRIES; ++i)
|
346
|
+
{
|
347
|
+
s->htHashes[i] = 0xffffffff;
|
348
|
+
s->htEntries[i] = 0xffffffff;
|
349
|
+
}
|
350
|
+
return YAZ0_OK;
|
351
|
+
}
|
352
|
+
|
353
|
+
int yaz0_RunCompress(Yaz0Stream* stream)
|
354
|
+
{
|
355
|
+
uint32_t tmp;
|
356
|
+
int ret;
|
357
|
+
|
358
|
+
/* Write headers */
|
359
|
+
if (!stream->headersDone)
|
360
|
+
{
|
361
|
+
if (stream->sizeOut < 16)
|
362
|
+
return YAZ0_NEED_AVAIL_OUT;
|
363
|
+
memcpy(stream->out, "Yaz0", 4);
|
364
|
+
tmp = swap32(stream->decompSize);
|
365
|
+
memcpy(stream->out + 4, &tmp, 4);
|
366
|
+
tmp = 0;
|
367
|
+
memcpy(stream->out + 8, &tmp, 4);
|
368
|
+
memcpy(stream->out + 12, &tmp, 4);
|
369
|
+
stream->cursorOut += 16;
|
370
|
+
stream->headersDone = 1;
|
371
|
+
}
|
372
|
+
|
373
|
+
/* Compress */
|
374
|
+
for (;;)
|
375
|
+
{
|
376
|
+
/* Check EOF */
|
377
|
+
if (stream->totalOut >= stream->decompSize)
|
378
|
+
return YAZ0_OK;
|
379
|
+
|
380
|
+
/* Check output space */
|
381
|
+
if (stream->sizeOut - stream->cursorOut < 1 + 8 * 3)
|
382
|
+
return YAZ0_NEED_AVAIL_OUT;
|
383
|
+
|
384
|
+
/* Check that we have consumed enough input */
|
385
|
+
ret = feed(stream);
|
386
|
+
if (ret)
|
387
|
+
return ret;
|
388
|
+
|
389
|
+
/* Compress one chunk */
|
390
|
+
compressGroup(stream);
|
391
|
+
}
|
392
|
+
}
|