isomorfeus-ferret 0.13.6 → 0.13.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +18 -32
- data/ext/isomorfeus_ferret_ext/frt_global.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_store.c +5 -8
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 479f3df28144a3ca25194afc36590058e027af38ba43cc42d24ffd83a35a6da7
|
4
|
+
data.tar.gz: 95c80530e4012e0f7219e3b60cc82748f4e6d1824f0958cfda6cd4735c92a82a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a72ddf5c974896ac0e5d11cdab105c02894f12ffac43db12b7651393f0bfd5ebd5f99f4e03e7714a8d230c6e77f71a1f5ee1c243b01608b23555a170eabe5c9
|
7
|
+
data.tar.gz: 58a803a8de3afae0b2bdf6e0eeda41f6a2e29dc649d8e219d2d7d5c667bd27bea3fa8dd80f16af934456e9414cd0f6a2ec9b998d303593db94f43de208a3eaff
|
data/README.md
CHANGED
@@ -63,14 +63,8 @@ fis.add_field(:compressed_field, :store => :yes, :compression => :brotli, :term_
|
|
63
63
|
|
64
64
|
### Performance
|
65
65
|
|
66
|
-
|
67
|
-
On Windows
|
68
|
-
|
69
|
-
Search performance is still excellent and multiple times faster than Lucene.
|
70
|
-
|
71
|
-
Lucene achieves roughly double the indexing performance. This seems to be because of the different way strings and
|
72
|
-
encodings are handled in Java. For example, the Java WhitespaceTokenizer code requires only one method call per character (check for whitespace), but for Ruby, to support all the different encodings, several method calls are required per character (retrieve character according to encoding, check character for whitespace).
|
73
|
-
Ferret is internally using the standard Ruby string encoding methods.
|
66
|
+
For version 0.13.7 the performance bottle-nack has been identified and removed, ferret now delivers excellent indexing perfomance on all platforms, see numbers below.
|
67
|
+
On Windows performance is still not as good as on Linux, but that is equally true for Lucene and because of how the Windows filesystem works.
|
74
68
|
|
75
69
|
## Documentation
|
76
70
|
|
@@ -105,43 +99,35 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
|
|
105
99
|
|
106
100
|
A recent Java JDK must be installed to compile and run lucene benchmarks.
|
107
101
|
|
108
|
-
Results on
|
109
|
-
|
110
|
-
Ferret
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
Searching took: 1.64s for 8000 queries
|
120
|
-
thats 4875 q/s
|
121
|
-
Total found: 41000
|
122
|
-
index size: 35Mb
|
123
|
-
|
124
|
-
JVM 11.0.14.1 (Ubuntu)
|
125
|
-
```
|
102
|
+
Results, Ferret 0.13.7 vs. Lucene 9.1.0, WhitespaceAnalyzer, on old Intel Core i5 from 2015:
|
103
|
+
|
104
|
+
| OS | Task | Ferret | Lucene* |
|
105
|
+
|---------|------------|-----------------|----------------|
|
106
|
+
| Linux | Indexing | 4905 docs/s | 4785 docs/s |
|
107
|
+
| Windows | Indexing | 2361 docs/s | 2395 docs/s |
|
108
|
+
| Linux | Searching | 25664 queries/s | 4708 queries/s |
|
109
|
+
| Windows | Searching | 3646 queries/s | 935 queries/s |
|
110
|
+
| | Index Size | 28 MB | 35 MB |
|
111
|
+
|
112
|
+
*Lucene 9.1.0 on JVM 11.0.14.1 (Ubuntu)
|
126
113
|
|
127
114
|
### Storing Fields with Compression, Indexing and Retrieval
|
128
115
|
- clone repo
|
129
116
|
- bundle install
|
130
117
|
- rake ferret_compression_benchmark
|
131
118
|
|
132
|
-
Results on Linux, 0.13.
|
119
|
+
Results on Linux, 0.13.7, on old Intel Core i5 from 2015:
|
133
120
|
|
134
121
|
| Compression | Index & Store | Retrieve | Index size |
|
135
122
|
|-------------|---------------|---------------|------------|
|
136
|
-
| none |
|
137
|
-
| brotli |
|
138
|
-
| bzip2 |
|
139
|
-
| lz4 |
|
123
|
+
| none | 4866 docs/s | 153853 docs/s | 43 MB |
|
124
|
+
| brotli | 3539 docs/s | 58315 docs/s | 36 MB |
|
125
|
+
| bzip2 | 2624 docs/s | 15382 docs/s | 38 MB |
|
126
|
+
| lz4 | 4639 docs/s | 127100 docs/s | 41 MB |
|
140
127
|
|
141
128
|
## Future
|
142
129
|
|
143
130
|
Lots of things to do:
|
144
|
-
- Improve indexing performance on Windows (WriteFile is terribly slow, maybe use mapping, see libuv)
|
145
131
|
- Bring documentation in order in a docs directory
|
146
132
|
- Review code (especially for memory/stack issues, typical c issues)
|
147
133
|
- Take care of ruby GVL and threading
|
@@ -149,7 +149,7 @@ void frt_os_seek(FrtOutStream *os, off_t new_pos)
|
|
149
149
|
*/
|
150
150
|
void frt_os_write_byte(FrtOutStream *os, frt_uchar b)
|
151
151
|
{
|
152
|
-
if (os->buf.pos >= FRT_BUFFER_SIZE) {
|
152
|
+
if (os->buf.pos >= (FRT_BUFFER_SIZE - 1)) {
|
153
153
|
frt_os_flush(os);
|
154
154
|
}
|
155
155
|
write_byte(os, b);
|
@@ -157,15 +157,12 @@ void frt_os_write_byte(FrtOutStream *os, frt_uchar b)
|
|
157
157
|
|
158
158
|
void frt_os_write_bytes(FrtOutStream *os, const frt_uchar *buf, int len)
|
159
159
|
{
|
160
|
-
if (os->buf.pos
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
if (len < FRT_BUFFER_SIZE) {
|
165
|
-
os->m->flush_i(os, buf, len);
|
166
|
-
os->buf.start += len;
|
160
|
+
if (len < (FRT_BUFFER_SIZE - os->buf.pos)) {
|
161
|
+
memcpy(os->buf.buf + os->buf.pos, buf, len);
|
162
|
+
os->buf.pos += len;
|
167
163
|
}
|
168
164
|
else {
|
165
|
+
frt_os_flush(os);
|
169
166
|
int pos = 0;
|
170
167
|
int size;
|
171
168
|
while (pos < len) {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isomorfeus-ferret
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Biedermann
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
11
|
+
date: 2022-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|