batch_jaro_winkler 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/batch_jaro_winkler/batch_jaro_winkler.c +0 -0
- data/ext/batch_jaro_winkler/ext/LICENSE.uthash.txt +0 -0
- data/ext/batch_jaro_winkler/ext/batch_jaro_winkler.c +26 -16
- data/ext/batch_jaro_winkler/ext/batch_jaro_winkler.h +0 -0
- data/ext/batch_jaro_winkler/ext/batch_jaro_winkler_internal.h +1 -1
- data/ext/batch_jaro_winkler/ext/batch_jaro_winkler_runtime.h +14 -10
- data/ext/batch_jaro_winkler/ext/uthash.h +1 -1
- data/ext/batch_jaro_winkler/extconf.rb +0 -0
- data/lib/batch_jaro_winkler/version.rb +1 -1
- data/lib/batch_jaro_winkler.rb +0 -0
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 432282dad48464ef1b3bbbbdf7f58d23482a4995dd19a71e76d77053dff0d075
|
4
|
+
data.tar.gz: 5062b7c8d7854fa7966fde862b827d95d9fe7f14633102c2d785dc09a9a24cd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fc6a7605d46daa03aff1e33bde2d9047af693ad10d2618635eea4f3aff8515b00ab82d999f7b49e4b4754748a11b450c37683a4746123e6378e9b1ebde127692
|
7
|
+
data.tar.gz: 594362d2c8d4ec96f6fe8efaa879b5a38b3de430dfba5639f61b154034d004fadf501f108164daadc0822988426e8f395df106a9f825ac1dafc81637a48d774f
|
File without changes
|
File without changes
|
@@ -326,13 +326,13 @@ static uint8_t *build_exportable_model_for_thread(
|
|
326
326
|
if (store_original_candidates)
|
327
327
|
{
|
328
328
|
memcpy(
|
329
|
-
res_original_candidates + (candidates_char_decal * original_char_width),
|
329
|
+
(uint8_t*)res_original_candidates + (candidates_char_decal * original_char_width),
|
330
330
|
original_candidates[sorted_candidates[i_candidate].original_ind],
|
331
331
|
sorted_candidates[i_candidate].candidate_length * original_char_width
|
332
332
|
);
|
333
333
|
}
|
334
334
|
memcpy(
|
335
|
-
res_compressed_candidates + (candidates_char_decal * compressed_char_width),
|
335
|
+
(uint8_t*)res_compressed_candidates + (candidates_char_decal * compressed_char_width),
|
336
336
|
sorted_candidates[i_candidate].candidate,
|
337
337
|
sorted_candidates[i_candidate].candidate_length * compressed_char_width
|
338
338
|
);
|
@@ -415,8 +415,8 @@ static void *build_exportable_model(
|
|
415
415
|
{
|
416
416
|
uint32_t i_thread;
|
417
417
|
uint32_t i;
|
418
|
-
uint8_t
|
419
|
-
uint32_t model_size_per_thread
|
418
|
+
uint8_t **model_per_thread;
|
419
|
+
uint32_t *model_size_per_thread;
|
420
420
|
uint8_t *res_buffer;
|
421
421
|
uint8_t *res_buffer_head;
|
422
422
|
void **original_candidates_for_thread;
|
@@ -427,6 +427,9 @@ static void *build_exportable_model(
|
|
427
427
|
uint32_t nb_taken_candidates;
|
428
428
|
uint32_t aligned_model_size;
|
429
429
|
|
430
|
+
model_per_thread = malloc(sizeof(uint8_t*) * nb_runtime_threads);
|
431
|
+
model_size_per_thread = malloc(sizeof(uint32_t) * nb_runtime_threads);
|
432
|
+
|
430
433
|
nb_taken_candidates = 0;
|
431
434
|
*res_model_size = 0;
|
432
435
|
for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
|
@@ -445,6 +448,8 @@ static void *build_exportable_model(
|
|
445
448
|
{
|
446
449
|
for (i = 0; i < i_thread; i++)
|
447
450
|
free(model_per_thread[i]);
|
451
|
+
free(model_per_thread);
|
452
|
+
free(model_size_per_thread);
|
448
453
|
return (NULL);
|
449
454
|
}
|
450
455
|
// align on next 4 byte boundary
|
@@ -459,6 +464,8 @@ static void *build_exportable_model(
|
|
459
464
|
{
|
460
465
|
for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
|
461
466
|
free(model_per_thread[i_thread]);
|
467
|
+
free(model_per_thread);
|
468
|
+
free(model_size_per_thread);
|
462
469
|
return (NULL);
|
463
470
|
}
|
464
471
|
*((uint32_t*)(res_buffer + sizeof(uint32_t) * 0)) = nb_runtime_threads;
|
@@ -478,6 +485,9 @@ static void *build_exportable_model(
|
|
478
485
|
free(model_per_thread[i_thread]);
|
479
486
|
}
|
480
487
|
|
488
|
+
free(model_per_thread);
|
489
|
+
free(model_size_per_thread);
|
490
|
+
|
481
491
|
return (res_buffer);
|
482
492
|
}
|
483
493
|
|
@@ -621,8 +631,8 @@ void bjw_free_runtime_model(void *runtime_model)
|
|
621
631
|
uint32_t char_width;
|
622
632
|
uint32_t char_access_width;
|
623
633
|
|
624
|
-
char_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 2));
|
625
|
-
char_access_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 3));
|
634
|
+
char_width = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 2));
|
635
|
+
char_access_width = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 3));
|
626
636
|
|
627
637
|
void (*free_function)(void*) = NULL;
|
628
638
|
if (char_width == 4 && char_access_width == 4)
|
@@ -720,18 +730,18 @@ bjw_result *bjw_jaro_winkler_distance(void *runtime_model, void *input, uint32_t
|
|
720
730
|
uint32_t results_decal;
|
721
731
|
t_thread_data *threads_data;
|
722
732
|
#if BJW_USE_THREADS
|
723
|
-
#
|
733
|
+
# if defined(_WIN32) || defined(_WIN64)
|
724
734
|
HANDLE *threads;
|
725
735
|
# else
|
726
736
|
pthread_t *threads;
|
727
737
|
# endif
|
728
738
|
#endif
|
729
739
|
|
730
|
-
nb_runtime_threads = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 0));
|
731
|
-
nb_candidates = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 1));
|
732
|
-
char_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 2));
|
733
|
-
char_access_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 3));
|
734
|
-
original_char_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 4));
|
740
|
+
nb_runtime_threads = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 0));
|
741
|
+
nb_candidates = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 1));
|
742
|
+
char_width = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 2));
|
743
|
+
char_access_width = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 3));
|
744
|
+
original_char_width = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 4));
|
735
745
|
|
736
746
|
// Characters after 256 won't be taken into consideration for score calculation anyway, and uint8_t won't be able to represent the indices.
|
737
747
|
if (char_access_width == 1 && input_length >= 256)
|
@@ -743,7 +753,7 @@ bjw_result *bjw_jaro_winkler_distance(void *runtime_model, void *input, uint32_t
|
|
743
753
|
n_best_results = nb_candidates;
|
744
754
|
|
745
755
|
#if BJW_USE_THREADS
|
746
|
-
#
|
756
|
+
# if defined(_WIN32) || defined(_WIN64)
|
747
757
|
if (!(threads = malloc(sizeof(HANDLE) * nb_runtime_threads)))
|
748
758
|
return (NULL);
|
749
759
|
# else
|
@@ -810,7 +820,7 @@ bjw_result *bjw_jaro_winkler_distance(void *runtime_model, void *input, uint32_t
|
|
810
820
|
for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
|
811
821
|
{
|
812
822
|
threads_data[i_thread] = (t_thread_data){
|
813
|
-
.runtime_models = runtime_model + sizeof(uint32_t) * 5,
|
823
|
+
.runtime_models = ((uint8_t*)runtime_model) + sizeof(uint32_t) * 5,
|
814
824
|
.i_thread = i_thread,
|
815
825
|
.original_char_width = original_char_width,
|
816
826
|
.input = input,
|
@@ -824,7 +834,7 @@ bjw_result *bjw_jaro_winkler_distance(void *runtime_model, void *input, uint32_t
|
|
824
834
|
};
|
825
835
|
|
826
836
|
#if BJW_USE_THREADS
|
827
|
-
#
|
837
|
+
# if defined(_WIN32) || defined(_WIN64)
|
828
838
|
threads[i_thread] = CreateThread(NULL, 0, runtime_function, &(threads_data[i_thread]), 0, NULL);
|
829
839
|
# else
|
830
840
|
pthread_create(&(threads[i_thread]), NULL, runtime_function, &(threads_data[i_thread]));
|
@@ -838,7 +848,7 @@ bjw_result *bjw_jaro_winkler_distance(void *runtime_model, void *input, uint32_t
|
|
838
848
|
for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
|
839
849
|
{
|
840
850
|
#if BJW_USE_THREADS
|
841
|
-
#
|
851
|
+
# if defined(_WIN32) || defined(_WIN64)
|
842
852
|
WaitForSingleObject(threads[i_thread], INFINITE);
|
843
853
|
CloseHandle(threads[i_thread]);
|
844
854
|
# else
|
File without changes
|
@@ -26,6 +26,10 @@ SOFTWARE.
|
|
26
26
|
#define BJW_SUFFIX_HANDLER(name, type1, type2) BJW_SUFFIX_PASTER(name, type1, type2)
|
27
27
|
#define BJW_SUFFIX(name) BJW_SUFFIX_HANDLER(name, BJW_CHAR_TYPE, BJW_CHAR_ACCESS_TYPE)
|
28
28
|
|
29
|
+
#if defined(_WIN32) || defined(_WIN64)
|
30
|
+
#define bzero(b,len) (memset((b), '\0', (len)), (void) 0)
|
31
|
+
#endif
|
32
|
+
|
29
33
|
// this represents the data needed for a candidate when finding matches
|
30
34
|
typedef struct
|
31
35
|
{
|
@@ -212,8 +216,8 @@ static void BJW_SUFFIX(free_runtime_model)
|
|
212
216
|
uint32_t i_thread;
|
213
217
|
uint32_t nb_runtime_threads;
|
214
218
|
|
215
|
-
nb_runtime_threads = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 0));
|
216
|
-
runtime_models = (BJW_SUFFIX(t_runtime_model)*)(runtime_model + sizeof(uint32_t) * 5);
|
219
|
+
nb_runtime_threads = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 0));
|
220
|
+
runtime_models = (BJW_SUFFIX(t_runtime_model)*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 5);
|
217
221
|
for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
|
218
222
|
BJW_SUFFIX(free_runtime_model_for_thread)(&(runtime_models[i_thread]));
|
219
223
|
free(runtime_model);
|
@@ -230,12 +234,12 @@ static void *BJW_SUFFIX(build_runtime_model)
|
|
230
234
|
res = malloc(sizeof(BJW_SUFFIX(t_runtime_model)) * nb_runtime_threads + sizeof(uint32_t) * 5);
|
231
235
|
if (!res)
|
232
236
|
return (NULL);
|
233
|
-
*((uint32_t*)(res + sizeof(uint32_t) * 0)) = nb_runtime_threads;
|
234
|
-
*((uint32_t*)(res + sizeof(uint32_t) * 1)) = nb_candidates;
|
235
|
-
*((uint32_t*)(res + sizeof(uint32_t) * 2)) = sizeof(BJW_CHAR_TYPE);
|
236
|
-
*((uint32_t*)(res + sizeof(uint32_t) * 3)) = sizeof(BJW_CHAR_ACCESS_TYPE);
|
237
|
-
*((uint32_t*)(res + sizeof(uint32_t) * 4)) = original_char_width;
|
238
|
-
runtime_models = res + sizeof(uint32_t) * 5;
|
237
|
+
*((uint32_t*)((uint8_t*)res + sizeof(uint32_t) * 0)) = nb_runtime_threads;
|
238
|
+
*((uint32_t*)((uint8_t*)res + sizeof(uint32_t) * 1)) = nb_candidates;
|
239
|
+
*((uint32_t*)((uint8_t*)res + sizeof(uint32_t) * 2)) = sizeof(BJW_CHAR_TYPE);
|
240
|
+
*((uint32_t*)((uint8_t*)res + sizeof(uint32_t) * 3)) = sizeof(BJW_CHAR_ACCESS_TYPE);
|
241
|
+
*((uint32_t*)((uint8_t*)res + sizeof(uint32_t) * 4)) = original_char_width;
|
242
|
+
runtime_models = (BJW_SUFFIX(t_runtime_model)*)((uint8_t*)res + sizeof(uint32_t) * 5);
|
239
243
|
|
240
244
|
for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
|
241
245
|
{
|
@@ -426,7 +430,7 @@ static uint32_t BJW_SUFFIX(jaro_winkler_distance_from_flags)
|
|
426
430
|
{
|
427
431
|
if (candidate_min_score <= 0.0f)
|
428
432
|
{
|
429
|
-
results[nb_results].candidate = runtime_model->original_candidates + original_char_width * candidate_decal;
|
433
|
+
results[nb_results].candidate = ((uint8_t*)(runtime_model->original_candidates)) + original_char_width * candidate_decal;
|
430
434
|
results[nb_results].score = 0.0f;
|
431
435
|
results[nb_results].candidate_length = candidate_data->candidate_length;
|
432
436
|
nb_results++;
|
@@ -458,7 +462,7 @@ static uint32_t BJW_SUFFIX(jaro_winkler_distance_from_flags)
|
|
458
462
|
if (score < candidate_min_score)
|
459
463
|
continue ;
|
460
464
|
|
461
|
-
results[nb_results].candidate = runtime_model->original_candidates + original_char_width * candidate_decal;
|
465
|
+
results[nb_results].candidate = ((uint8_t*)(runtime_model->original_candidates)) + original_char_width * candidate_decal;
|
462
466
|
results[nb_results].score = score;
|
463
467
|
results[nb_results].candidate_length = candidate_data->candidate_length;
|
464
468
|
nb_results++;
|
@@ -63,7 +63,7 @@ do {
|
|
63
63
|
#endif
|
64
64
|
|
65
65
|
/* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */
|
66
|
-
#if defined(_WIN32)
|
66
|
+
#if defined(_WIN32) || defined(_WIN64)
|
67
67
|
#if defined(_MSC_VER) && _MSC_VER >= 1600
|
68
68
|
#include <stdint.h>
|
69
69
|
#elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__)
|
File without changes
|
data/lib/batch_jaro_winkler.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: batch_jaro_winkler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dominik Bousquet
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1.
|
19
|
+
version: 1.16.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1.
|
26
|
+
version: 1.16.0
|
27
27
|
description: This project gets its performance from the pre-calculation of an optimized
|
28
28
|
model in advance of the actual runtime calculations. Supports any encoding.
|
29
29
|
email: bousquet.dominik@gmail.com
|
@@ -62,7 +62,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
62
62
|
- !ruby/object:Gem::Version
|
63
63
|
version: '0'
|
64
64
|
requirements: []
|
65
|
-
rubygems_version: 3.
|
65
|
+
rubygems_version: 3.4.6
|
66
66
|
signing_key:
|
67
67
|
specification_version: 4
|
68
68
|
summary: Fast batch jaro winkler distance implementation in C99.
|